OpenStack 二次开发实战:Nova 核心改造与生产实践
开发环境搭建
DevStack(快速验证)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| git clone https://opendev.org/openstack/devstack cd devstack
cat > local.conf << 'EOF' [[local|localrc]] ADMIN_PASSWORD=secret DATABASE_PASSWORD=secret RABBIT_PASSWORD=secret SERVICE_PASSWORD=secret
ENABLED_SERVICES=key,n-api,n-cpu,n-cond,n-sch,n-novnc ENABLED_SERVICES+=,placement-api ENABLED_SERVICES+=,q-svc,q-agt,q-dhcp,q-l3,q-meta ENABLED_SERVICES+=,c-api,c-vol,c-sch ENABLED_SERVICES+=,g-api,g-reg
NOVA_REPO=/path/to/your/nova NOVA_BRANCH=my-feature-branch EOF
./stack.sh
|
源码调试(pdb/VSCode)
1 2 3 4 5 6 7
| import pdb; pdb.set_trace()
import remote_pdb remote_pdb.set_trace(host='0.0.0.0', port=4444)
|
1 2 3 4 5 6 7 8 9 10 11
| { "name": "Nova Compute", "type": "python", "request": "attach", "connect": {"host": "devstack-host", "port": 5678}, "pathMappings": [{ "localRoot": "${workspaceFolder}", "remoteRoot": "/opt/stack/nova" }] }
|
常见改造场景
1. 自定义调度策略
场景:按业务标签调度,同一业务的 VM 尽量分散到不同宿主机。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
|
from nova.scheduler import filters from nova import objects
class BusinessSpreadFilter(filters.BaseHostFilter): """同一 business_tag 的 VM 分散调度"""
RUN_ON_DB = False
def host_passes(self, host_state, spec_obj): metadata = spec_obj.flavor.extra_specs business_tag = metadata.get('business_tag') if not business_tag: return True
host_name = host_state.host count = self._get_business_vm_count(host_name, business_tag)
max_per_host = int(metadata.get('business_max_per_host', 3)) return count < max_per_host
def _get_business_vm_count(self, host, tag): from nova.db.main import api as db_api return db_api.count_instances_by_host_and_tag(host, tag)
|
1 2 3
| [filter_scheduler] enabled_filters = ...,BusinessSpreadFilter
|
2. 虚拟机创建前后 Hook
场景:VM 创建后自动注册到 CMDB,删除后自动注销。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
def _build_and_run_instance(self, context, instance, ...): try: self.driver.spawn(context, instance, ...)
self._post_create_hook(context, instance)
except Exception as e: self._create_failed_hook(context, instance, e) raise
def _post_create_hook(self, context, instance): """VM 创建成功后的自定义逻辑""" try: cmdb_client.register_vm( vm_id=instance.uuid, vm_name=instance.display_name, host=instance.host, project_id=instance.project_id, ip=instance.access_ip_v4, created_at=instance.created_at.isoformat() ) LOG.info(f"VM {instance.uuid} 已注册到 CMDB") except Exception as e: LOG.warning(f"CMDB 注册失败: {e}")
|
3. 扩展虚拟机 API
场景:添加自定义 API,获取 VM 的实时 CPU 使用率。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
from nova.api.openstack import extensions from nova.api.openstack import wsgi
class VmMetricsController(wsgi.Controller): @wsgi.action('get_metrics') def get_metrics(self, req, id, body): """GET /servers/{id}/action → get_metrics""" context = req.environ['nova.context']
instance = self.compute_api.get(context, id)
metrics = self._get_vm_metrics(instance)
return {'metrics': metrics}
def _get_vm_metrics(self, instance): return self.compute_rpcapi.get_instance_metrics( context, instance )
|
4. 自定义虚拟机规格校验
场景:限制某些 Project 只能创建特定规格的 VM。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
def create(self, req, body): context = req.environ['nova.context'] flavor_id = body['server']['flavorRef']
self._validate_flavor_for_project(context, flavor_id)
return self._create(req, body)
def _validate_flavor_for_project(self, context, flavor_id): """检查项目是否有权限使用该规格""" project_id = context.project_id flavor = objects.Flavor.get_by_id(context, flavor_id)
allowed_flavors = self._get_allowed_flavors(project_id) if allowed_flavors and flavor.name not in allowed_flavors: raise exception.FlavorNotAllowed( flavor=flavor.name, project=project_id )
|
数据库 Schema 变更
OpenStack 使用 Alembic 管理数据库迁移:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
|
from alembic import op import sqlalchemy as sa
def upgrade(): op.add_column( 'instances', sa.Column('business_tag', sa.String(255), nullable=True) ) op.create_index( 'ix_instances_business_tag', 'instances', ['business_tag'] )
def downgrade(): op.drop_index('ix_instances_business_tag', 'instances') op.drop_column('instances', 'business_tag')
|
1 2 3 4 5
| nova-manage db sync
nova-manage db version
|
单元测试
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
|
from unittest import mock from nova.compute import manager from nova import test
class TestBuildInstance(test.TestCase): def setUp(self): super().setUp() self.compute = manager.ComputeManager()
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.spawn') @mock.patch('nova.network.neutron.API.allocate_for_instance') def test_build_instance_success(self, mock_network, mock_spawn): instance = self._create_fake_instance()
self.compute._build_and_run_instance( self.context, instance, ... )
mock_spawn.assert_called_once() mock_network.assert_called_once_with( self.context, instance, ... )
def test_build_instance_network_failure(self): """测试网络创建失败时的回滚逻辑""" with mock.patch( 'nova.network.neutron.API.allocate_for_instance', side_effect=Exception("Network error") ): self.assertRaises( exception.BuildAbortException, self.compute._build_and_run_instance, self.context, instance, ... ) self.assertEqual('error', instance.vm_state)
|
版本管理与发布
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| git clone https://github.com/openstack/nova.git git checkout stable/2024.1 git checkout -b mycompany/2024.1
git tag mycompany-2024.1.1-patch1
python setup.py bdist_rpm
pip wheel . -w dist/
pip install dist/nova-*.whl --upgrade systemctl restart openstack-nova-compute
|
生产改造经验总结
| 改造点 |
风险 |
建议 |
| 修改 nova/compute/manager.py |
高(核心文件) |
最小化改动,充分测试 |
| 添加自定义 Filter |
低 |
推荐,影响范围小 |
| 修改数据库 Schema |
中 |
只加列不删列,保持向后兼容 |
| 修改 API 响应格式 |
高 |
避免,会破坏客户端兼容性 |
| 添加新 API Endpoint |
中 |
通过 Extension 机制添加 |
| 修改调度算法 |
中 |
充分压测,关注调度延迟 |
核心原则:最小化侵入。尽量通过配置、插件、Extension 实现需求,避免直接修改核心代码,降低升级成本。