在openstack nova创建虚拟机过程(一)中,分析结束在/nova/conductor/rpcapi.py函数self.conductor_compute_rpcapi.schedule_and_build_instances(),如下:
def schedule_and_build_instances(self, context, build_requests, request_specs, image, admin_password, injected_files, requested_networks, block_device_mapping, tags=None): version = '1.17' kw = {'build_requests': build_requests, 'request_specs': request_specs, 'image': jsonutils.to_primitive(image), 'admin_password': admin_password, 'injected_files': injected_files, 'requested_networks': requested_networks, 'block_device_mapping': block_device_mapping, 'tags': tags} if not self.client.can_send_version(version): version = '1.16' del kw['tags'] cctxt = self.client.prepare(version=version) # 发送异步rpc消息 cctxt.cast(context, 'schedule_and_build_instances', **kw)
它通过cast的方式将消息发送到消息总线上,等待nova-conductor处理。
注明:RPC.call()发送消息到队列,等待返回。阻塞;RPC.cast()发送消息到队列,不等待返回继续执行,非阻塞。
nova-conductor订阅了执行schedule_and_build_instances的消息,将调用/nova/conductor/manager.py中该函数,如下:
def schedule_and_build_instances(self, context, build_requests,
request_specs, image,
admin_password, injected_files,
requested_networks, block_device_mapping,
tags=None):
# Add all the UUIDs for the instances instance_uuids = [spec.instance_uuid for spec in request_specs] try:
# 返回适合创建虚拟机的主机列表,调用顺序:
# /nova/conductor/manager.py:_schedule_instances() --> /nova/scheduler/client/__init__.py:select_destinations() --> /nova/scheduler/client/query.py:select_destinations()
# --> /nova/scheduler/rpcapi.py:select_destinations(),向消息队列发送执行selet_destinations请求,使用call方法,等待nova-scheduler返回适合的主机
host_lists = self._schedule_instances(context, request_specs[0],
instance_uuids, return_alternates=True)
except Exception as exc:
LOG.exception('Failed to schedule instances')
self._bury_in_cell0(context, request_specs[0], exc,
build_requests=build_requests) return
host_mapping_cache = {}
cell_mapping_cache = {}
instances = [] for (build_request, request_spec, host_list) in six.moves.zip(
build_requests, request_specs, host_lists):
instance = build_request.get_new_instance(context)
# host_list is a list of one or more Selection objects, the first
# of which has been selected and its resources claimed.
host = host_list[0]
# Convert host from the scheduler into a cell record if host.service_host not in host_mapping_cache: try:
host_mapping = objects.HostMapping.get_by_host(
context, host.service_host)
host_mapping_cache[host.service_host] = host_mapping
except exception.HostMappingNotFound as exc:
LOG.error('No host-to-cell mapping found for selected '
'host %(host)s. Setup is incomplete.',
{'host': host.service_host})
self._bury_in_cell0(context, request_spec, exc,
build_requests=[build_request],
instances=[instance])
# This is a placeholder in case the quota recheck fails.
instances.append(None) continue
else:
host_mapping = host_mapping_cache[host.service_host]
cell = host_mapping.cell_mapping
# Before we create the instance, let's make one final check that
# the build request is still around and wasn't deleted by the user # already. try:
objects.BuildRequest.get_by_instance_uuid(
context, instance.uuid)
except exception.BuildRequestNotFound:
# the build request is gone so we're done for this instance
LOG.debug('While scheduling instance, the build request '
'was already deleted.', instance=instance)
# This is a placeholder in case the quota recheck fails.
instances.append(None)
rc = self.scheduler_client.reportclient
rc.delete_allocation_for_instance(context, instance.uuid) continue
else:
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host.service_host))
with obj_target_cell(instance, cell):
instance.create()
instances.append(instance)
cell_mapping_cache[instance.uuid] = cell
# NOTE(melwitt): We recheck the quota after creating the
# objects to prevent users from allocating more resources
# than their allowed quota in the event of a race. This is
# configurable because it can be expensive if strict quota
# limits are not required in a deployment. if CONF.quota.recheck_quota: try:
compute_utils.check_num_instances_quota(
context, instance.flavor, 0, 0,
orig_num_req=len(build_requests))
except exception.TooManyInstances as exc:
with excutils.save_and_reraise_exception():
self._cleanup_build_artifacts(context, exc, instances,
build_requests,
request_specs,
cell_mapping_cache)
zipped = six.moves.zip(build_requests, request_specs, host_lists,
instances) for (build_request, request_spec, host_list, instance) in zipped: if instance is None:
# Skip placeholders that were buried in cell0 or had their
# build requests deleted by the user before instance create. continue
cell = cell_mapping_cache[instance.uuid]
# host_list is a list of one or more Selection objects, the first
# of which has been selected and its resources claimed.
host = host_list.pop(0)
alts = [(alt.service_host, alt.nodename) for alt in host_list]
LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
host.service_host, host.nodename, alts, instance=instance)
filter_props = request_spec.to_legacy_filter_properties_dict()
scheduler_utils.populate_retry(filter_props, instance.uuid)
scheduler_utils.populate_filter_properties(filter_props,
host)
# TODO(melwitt): Maybe we should set_target_cell on the contexts
# once we map to a cell, and remove these separate with statements.
with obj_target_cell(instance, cell) as cctxt:
# send a state update notification for the initial create to
# show it going from non-existent to BUILDING
# This can lazy-load attributes on instance.
notifications.send_update_with_states(cctxt, instance, None,
vm_states.BUILDING, None, None, service="conductor")
objects.InstanceAction.action_start(
cctxt, instance.uuid, instance_actions.CREATE,
want_result=False)
instance_bdms = self._create_block_device_mapping(
cell, instance.flavor, instance.uuid, block_device_mapping)
instance_tags = self._create_tags(cctxt, instance.uuid, tags)
# TODO(Kevin Zheng): clean this up once instance.create() handles
# tags; we do this so the instance.create notification in
# build_and_run_instance in nova-compute doesn't lazy-load tags
instance.tags = instance_tags if instance_tags \ else objects.TagList()
# Update mapping for instance. Normally this check is guarded by
# a try/except but if we're here we know that a newer nova-api # handled the build process and would have created the mapping
inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
context, instance.uuid)
inst_mapping.cell_mapping = cell
inst_mapping.save() if not self._delete_build_request(
context, build_request, instance, cell, instance_bdms,
instance_tags):
# The build request was deleted before/during scheduling so
# the instance is gone and we don't have anything to build for
# this one. continue
# NOTE(danms): Compute RPC expects security group names or ids
# not objects, so convert this to a list of names until we can
# pass the objects.
legacy_secgroups = [s.identifier for s in request_spec.security_groups]
with obj_target_cell(instance, cell) as cctxt: self.compute_rpcapi.build_and_run_instance(
cctxt, instance=instance, image=image,
request_spec=request_spec,
filter_properties=filter_props,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=legacy_secgroups,
block_device_mapping=instance_bdms,
host=host.service_host, node=host.nodename,
limits=host.limits, host_list=host_list)
之后,进入/nova/compute/rpcapi.py中,向消息队列发送非阻塞消息请求执行build_and_run_instance(),如下:
def build_and_run_instance(self, ctxt, instance, host, image, request_spec, filter_properties, admin_password=None, injected_files=None, requested_networks=None, security_groups=None, block_device_mapping=None, node=None, limits=None, host_list=None): # NOTE(edleafe): compute nodes can only use the dict form of limits. if isinstance(limits, objects.SchedulerLimits): limits = limits.to_dict() kwargs = {"instance": instance, "image": image, "request_spec": request_spec, "filter_properties": filter_properties, "admin_password": admin_password, "injected_files": injected_files, "requested_networks": requested_networks, "security_groups": security_groups, "block_device_mapping": block_device_mapping, "node": node, "limits": limits, "host_list": host_list, } client = self.router.client(ctxt) version = '4.19' if not client.can_send_version(version): version = '4.0' kwargs.pop("host_list") cctxt = client.prepare(server=host, version=version) cctxt.cast(ctxt, 'build_and_run_instance', **kwargs)
nova-compute订阅了执行build_and_run_instance()的消息,并在/nova/compute/manager.py中执行该函数,如下:
def build_and_run_instance(self, context, instance, image, request_spec, filter_properties, admin_password=None, injected_files=None, requested_networks=None, security_groups=None, block_device_mapping=None, node=None, limits=None, host_list=None): @utils.synchronized(instance.uuid) def _locked_do_build_and_run_instance(*args, **kwargs): # NOTE(danms): We grab the semaphore with the instance uuid # locked because we could wait in line to build this instance # for a while and we want to make sure that nothing else tries # to do anything with this instance while we wait. with self._build_semaphore: try: result = self._do_build_and_run_instance(*args, **kwargs) except Exception: # NOTE(mriedem): This should really only happen if # _decode_files in _do_build_and_run_instance fails, and # that's before a guest is spawned so it's OK to remove # allocations for the instance for this node from Placement # below as there is no guest consuming resources anyway. # The _decode_files case could be handled more specifically # but that's left for another day. result = build_results.FAILED raise finally: if result == build_results.FAILED: # Remove the allocation records from Placement for the # instance if the build failed. The instance.host is # likely set to None in _do_build_and_run_instance # which means if the user deletes the instance, it # will be deleted in the API, not the compute service. # Setting the instance.host to None in # _do_build_and_run_instance means that the # ResourceTracker will no longer consider this instance # to be claiming resources against it, so we want to # reflect that same thing in Placement. No need to # call this for a reschedule, as the allocations will # have already been removed in # self._do_build_and_run_instance(). self._delete_allocation_for_instance(context, instance.uuid) if result in (build_results.FAILED, build_results.RESCHEDULED): self._build_failed() else: self._failed_builds = 0 # NOTE(danms): We spawn here to return the RPC worker thread back to # the pool. Since what follows could take a really long time, we don't # want to tie up RPC workers. utils.spawn_n(_locked_do_build_and_run_instance, context, instance, image, request_spec, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping, node, limits, host_list)
调用/nova/compute/manager.py下的_do_build_and_run_instance(),如下:
def _do_build_and_run_instance(self, context, instance, image, request_spec, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping, node=None, limits=None, host_list=None): try: LOG.debug('Starting instance...', instance=instance) instance.vm_state = vm_states.BUILDING instance.task_state = None instance.save(expected_task_state= (task_states.SCHEDULING, None)) except exception.InstanceNotFound: msg = 'Instance disappeared before build.' LOG.debug(msg, instance=instance) return build_results.FAILED except exception.UnexpectedTaskStateError as e: LOG.debug(e.format_message(), instance=instance) return build_results.FAILED # b64 decode the files to inject: decoded_files = self._decode_files(injected_files) if limits is None: limits = {} if node is None: node = self._get_nodename(instance, refresh=True) try: with timeutils.StopWatch() as timer: self._build_and_run_instance(context, instance, image, decoded_files, admin_password, requested_networks, security_groups, block_device_mapping, node, limits, filter_properties, request_spec) LOG.info('Took %0.2f seconds to build instance.', timer.elapsed(), instance=instance) return build_results.ACTIVE except exception.RescheduledException as e: retry = filter_properties.get('retry') if not retry: # no retry information, do not reschedule. LOG.debug("Retry info not present, will not reschedule", instance=instance) self._cleanup_allocated_networks(context, instance, requested_networks) self._cleanup_volumes(context, instance.uuid, block_device_mapping, raise_exc=False) compute_utils.add_instance_fault_from_exc(context, instance, e, sys.exc_info(), fault_message=e.kwargs['reason']) self._nil_out_instance_obj_host_and_node(instance) self._set_instance_obj_error_state(context, instance, clean_task_state=True) return build_results.FAILED LOG.debug(e.format_message(), instance=instance) # This will be used for logging the exception retry['exc'] = traceback.format_exception(*sys.exc_info()) # This will be used for setting the instance fault message retry['exc_reason'] = e.kwargs['reason'] # NOTE(comstud): Deallocate networks if the driver wants # us to do so. # NOTE(vladikr): SR-IOV ports should be deallocated to # allow new sriov pci devices to be allocated on a new host. # Otherwise, if devices with pci addresses are already allocated # on the destination host, the instance will fail to spawn. # info_cache.network_info should be present at this stage. if (self.driver.deallocate_networks_on_reschedule(instance) or self.deallocate_sriov_ports_on_reschedule(instance)): self._cleanup_allocated_networks(context, instance, requested_networks) else: # NOTE(alex_xu): Network already allocated and we don't # want to deallocate them before rescheduling. But we need # to cleanup those network resources setup on this host before # rescheduling. self.network_api.cleanup_instance_network_on_host( context, instance, self.host) self._nil_out_instance_obj_host_and_node(instance) instance.task_state = task_states.SCHEDULING instance.save() # The instance will have already claimed resources from this host # before this build was attempted. Now that it has failed, we need # to unclaim those resources before casting to the conductor, so # that if there are alternate hosts available for a retry, it can # claim resources on that new host for the instance. self._delete_allocation_for_instance(context, instance.uuid) self.compute_task_api.build_instances(context, [instance], image, filter_properties, admin_password, injected_files, requested_networks, security_groups, block_device_mapping, request_spec=request_spec, host_lists=[host_list]) return build_results.RESCHEDULED except (exception.InstanceNotFound, exception.UnexpectedDeletingTaskStateError): msg = 'Instance disappeared during build.' LOG.debug(msg, instance=instance) self._cleanup_allocated_networks(context, instance, requested_networks) return build_results.FAILED except exception.BuildAbortException as e: LOG.exception(e.format_message(), instance=instance) self._cleanup_allocated_networks(context, instance, requested_networks) self._cleanup_volumes(context, instance.uuid, block_device_mapping, raise_exc=False) compute_utils.add_instance_fault_from_exc(context, instance, e, sys.exc_info()) self._nil_out_instance_obj_host_and_node(instance) self._set_instance_obj_error_state(context, instance, clean_task_state=True) return build_results.FAILED except Exception as e: # Should not reach here. LOG.exception('Unexpected build failure, not rescheduling build.', instance=instance) self._cleanup_allocated_networks(context, instance, requested_networks) self._cleanup_volumes(context, instance.uuid, block_device_mapping, raise_exc=False) compute_utils.add_instance_fault_from_exc(context, instance, e, sys.exc_info()) self._nil_out_instance_obj_host_and_node(instance) self._set_instance_obj_error_state(context, instance, clean_task_state=True) return build_results.FAILED
继续在本文件下调用_build_and_run_instance(),开始在hypervisor上孵化虚拟机,如下:
def _build_and_run_instance(self, context, instance, image, injected_files,
admin_password, requested_networks, security_groups,
block_device_mapping, node, limits, filter_properties,
request_spec=None):
image_name = image.get('name')
self._notify_about_instance_usage(context, instance, 'create.start',
extra_usage_info={'image_name': image_name})
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.START,
bdms=block_device_mapping)
# NOTE(mikal): cache the keystone roles associated with the instance
# at boot time for later reference
instance.system_metadata.update(
{'boot_roles': ','.join(context.roles)})
self._check_device_tagging(requested_networks, block_device_mapping) try:
scheduler_hints = self._get_scheduler_hints(filter_properties,
request_spec)
rt = self._get_resource_tracker()
with rt.instance_claim(context, instance, node, limits):
# NOTE(russellb) It's important that this validation be done
# *after* the resource tracker instance claim, as that is where
# the host is set on the instance.
self._validate_instance_group_policy(context, instance,
scheduler_hints)
image_meta = objects.ImageMeta.from_dict(image)
with self._build_resources(context, instance,
requested_networks, security_groups, image_meta,
block_device_mapping) as resources:
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.SPAWNING
# NOTE(JoshNang) This also saves the changes to the
# instance from _allocate_network_async, as they aren't
# saved in that function to prevent races.
instance.save(expected_task_state=
task_states.BLOCK_DEVICE_MAPPING)
block_device_info = resources['block_device_info']
network_info = resources['network_info']
allocs = resources['allocations']
LOG.debug('Start spawning the instance on the hypervisor.',
instance=instance)
with timeutils.StopWatch() as timer:
# 开始在hypervisor上孵化虚拟机
self.driver.spawn(context, instance, image_meta,
injected_files, admin_password,
allocs, network_info=network_info,
block_device_info=block_device_info)
LOG.info('Took %0.2f seconds to spawn the instance on '
'the hypervisor.', timer.elapsed(),
instance=instance)
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as e:
with excutils.save_and_reraise_exception():
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
except exception.ComputeResourcesUnavailable as e:
LOG.debug(e.format_message(), instance=instance)
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
raise exception.RescheduledException(
instance_uuid=instance.uuid, reason=e.format_message())
except exception.BuildAbortException as e:
with excutils.save_and_reraise_exception():
LOG.debug(e.format_message(), instance=instance)
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
except (exception.FixedIpLimitExceeded,
exception.NoMoreNetworks, exception.NoMoreFixedIps) as e:
LOG.warning('No more network or fixed IP to be allocated',
instance=instance)
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
msg = _('Failed to allocate the network(s) with error %s, '
'not rescheduling.') % e.format_message()
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=msg)
except (exception.VirtualInterfaceCreateException,
exception.VirtualInterfaceMacAddressException,
exception.FixedIpInvalidOnHost,
exception.UnableToAutoAllocateNetwork) as e:
LOG.exception('Failed to allocate network(s)',
instance=instance)
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
msg = _('Failed to allocate the network(s), not rescheduling.')
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=msg)
except (exception.FlavorDiskTooSmall,
exception.FlavorMemoryTooSmall,
exception.ImageNotActive,
exception.ImageUnacceptable,
exception.InvalidDiskInfo,
exception.InvalidDiskFormat,
cursive_exception.SignatureVerificationError,
exception.VolumeEncryptionNotSupported,
exception.InvalidInput) as e:
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=e.format_message())
except Exception as e:
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
raise exception.RescheduledException(
instance_uuid=instance.uuid, reason=six.text_type(e))
# NOTE(alaski): This is only useful during reschedules, remove it now.
instance.system_metadata.pop('network_allocated', None)
# If CONF.default_access_ip_network_name is set, grab the
# corresponding network and set the access ip values accordingly.
network_name = CONF.default_access_ip_network_name if (network_name and not instance.access_ip_v4 and
not instance.access_ip_v6):
# Note that when there are multiple ips to choose from, an
# arbitrary one will be chosen. for vif in network_info: if vif['network']['label'] == network_name: for ip in vif.fixed_ips(): if not instance.access_ip_v4 and ip['version'] == 4:
instance.access_ip_v4 = ip['address'] if not instance.access_ip_v6 and ip['version'] == 6:
instance.access_ip_v6 = ip['address'] break
self._update_instance_after_spawn(context, instance) try:
instance.save(expected_task_state=task_states.SPAWNING)
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as e:
with excutils.save_and_reraise_exception():
self._notify_about_instance_usage(context, instance, 'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
self._update_scheduler_instance_info(context, instance)
self._notify_about_instance_usage(context, instance, 'create.end',
extra_usage_info={'message': _('Success')},
network_info=network_info)
compute_utils.notify_about_instance_create(context, instance,
self.host, phase=fields.NotificationPhase.END,
bdms=block_device_mapping)
开始在hypervisor上孵化虚拟机需要调用hypervisor的相应python API驱动,在/nova/virt/下可以看到多种驱动程序,如下:
包括libvirt, powervm, vmwareapi, xenapi;我们使用libvirt,那么就会调用到/libvirt/driver.py下spawn()函数,如下:
def spawn(self, context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):
# 获取disk配置信息
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
image_meta,
block_device_info)
injection_info = InjectionInfo(network_info=network_info,
files=injected_files,
admin_pass=admin_password)
gen_confdrive = functools.partial(self._create_configdrive,
context, instance,
injection_info)
# 处理镜像
self._create_image(context, instance, disk_info['mapping'],
injection_info=injection_info,
block_device_info=block_device_info)
# Required by Quobyte CI
self._ensure_console_log_for_instance(instance)
# Does the guest need to be assigned some vGPU mediated devices ?
mdevs = self._allocate_mdevs(allocations)
# 将当前参数配置转化成创建虚拟机的xml文件
xml = self._get_guest_xml(context, instance, network_info,
disk_info, image_meta,
block_device_info=block_device_info,
mdevs=mdevs)
# 设置网络,创建实例
self._create_domain_and_network(
context, xml, instance, network_info,
block_device_info=block_device_info,
post_xml_callback=gen_confdrive,
destroy_disks_on_failure=True)
LOG.debug("Instance is running", instance=instance)
def _wait_for_boot(): """Called at an interval until the VM is running."""
state = self.get_info(instance).state if state == power_state.RUNNING:
LOG.info("Instance spawned successfully.", instance=instance)
raise loopingcall.LoopingCallDone()
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)
timer.start(interval=0.5).wait()
本文件下调用的_create_domain_and_network()如下:
def _create_domain_and_network(self, context, xml, instance, network_info, block_device_info=None, power_on=True, vifs_already_plugged=False, post_xml_callback=None, destroy_disks_on_failure=False): """Do required network setup and create domain.""" timeout = CONF.vif_plugging_timeout if (self._conn_supports_start_paused and utils.is_neutron() and not vifs_already_plugged and power_on and timeout): events = self._get_neutron_events(network_info) else: events = [] pause = bool(events) guest = None try: with self.virtapi.wait_for_instance_event( instance, events, deadline=timeout, error_callback=self._neutron_failed_callback): self.plug_vifs(instance, network_info) self.firewall_driver.setup_basic_filtering(instance, network_info) self.firewall_driver.prepare_instance_filter(instance, network_info) with self._lxc_disk_handler(context, instance, instance.image_meta, block_device_info): guest = self._create_domain( xml, pause=pause, power_on=power_on, post_xml_callback=post_xml_callback) self.firewall_driver.apply_instance_filter(instance, network_info) except exception.VirtualInterfaceCreateException: # Neutron reported failure and we didn't swallow it, so # bail here with excutils.save_and_reraise_exception(): self._cleanup_failed_start(context, instance, network_info, block_device_info, guest, destroy_disks_on_failure) except eventlet.timeout.Timeout: # We never heard from Neutron LOG.warning('Timeout waiting for %(events)s for ' 'instance with vm_state %(vm_state)s and ' 'task_state %(task_state)s.', {'events': events, 'vm_state': instance.vm_state, 'task_state': instance.task_state}, instance=instance) if CONF.vif_plugging_is_fatal: self._cleanup_failed_start(context, instance, network_info, block_device_info, guest, destroy_disks_on_failure) raise exception.VirtualInterfaceCreateException() except Exception: # Any other error, be sure to clean up LOG.error('Failed to start libvirt guest', instance=instance) with excutils.save_and_reraise_exception(): self._cleanup_failed_start(context, instance, network_info, block_device_info, guest, destroy_disks_on_failure) # Resume only if domain has been paused if pause: guest.resume() return guest
本文件下调用的_create_domain()如下:
def _create_domain(self, xml=None, domain=None,
power_on=True, pause=False, post_xml_callback=None): """Create a domain.
Either domain or xml must be passed in. If both are passed, then
the domain definition is overwritten from the xml.
:returns guest.Guest: Guest just created """ if xml:
# 调用libvirt定义虚拟机
guest = libvirt_guest.Guest.create(xml, self._host) if post_xml_callback is not None:
post_xml_callback() else:
guest = libvirt_guest.Guest(domain) if power_on or pause:
# 调用libvirt启动虚拟机
guest.launch(pause=pause) if not utils.is_neutron():
guest.enable_hairpin() return guest
总结:到此,基本梳理出了openstack创建虚拟机的基本代码运行流程:openstack cli发送创建虚拟机的RESTful url --->>> nova-api接收到url请求,路由到/nova/api/openstack/compute/servers.py下的具体controller方法(def create())开始创建流程,执行到/nova/conductor/rpcapi.py下self.conductor_compute_rpcapi.schedule_and_build_instances()函数,向rabbitmq消息队列发送“执行schedule_and_build_instances”请求 --->>> nova-conductor接收到rpc请求,执行/nova/conductor/manager.py下schedule_and_build_instances()函数,期间向nova-scheduler发送rpc请求去获取到host_lists(适合创建虚拟机的主机列表),一系列处理后向rabbitmq消息队列发送“执行build_and_run_instance”请求 --->>> nova-compute服务获取到rpc请求,执行/nova/compute/manager.py中build_and_run_instance()函数,然后调用到self.driver.spawn(),开始调用hypervisor驱动程序孵化虚拟机,最后完成虚拟机的定义和启动。