xref: /freebsd/usr.sbin/bhyve/bhyverun.c (revision 730cecb05aaf016ac52ef7cfc691ccec3a0408cd)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/types.h>
33 #include <sys/mman.h>
34 #include <sys/time.h>
35 
36 #include <machine/segments.h>
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <libgen.h>
41 #include <unistd.h>
42 #include <assert.h>
43 #include <errno.h>
44 #include <signal.h>
45 #include <pthread.h>
46 #include <pthread_np.h>
47 
48 #include <machine/vmm.h>
49 #include <vmmapi.h>
50 
51 #include "bhyverun.h"
52 #include "acpi.h"
53 #include "inout.h"
54 #include "dbgport.h"
55 #include "mem.h"
56 #include "mevent.h"
57 #include "mptbl.h"
58 #include "pci_emul.h"
59 #include "xmsr.h"
60 #include "ioapic.h"
61 #include "spinup_ap.h"
62 
63 #define	DEFAULT_GUEST_HZ	100
64 #define	DEFAULT_GUEST_TSLICE	200
65 
66 #define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
67 
68 #define	VMEXIT_SWITCH		0	/* force vcpu switch in mux mode */
69 #define	VMEXIT_CONTINUE		1	/* continue from next instruction */
70 #define	VMEXIT_RESTART		2	/* restart current instruction */
71 #define	VMEXIT_ABORT		3	/* abort the vm run loop */
72 #define	VMEXIT_RESET		4	/* guest machine has reset */
73 
74 #define MB		(1024UL * 1024)
75 #define GB		(1024UL * MB)
76 
77 typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
78 
79 int guest_tslice = DEFAULT_GUEST_TSLICE;
80 int guest_hz = DEFAULT_GUEST_HZ;
81 char *vmname;
82 
83 int guest_ncpus;
84 
85 static int pincpu = -1;
86 static int guest_vcpu_mux;
87 static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic;
88 
89 static int foundcpus;
90 
91 static int strictio;
92 
93 static int acpi;
94 
95 static char *progname;
96 static const int BSP = 0;
97 
98 static int cpumask;
99 
100 static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
101 
102 struct vm_exit vmexit[VM_MAXCPU];
103 
104 struct fbsdstats {
105         uint64_t        vmexit_bogus;
106         uint64_t        vmexit_bogus_switch;
107         uint64_t        vmexit_hlt;
108         uint64_t        vmexit_pause;
109         uint64_t        vmexit_mtrap;
110         uint64_t        vmexit_paging;
111         uint64_t        cpu_switch_rotate;
112         uint64_t        cpu_switch_direct;
113         int             io_reset;
114 } stats;
115 
116 struct mt_vmm_info {
117 	pthread_t	mt_thr;
118 	struct vmctx	*mt_ctx;
119 	int		mt_vcpu;
120 } mt_vmm_info[VM_MAXCPU];
121 
122 static void
123 usage(int code)
124 {
125 
126         fprintf(stderr,
127                 "Usage: %s [-aehABHIP][-g <gdb port>][-z <hz>][-s <pci>]"
128 		"[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem]"
129 		" <vmname>\n"
130 		"       -a: local apic is in XAPIC mode (default is X2APIC)\n"
131 		"       -A: create an ACPI table\n"
132 		"       -g: gdb port (default is %d and 0 means don't open)\n"
133 		"       -c: # cpus (default 1)\n"
134 		"       -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
135 		"       -B: inject breakpoint exception on vm entry\n"
136 		"       -H: vmexit from the guest on hlt\n"
137 		"       -I: present an ioapic to the guest\n"
138 		"       -P: vmexit from the guest on pause\n"
139 		"	-e: exit on unhandled i/o access\n"
140 		"       -h: help\n"
141 		"       -z: guest hz (default is %d)\n"
142 		"       -s: <slot,driver,configinfo> PCI slot config\n"
143 		"       -S: <slot,driver,configinfo> legacy PCI slot config\n"
144 		"       -m: memory size in MB\n"
145 		"       -x: mux vcpus to 1 hcpu\n"
146 		"       -t: mux vcpu timeslice hz (default %d)\n",
147 		progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ,
148 		DEFAULT_GUEST_TSLICE);
149 	exit(code);
150 }
151 
152 void *
153 paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
154 {
155 
156 	return (vm_map_gpa(ctx, gaddr, len));
157 }
158 
159 int
160 fbsdrun_disable_x2apic(void)
161 {
162 
163 	return (disable_x2apic);
164 }
165 
166 int
167 fbsdrun_vmexit_on_pause(void)
168 {
169 
170 	return (guest_vmexit_on_pause);
171 }
172 
173 int
174 fbsdrun_vmexit_on_hlt(void)
175 {
176 
177 	return (guest_vmexit_on_hlt);
178 }
179 
180 int
181 fbsdrun_muxed(void)
182 {
183 
184 	return (guest_vcpu_mux);
185 }
186 
187 static void *
188 fbsdrun_start_thread(void *param)
189 {
190 	char tname[MAXCOMLEN + 1];
191 	struct mt_vmm_info *mtp;
192 	int vcpu;
193 
194 	mtp = param;
195 	vcpu = mtp->mt_vcpu;
196 
197 	snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu);
198 	pthread_set_name_np(mtp->mt_thr, tname);
199 
200 	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
201 
202 	/* not reached */
203 	exit(1);
204 	return (NULL);
205 }
206 
207 void
208 fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
209 {
210 	int error;
211 
212 	if (cpumask & (1 << vcpu)) {
213 		fprintf(stderr, "addcpu: attempting to add existing cpu %d\n",
214 		    vcpu);
215 		exit(1);
216 	}
217 
218 	cpumask |= 1 << vcpu;
219 	foundcpus++;
220 
221 	/*
222 	 * Set up the vmexit struct to allow execution to start
223 	 * at the given RIP
224 	 */
225 	vmexit[vcpu].rip = rip;
226 	vmexit[vcpu].inst_length = 0;
227 
228 	if (vcpu == BSP || !guest_vcpu_mux){
229 		mt_vmm_info[vcpu].mt_ctx = ctx;
230 		mt_vmm_info[vcpu].mt_vcpu = vcpu;
231 
232 		error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
233 				fbsdrun_start_thread, &mt_vmm_info[vcpu]);
234 		assert(error == 0);
235 	}
236 }
237 
238 static int
239 fbsdrun_get_next_cpu(int curcpu)
240 {
241 
242 	/*
243 	 * Get the next available CPU. Assumes they arrive
244 	 * in ascending order with no gaps.
245 	 */
246 	return ((curcpu + 1) % foundcpus);
247 }
248 
249 static int
250 vmexit_catch_reset(void)
251 {
252         stats.io_reset++;
253         return (VMEXIT_RESET);
254 }
255 
256 static int
257 vmexit_catch_inout(void)
258 {
259 	return (VMEXIT_ABORT);
260 }
261 
262 static int
263 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
264 		     uint32_t eax)
265 {
266 #if PG_DEBUG /* put all types of debug here */
267         if (eax == 0) {
268 		pause_noswitch = 1;
269 	} else if (eax == 1) {
270 		pause_noswitch = 0;
271 	} else {
272 		pause_noswitch = 0;
273 		if (eax == 5) {
274 			vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1);
275 		}
276 	}
277 #endif
278         return (VMEXIT_CONTINUE);
279 }
280 
281 static int
282 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
283 {
284 	int error;
285 	int bytes, port, in, out;
286 	uint32_t eax;
287 	int vcpu;
288 
289 	vcpu = *pvcpu;
290 
291 	port = vme->u.inout.port;
292 	bytes = vme->u.inout.bytes;
293 	eax = vme->u.inout.eax;
294 	in = vme->u.inout.in;
295 	out = !in;
296 
297 	/* We don't deal with these */
298 	if (vme->u.inout.string || vme->u.inout.rep)
299 		return (VMEXIT_ABORT);
300 
301 	/* Special case of guest reset */
302 	if (out && port == 0x64 && (uint8_t)eax == 0xFE)
303 		return (vmexit_catch_reset());
304 
305         /* Extra-special case of host notifications */
306         if (out && port == GUEST_NIO_PORT)
307                 return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
308 
309 	error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio);
310 	if (error == 0 && in)
311 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
312 
313 	if (error == 0)
314 		return (VMEXIT_CONTINUE);
315 	else {
316 		fprintf(stderr, "Unhandled %s%c 0x%04x\n",
317 			in ? "in" : "out",
318 			bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
319 		return (vmexit_catch_inout());
320 	}
321 }
322 
323 static int
324 vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
325 {
326 	fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code,
327 	    *pvcpu);
328 	return (VMEXIT_ABORT);
329 }
330 
331 static int
332 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
333 {
334 	int newcpu;
335 	int retval = VMEXIT_CONTINUE;
336 
337 	newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
338 
339 	if (guest_vcpu_mux && *pvcpu != newcpu) {
340                 retval = VMEXIT_SWITCH;
341                 *pvcpu = newcpu;
342         }
343 
344         return (retval);
345 }
346 
347 static int
348 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
349 {
350 	int newcpu;
351 	int retval = VMEXIT_CONTINUE;
352 
353 	newcpu = spinup_ap(ctx, *pvcpu,
354 			   vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
355 
356 	if (guest_vcpu_mux && *pvcpu != newcpu) {
357 		retval = VMEXIT_SWITCH;
358 		*pvcpu = newcpu;
359 	}
360 
361 	return (retval);
362 }
363 
364 static int
365 vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
366 {
367 
368 	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
369 	fprintf(stderr, "\treason\t\tVMX\n");
370 	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
371 	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
372 	fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error);
373 	fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
374 	fprintf(stderr, "\tqualification\t0x%016lx\n",
375 	    vmexit->u.vmx.exit_qualification);
376 
377 	return (VMEXIT_ABORT);
378 }
379 
380 static int bogus_noswitch = 1;
381 
382 static int
383 vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
384 {
385 	stats.vmexit_bogus++;
386 
387 	if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) {
388 		return (VMEXIT_RESTART);
389 	} else {
390 		stats.vmexit_bogus_switch++;
391 		vmexit->inst_length = 0;
392 		*pvcpu = -1;
393 		return (VMEXIT_SWITCH);
394 	}
395 }
396 
397 static int
398 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
399 {
400 	stats.vmexit_hlt++;
401 	if (fbsdrun_muxed()) {
402 		*pvcpu = -1;
403 		return (VMEXIT_SWITCH);
404 	} else {
405 		/*
406 		 * Just continue execution with the next instruction. We use
407 		 * the HLT VM exit as a way to be friendly with the host
408 		 * scheduler.
409 		 */
410 		return (VMEXIT_CONTINUE);
411 	}
412 }
413 
414 static int pause_noswitch;
415 
416 static int
417 vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
418 {
419 	stats.vmexit_pause++;
420 
421 	if (fbsdrun_muxed() && !pause_noswitch) {
422 		*pvcpu = -1;
423 		return (VMEXIT_SWITCH);
424         } else {
425 		return (VMEXIT_CONTINUE);
426 	}
427 }
428 
429 static int
430 vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
431 {
432 	stats.vmexit_mtrap++;
433 
434 	return (VMEXIT_RESTART);
435 }
436 
437 static int
438 vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
439 {
440 	int err;
441 	stats.vmexit_paging++;
442 
443 	err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa,
444 			  &vmexit->u.paging.vie);
445 
446 	if (err) {
447 		if (err == EINVAL) {
448 			fprintf(stderr,
449 			    "Failed to emulate instruction at 0x%lx\n",
450 			    vmexit->rip);
451 		} else if (err == ESRCH) {
452 			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
453 			    vmexit->u.paging.gpa);
454 		}
455 
456 		return (VMEXIT_ABORT);
457 	}
458 
459 	return (VMEXIT_CONTINUE);
460 }
461 
462 static void
463 sigalrm(int sig)
464 {
465 	return;
466 }
467 
468 static void
469 setup_timeslice(void)
470 {
471 	struct sigaction sa;
472 	struct itimerval itv;
473 	int error;
474 
475 	/*
476 	 * Setup a realtime timer to generate a SIGALRM at a
477 	 * frequency of 'guest_tslice' ticks per second.
478 	 */
479 	sigemptyset(&sa.sa_mask);
480 	sa.sa_flags = 0;
481 	sa.sa_handler = sigalrm;
482 
483 	error = sigaction(SIGALRM, &sa, NULL);
484 	assert(error == 0);
485 
486 	itv.it_interval.tv_sec = 0;
487 	itv.it_interval.tv_usec = 1000000 / guest_tslice;
488 	itv.it_value.tv_sec = 0;
489 	itv.it_value.tv_usec = 1000000 / guest_tslice;
490 
491 	error = setitimer(ITIMER_REAL, &itv, NULL);
492 	assert(error == 0);
493 }
494 
495 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
496 	[VM_EXITCODE_INOUT]  = vmexit_inout,
497 	[VM_EXITCODE_VMX]    = vmexit_vmx,
498 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
499 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
500 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
501 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
502 	[VM_EXITCODE_PAGING] = vmexit_paging,
503 	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
504 };
505 
506 static void
507 vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
508 {
509 	cpuset_t mask;
510 	int error, rc, prevcpu;
511 
512 	if (guest_vcpu_mux)
513 		setup_timeslice();
514 
515 	if (pincpu >= 0) {
516 		CPU_ZERO(&mask);
517 		CPU_SET(pincpu + vcpu, &mask);
518 		error = pthread_setaffinity_np(pthread_self(),
519 					       sizeof(mask), &mask);
520 		assert(error == 0);
521 	}
522 
523 	while (1) {
524 		error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
525 		if (error != 0) {
526 			/*
527 			 * It is possible that 'vmmctl' or some other process
528 			 * has transitioned the vcpu to CANNOT_RUN state right
529 			 * before we tried to transition it to RUNNING.
530 			 *
531 			 * This is expected to be temporary so just retry.
532 			 */
533 			if (errno == EBUSY)
534 				continue;
535 			else
536 				break;
537 		}
538 
539 		prevcpu = vcpu;
540                 rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu],
541                                                        &vcpu);
542 		switch (rc) {
543                 case VMEXIT_SWITCH:
544 			assert(guest_vcpu_mux);
545 			if (vcpu == -1) {
546 				stats.cpu_switch_rotate++;
547 				vcpu = fbsdrun_get_next_cpu(prevcpu);
548 			} else {
549 				stats.cpu_switch_direct++;
550 			}
551 			/* fall through */
552 		case VMEXIT_CONTINUE:
553                         rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
554 			break;
555 		case VMEXIT_RESTART:
556                         rip = vmexit[vcpu].rip;
557 			break;
558 		case VMEXIT_RESET:
559 			exit(0);
560 		default:
561 			exit(1);
562 		}
563 	}
564 	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
565 }
566 
567 static int
568 num_vcpus_allowed(struct vmctx *ctx)
569 {
570 	int tmp, error;
571 
572 	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
573 
574 	/*
575 	 * The guest is allowed to spinup more than one processor only if the
576 	 * UNRESTRICTED_GUEST capability is available.
577 	 */
578 	if (error == 0)
579 		return (VM_MAXCPU);
580 	else
581 		return (1);
582 }
583 
584 int
585 main(int argc, char *argv[])
586 {
587 	int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons;
588 	int max_vcpus;
589 	struct vmctx *ctx;
590 	uint64_t rip;
591 	size_t memsize;
592 
593 	bvmcons = 0;
594 	inject_bkpt = 0;
595 	progname = basename(argv[0]);
596 	gdb_port = DEFAULT_GDB_PORT;
597 	guest_ncpus = 1;
598 	ioapic = 0;
599 	memsize = 256 * MB;
600 
601 	while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:")) != -1) {
602 		switch (c) {
603 		case 'a':
604 			disable_x2apic = 1;
605 			break;
606 		case 'A':
607 			acpi = 1;
608 			break;
609 		case 'b':
610 			bvmcons = 1;
611 			break;
612 		case 'B':
613 			inject_bkpt = 1;
614 			break;
615 		case 'x':
616 			guest_vcpu_mux = 1;
617 			break;
618 		case 'p':
619 			pincpu = atoi(optarg);
620 			break;
621                 case 'c':
622 			guest_ncpus = atoi(optarg);
623 			break;
624 		case 'g':
625 			gdb_port = atoi(optarg);
626 			break;
627 		case 'z':
628 			guest_hz = atoi(optarg);
629 			break;
630 		case 't':
631 			guest_tslice = atoi(optarg);
632 			break;
633 		case 's':
634 			pci_parse_slot(optarg, 0);
635 			break;
636 		case 'S':
637 			pci_parse_slot(optarg, 1);
638 			break;
639                 case 'm':
640 			memsize = strtoul(optarg, NULL, 0) * MB;
641 			break;
642 		case 'H':
643 			guest_vmexit_on_hlt = 1;
644 			break;
645 		case 'I':
646 			ioapic = 1;
647 			break;
648 		case 'P':
649 			guest_vmexit_on_pause = 1;
650 			break;
651 		case 'e':
652 			strictio = 1;
653 			break;
654 		case 'h':
655 			usage(0);
656 		default:
657 			usage(1);
658 		}
659 	}
660 	argc -= optind;
661 	argv += optind;
662 
663 	if (argc != 1)
664 		usage(1);
665 
666 	/* No need to mux if guest is uni-processor */
667 	if (guest_ncpus <= 1)
668 		guest_vcpu_mux = 0;
669 
670 	/* vmexit on hlt if guest is muxed */
671 	if (guest_vcpu_mux) {
672 		guest_vmexit_on_hlt = 1;
673 		guest_vmexit_on_pause = 1;
674 	}
675 
676 	vmname = argv[0];
677 
678 	ctx = vm_open(vmname);
679 	if (ctx == NULL) {
680 		perror("vm_open");
681 		exit(1);
682 	}
683 
684 	max_vcpus = num_vcpus_allowed(ctx);
685 	if (guest_ncpus > max_vcpus) {
686 		fprintf(stderr, "%d vCPUs requested but only %d available\n",
687 			guest_ncpus, max_vcpus);
688 		exit(1);
689 	}
690 
691 	if (fbsdrun_vmexit_on_hlt()) {
692 		err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp);
693 		if (err < 0) {
694 			fprintf(stderr, "VM exit on HLT not supported\n");
695 			exit(1);
696 		}
697 		vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1);
698 		handler[VM_EXITCODE_HLT] = vmexit_hlt;
699 	}
700 
701         if (fbsdrun_vmexit_on_pause()) {
702 		/*
703 		 * pause exit support required for this mode
704 		 */
705 		err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp);
706 		if (err < 0) {
707 			fprintf(stderr,
708 			    "SMP mux requested, no pause support\n");
709 			exit(1);
710 		}
711 		vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1);
712 		handler[VM_EXITCODE_PAUSE] = vmexit_pause;
713         }
714 
715 	if (fbsdrun_disable_x2apic())
716 		err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED);
717 	else
718 		err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED);
719 
720 	if (err) {
721 		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
722 		exit(1);
723 	}
724 
725 	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
726 	if (err) {
727 		fprintf(stderr, "Unable to setup memory (%d)\n", err);
728 		exit(1);
729 	}
730 
731 	init_inout();
732 	init_pci(ctx);
733 	if (ioapic)
734 		ioapic_init(0);
735 
736 	if (gdb_port != 0)
737 		init_dbgport(gdb_port);
738 
739 	if (bvmcons)
740 		init_bvmcons();
741 
742 	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
743 	assert(error == 0);
744 
745 	if (inject_bkpt) {
746 		error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP);
747 		assert(error == 0);
748 	}
749 
750 	/*
751 	 * build the guest tables, MP etc.
752 	 */
753 	mptable_build(ctx, guest_ncpus, ioapic);
754 
755 	if (acpi) {
756 		error = acpi_build(ctx, guest_ncpus, ioapic);
757 		assert(error == 0);
758 	}
759 
760 	/*
761 	 * Add CPU 0
762 	 */
763 	fbsdrun_addcpu(ctx, BSP, rip);
764 
765 	/*
766 	 * Head off to the main event dispatch loop
767 	 */
768 	mevent_dispatch();
769 
770 	exit(1);
771 }
772