xref: /illumos-gate/usr/src/uts/intel/sys/vmm_dev.h (revision 6520eed593b6f28dfe76a9ad27bf2030d9af0a43)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * This file and its contents are supplied under the terms of the
30  * Common Development and Distribution License ("CDDL"), version 1.0.
31  * You may only use this file in accordance with the terms of version
32  * 1.0 of the CDDL.
33  *
34  * A full copy of the text of the CDDL should have accompanied this
35  * source.  A copy of the CDDL is also available via the Internet at
36  * http://www.illumos.org/license/CDDL.
37  *
38  * Copyright 2015 Pluribus Networks Inc.
39  * Copyright 2019 Joyent, Inc.
40  * Copyright 2023 Oxide Computer Company
41  */
42 
43 #ifndef	_VMM_DEV_H_
44 #define	_VMM_DEV_H_
45 
46 #include <machine/vmm.h>
47 
48 #include <sys/param.h>
49 #include <sys/cpuset.h>
50 #include <sys/vmm_data.h>
51 
52 struct vm_create_req {
53 	char		name[VM_MAX_NAMELEN];
54 	uint64_t	flags;
55 };
56 
57 
58 struct vm_destroy_req {
59 	char		name[VM_MAX_NAMELEN];
60 };
61 
62 struct vm_memmap {
63 	vm_paddr_t	gpa;
64 	int		segid;		/* memory segment */
65 	vm_ooffset_t	segoff;		/* offset into memory segment */
66 	size_t		len;		/* mmap length */
67 	int		prot;		/* RWX */
68 	int		flags;
69 };
70 #define	VM_MEMMAP_F_WIRED	0x01
71 #define	VM_MEMMAP_F_IOMMU	0x02
72 
73 struct vm_munmap {
74 	vm_paddr_t	gpa;
75 	size_t		len;
76 };
77 
78 #define	VM_MEMSEG_NAME(m)	((m)->name[0] != '\0' ? (m)->name : NULL)
79 struct vm_memseg {
80 	int		segid;
81 	size_t		len;
82 	char		name[VM_MAX_SEG_NAMELEN];
83 };
84 
85 struct vm_register {
86 	int		cpuid;
87 	int		regnum;		/* enum vm_reg_name */
88 	uint64_t	regval;
89 };
90 
91 struct vm_seg_desc {			/* data or code segment */
92 	int		cpuid;
93 	int		regnum;		/* enum vm_reg_name */
94 	struct seg_desc desc;
95 };
96 
97 struct vm_register_set {
98 	int		cpuid;
99 	unsigned int	count;
100 	const int	*regnums;	/* enum vm_reg_name */
101 	uint64_t	*regvals;
102 };
103 
104 struct vm_exception {
105 	int		cpuid;
106 	int		vector;
107 	uint32_t	error_code;
108 	int		error_code_valid;
109 	int		restart_instruction;
110 };
111 
112 struct vm_lapic_msi {
113 	uint64_t	msg;
114 	uint64_t	addr;
115 };
116 
117 struct vm_lapic_irq {
118 	int		cpuid;
119 	int		vector;
120 };
121 
122 struct vm_ioapic_irq {
123 	int		irq;
124 };
125 
126 struct vm_isa_irq {
127 	int		atpic_irq;
128 	int		ioapic_irq;
129 };
130 
131 struct vm_isa_irq_trigger {
132 	int		atpic_irq;
133 	enum vm_intr_trigger trigger;
134 };
135 
136 struct vm_capability {
137 	int		cpuid;
138 	enum vm_cap_type captype;
139 	int		capval;
140 	int		allcpus;
141 };
142 
143 struct vm_pptdev {
144 	int		pptfd;
145 };
146 
147 struct vm_pptdev_mmio {
148 	int		pptfd;
149 	vm_paddr_t	gpa;
150 	vm_paddr_t	hpa;
151 	size_t		len;
152 };
153 
154 struct vm_pptdev_msi {
155 	int		vcpu;
156 	int		pptfd;
157 	int		numvec;		/* 0 means disabled */
158 	uint64_t	msg;
159 	uint64_t	addr;
160 };
161 
162 struct vm_pptdev_msix {
163 	int		vcpu;
164 	int		pptfd;
165 	int		idx;
166 	uint64_t	msg;
167 	uint32_t	vector_control;
168 	uint64_t	addr;
169 };
170 
171 struct vm_pptdev_limits {
172 	int		pptfd;
173 	int		msi_limit;
174 	int		msix_limit;
175 };
176 
177 struct vm_nmi {
178 	int		cpuid;
179 };
180 
181 #define	MAX_VM_STATS	64
182 
183 struct vm_stats {
184 	int		cpuid;				/* in */
185 	int		index;				/* in */
186 	int		num_entries;			/* out */
187 	struct timeval	tv;
188 	uint64_t	statbuf[MAX_VM_STATS];
189 };
190 
191 struct vm_stat_desc {
192 	int		index;				/* in */
193 	char		desc[128];			/* out */
194 };
195 
196 struct vm_x2apic {
197 	int			cpuid;
198 	enum x2apic_state	state;
199 };
200 
201 struct vm_gpa_pte {
202 	uint64_t	gpa;				/* in */
203 	uint64_t	pte[4];				/* out */
204 	int		ptenum;
205 };
206 
207 struct vm_hpet_cap {
208 	uint32_t	capabilities;	/* lower 32 bits of HPET capabilities */
209 };
210 
211 struct vm_suspend {
212 	enum vm_suspend_how how;
213 	int source;
214 };
215 
216 /*
217  * Deprecated flags for vm_reinit`flags:
218  *
219  * Suspend (by force) VM as part of reinit.  Effectively a no-op since
220  * suspension requirements during reinit have been lifted.
221  *
222  * #define VM_REINIT_F_FORCE_SUSPEND	(1 << 0)
223  */
224 
225 struct vm_reinit {
226 	uint64_t	flags;
227 };
228 
229 struct vm_gla2gpa {
230 	int		vcpuid;		/* inputs */
231 	int		prot;		/* PROT_READ or PROT_WRITE */
232 	uint64_t	gla;
233 	struct vm_guest_paging paging;
234 	int		fault;		/* outputs */
235 	uint64_t	gpa;
236 };
237 
238 struct vm_activate_cpu {
239 	int		vcpuid;
240 };
241 
242 struct vm_cpuset {
243 	int		which;
244 	int		cpusetsize;
245 #ifndef _KERNEL
246 	cpuset_t	*cpus;
247 #else
248 	void		*cpus;
249 #endif
250 };
251 #define	VM_ACTIVE_CPUS		0
252 /*
253  * Deprecated:
254  * #define VM_SUSPENDED_CPUS	1
255  */
256 #define	VM_DEBUG_CPUS		2
257 
258 struct vm_intinfo {
259 	int		vcpuid;
260 	uint64_t	info1;
261 	uint64_t	info2;
262 };
263 
264 struct vm_rtc_data {
265 	int		offset;
266 	uint8_t		value;
267 };
268 
269 struct vm_devmem_offset {
270 	int		segid;
271 	off_t		offset;
272 };
273 
274 struct vm_cpu_topology {
275 	uint16_t	sockets;
276 	uint16_t	cores;
277 	uint16_t	threads;
278 	uint16_t	maxcpus;
279 };
280 
281 struct vm_readwrite_kernemu_device {
282 	int		vcpuid;
283 	unsigned	access_width : 3;
284 	unsigned	_unused : 29;
285 	uint64_t	gpa;
286 	uint64_t	value;
287 };
288 _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
289 
290 enum vcpu_reset_kind {
291 	VRK_RESET = 0,
292 	/*
293 	 * The reset performed by an INIT IPI clears much of the CPU state, but
294 	 * some portions are left untouched, unlike VRK_RESET, which represents
295 	 * a "full" reset as if the system was freshly powered on.
296 	 */
297 	VRK_INIT = 1,
298 };
299 
300 struct vm_vcpu_reset {
301 	int		vcpuid;
302 	uint32_t	kind;	/* contains: enum vcpu_reset_kind */
303 };
304 
305 struct vm_run_state {
306 	int		vcpuid;
307 	uint32_t	state;	/* of enum cpu_init_status type */
308 	uint8_t		sipi_vector;	/* vector of SIPI, if any */
309 	uint8_t		_pad[3];
310 };
311 
312 /* Transfer data for VM_GET_FPU and VM_SET_FPU */
313 struct vm_fpu_state {
314 	int		vcpuid;
315 	void		*buf;
316 	size_t		len;
317 };
318 
319 struct vm_fpu_desc_entry {
320 	uint64_t	vfde_feature;
321 	uint32_t	vfde_size;
322 	uint32_t	vfde_off;
323 };
324 
325 struct vm_fpu_desc {
326 	struct vm_fpu_desc_entry	*vfd_entry_data;
327 	size_t				vfd_req_size;
328 	uint32_t			vfd_num_entries;
329 };
330 
331 struct vmm_resv_query {
332 	size_t	vrq_free_sz;
333 	size_t	vrq_alloc_sz;
334 	size_t	vrq_alloc_transient_sz;
335 	size_t	vrq_limit;
336 };
337 
338 struct vmm_resv_target {
339 	/* Target size for VMM reservoir */
340 	size_t	vrt_target_sz;
341 
342 	/*
343 	 * Change of reservoir size to meet target will be done in multiple
344 	 * steps of chunk size (or smaller)
345 	 */
346 	size_t	vrt_chunk_sz;
347 
348 	/*
349 	 * Resultant size of reservoir after operation.  Should match target
350 	 * size, except when interrupted.
351 	 */
352 	size_t	vrt_result_sz;
353 };
354 
355 /*
356  * struct vmm_dirty_tracker is used for tracking dirty guest pages during
357  * e.g. live migration.
358  *
359  * - The `vdt_start_gpa` field specifies the offset from the beginning of
360  *   guest physical memory to track;
361  * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the
362  *   given start address.  Each bit indicates whether the given guest page
363  *   is dirty or not.
364  * - `vdt_pfns_len` specifies the length of the of the guest physical memory
365  *   region in bytes.  It also de facto bounds the range of guest addresses
366  *   we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl().  If the
367  *   range of the bit vector spans an unallocated region (or extends beyond
368  *   the end of the guest physical address space) the corresponding bits in
369  *   `vdt_pfns` will be zeroed.
370  */
371 struct vmm_dirty_tracker {
372 	uint64_t	vdt_start_gpa;
373 	size_t		vdt_len;	/* length of region */
374 	void		*vdt_pfns;	/* bit vector of dirty bits */
375 };
376 
377 /* Current (arbitrary) max length for vm_data_xfer */
378 #define VM_DATA_XFER_LIMIT	8192
379 
380 #define	VDX_FLAG_READ_COPYIN	(1 << 0)
381 #define	VDX_FLAG_WRITE_COPYOUT	(1 << 1)
382 
383 #define	VDX_FLAGS_VALID		(VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT)
384 
385 struct vm_data_xfer {
386 	int		vdx_vcpuid;
387 	uint16_t	vdx_class;
388 	uint16_t	vdx_version;
389 	uint32_t	vdx_flags;
390 	uint32_t	vdx_len;
391 	uint32_t	vdx_result_len;
392 	void		*vdx_data;
393 };
394 
395 struct vm_vcpu_cpuid_config {
396 	int		vvcc_vcpuid;
397 	uint32_t	vvcc_flags;
398 	uint32_t	vvcc_nent;
399 	uint32_t	_pad;
400 	void		*vvcc_entries;
401 };
402 
403 /* Query the computed legacy cpuid value for a vcpuid with VM_LEGACY_CPUID */
404 struct vm_legacy_cpuid {
405 	int		vlc_vcpuid;
406 	uint32_t	vlc_eax;
407 	uint32_t	vlc_ebx;
408 	uint32_t	vlc_ecx;
409 	uint32_t	vlc_edx;
410 };
411 
412 /*
413  * VMM Interface Version
414  *
415  * Despite the fact that the kernel interface to bhyve is explicitly considered
416  * Private, there are out-of-gate consumers which utilize it.  While they assume
417  * the risk of any breakage incurred by changes to bhyve, we can at least try to
418  * make it easier to detect changes by exposing a "version" of the interface.
419  * It can also be used by the in-gate userland to detect if packaging updates
420  * somehow result in the userland and kernel falling out of sync.
421  *
422  * There are no established criteria for the magnitude of change which requires
423  * this version to be incremented, and maintenance of it is considered a
424  * best-effort activity.  Nothing is to be inferred about the magnitude of a
425  * change when the version is modified.  It follows no rules like semver.
426  */
427 #define	VMM_CURRENT_INTERFACE_VERSION	16
428 
429 
430 #define	VMMCTL_IOC_BASE		(('V' << 16) | ('M' << 8))
431 #define	VMM_IOC_BASE		(('v' << 16) | ('m' << 8))
432 #define	VMM_LOCK_IOC_BASE	(('v' << 16) | ('l' << 8))
433 #define	VMM_CPU_IOC_BASE	(('v' << 16) | ('p' << 8))
434 
435 /* Operations performed on the vmmctl device */
436 #define	VMM_CREATE_VM		(VMMCTL_IOC_BASE | 0x01)
437 #define	VMM_DESTROY_VM		(VMMCTL_IOC_BASE | 0x02)
438 #define	VMM_VM_SUPPORTED	(VMMCTL_IOC_BASE | 0x03)
439 #define	VMM_INTERFACE_VERSION	(VMMCTL_IOC_BASE | 0x04)
440 #define	VMM_CHECK_IOMMU		(VMMCTL_IOC_BASE | 0x05)
441 
442 #define	VMM_RESV_QUERY		(VMMCTL_IOC_BASE | 0x10)
443 #define	VMM_RESV_SET_TARGET	(VMMCTL_IOC_BASE | 0x11)
444 
445 /* Operations performed in the context of a given vCPU */
446 #define	VM_RUN				(VMM_CPU_IOC_BASE | 0x01)
447 #define	VM_SET_REGISTER			(VMM_CPU_IOC_BASE | 0x02)
448 #define	VM_GET_REGISTER			(VMM_CPU_IOC_BASE | 0x03)
449 #define	VM_SET_SEGMENT_DESCRIPTOR	(VMM_CPU_IOC_BASE | 0x04)
450 #define	VM_GET_SEGMENT_DESCRIPTOR	(VMM_CPU_IOC_BASE | 0x05)
451 #define	VM_SET_REGISTER_SET		(VMM_CPU_IOC_BASE | 0x06)
452 #define	VM_GET_REGISTER_SET		(VMM_CPU_IOC_BASE | 0x07)
453 #define	VM_INJECT_EXCEPTION		(VMM_CPU_IOC_BASE | 0x08)
454 #define	VM_SET_CAPABILITY		(VMM_CPU_IOC_BASE | 0x09)
455 #define	VM_GET_CAPABILITY		(VMM_CPU_IOC_BASE | 0x0a)
456 #define	VM_PPTDEV_MSI			(VMM_CPU_IOC_BASE | 0x0b)
457 #define	VM_PPTDEV_MSIX			(VMM_CPU_IOC_BASE | 0x0c)
458 #define	VM_SET_X2APIC_STATE		(VMM_CPU_IOC_BASE | 0x0d)
459 #define	VM_GLA2GPA			(VMM_CPU_IOC_BASE | 0x0e)
460 #define	VM_GLA2GPA_NOFAULT		(VMM_CPU_IOC_BASE | 0x0f)
461 #define	VM_ACTIVATE_CPU			(VMM_CPU_IOC_BASE | 0x10)
462 #define	VM_SET_INTINFO			(VMM_CPU_IOC_BASE | 0x11)
463 #define	VM_GET_INTINFO			(VMM_CPU_IOC_BASE | 0x12)
464 #define	VM_RESTART_INSTRUCTION		(VMM_CPU_IOC_BASE | 0x13)
465 #define	VM_SET_KERNEMU_DEV		(VMM_CPU_IOC_BASE | 0x14)
466 #define	VM_GET_KERNEMU_DEV		(VMM_CPU_IOC_BASE | 0x15)
467 #define	VM_RESET_CPU			(VMM_CPU_IOC_BASE | 0x16)
468 #define	VM_GET_RUN_STATE		(VMM_CPU_IOC_BASE | 0x17)
469 #define	VM_SET_RUN_STATE		(VMM_CPU_IOC_BASE | 0x18)
470 #define	VM_GET_FPU			(VMM_CPU_IOC_BASE | 0x19)
471 #define	VM_SET_FPU			(VMM_CPU_IOC_BASE | 0x1a)
472 #define	VM_GET_CPUID			(VMM_CPU_IOC_BASE | 0x1b)
473 #define	VM_SET_CPUID			(VMM_CPU_IOC_BASE | 0x1c)
474 #define	VM_LEGACY_CPUID			(VMM_CPU_IOC_BASE | 0x1d)
475 
476 /* Operations requiring write-locking the VM */
477 #define	VM_REINIT		(VMM_LOCK_IOC_BASE | 0x01)
478 #define	VM_BIND_PPTDEV		(VMM_LOCK_IOC_BASE | 0x02)
479 #define	VM_UNBIND_PPTDEV	(VMM_LOCK_IOC_BASE | 0x03)
480 #define	VM_MAP_PPTDEV_MMIO	(VMM_LOCK_IOC_BASE | 0x04)
481 #define	VM_ALLOC_MEMSEG		(VMM_LOCK_IOC_BASE | 0x05)
482 #define	VM_MMAP_MEMSEG		(VMM_LOCK_IOC_BASE | 0x06)
483 #define	VM_PMTMR_LOCATE		(VMM_LOCK_IOC_BASE | 0x07)
484 #define	VM_MUNMAP_MEMSEG	(VMM_LOCK_IOC_BASE | 0x08)
485 #define	VM_UNMAP_PPTDEV_MMIO	(VMM_LOCK_IOC_BASE | 0x09)
486 #define	VM_PAUSE		(VMM_LOCK_IOC_BASE | 0x0a)
487 #define	VM_RESUME		(VMM_LOCK_IOC_BASE | 0x0b)
488 
489 #define	VM_WRLOCK_CYCLE		(VMM_LOCK_IOC_BASE | 0xff)
490 
491 /* All other ioctls */
492 #define	VM_GET_GPA_PMAP			(VMM_IOC_BASE | 0x01)
493 #define	VM_GET_MEMSEG			(VMM_IOC_BASE | 0x02)
494 #define	VM_MMAP_GETNEXT			(VMM_IOC_BASE | 0x03)
495 
496 #define	VM_LAPIC_IRQ			(VMM_IOC_BASE | 0x04)
497 #define	VM_LAPIC_LOCAL_IRQ		(VMM_IOC_BASE | 0x05)
498 #define	VM_LAPIC_MSI			(VMM_IOC_BASE | 0x06)
499 
500 #define	VM_IOAPIC_ASSERT_IRQ		(VMM_IOC_BASE | 0x07)
501 #define	VM_IOAPIC_DEASSERT_IRQ		(VMM_IOC_BASE | 0x08)
502 #define	VM_IOAPIC_PULSE_IRQ		(VMM_IOC_BASE | 0x09)
503 
504 #define	VM_ISA_ASSERT_IRQ		(VMM_IOC_BASE | 0x0a)
505 #define	VM_ISA_DEASSERT_IRQ		(VMM_IOC_BASE | 0x0b)
506 #define	VM_ISA_PULSE_IRQ		(VMM_IOC_BASE | 0x0c)
507 #define	VM_ISA_SET_IRQ_TRIGGER		(VMM_IOC_BASE | 0x0d)
508 
509 #define	VM_RTC_WRITE			(VMM_IOC_BASE | 0x0e)
510 #define	VM_RTC_READ			(VMM_IOC_BASE | 0x0f)
511 #define	VM_RTC_SETTIME			(VMM_IOC_BASE | 0x10)
512 #define	VM_RTC_GETTIME			(VMM_IOC_BASE | 0x11)
513 
514 #define	VM_SUSPEND			(VMM_IOC_BASE | 0x12)
515 
516 #define	VM_IOAPIC_PINCOUNT		(VMM_IOC_BASE | 0x13)
517 #define	VM_GET_PPTDEV_LIMITS		(VMM_IOC_BASE | 0x14)
518 #define	VM_GET_HPET_CAPABILITIES	(VMM_IOC_BASE | 0x15)
519 
520 #define	VM_STATS_IOC			(VMM_IOC_BASE | 0x16)
521 #define	VM_STAT_DESC			(VMM_IOC_BASE | 0x17)
522 
523 #define	VM_INJECT_NMI			(VMM_IOC_BASE | 0x18)
524 #define	VM_GET_X2APIC_STATE		(VMM_IOC_BASE | 0x19)
525 #define	VM_SET_TOPOLOGY			(VMM_IOC_BASE | 0x1a)
526 #define	VM_GET_TOPOLOGY			(VMM_IOC_BASE | 0x1b)
527 #define	VM_GET_CPUS			(VMM_IOC_BASE | 0x1c)
528 #define	VM_SUSPEND_CPU			(VMM_IOC_BASE | 0x1d)
529 #define	VM_RESUME_CPU			(VMM_IOC_BASE | 0x1e)
530 
531 #define	VM_PPTDEV_DISABLE_MSIX		(VMM_IOC_BASE | 0x1f)
532 
533 /* Note: forces a barrier on a flush operation before returning. */
534 #define	VM_TRACK_DIRTY_PAGES		(VMM_IOC_BASE | 0x20)
535 #define	VM_DESC_FPU_AREA		(VMM_IOC_BASE | 0x21)
536 
537 #define	VM_DATA_READ			(VMM_IOC_BASE | 0x22)
538 #define	VM_DATA_WRITE			(VMM_IOC_BASE | 0x23)
539 
540 #define	VM_SET_AUTODESTRUCT		(VMM_IOC_BASE | 0x24)
541 #define	VM_DESTROY_SELF			(VMM_IOC_BASE | 0x25)
542 #define	VM_DESTROY_PENDING		(VMM_IOC_BASE | 0x26)
543 
544 #define	VM_VCPU_BARRIER			(VMM_IOC_BASE | 0x27)
545 
546 #define	VM_DEVMEM_GETOFFSET		(VMM_IOC_BASE | 0xff)
547 
548 #define	VMM_CTL_DEV		"/dev/vmmctl"
549 
550 #endif
551