xref: /illumos-gate/usr/src/uts/intel/sys/vmm_dev.h (revision 5b0d53307d70a828ad7aef4dc6d8a3ad7d5c231b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 /*
31  * This file and its contents are supplied under the terms of the
32  * Common Development and Distribution License ("CDDL"), version 1.0.
33  * You may only use this file in accordance with the terms of version
34  * 1.0 of the CDDL.
35  *
36  * A full copy of the text of the CDDL should have accompanied this
37  * source.  A copy of the CDDL is also available via the Internet at
38  * http://www.illumos.org/license/CDDL.
39  *
40  * Copyright 2015 Pluribus Networks Inc.
41  * Copyright 2019 Joyent, Inc.
42  * Copyright 2021 Oxide Computer Company
43  */
44 
45 #ifndef	_VMM_DEV_H_
46 #define	_VMM_DEV_H_
47 
48 #include <machine/vmm.h>
49 
50 #include <sys/param.h>
51 #include <sys/cpuset.h>
52 
53 struct vm_create_req {
54 	char		name[VM_MAX_NAMELEN];
55 	uint64_t	flags;
56 };
57 
58 
59 struct vm_destroy_req {
60 	char		name[VM_MAX_NAMELEN];
61 };
62 
63 struct vm_memmap {
64 	vm_paddr_t	gpa;
65 	int		segid;		/* memory segment */
66 	vm_ooffset_t	segoff;		/* offset into memory segment */
67 	size_t		len;		/* mmap length */
68 	int		prot;		/* RWX */
69 	int		flags;
70 };
71 #define	VM_MEMMAP_F_WIRED	0x01
72 #define	VM_MEMMAP_F_IOMMU	0x02
73 
74 struct vm_munmap {
75 	vm_paddr_t	gpa;
76 	size_t		len;
77 };
78 
79 #define	VM_MEMSEG_NAME(m)	((m)->name[0] != '\0' ? (m)->name : NULL)
80 struct vm_memseg {
81 	int		segid;
82 	size_t		len;
83 	char		name[VM_MAX_SEG_NAMELEN];
84 };
85 
86 struct vm_register {
87 	int		cpuid;
88 	int		regnum;		/* enum vm_reg_name */
89 	uint64_t	regval;
90 };
91 
92 struct vm_seg_desc {			/* data or code segment */
93 	int		cpuid;
94 	int		regnum;		/* enum vm_reg_name */
95 	struct seg_desc desc;
96 };
97 
98 struct vm_register_set {
99 	int		cpuid;
100 	unsigned int	count;
101 	const int	*regnums;	/* enum vm_reg_name */
102 	uint64_t	*regvals;
103 };
104 
105 struct vm_exception {
106 	int		cpuid;
107 	int		vector;
108 	uint32_t	error_code;
109 	int		error_code_valid;
110 	int		restart_instruction;
111 };
112 
113 struct vm_lapic_msi {
114 	uint64_t	msg;
115 	uint64_t	addr;
116 };
117 
118 struct vm_lapic_irq {
119 	int		cpuid;
120 	int		vector;
121 };
122 
123 struct vm_ioapic_irq {
124 	int		irq;
125 };
126 
127 struct vm_isa_irq {
128 	int		atpic_irq;
129 	int		ioapic_irq;
130 };
131 
132 struct vm_isa_irq_trigger {
133 	int		atpic_irq;
134 	enum vm_intr_trigger trigger;
135 };
136 
137 struct vm_capability {
138 	int		cpuid;
139 	enum vm_cap_type captype;
140 	int		capval;
141 	int		allcpus;
142 };
143 
144 struct vm_pptdev {
145 	int		pptfd;
146 };
147 
148 struct vm_pptdev_mmio {
149 	int		pptfd;
150 	vm_paddr_t	gpa;
151 	vm_paddr_t	hpa;
152 	size_t		len;
153 };
154 
155 struct vm_pptdev_msi {
156 	int		vcpu;
157 	int		pptfd;
158 	int		numvec;		/* 0 means disabled */
159 	uint64_t	msg;
160 	uint64_t	addr;
161 };
162 
163 struct vm_pptdev_msix {
164 	int		vcpu;
165 	int		pptfd;
166 	int		idx;
167 	uint64_t	msg;
168 	uint32_t	vector_control;
169 	uint64_t	addr;
170 };
171 
172 struct vm_pptdev_limits {
173 	int		pptfd;
174 	int		msi_limit;
175 	int		msix_limit;
176 };
177 
178 struct vm_nmi {
179 	int		cpuid;
180 };
181 
182 #define	MAX_VM_STATS	64
183 
184 struct vm_stats {
185 	int		cpuid;				/* in */
186 	int		index;				/* in */
187 	int		num_entries;			/* out */
188 	struct timeval	tv;
189 	uint64_t	statbuf[MAX_VM_STATS];
190 };
191 
192 struct vm_stat_desc {
193 	int		index;				/* in */
194 	char		desc[128];			/* out */
195 };
196 
197 struct vm_x2apic {
198 	int			cpuid;
199 	enum x2apic_state	state;
200 };
201 
202 struct vm_gpa_pte {
203 	uint64_t	gpa;				/* in */
204 	uint64_t	pte[4];				/* out */
205 	int		ptenum;
206 };
207 
208 struct vm_hpet_cap {
209 	uint32_t	capabilities;	/* lower 32 bits of HPET capabilities */
210 };
211 
212 struct vm_suspend {
213 	enum vm_suspend_how how;
214 };
215 
216 #define	VM_REINIT_F_FORCE_SUSPEND	(1 << 0)
217 
218 struct vm_reinit {
219 	uint64_t	flags;
220 };
221 
222 struct vm_gla2gpa {
223 	int		vcpuid;		/* inputs */
224 	int		prot;		/* PROT_READ or PROT_WRITE */
225 	uint64_t	gla;
226 	struct vm_guest_paging paging;
227 	int		fault;		/* outputs */
228 	uint64_t	gpa;
229 };
230 
231 struct vm_activate_cpu {
232 	int		vcpuid;
233 };
234 
235 struct vm_cpuset {
236 	int		which;
237 	int		cpusetsize;
238 #ifndef _KERNEL
239 	cpuset_t	*cpus;
240 #else
241 	void		*cpus;
242 #endif
243 };
244 #define	VM_ACTIVE_CPUS		0
245 #define	VM_SUSPENDED_CPUS	1
246 #define	VM_DEBUG_CPUS		2
247 
248 struct vm_intinfo {
249 	int		vcpuid;
250 	uint64_t	info1;
251 	uint64_t	info2;
252 };
253 
254 struct vm_rtc_time {
255 	time_t		secs;
256 };
257 
258 struct vm_rtc_data {
259 	int		offset;
260 	uint8_t		value;
261 };
262 
263 struct vm_devmem_offset {
264 	int		segid;
265 	off_t		offset;
266 };
267 
268 struct vm_cpu_topology {
269 	uint16_t	sockets;
270 	uint16_t	cores;
271 	uint16_t	threads;
272 	uint16_t	maxcpus;
273 };
274 
275 struct vm_readwrite_kernemu_device {
276 	int		vcpuid;
277 	unsigned	access_width : 3;
278 	unsigned	_unused : 29;
279 	uint64_t	gpa;
280 	uint64_t	value;
281 };
282 _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
283 
284 enum vcpu_reset_kind {
285 	VRK_RESET = 0,
286 	/*
287 	 * The reset performed by an INIT IPI clears much of the CPU state, but
288 	 * some portions are left untouched, unlike VRK_RESET, which represents
289 	 * a "full" reset as if the system was freshly powered on.
290 	 */
291 	VRK_INIT = 1,
292 };
293 
294 struct vm_vcpu_reset {
295 	int		vcpuid;
296 	uint32_t	kind;	/* contains: enum vcpu_reset_kind */
297 };
298 
299 struct vm_run_state {
300 	int		vcpuid;
301 	uint32_t	state;	/* of enum cpu_init_status type */
302 	uint8_t		sipi_vector;	/* vector of SIPI, if any */
303 	uint8_t		_pad[3];
304 };
305 
306 /* Transfer data for VM_GET_FPU and VM_SET_FPU */
307 struct vm_fpu_state {
308 	int		vcpuid;
309 	void		*buf;
310 	size_t		len;
311 };
312 
313 struct vm_fpu_desc_entry {
314 	uint64_t	vfde_feature;
315 	uint32_t	vfde_size;
316 	uint32_t	vfde_off;
317 };
318 
319 struct vm_fpu_desc {
320 	struct vm_fpu_desc_entry	*vfd_entry_data;
321 	size_t				vfd_req_size;
322 	uint32_t			vfd_num_entries;
323 };
324 
325 struct vmm_resv_query {
326 	size_t	vrq_free_sz;
327 	size_t	vrq_alloc_sz;
328 	size_t	vrq_alloc_transient_sz;
329 	size_t	vrq_limit;
330 };
331 
332 /*
333  * struct vmm_dirty_tracker is used for tracking dirty guest pages during
334  * e.g. live migration.
335  *
336  * - The `vdt_start_gpa` field specifies the offset from the beginning of
337  *   guest physical memory to track;
338  * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the
339  *   given start address.  Each bit indicates whether the given guest page
340  *   is dirty or not.
341  * - `vdt_pfns_len` specifies the length of the of the guest physical memory
342  *   region in bytes.  It also de facto bounds the range of guest addresses
343  *   we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl().  If the
344  *   range of the bit vector spans an unallocated region (or extends beyond
345  *   the end of the guest physical address space) the corresponding bits in
346  *   `vdt_pfns` will be zeroed.
347  */
348 struct vmm_dirty_tracker {
349 	uint64_t	vdt_start_gpa;
350 	size_t		vdt_len;	/* length of region */
351 	void		*vdt_pfns;	/* bit vector of dirty bits */
352 };
353 
354 /*
355  * VMM Interface Version
356  *
357  * Despite the fact that the kernel interface to bhyve is explicitly considered
358  * Private, there are out-of-gate consumers which utilize it.  While they assume
359  * the risk of any breakage incurred by changes to bhyve, we can at least try to
360  * make it easier to detect changes by exposing a "version" of the interface.
361  * It can also be used by the in-gate userland to detect if packaging updates
362  * somehow result in the userland and kernel falling out of sync.
363  *
364  * There are no established criteria for the magnitude of change which requires
365  * this version to be incremented, and maintenance of it is considered a
366  * best-effort activity.  Nothing is to be inferred about the magnitude of a
367  * change when the version is modified.  It follows no rules like semver.
368  */
369 #define	VMM_CURRENT_INTERFACE_VERSION	1
370 
371 
372 #define	VMMCTL_IOC_BASE		(('V' << 16) | ('M' << 8))
373 #define	VMM_IOC_BASE		(('v' << 16) | ('m' << 8))
374 #define	VMM_LOCK_IOC_BASE	(('v' << 16) | ('l' << 8))
375 #define	VMM_CPU_IOC_BASE	(('v' << 16) | ('p' << 8))
376 
377 /* Operations performed on the vmmctl device */
378 #define	VMM_CREATE_VM		(VMMCTL_IOC_BASE | 0x01)
379 #define	VMM_DESTROY_VM		(VMMCTL_IOC_BASE | 0x02)
380 #define	VMM_VM_SUPPORTED	(VMMCTL_IOC_BASE | 0x03)
381 #define	VMM_INTERFACE_VERSION	(VMMCTL_IOC_BASE | 0x04)
382 
383 #define	VMM_RESV_QUERY		(VMMCTL_IOC_BASE | 0x10)
384 #define	VMM_RESV_ADD		(VMMCTL_IOC_BASE | 0x11)
385 #define	VMM_RESV_REMOVE		(VMMCTL_IOC_BASE | 0x12)
386 
387 /* Operations performed in the context of a given vCPU */
388 #define	VM_RUN				(VMM_CPU_IOC_BASE | 0x01)
389 #define	VM_SET_REGISTER			(VMM_CPU_IOC_BASE | 0x02)
390 #define	VM_GET_REGISTER			(VMM_CPU_IOC_BASE | 0x03)
391 #define	VM_SET_SEGMENT_DESCRIPTOR	(VMM_CPU_IOC_BASE | 0x04)
392 #define	VM_GET_SEGMENT_DESCRIPTOR	(VMM_CPU_IOC_BASE | 0x05)
393 #define	VM_SET_REGISTER_SET		(VMM_CPU_IOC_BASE | 0x06)
394 #define	VM_GET_REGISTER_SET		(VMM_CPU_IOC_BASE | 0x07)
395 #define	VM_INJECT_EXCEPTION		(VMM_CPU_IOC_BASE | 0x08)
396 #define	VM_SET_CAPABILITY		(VMM_CPU_IOC_BASE | 0x09)
397 #define	VM_GET_CAPABILITY		(VMM_CPU_IOC_BASE | 0x0a)
398 #define	VM_PPTDEV_MSI			(VMM_CPU_IOC_BASE | 0x0b)
399 #define	VM_PPTDEV_MSIX			(VMM_CPU_IOC_BASE | 0x0c)
400 #define	VM_SET_X2APIC_STATE		(VMM_CPU_IOC_BASE | 0x0d)
401 #define	VM_GLA2GPA			(VMM_CPU_IOC_BASE | 0x0e)
402 #define	VM_GLA2GPA_NOFAULT		(VMM_CPU_IOC_BASE | 0x0f)
403 #define	VM_ACTIVATE_CPU			(VMM_CPU_IOC_BASE | 0x10)
404 #define	VM_SET_INTINFO			(VMM_CPU_IOC_BASE | 0x11)
405 #define	VM_GET_INTINFO			(VMM_CPU_IOC_BASE | 0x12)
406 #define	VM_RESTART_INSTRUCTION		(VMM_CPU_IOC_BASE | 0x13)
407 #define	VM_SET_KERNEMU_DEV		(VMM_CPU_IOC_BASE | 0x14)
408 #define	VM_GET_KERNEMU_DEV		(VMM_CPU_IOC_BASE | 0x15)
409 #define	VM_RESET_CPU			(VMM_CPU_IOC_BASE | 0x16)
410 #define	VM_GET_RUN_STATE		(VMM_CPU_IOC_BASE | 0x17)
411 #define	VM_SET_RUN_STATE		(VMM_CPU_IOC_BASE | 0x18)
412 #define	VM_GET_FPU			(VMM_CPU_IOC_BASE | 0x19)
413 #define	VM_SET_FPU			(VMM_CPU_IOC_BASE | 0x1a)
414 
415 /* Operations requiring write-locking the VM */
416 #define	VM_REINIT		(VMM_LOCK_IOC_BASE | 0x01)
417 #define	VM_BIND_PPTDEV		(VMM_LOCK_IOC_BASE | 0x02)
418 #define	VM_UNBIND_PPTDEV	(VMM_LOCK_IOC_BASE | 0x03)
419 #define	VM_MAP_PPTDEV_MMIO	(VMM_LOCK_IOC_BASE | 0x04)
420 #define	VM_ALLOC_MEMSEG		(VMM_LOCK_IOC_BASE | 0x05)
421 #define	VM_MMAP_MEMSEG		(VMM_LOCK_IOC_BASE | 0x06)
422 #define	VM_PMTMR_LOCATE		(VMM_LOCK_IOC_BASE | 0x07)
423 #define	VM_MUNMAP_MEMSEG	(VMM_LOCK_IOC_BASE | 0x08)
424 #define	VM_UNMAP_PPTDEV_MMIO	(VMM_LOCK_IOC_BASE | 0x09)
425 
426 #define	VM_WRLOCK_CYCLE		(VMM_LOCK_IOC_BASE | 0xff)
427 
428 /* All other ioctls */
429 #define	VM_GET_GPA_PMAP			(VMM_IOC_BASE | 0x01)
430 #define	VM_GET_MEMSEG			(VMM_IOC_BASE | 0x02)
431 #define	VM_MMAP_GETNEXT			(VMM_IOC_BASE | 0x03)
432 
433 #define	VM_LAPIC_IRQ			(VMM_IOC_BASE | 0x04)
434 #define	VM_LAPIC_LOCAL_IRQ		(VMM_IOC_BASE | 0x05)
435 #define	VM_LAPIC_MSI			(VMM_IOC_BASE | 0x06)
436 
437 #define	VM_IOAPIC_ASSERT_IRQ		(VMM_IOC_BASE | 0x07)
438 #define	VM_IOAPIC_DEASSERT_IRQ		(VMM_IOC_BASE | 0x08)
439 #define	VM_IOAPIC_PULSE_IRQ		(VMM_IOC_BASE | 0x09)
440 
441 #define	VM_ISA_ASSERT_IRQ		(VMM_IOC_BASE | 0x0a)
442 #define	VM_ISA_DEASSERT_IRQ		(VMM_IOC_BASE | 0x0b)
443 #define	VM_ISA_PULSE_IRQ		(VMM_IOC_BASE | 0x0c)
444 #define	VM_ISA_SET_IRQ_TRIGGER		(VMM_IOC_BASE | 0x0d)
445 
446 #define	VM_RTC_WRITE			(VMM_IOC_BASE | 0x0e)
447 #define	VM_RTC_READ			(VMM_IOC_BASE | 0x0f)
448 #define	VM_RTC_SETTIME			(VMM_IOC_BASE | 0x10)
449 #define	VM_RTC_GETTIME			(VMM_IOC_BASE | 0x11)
450 
451 #define	VM_SUSPEND			(VMM_IOC_BASE | 0x12)
452 
453 #define	VM_IOAPIC_PINCOUNT		(VMM_IOC_BASE | 0x13)
454 #define	VM_GET_PPTDEV_LIMITS		(VMM_IOC_BASE | 0x14)
455 #define	VM_GET_HPET_CAPABILITIES	(VMM_IOC_BASE | 0x15)
456 
457 #define	VM_STATS_IOC			(VMM_IOC_BASE | 0x16)
458 #define	VM_STAT_DESC			(VMM_IOC_BASE | 0x17)
459 
460 #define	VM_INJECT_NMI			(VMM_IOC_BASE | 0x18)
461 #define	VM_GET_X2APIC_STATE		(VMM_IOC_BASE | 0x19)
462 #define	VM_SET_TOPOLOGY			(VMM_IOC_BASE | 0x1a)
463 #define	VM_GET_TOPOLOGY			(VMM_IOC_BASE | 0x1b)
464 #define	VM_GET_CPUS			(VMM_IOC_BASE | 0x1c)
465 #define	VM_SUSPEND_CPU			(VMM_IOC_BASE | 0x1d)
466 #define	VM_RESUME_CPU			(VMM_IOC_BASE | 0x1e)
467 
468 #define	VM_PPTDEV_DISABLE_MSIX		(VMM_IOC_BASE | 0x1f)
469 
470 /* Note: forces a barrier on a flush operation before returning. */
471 #define	VM_TRACK_DIRTY_PAGES		(VMM_IOC_BASE | 0x20)
472 #define	VM_DESC_FPU_AREA		(VMM_IOC_BASE | 0x21)
473 
474 #define	VM_DEVMEM_GETOFFSET		(VMM_IOC_BASE | 0xff)
475 
476 #define	VMM_CTL_DEV		"/dev/vmmctl"
477 
478 #endif
479