xref: /freebsd/usr.sbin/bhyve/amd64/task_switch.c (revision 4d65a7c6951cea0333f1a0c1b32c38489cdfa6c5)
1*4f2bd402SMark Johnston /*-
2*4f2bd402SMark Johnston  * SPDX-License-Identifier: BSD-2-Clause
3*4f2bd402SMark Johnston  *
4*4f2bd402SMark Johnston  * Copyright (c) 2014 Neel Natu <neel@freebsd.org>
5*4f2bd402SMark Johnston  * All rights reserved.
6*4f2bd402SMark Johnston  *
7*4f2bd402SMark Johnston  * Redistribution and use in source and binary forms, with or without
8*4f2bd402SMark Johnston  * modification, are permitted provided that the following conditions
9*4f2bd402SMark Johnston  * are met:
10*4f2bd402SMark Johnston  * 1. Redistributions of source code must retain the above copyright
11*4f2bd402SMark Johnston  *    notice, this list of conditions and the following disclaimer.
12*4f2bd402SMark Johnston  * 2. Redistributions in binary form must reproduce the above copyright
13*4f2bd402SMark Johnston  *    notice, this list of conditions and the following disclaimer in the
14*4f2bd402SMark Johnston  *    documentation and/or other materials provided with the distribution.
15*4f2bd402SMark Johnston  *
16*4f2bd402SMark Johnston  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
17*4f2bd402SMark Johnston  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*4f2bd402SMark Johnston  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*4f2bd402SMark Johnston  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*4f2bd402SMark Johnston  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*4f2bd402SMark Johnston  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*4f2bd402SMark Johnston  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*4f2bd402SMark Johnston  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*4f2bd402SMark Johnston  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*4f2bd402SMark Johnston  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*4f2bd402SMark Johnston  * SUCH DAMAGE.
27*4f2bd402SMark Johnston  */
28*4f2bd402SMark Johnston 
29*4f2bd402SMark Johnston #include <sys/param.h>
30*4f2bd402SMark Johnston #include <sys/_iovec.h>
31*4f2bd402SMark Johnston #include <sys/mman.h>
32*4f2bd402SMark Johnston 
33*4f2bd402SMark Johnston #include <x86/psl.h>
34*4f2bd402SMark Johnston #include <x86/specialreg.h>
35*4f2bd402SMark Johnston #include <machine/vmm.h>
36*4f2bd402SMark Johnston #include <machine/vmm_instruction_emul.h>
37*4f2bd402SMark Johnston 
38*4f2bd402SMark Johnston #include <assert.h>
39*4f2bd402SMark Johnston #include <errno.h>
40*4f2bd402SMark Johnston #include <stdbool.h>
41*4f2bd402SMark Johnston #include <stdio.h>
42*4f2bd402SMark Johnston #include <stdlib.h>
43*4f2bd402SMark Johnston 
44*4f2bd402SMark Johnston #include <vmmapi.h>
45*4f2bd402SMark Johnston 
46*4f2bd402SMark Johnston #include "bhyverun.h"
47*4f2bd402SMark Johnston #include "debug.h"
48*4f2bd402SMark Johnston 
49*4f2bd402SMark Johnston /*
50*4f2bd402SMark Johnston  * Using 'struct i386tss' is tempting but causes myriad sign extension
51*4f2bd402SMark Johnston  * issues because all of its fields are defined as signed integers.
52*4f2bd402SMark Johnston  */
53*4f2bd402SMark Johnston struct tss32 {
54*4f2bd402SMark Johnston 	uint16_t	tss_link;
55*4f2bd402SMark Johnston 	uint16_t	rsvd1;
56*4f2bd402SMark Johnston 	uint32_t	tss_esp0;
57*4f2bd402SMark Johnston 	uint16_t	tss_ss0;
58*4f2bd402SMark Johnston 	uint16_t	rsvd2;
59*4f2bd402SMark Johnston 	uint32_t	tss_esp1;
60*4f2bd402SMark Johnston 	uint16_t	tss_ss1;
61*4f2bd402SMark Johnston 	uint16_t	rsvd3;
62*4f2bd402SMark Johnston 	uint32_t	tss_esp2;
63*4f2bd402SMark Johnston 	uint16_t	tss_ss2;
64*4f2bd402SMark Johnston 	uint16_t	rsvd4;
65*4f2bd402SMark Johnston 	uint32_t	tss_cr3;
66*4f2bd402SMark Johnston 	uint32_t	tss_eip;
67*4f2bd402SMark Johnston 	uint32_t	tss_eflags;
68*4f2bd402SMark Johnston 	uint32_t	tss_eax;
69*4f2bd402SMark Johnston 	uint32_t	tss_ecx;
70*4f2bd402SMark Johnston 	uint32_t	tss_edx;
71*4f2bd402SMark Johnston 	uint32_t	tss_ebx;
72*4f2bd402SMark Johnston 	uint32_t	tss_esp;
73*4f2bd402SMark Johnston 	uint32_t	tss_ebp;
74*4f2bd402SMark Johnston 	uint32_t	tss_esi;
75*4f2bd402SMark Johnston 	uint32_t	tss_edi;
76*4f2bd402SMark Johnston 	uint16_t	tss_es;
77*4f2bd402SMark Johnston 	uint16_t	rsvd5;
78*4f2bd402SMark Johnston 	uint16_t	tss_cs;
79*4f2bd402SMark Johnston 	uint16_t	rsvd6;
80*4f2bd402SMark Johnston 	uint16_t	tss_ss;
81*4f2bd402SMark Johnston 	uint16_t	rsvd7;
82*4f2bd402SMark Johnston 	uint16_t	tss_ds;
83*4f2bd402SMark Johnston 	uint16_t	rsvd8;
84*4f2bd402SMark Johnston 	uint16_t	tss_fs;
85*4f2bd402SMark Johnston 	uint16_t	rsvd9;
86*4f2bd402SMark Johnston 	uint16_t	tss_gs;
87*4f2bd402SMark Johnston 	uint16_t	rsvd10;
88*4f2bd402SMark Johnston 	uint16_t	tss_ldt;
89*4f2bd402SMark Johnston 	uint16_t	rsvd11;
90*4f2bd402SMark Johnston 	uint16_t	tss_trap;
91*4f2bd402SMark Johnston 	uint16_t	tss_iomap;
92*4f2bd402SMark Johnston };
93*4f2bd402SMark Johnston static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed");
94*4f2bd402SMark Johnston 
95*4f2bd402SMark Johnston #define	SEL_START(sel)	(((sel) & ~0x7))
96*4f2bd402SMark Johnston #define	SEL_LIMIT(sel)	(((sel) | 0x7))
97*4f2bd402SMark Johnston #define	TSS_BUSY(type)	(((type) & 0x2) != 0)
98*4f2bd402SMark Johnston 
99*4f2bd402SMark Johnston static uint64_t
GETREG(struct vcpu * vcpu,int reg)100*4f2bd402SMark Johnston GETREG(struct vcpu *vcpu, int reg)
101*4f2bd402SMark Johnston {
102*4f2bd402SMark Johnston 	uint64_t val;
103*4f2bd402SMark Johnston 	int error;
104*4f2bd402SMark Johnston 
105*4f2bd402SMark Johnston 	error = vm_get_register(vcpu, reg, &val);
106*4f2bd402SMark Johnston 	assert(error == 0);
107*4f2bd402SMark Johnston 	return (val);
108*4f2bd402SMark Johnston }
109*4f2bd402SMark Johnston 
110*4f2bd402SMark Johnston static void
SETREG(struct vcpu * vcpu,int reg,uint64_t val)111*4f2bd402SMark Johnston SETREG(struct vcpu *vcpu, int reg, uint64_t val)
112*4f2bd402SMark Johnston {
113*4f2bd402SMark Johnston 	int error;
114*4f2bd402SMark Johnston 
115*4f2bd402SMark Johnston 	error = vm_set_register(vcpu, reg, val);
116*4f2bd402SMark Johnston 	assert(error == 0);
117*4f2bd402SMark Johnston }
118*4f2bd402SMark Johnston 
119*4f2bd402SMark Johnston static struct seg_desc
usd_to_seg_desc(struct user_segment_descriptor * usd)120*4f2bd402SMark Johnston usd_to_seg_desc(struct user_segment_descriptor *usd)
121*4f2bd402SMark Johnston {
122*4f2bd402SMark Johnston 	struct seg_desc seg_desc;
123*4f2bd402SMark Johnston 
124*4f2bd402SMark Johnston 	seg_desc.base = (u_int)USD_GETBASE(usd);
125*4f2bd402SMark Johnston 	if (usd->sd_gran)
126*4f2bd402SMark Johnston 		seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
127*4f2bd402SMark Johnston 	else
128*4f2bd402SMark Johnston 		seg_desc.limit = (u_int)USD_GETLIMIT(usd);
129*4f2bd402SMark Johnston 	seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
130*4f2bd402SMark Johnston 	seg_desc.access |= usd->sd_xx << 12;
131*4f2bd402SMark Johnston 	seg_desc.access |= usd->sd_def32 << 14;
132*4f2bd402SMark Johnston 	seg_desc.access |= usd->sd_gran << 15;
133*4f2bd402SMark Johnston 
134*4f2bd402SMark Johnston 	return (seg_desc);
135*4f2bd402SMark Johnston }
136*4f2bd402SMark Johnston 
137*4f2bd402SMark Johnston /*
138*4f2bd402SMark Johnston  * Inject an exception with an error code that is a segment selector.
139*4f2bd402SMark Johnston  * The format of the error code is described in section 6.13, "Error Code",
140*4f2bd402SMark Johnston  * Intel SDM volume 3.
141*4f2bd402SMark Johnston  *
142*4f2bd402SMark Johnston  * Bit 0 (EXT) denotes whether the exception occurred during delivery
143*4f2bd402SMark Johnston  * of an external event like an interrupt.
144*4f2bd402SMark Johnston  *
145*4f2bd402SMark Johnston  * Bit 1 (IDT) indicates whether the selector points to a gate descriptor
146*4f2bd402SMark Johnston  * in the IDT.
147*4f2bd402SMark Johnston  *
148*4f2bd402SMark Johnston  * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
149*4f2bd402SMark Johnston  */
150*4f2bd402SMark Johnston static void
sel_exception(struct vcpu * vcpu,int vector,uint16_t sel,int ext)151*4f2bd402SMark Johnston sel_exception(struct vcpu *vcpu, int vector, uint16_t sel, int ext)
152*4f2bd402SMark Johnston {
153*4f2bd402SMark Johnston 	/*
154*4f2bd402SMark Johnston 	 * Bit 2 from the selector is retained as-is in the error code.
155*4f2bd402SMark Johnston 	 *
156*4f2bd402SMark Johnston 	 * Bit 1 can be safely cleared because none of the selectors
157*4f2bd402SMark Johnston 	 * encountered during task switch emulation refer to a task
158*4f2bd402SMark Johnston 	 * gate in the IDT.
159*4f2bd402SMark Johnston 	 *
160*4f2bd402SMark Johnston 	 * Bit 0 is set depending on the value of 'ext'.
161*4f2bd402SMark Johnston 	 */
162*4f2bd402SMark Johnston 	sel &= ~0x3;
163*4f2bd402SMark Johnston 	if (ext)
164*4f2bd402SMark Johnston 		sel |= 0x1;
165*4f2bd402SMark Johnston 	vm_inject_fault(vcpu, vector, 1, sel);
166*4f2bd402SMark Johnston }
167*4f2bd402SMark Johnston 
168*4f2bd402SMark Johnston /*
169*4f2bd402SMark Johnston  * Return 0 if the selector 'sel' in within the limits of the GDT/LDT
170*4f2bd402SMark Johnston  * and non-zero otherwise.
171*4f2bd402SMark Johnston  */
172*4f2bd402SMark Johnston static int
desc_table_limit_check(struct vcpu * vcpu,uint16_t sel)173*4f2bd402SMark Johnston desc_table_limit_check(struct vcpu *vcpu, uint16_t sel)
174*4f2bd402SMark Johnston {
175*4f2bd402SMark Johnston 	uint64_t base;
176*4f2bd402SMark Johnston 	uint32_t limit, access;
177*4f2bd402SMark Johnston 	int error, reg;
178*4f2bd402SMark Johnston 
179*4f2bd402SMark Johnston 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
180*4f2bd402SMark Johnston 	error = vm_get_desc(vcpu, reg, &base, &limit, &access);
181*4f2bd402SMark Johnston 	assert(error == 0);
182*4f2bd402SMark Johnston 
183*4f2bd402SMark Johnston 	if (reg == VM_REG_GUEST_LDTR) {
184*4f2bd402SMark Johnston 		if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
185*4f2bd402SMark Johnston 			return (-1);
186*4f2bd402SMark Johnston 	}
187*4f2bd402SMark Johnston 
188*4f2bd402SMark Johnston 	if (limit < SEL_LIMIT(sel))
189*4f2bd402SMark Johnston 		return (-1);
190*4f2bd402SMark Johnston 	else
191*4f2bd402SMark Johnston 		return (0);
192*4f2bd402SMark Johnston }
193*4f2bd402SMark Johnston 
194*4f2bd402SMark Johnston /*
195*4f2bd402SMark Johnston  * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
196*4f2bd402SMark Johnston  * by the selector 'sel'.
197*4f2bd402SMark Johnston  *
198*4f2bd402SMark Johnston  * Returns 0 on success.
199*4f2bd402SMark Johnston  * Returns 1 if an exception was injected into the guest.
200*4f2bd402SMark Johnston  * Returns -1 otherwise.
201*4f2bd402SMark Johnston  */
202*4f2bd402SMark Johnston static int
desc_table_rw(struct vcpu * vcpu,struct vm_guest_paging * paging,uint16_t sel,struct user_segment_descriptor * desc,bool doread,int * faultptr)203*4f2bd402SMark Johnston desc_table_rw(struct vcpu *vcpu, struct vm_guest_paging *paging,
204*4f2bd402SMark Johnston     uint16_t sel, struct user_segment_descriptor *desc, bool doread,
205*4f2bd402SMark Johnston     int *faultptr)
206*4f2bd402SMark Johnston {
207*4f2bd402SMark Johnston 	struct iovec iov[2];
208*4f2bd402SMark Johnston 	uint64_t base;
209*4f2bd402SMark Johnston 	uint32_t limit, access;
210*4f2bd402SMark Johnston 	int error, reg;
211*4f2bd402SMark Johnston 
212*4f2bd402SMark Johnston 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
213*4f2bd402SMark Johnston 	error = vm_get_desc(vcpu, reg, &base, &limit, &access);
214*4f2bd402SMark Johnston 	assert(error == 0);
215*4f2bd402SMark Johnston 	assert(limit >= SEL_LIMIT(sel));
216*4f2bd402SMark Johnston 
217*4f2bd402SMark Johnston 	error = vm_copy_setup(vcpu, paging, base + SEL_START(sel),
218*4f2bd402SMark Johnston 	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
219*4f2bd402SMark Johnston 	    faultptr);
220*4f2bd402SMark Johnston 	if (error || *faultptr)
221*4f2bd402SMark Johnston 		return (error);
222*4f2bd402SMark Johnston 
223*4f2bd402SMark Johnston 	if (doread)
224*4f2bd402SMark Johnston 		vm_copyin(iov, desc, sizeof(*desc));
225*4f2bd402SMark Johnston 	else
226*4f2bd402SMark Johnston 		vm_copyout(desc, iov, sizeof(*desc));
227*4f2bd402SMark Johnston 	return (0);
228*4f2bd402SMark Johnston }
229*4f2bd402SMark Johnston 
230*4f2bd402SMark Johnston static int
desc_table_read(struct vcpu * vcpu,struct vm_guest_paging * paging,uint16_t sel,struct user_segment_descriptor * desc,int * faultptr)231*4f2bd402SMark Johnston desc_table_read(struct vcpu *vcpu, struct vm_guest_paging *paging,
232*4f2bd402SMark Johnston     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
233*4f2bd402SMark Johnston {
234*4f2bd402SMark Johnston 	return (desc_table_rw(vcpu, paging, sel, desc, true, faultptr));
235*4f2bd402SMark Johnston }
236*4f2bd402SMark Johnston 
237*4f2bd402SMark Johnston static int
desc_table_write(struct vcpu * vcpu,struct vm_guest_paging * paging,uint16_t sel,struct user_segment_descriptor * desc,int * faultptr)238*4f2bd402SMark Johnston desc_table_write(struct vcpu *vcpu, struct vm_guest_paging *paging,
239*4f2bd402SMark Johnston     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
240*4f2bd402SMark Johnston {
241*4f2bd402SMark Johnston 	return (desc_table_rw(vcpu, paging, sel, desc, false, faultptr));
242*4f2bd402SMark Johnston }
243*4f2bd402SMark Johnston 
244*4f2bd402SMark Johnston /*
245*4f2bd402SMark Johnston  * Read the TSS descriptor referenced by 'sel' into 'desc'.
246*4f2bd402SMark Johnston  *
247*4f2bd402SMark Johnston  * Returns 0 on success.
248*4f2bd402SMark Johnston  * Returns 1 if an exception was injected into the guest.
249*4f2bd402SMark Johnston  * Returns -1 otherwise.
250*4f2bd402SMark Johnston  */
251*4f2bd402SMark Johnston static int
read_tss_descriptor(struct vcpu * vcpu,struct vm_task_switch * ts,uint16_t sel,struct user_segment_descriptor * desc,int * faultptr)252*4f2bd402SMark Johnston read_tss_descriptor(struct vcpu *vcpu, struct vm_task_switch *ts,
253*4f2bd402SMark Johnston     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
254*4f2bd402SMark Johnston {
255*4f2bd402SMark Johnston 	struct vm_guest_paging sup_paging;
256*4f2bd402SMark Johnston 	int error;
257*4f2bd402SMark Johnston 
258*4f2bd402SMark Johnston 	assert(!ISLDT(sel));
259*4f2bd402SMark Johnston 	assert(IDXSEL(sel) != 0);
260*4f2bd402SMark Johnston 
261*4f2bd402SMark Johnston 	/* Fetch the new TSS descriptor */
262*4f2bd402SMark Johnston 	if (desc_table_limit_check(vcpu, sel)) {
263*4f2bd402SMark Johnston 		if (ts->reason == TSR_IRET)
264*4f2bd402SMark Johnston 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
265*4f2bd402SMark Johnston 		else
266*4f2bd402SMark Johnston 			sel_exception(vcpu, IDT_GP, sel, ts->ext);
267*4f2bd402SMark Johnston 		return (1);
268*4f2bd402SMark Johnston 	}
269*4f2bd402SMark Johnston 
270*4f2bd402SMark Johnston 	sup_paging = ts->paging;
271*4f2bd402SMark Johnston 	sup_paging.cpl = 0;		/* implicit supervisor mode */
272*4f2bd402SMark Johnston 	error = desc_table_read(vcpu, &sup_paging, sel, desc, faultptr);
273*4f2bd402SMark Johnston 	return (error);
274*4f2bd402SMark Johnston }
275*4f2bd402SMark Johnston 
276*4f2bd402SMark Johnston static bool
code_desc(int sd_type)277*4f2bd402SMark Johnston code_desc(int sd_type)
278*4f2bd402SMark Johnston {
279*4f2bd402SMark Johnston 	/* code descriptor */
280*4f2bd402SMark Johnston 	return ((sd_type & 0x18) == 0x18);
281*4f2bd402SMark Johnston }
282*4f2bd402SMark Johnston 
283*4f2bd402SMark Johnston static bool
stack_desc(int sd_type)284*4f2bd402SMark Johnston stack_desc(int sd_type)
285*4f2bd402SMark Johnston {
286*4f2bd402SMark Johnston 	/* writable data descriptor */
287*4f2bd402SMark Johnston 	return ((sd_type & 0x1A) == 0x12);
288*4f2bd402SMark Johnston }
289*4f2bd402SMark Johnston 
290*4f2bd402SMark Johnston static bool
data_desc(int sd_type)291*4f2bd402SMark Johnston data_desc(int sd_type)
292*4f2bd402SMark Johnston {
293*4f2bd402SMark Johnston 	/* data descriptor or a readable code descriptor */
294*4f2bd402SMark Johnston 	return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
295*4f2bd402SMark Johnston }
296*4f2bd402SMark Johnston 
297*4f2bd402SMark Johnston static bool
ldt_desc(int sd_type)298*4f2bd402SMark Johnston ldt_desc(int sd_type)
299*4f2bd402SMark Johnston {
300*4f2bd402SMark Johnston 
301*4f2bd402SMark Johnston 	return (sd_type == SDT_SYSLDT);
302*4f2bd402SMark Johnston }
303*4f2bd402SMark Johnston 
304*4f2bd402SMark Johnston /*
305*4f2bd402SMark Johnston  * Validate the descriptor 'seg_desc' associated with 'segment'.
306*4f2bd402SMark Johnston  */
307*4f2bd402SMark Johnston static int
validate_seg_desc(struct vcpu * vcpu,struct vm_task_switch * ts,int segment,struct seg_desc * seg_desc,int * faultptr)308*4f2bd402SMark Johnston validate_seg_desc(struct vcpu *vcpu, struct vm_task_switch *ts,
309*4f2bd402SMark Johnston     int segment, struct seg_desc *seg_desc, int *faultptr)
310*4f2bd402SMark Johnston {
311*4f2bd402SMark Johnston 	struct vm_guest_paging sup_paging;
312*4f2bd402SMark Johnston 	struct user_segment_descriptor usd;
313*4f2bd402SMark Johnston 	int error, idtvec;
314*4f2bd402SMark Johnston 	int cpl, dpl, rpl;
315*4f2bd402SMark Johnston 	uint16_t sel, cs;
316*4f2bd402SMark Johnston 	bool ldtseg, codeseg, stackseg, dataseg, conforming;
317*4f2bd402SMark Johnston 
318*4f2bd402SMark Johnston 	ldtseg = codeseg = stackseg = dataseg = false;
319*4f2bd402SMark Johnston 	switch (segment) {
320*4f2bd402SMark Johnston 	case VM_REG_GUEST_LDTR:
321*4f2bd402SMark Johnston 		ldtseg = true;
322*4f2bd402SMark Johnston 		break;
323*4f2bd402SMark Johnston 	case VM_REG_GUEST_CS:
324*4f2bd402SMark Johnston 		codeseg = true;
325*4f2bd402SMark Johnston 		break;
326*4f2bd402SMark Johnston 	case VM_REG_GUEST_SS:
327*4f2bd402SMark Johnston 		stackseg = true;
328*4f2bd402SMark Johnston 		break;
329*4f2bd402SMark Johnston 	case VM_REG_GUEST_DS:
330*4f2bd402SMark Johnston 	case VM_REG_GUEST_ES:
331*4f2bd402SMark Johnston 	case VM_REG_GUEST_FS:
332*4f2bd402SMark Johnston 	case VM_REG_GUEST_GS:
333*4f2bd402SMark Johnston 		dataseg = true;
334*4f2bd402SMark Johnston 		break;
335*4f2bd402SMark Johnston 	default:
336*4f2bd402SMark Johnston 		assert(0);
337*4f2bd402SMark Johnston 	}
338*4f2bd402SMark Johnston 
339*4f2bd402SMark Johnston 	/* Get the segment selector */
340*4f2bd402SMark Johnston 	sel = GETREG(vcpu, segment);
341*4f2bd402SMark Johnston 
342*4f2bd402SMark Johnston 	/* LDT selector must point into the GDT */
343*4f2bd402SMark Johnston 	if (ldtseg && ISLDT(sel)) {
344*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
345*4f2bd402SMark Johnston 		return (1);
346*4f2bd402SMark Johnston 	}
347*4f2bd402SMark Johnston 
348*4f2bd402SMark Johnston 	/* Descriptor table limit check */
349*4f2bd402SMark Johnston 	if (desc_table_limit_check(vcpu, sel)) {
350*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
351*4f2bd402SMark Johnston 		return (1);
352*4f2bd402SMark Johnston 	}
353*4f2bd402SMark Johnston 
354*4f2bd402SMark Johnston 	/* NULL selector */
355*4f2bd402SMark Johnston 	if (IDXSEL(sel) == 0) {
356*4f2bd402SMark Johnston 		/* Code and stack segment selectors cannot be NULL */
357*4f2bd402SMark Johnston 		if (codeseg || stackseg) {
358*4f2bd402SMark Johnston 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
359*4f2bd402SMark Johnston 			return (1);
360*4f2bd402SMark Johnston 		}
361*4f2bd402SMark Johnston 		seg_desc->base = 0;
362*4f2bd402SMark Johnston 		seg_desc->limit = 0;
363*4f2bd402SMark Johnston 		seg_desc->access = 0x10000;	/* unusable */
364*4f2bd402SMark Johnston 		return (0);
365*4f2bd402SMark Johnston 	}
366*4f2bd402SMark Johnston 
367*4f2bd402SMark Johnston 	/* Read the descriptor from the GDT/LDT */
368*4f2bd402SMark Johnston 	sup_paging = ts->paging;
369*4f2bd402SMark Johnston 	sup_paging.cpl = 0;	/* implicit supervisor mode */
370*4f2bd402SMark Johnston 	error = desc_table_read(vcpu, &sup_paging, sel, &usd, faultptr);
371*4f2bd402SMark Johnston 	if (error || *faultptr)
372*4f2bd402SMark Johnston 		return (error);
373*4f2bd402SMark Johnston 
374*4f2bd402SMark Johnston 	/* Verify that the descriptor type is compatible with the segment */
375*4f2bd402SMark Johnston 	if ((ldtseg && !ldt_desc(usd.sd_type)) ||
376*4f2bd402SMark Johnston 	    (codeseg && !code_desc(usd.sd_type)) ||
377*4f2bd402SMark Johnston 	    (dataseg && !data_desc(usd.sd_type)) ||
378*4f2bd402SMark Johnston 	    (stackseg && !stack_desc(usd.sd_type))) {
379*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
380*4f2bd402SMark Johnston 		return (1);
381*4f2bd402SMark Johnston 	}
382*4f2bd402SMark Johnston 
383*4f2bd402SMark Johnston 	/* Segment must be marked present */
384*4f2bd402SMark Johnston 	if (!usd.sd_p) {
385*4f2bd402SMark Johnston 		if (ldtseg)
386*4f2bd402SMark Johnston 			idtvec = IDT_TS;
387*4f2bd402SMark Johnston 		else if (stackseg)
388*4f2bd402SMark Johnston 			idtvec = IDT_SS;
389*4f2bd402SMark Johnston 		else
390*4f2bd402SMark Johnston 			idtvec = IDT_NP;
391*4f2bd402SMark Johnston 		sel_exception(vcpu, idtvec, sel, ts->ext);
392*4f2bd402SMark Johnston 		return (1);
393*4f2bd402SMark Johnston 	}
394*4f2bd402SMark Johnston 
395*4f2bd402SMark Johnston 	cs = GETREG(vcpu, VM_REG_GUEST_CS);
396*4f2bd402SMark Johnston 	cpl = cs & SEL_RPL_MASK;
397*4f2bd402SMark Johnston 	rpl = sel & SEL_RPL_MASK;
398*4f2bd402SMark Johnston 	dpl = usd.sd_dpl;
399*4f2bd402SMark Johnston 
400*4f2bd402SMark Johnston 	if (stackseg && (rpl != cpl || dpl != cpl)) {
401*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
402*4f2bd402SMark Johnston 		return (1);
403*4f2bd402SMark Johnston 	}
404*4f2bd402SMark Johnston 
405*4f2bd402SMark Johnston 	if (codeseg) {
406*4f2bd402SMark Johnston 		conforming = (usd.sd_type & 0x4) ? true : false;
407*4f2bd402SMark Johnston 		if ((conforming && (cpl < dpl)) ||
408*4f2bd402SMark Johnston 		    (!conforming && (cpl != dpl))) {
409*4f2bd402SMark Johnston 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
410*4f2bd402SMark Johnston 			return (1);
411*4f2bd402SMark Johnston 		}
412*4f2bd402SMark Johnston 	}
413*4f2bd402SMark Johnston 
414*4f2bd402SMark Johnston 	if (dataseg) {
415*4f2bd402SMark Johnston 		/*
416*4f2bd402SMark Johnston 		 * A data segment is always non-conforming except when it's
417*4f2bd402SMark Johnston 		 * descriptor is a readable, conforming code segment.
418*4f2bd402SMark Johnston 		 */
419*4f2bd402SMark Johnston 		if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
420*4f2bd402SMark Johnston 			conforming = true;
421*4f2bd402SMark Johnston 		else
422*4f2bd402SMark Johnston 			conforming = false;
423*4f2bd402SMark Johnston 
424*4f2bd402SMark Johnston 		if (!conforming && (rpl > dpl || cpl > dpl)) {
425*4f2bd402SMark Johnston 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
426*4f2bd402SMark Johnston 			return (1);
427*4f2bd402SMark Johnston 		}
428*4f2bd402SMark Johnston 	}
429*4f2bd402SMark Johnston 	*seg_desc = usd_to_seg_desc(&usd);
430*4f2bd402SMark Johnston 	return (0);
431*4f2bd402SMark Johnston }
432*4f2bd402SMark Johnston 
433*4f2bd402SMark Johnston static void
tss32_save(struct vcpu * vcpu,struct vm_task_switch * task_switch,uint32_t eip,struct tss32 * tss,struct iovec * iov)434*4f2bd402SMark Johnston tss32_save(struct vcpu *vcpu, struct vm_task_switch *task_switch,
435*4f2bd402SMark Johnston     uint32_t eip, struct tss32 *tss, struct iovec *iov)
436*4f2bd402SMark Johnston {
437*4f2bd402SMark Johnston 
438*4f2bd402SMark Johnston 	/* General purpose registers */
439*4f2bd402SMark Johnston 	tss->tss_eax = GETREG(vcpu, VM_REG_GUEST_RAX);
440*4f2bd402SMark Johnston 	tss->tss_ecx = GETREG(vcpu, VM_REG_GUEST_RCX);
441*4f2bd402SMark Johnston 	tss->tss_edx = GETREG(vcpu, VM_REG_GUEST_RDX);
442*4f2bd402SMark Johnston 	tss->tss_ebx = GETREG(vcpu, VM_REG_GUEST_RBX);
443*4f2bd402SMark Johnston 	tss->tss_esp = GETREG(vcpu, VM_REG_GUEST_RSP);
444*4f2bd402SMark Johnston 	tss->tss_ebp = GETREG(vcpu, VM_REG_GUEST_RBP);
445*4f2bd402SMark Johnston 	tss->tss_esi = GETREG(vcpu, VM_REG_GUEST_RSI);
446*4f2bd402SMark Johnston 	tss->tss_edi = GETREG(vcpu, VM_REG_GUEST_RDI);
447*4f2bd402SMark Johnston 
448*4f2bd402SMark Johnston 	/* Segment selectors */
449*4f2bd402SMark Johnston 	tss->tss_es = GETREG(vcpu, VM_REG_GUEST_ES);
450*4f2bd402SMark Johnston 	tss->tss_cs = GETREG(vcpu, VM_REG_GUEST_CS);
451*4f2bd402SMark Johnston 	tss->tss_ss = GETREG(vcpu, VM_REG_GUEST_SS);
452*4f2bd402SMark Johnston 	tss->tss_ds = GETREG(vcpu, VM_REG_GUEST_DS);
453*4f2bd402SMark Johnston 	tss->tss_fs = GETREG(vcpu, VM_REG_GUEST_FS);
454*4f2bd402SMark Johnston 	tss->tss_gs = GETREG(vcpu, VM_REG_GUEST_GS);
455*4f2bd402SMark Johnston 
456*4f2bd402SMark Johnston 	/* eflags and eip */
457*4f2bd402SMark Johnston 	tss->tss_eflags = GETREG(vcpu, VM_REG_GUEST_RFLAGS);
458*4f2bd402SMark Johnston 	if (task_switch->reason == TSR_IRET)
459*4f2bd402SMark Johnston 		tss->tss_eflags &= ~PSL_NT;
460*4f2bd402SMark Johnston 	tss->tss_eip = eip;
461*4f2bd402SMark Johnston 
462*4f2bd402SMark Johnston 	/* Copy updated old TSS into guest memory */
463*4f2bd402SMark Johnston 	vm_copyout(tss, iov, sizeof(struct tss32));
464*4f2bd402SMark Johnston }
465*4f2bd402SMark Johnston 
466*4f2bd402SMark Johnston static void
update_seg_desc(struct vcpu * vcpu,int reg,struct seg_desc * sd)467*4f2bd402SMark Johnston update_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *sd)
468*4f2bd402SMark Johnston {
469*4f2bd402SMark Johnston 	int error;
470*4f2bd402SMark Johnston 
471*4f2bd402SMark Johnston 	error = vm_set_desc(vcpu, reg, sd->base, sd->limit, sd->access);
472*4f2bd402SMark Johnston 	assert(error == 0);
473*4f2bd402SMark Johnston }
474*4f2bd402SMark Johnston 
475*4f2bd402SMark Johnston /*
476*4f2bd402SMark Johnston  * Update the vcpu registers to reflect the state of the new task.
477*4f2bd402SMark Johnston  */
478*4f2bd402SMark Johnston static int
tss32_restore(struct vmctx * ctx,struct vcpu * vcpu,struct vm_task_switch * ts,uint16_t ot_sel,struct tss32 * tss,struct iovec * iov,int * faultptr)479*4f2bd402SMark Johnston tss32_restore(struct vmctx *ctx, struct vcpu *vcpu, struct vm_task_switch *ts,
480*4f2bd402SMark Johnston     uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
481*4f2bd402SMark Johnston {
482*4f2bd402SMark Johnston 	struct seg_desc seg_desc, seg_desc2;
483*4f2bd402SMark Johnston 	uint64_t *pdpte, maxphyaddr, reserved;
484*4f2bd402SMark Johnston 	uint32_t eflags;
485*4f2bd402SMark Johnston 	int error, i;
486*4f2bd402SMark Johnston 	bool nested;
487*4f2bd402SMark Johnston 
488*4f2bd402SMark Johnston 	nested = false;
489*4f2bd402SMark Johnston 	if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
490*4f2bd402SMark Johnston 		tss->tss_link = ot_sel;
491*4f2bd402SMark Johnston 		nested = true;
492*4f2bd402SMark Johnston 	}
493*4f2bd402SMark Johnston 
494*4f2bd402SMark Johnston 	eflags = tss->tss_eflags;
495*4f2bd402SMark Johnston 	if (nested)
496*4f2bd402SMark Johnston 		eflags |= PSL_NT;
497*4f2bd402SMark Johnston 
498*4f2bd402SMark Johnston 	/* LDTR */
499*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
500*4f2bd402SMark Johnston 
501*4f2bd402SMark Johnston 	/* PBDR */
502*4f2bd402SMark Johnston 	if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
503*4f2bd402SMark Johnston 		if (ts->paging.paging_mode == PAGING_MODE_PAE) {
504*4f2bd402SMark Johnston 			/*
505*4f2bd402SMark Johnston 			 * XXX Assuming 36-bit MAXPHYADDR.
506*4f2bd402SMark Johnston 			 */
507*4f2bd402SMark Johnston 			maxphyaddr = (1UL << 36) - 1;
508*4f2bd402SMark Johnston 			pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
509*4f2bd402SMark Johnston 			for (i = 0; i < 4; i++) {
510*4f2bd402SMark Johnston 				/* Check reserved bits if the PDPTE is valid */
511*4f2bd402SMark Johnston 				if (!(pdpte[i] & 0x1))
512*4f2bd402SMark Johnston 					continue;
513*4f2bd402SMark Johnston 				/*
514*4f2bd402SMark Johnston 				 * Bits 2:1, 8:5 and bits above the processor's
515*4f2bd402SMark Johnston 				 * maximum physical address are reserved.
516*4f2bd402SMark Johnston 				 */
517*4f2bd402SMark Johnston 				reserved = ~maxphyaddr | 0x1E6;
518*4f2bd402SMark Johnston 				if (pdpte[i] & reserved) {
519*4f2bd402SMark Johnston 					vm_inject_gp(vcpu);
520*4f2bd402SMark Johnston 					return (1);
521*4f2bd402SMark Johnston 				}
522*4f2bd402SMark Johnston 			}
523*4f2bd402SMark Johnston 			SETREG(vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
524*4f2bd402SMark Johnston 			SETREG(vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
525*4f2bd402SMark Johnston 			SETREG(vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
526*4f2bd402SMark Johnston 			SETREG(vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
527*4f2bd402SMark Johnston 		}
528*4f2bd402SMark Johnston 		SETREG(vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
529*4f2bd402SMark Johnston 		ts->paging.cr3 = tss->tss_cr3;
530*4f2bd402SMark Johnston 	}
531*4f2bd402SMark Johnston 
532*4f2bd402SMark Johnston 	/* eflags and eip */
533*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RFLAGS, eflags);
534*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
535*4f2bd402SMark Johnston 
536*4f2bd402SMark Johnston 	/* General purpose registers */
537*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
538*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
539*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
540*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
541*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
542*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
543*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
544*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
545*4f2bd402SMark Johnston 
546*4f2bd402SMark Johnston 	/* Segment selectors */
547*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_ES, tss->tss_es);
548*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_CS, tss->tss_cs);
549*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_SS, tss->tss_ss);
550*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_DS, tss->tss_ds);
551*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_FS, tss->tss_fs);
552*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_GS, tss->tss_gs);
553*4f2bd402SMark Johnston 
554*4f2bd402SMark Johnston 	/*
555*4f2bd402SMark Johnston 	 * If this is a nested task then write out the new TSS to update
556*4f2bd402SMark Johnston 	 * the previous link field.
557*4f2bd402SMark Johnston 	 */
558*4f2bd402SMark Johnston 	if (nested)
559*4f2bd402SMark Johnston 		vm_copyout(tss, iov, sizeof(*tss));
560*4f2bd402SMark Johnston 
561*4f2bd402SMark Johnston 	/* Validate segment descriptors */
562*4f2bd402SMark Johnston 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
563*4f2bd402SMark Johnston 	    faultptr);
564*4f2bd402SMark Johnston 	if (error || *faultptr)
565*4f2bd402SMark Johnston 		return (error);
566*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_LDTR, &seg_desc);
567*4f2bd402SMark Johnston 
568*4f2bd402SMark Johnston 	/*
569*4f2bd402SMark Johnston 	 * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
570*4f2bd402SMark Johnston 	 *
571*4f2bd402SMark Johnston 	 * The SS and CS attribute checks on VM-entry are inter-dependent so
572*4f2bd402SMark Johnston 	 * we need to make sure that both segments are valid before updating
573*4f2bd402SMark Johnston 	 * either of them. This ensures that the VMCS state can pass the
574*4f2bd402SMark Johnston 	 * VM-entry checks so the guest can handle any exception injected
575*4f2bd402SMark Johnston 	 * during task switch emulation.
576*4f2bd402SMark Johnston 	 */
577*4f2bd402SMark Johnston 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
578*4f2bd402SMark Johnston 	    faultptr);
579*4f2bd402SMark Johnston 	if (error || *faultptr)
580*4f2bd402SMark Johnston 		return (error);
581*4f2bd402SMark Johnston 
582*4f2bd402SMark Johnston 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
583*4f2bd402SMark Johnston 	    faultptr);
584*4f2bd402SMark Johnston 	if (error || *faultptr)
585*4f2bd402SMark Johnston 		return (error);
586*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_CS, &seg_desc);
587*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_SS, &seg_desc2);
588*4f2bd402SMark Johnston 	ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
589*4f2bd402SMark Johnston 
590*4f2bd402SMark Johnston 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
591*4f2bd402SMark Johnston 	    faultptr);
592*4f2bd402SMark Johnston 	if (error || *faultptr)
593*4f2bd402SMark Johnston 		return (error);
594*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_DS, &seg_desc);
595*4f2bd402SMark Johnston 
596*4f2bd402SMark Johnston 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
597*4f2bd402SMark Johnston 	    faultptr);
598*4f2bd402SMark Johnston 	if (error || *faultptr)
599*4f2bd402SMark Johnston 		return (error);
600*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_ES, &seg_desc);
601*4f2bd402SMark Johnston 
602*4f2bd402SMark Johnston 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
603*4f2bd402SMark Johnston 	    faultptr);
604*4f2bd402SMark Johnston 	if (error || *faultptr)
605*4f2bd402SMark Johnston 		return (error);
606*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_FS, &seg_desc);
607*4f2bd402SMark Johnston 
608*4f2bd402SMark Johnston 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
609*4f2bd402SMark Johnston 	    faultptr);
610*4f2bd402SMark Johnston 	if (error || *faultptr)
611*4f2bd402SMark Johnston 		return (error);
612*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_GS, &seg_desc);
613*4f2bd402SMark Johnston 
614*4f2bd402SMark Johnston 	return (0);
615*4f2bd402SMark Johnston }
616*4f2bd402SMark Johnston 
617*4f2bd402SMark Johnston /*
618*4f2bd402SMark Johnston  * Push an error code on the stack of the new task. This is needed if the
619*4f2bd402SMark Johnston  * task switch was triggered by a hardware exception that causes an error
620*4f2bd402SMark Johnston  * code to be saved (e.g. #PF).
621*4f2bd402SMark Johnston  */
622*4f2bd402SMark Johnston static int
push_errcode(struct vcpu * vcpu,struct vm_guest_paging * paging,int task_type,uint32_t errcode,int * faultptr)623*4f2bd402SMark Johnston push_errcode(struct vcpu *vcpu, struct vm_guest_paging *paging,
624*4f2bd402SMark Johnston     int task_type, uint32_t errcode, int *faultptr)
625*4f2bd402SMark Johnston {
626*4f2bd402SMark Johnston 	struct iovec iov[2];
627*4f2bd402SMark Johnston 	struct seg_desc seg_desc;
628*4f2bd402SMark Johnston 	int stacksize, bytes, error;
629*4f2bd402SMark Johnston 	uint64_t gla, cr0, rflags;
630*4f2bd402SMark Johnston 	uint32_t esp;
631*4f2bd402SMark Johnston 	uint16_t stacksel;
632*4f2bd402SMark Johnston 
633*4f2bd402SMark Johnston 	*faultptr = 0;
634*4f2bd402SMark Johnston 
635*4f2bd402SMark Johnston 	cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
636*4f2bd402SMark Johnston 	rflags = GETREG(vcpu, VM_REG_GUEST_RFLAGS);
637*4f2bd402SMark Johnston 	stacksel = GETREG(vcpu, VM_REG_GUEST_SS);
638*4f2bd402SMark Johnston 
639*4f2bd402SMark Johnston 	error = vm_get_desc(vcpu, VM_REG_GUEST_SS, &seg_desc.base,
640*4f2bd402SMark Johnston 	    &seg_desc.limit, &seg_desc.access);
641*4f2bd402SMark Johnston 	assert(error == 0);
642*4f2bd402SMark Johnston 
643*4f2bd402SMark Johnston 	/*
644*4f2bd402SMark Johnston 	 * Section "Error Code" in the Intel SDM vol 3: the error code is
645*4f2bd402SMark Johnston 	 * pushed on the stack as a doubleword or word (depending on the
646*4f2bd402SMark Johnston 	 * default interrupt, trap or task gate size).
647*4f2bd402SMark Johnston 	 */
648*4f2bd402SMark Johnston 	if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
649*4f2bd402SMark Johnston 		bytes = 4;
650*4f2bd402SMark Johnston 	else
651*4f2bd402SMark Johnston 		bytes = 2;
652*4f2bd402SMark Johnston 
653*4f2bd402SMark Johnston 	/*
654*4f2bd402SMark Johnston 	 * PUSH instruction from Intel SDM vol 2: the 'B' flag in the
655*4f2bd402SMark Johnston 	 * stack-segment descriptor determines the size of the stack
656*4f2bd402SMark Johnston 	 * pointer outside of 64-bit mode.
657*4f2bd402SMark Johnston 	 */
658*4f2bd402SMark Johnston 	if (SEG_DESC_DEF32(seg_desc.access))
659*4f2bd402SMark Johnston 		stacksize = 4;
660*4f2bd402SMark Johnston 	else
661*4f2bd402SMark Johnston 		stacksize = 2;
662*4f2bd402SMark Johnston 
663*4f2bd402SMark Johnston 	esp = GETREG(vcpu, VM_REG_GUEST_RSP);
664*4f2bd402SMark Johnston 	esp -= bytes;
665*4f2bd402SMark Johnston 
666*4f2bd402SMark Johnston 	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
667*4f2bd402SMark Johnston 	    &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
668*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_SS, stacksel, 1);
669*4f2bd402SMark Johnston 		*faultptr = 1;
670*4f2bd402SMark Johnston 		return (0);
671*4f2bd402SMark Johnston 	}
672*4f2bd402SMark Johnston 
673*4f2bd402SMark Johnston 	if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
674*4f2bd402SMark Johnston 		vm_inject_ac(vcpu, 1);
675*4f2bd402SMark Johnston 		*faultptr = 1;
676*4f2bd402SMark Johnston 		return (0);
677*4f2bd402SMark Johnston 	}
678*4f2bd402SMark Johnston 
679*4f2bd402SMark Johnston 	error = vm_copy_setup(vcpu, paging, gla, bytes, PROT_WRITE,
680*4f2bd402SMark Johnston 	    iov, nitems(iov), faultptr);
681*4f2bd402SMark Johnston 	if (error || *faultptr)
682*4f2bd402SMark Johnston 		return (error);
683*4f2bd402SMark Johnston 
684*4f2bd402SMark Johnston 	vm_copyout(&errcode, iov, bytes);
685*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_RSP, esp);
686*4f2bd402SMark Johnston 	return (0);
687*4f2bd402SMark Johnston }
688*4f2bd402SMark Johnston 
689*4f2bd402SMark Johnston /*
690*4f2bd402SMark Johnston  * Evaluate return value from helper functions and potentially return to
691*4f2bd402SMark Johnston  * the VM run loop.
692*4f2bd402SMark Johnston  */
693*4f2bd402SMark Johnston #define	CHKERR(error,fault)						\
694*4f2bd402SMark Johnston 	do {								\
695*4f2bd402SMark Johnston 		assert((error == 0) || (error == EFAULT));		\
696*4f2bd402SMark Johnston 		if (error)						\
697*4f2bd402SMark Johnston 			return (VMEXIT_ABORT);				\
698*4f2bd402SMark Johnston 		else if (fault)						\
699*4f2bd402SMark Johnston 			return (VMEXIT_CONTINUE);			\
700*4f2bd402SMark Johnston 	} while (0)
701*4f2bd402SMark Johnston 
702*4f2bd402SMark Johnston int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
703*4f2bd402SMark Johnston 
704*4f2bd402SMark Johnston int
vmexit_task_switch(struct vmctx * ctx,struct vcpu * vcpu,struct vm_run * vmrun)705*4f2bd402SMark Johnston vmexit_task_switch(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
706*4f2bd402SMark Johnston {
707*4f2bd402SMark Johnston 	struct seg_desc nt;
708*4f2bd402SMark Johnston 	struct tss32 oldtss, newtss;
709*4f2bd402SMark Johnston 	struct vm_task_switch *task_switch;
710*4f2bd402SMark Johnston 	struct vm_guest_paging *paging, sup_paging;
711*4f2bd402SMark Johnston 	struct user_segment_descriptor nt_desc, ot_desc;
712*4f2bd402SMark Johnston 	struct iovec nt_iov[2], ot_iov[2];
713*4f2bd402SMark Johnston 	struct vm_exit *vmexit;
714*4f2bd402SMark Johnston 	uint64_t cr0, ot_base;
715*4f2bd402SMark Johnston 	uint32_t eip, ot_lim, access;
716*4f2bd402SMark Johnston 	int error, ext, fault, minlimit, nt_type, ot_type;
717*4f2bd402SMark Johnston 	enum task_switch_reason reason;
718*4f2bd402SMark Johnston 	uint16_t nt_sel, ot_sel;
719*4f2bd402SMark Johnston 
720*4f2bd402SMark Johnston 	vmexit = vmrun->vm_exit;
721*4f2bd402SMark Johnston 	task_switch = &vmexit->u.task_switch;
722*4f2bd402SMark Johnston 	nt_sel = task_switch->tsssel;
723*4f2bd402SMark Johnston 	ext = vmexit->u.task_switch.ext;
724*4f2bd402SMark Johnston 	reason = vmexit->u.task_switch.reason;
725*4f2bd402SMark Johnston 	paging = &vmexit->u.task_switch.paging;
726*4f2bd402SMark Johnston 
727*4f2bd402SMark Johnston 	assert(paging->cpu_mode == CPU_MODE_PROTECTED);
728*4f2bd402SMark Johnston 
729*4f2bd402SMark Johnston 	/*
730*4f2bd402SMark Johnston 	 * Calculate the instruction pointer to store in the old TSS.
731*4f2bd402SMark Johnston 	 */
732*4f2bd402SMark Johnston 	eip = vmexit->rip + vmexit->inst_length;
733*4f2bd402SMark Johnston 
734*4f2bd402SMark Johnston 	/*
735*4f2bd402SMark Johnston 	 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
736*4f2bd402SMark Johnston 	 * The following page table accesses are implicitly supervisor mode:
737*4f2bd402SMark Johnston 	 * - accesses to GDT or LDT to load segment descriptors
738*4f2bd402SMark Johnston 	 * - accesses to the task state segment during task switch
739*4f2bd402SMark Johnston 	 */
740*4f2bd402SMark Johnston 	sup_paging = *paging;
741*4f2bd402SMark Johnston 	sup_paging.cpl = 0;	/* implicit supervisor mode */
742*4f2bd402SMark Johnston 
743*4f2bd402SMark Johnston 	/* Fetch the new TSS descriptor */
744*4f2bd402SMark Johnston 	error = read_tss_descriptor(vcpu, task_switch, nt_sel, &nt_desc,
745*4f2bd402SMark Johnston 	    &fault);
746*4f2bd402SMark Johnston 	CHKERR(error, fault);
747*4f2bd402SMark Johnston 
748*4f2bd402SMark Johnston 	nt = usd_to_seg_desc(&nt_desc);
749*4f2bd402SMark Johnston 
750*4f2bd402SMark Johnston 	/* Verify the type of the new TSS */
751*4f2bd402SMark Johnston 	nt_type = SEG_DESC_TYPE(nt.access);
752*4f2bd402SMark Johnston 	if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
753*4f2bd402SMark Johnston 	    nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
754*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, nt_sel, ext);
755*4f2bd402SMark Johnston 		goto done;
756*4f2bd402SMark Johnston 	}
757*4f2bd402SMark Johnston 
758*4f2bd402SMark Johnston 	/* TSS descriptor must have present bit set */
759*4f2bd402SMark Johnston 	if (!SEG_DESC_PRESENT(nt.access)) {
760*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_NP, nt_sel, ext);
761*4f2bd402SMark Johnston 		goto done;
762*4f2bd402SMark Johnston 	}
763*4f2bd402SMark Johnston 
764*4f2bd402SMark Johnston 	/*
765*4f2bd402SMark Johnston 	 * TSS must have a minimum length of 104 bytes for a 32-bit TSS and
766*4f2bd402SMark Johnston 	 * 44 bytes for a 16-bit TSS.
767*4f2bd402SMark Johnston 	 */
768*4f2bd402SMark Johnston 	if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
769*4f2bd402SMark Johnston 		minlimit = 104 - 1;
770*4f2bd402SMark Johnston 	else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
771*4f2bd402SMark Johnston 		minlimit = 44 - 1;
772*4f2bd402SMark Johnston 	else
773*4f2bd402SMark Johnston 		minlimit = 0;
774*4f2bd402SMark Johnston 
775*4f2bd402SMark Johnston 	assert(minlimit > 0);
776*4f2bd402SMark Johnston 	if (nt.limit < (unsigned int)minlimit) {
777*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, nt_sel, ext);
778*4f2bd402SMark Johnston 		goto done;
779*4f2bd402SMark Johnston 	}
780*4f2bd402SMark Johnston 
781*4f2bd402SMark Johnston 	/* TSS must be busy if task switch is due to IRET */
782*4f2bd402SMark Johnston 	if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
783*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, nt_sel, ext);
784*4f2bd402SMark Johnston 		goto done;
785*4f2bd402SMark Johnston 	}
786*4f2bd402SMark Johnston 
787*4f2bd402SMark Johnston 	/*
788*4f2bd402SMark Johnston 	 * TSS must be available (not busy) if task switch reason is
789*4f2bd402SMark Johnston 	 * CALL, JMP, exception or interrupt.
790*4f2bd402SMark Johnston 	 */
791*4f2bd402SMark Johnston 	if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
792*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_GP, nt_sel, ext);
793*4f2bd402SMark Johnston 		goto done;
794*4f2bd402SMark Johnston 	}
795*4f2bd402SMark Johnston 
796*4f2bd402SMark Johnston 	/* Fetch the new TSS */
797*4f2bd402SMark Johnston 	error = vm_copy_setup(vcpu, &sup_paging, nt.base, minlimit + 1,
798*4f2bd402SMark Johnston 	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
799*4f2bd402SMark Johnston 	CHKERR(error, fault);
800*4f2bd402SMark Johnston 	vm_copyin(nt_iov, &newtss, minlimit + 1);
801*4f2bd402SMark Johnston 
802*4f2bd402SMark Johnston 	/* Get the old TSS selector from the guest's task register */
803*4f2bd402SMark Johnston 	ot_sel = GETREG(vcpu, VM_REG_GUEST_TR);
804*4f2bd402SMark Johnston 	if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
805*4f2bd402SMark Johnston 		/*
806*4f2bd402SMark Johnston 		 * This might happen if a task switch was attempted without
807*4f2bd402SMark Johnston 		 * ever loading the task register with LTR. In this case the
808*4f2bd402SMark Johnston 		 * TR would contain the values from power-on:
809*4f2bd402SMark Johnston 		 * (sel = 0, base = 0, limit = 0xffff).
810*4f2bd402SMark Johnston 		 */
811*4f2bd402SMark Johnston 		sel_exception(vcpu, IDT_TS, ot_sel, task_switch->ext);
812*4f2bd402SMark Johnston 		goto done;
813*4f2bd402SMark Johnston 	}
814*4f2bd402SMark Johnston 
815*4f2bd402SMark Johnston 	/* Get the old TSS base and limit from the guest's task register */
816*4f2bd402SMark Johnston 	error = vm_get_desc(vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
817*4f2bd402SMark Johnston 	    &access);
818*4f2bd402SMark Johnston 	assert(error == 0);
819*4f2bd402SMark Johnston 	assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
820*4f2bd402SMark Johnston 	ot_type = SEG_DESC_TYPE(access);
821*4f2bd402SMark Johnston 	assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
822*4f2bd402SMark Johnston 
823*4f2bd402SMark Johnston 	/* Fetch the old TSS descriptor */
824*4f2bd402SMark Johnston 	error = read_tss_descriptor(vcpu, task_switch, ot_sel, &ot_desc,
825*4f2bd402SMark Johnston 	    &fault);
826*4f2bd402SMark Johnston 	CHKERR(error, fault);
827*4f2bd402SMark Johnston 
828*4f2bd402SMark Johnston 	/* Get the old TSS */
829*4f2bd402SMark Johnston 	error = vm_copy_setup(vcpu, &sup_paging, ot_base, minlimit + 1,
830*4f2bd402SMark Johnston 	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
831*4f2bd402SMark Johnston 	CHKERR(error, fault);
832*4f2bd402SMark Johnston 	vm_copyin(ot_iov, &oldtss, minlimit + 1);
833*4f2bd402SMark Johnston 
834*4f2bd402SMark Johnston 	/*
835*4f2bd402SMark Johnston 	 * Clear the busy bit in the old TSS descriptor if the task switch
836*4f2bd402SMark Johnston 	 * due to an IRET or JMP instruction.
837*4f2bd402SMark Johnston 	 */
838*4f2bd402SMark Johnston 	if (reason == TSR_IRET || reason == TSR_JMP) {
839*4f2bd402SMark Johnston 		ot_desc.sd_type &= ~0x2;
840*4f2bd402SMark Johnston 		error = desc_table_write(vcpu, &sup_paging, ot_sel,
841*4f2bd402SMark Johnston 		    &ot_desc, &fault);
842*4f2bd402SMark Johnston 		CHKERR(error, fault);
843*4f2bd402SMark Johnston 	}
844*4f2bd402SMark Johnston 
845*4f2bd402SMark Johnston 	if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
846*4f2bd402SMark Johnston 		EPRINTLN("Task switch to 16-bit TSS not supported");
847*4f2bd402SMark Johnston 		return (VMEXIT_ABORT);
848*4f2bd402SMark Johnston 	}
849*4f2bd402SMark Johnston 
850*4f2bd402SMark Johnston 	/* Save processor state in old TSS */
851*4f2bd402SMark Johnston 	tss32_save(vcpu, task_switch, eip, &oldtss, ot_iov);
852*4f2bd402SMark Johnston 
853*4f2bd402SMark Johnston 	/*
854*4f2bd402SMark Johnston 	 * If the task switch was triggered for any reason other than IRET
855*4f2bd402SMark Johnston 	 * then set the busy bit in the new TSS descriptor.
856*4f2bd402SMark Johnston 	 */
857*4f2bd402SMark Johnston 	if (reason != TSR_IRET) {
858*4f2bd402SMark Johnston 		nt_desc.sd_type |= 0x2;
859*4f2bd402SMark Johnston 		error = desc_table_write(vcpu, &sup_paging, nt_sel,
860*4f2bd402SMark Johnston 		    &nt_desc, &fault);
861*4f2bd402SMark Johnston 		CHKERR(error, fault);
862*4f2bd402SMark Johnston 	}
863*4f2bd402SMark Johnston 
864*4f2bd402SMark Johnston 	/* Update task register to point at the new TSS */
865*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_TR, nt_sel);
866*4f2bd402SMark Johnston 
867*4f2bd402SMark Johnston 	/* Update the hidden descriptor state of the task register */
868*4f2bd402SMark Johnston 	nt = usd_to_seg_desc(&nt_desc);
869*4f2bd402SMark Johnston 	update_seg_desc(vcpu, VM_REG_GUEST_TR, &nt);
870*4f2bd402SMark Johnston 
871*4f2bd402SMark Johnston 	/* Set CR0.TS */
872*4f2bd402SMark Johnston 	cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
873*4f2bd402SMark Johnston 	SETREG(vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
874*4f2bd402SMark Johnston 
875*4f2bd402SMark Johnston 	/*
876*4f2bd402SMark Johnston 	 * We are now committed to the task switch. Any exceptions encountered
877*4f2bd402SMark Johnston 	 * after this point will be handled in the context of the new task and
878*4f2bd402SMark Johnston 	 * the saved instruction pointer will belong to the new task.
879*4f2bd402SMark Johnston 	 */
880*4f2bd402SMark Johnston 	error = vm_set_register(vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
881*4f2bd402SMark Johnston 	assert(error == 0);
882*4f2bd402SMark Johnston 
883*4f2bd402SMark Johnston 	/* Load processor state from new TSS */
884*4f2bd402SMark Johnston 	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
885*4f2bd402SMark Johnston 	    &fault);
886*4f2bd402SMark Johnston 	CHKERR(error, fault);
887*4f2bd402SMark Johnston 
888*4f2bd402SMark Johnston 	/*
889*4f2bd402SMark Johnston 	 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
890*4f2bd402SMark Johnston 	 * caused an error code to be generated, this error code is copied
891*4f2bd402SMark Johnston 	 * to the stack of the new task.
892*4f2bd402SMark Johnston 	 */
893*4f2bd402SMark Johnston 	if (task_switch->errcode_valid) {
894*4f2bd402SMark Johnston 		assert(task_switch->ext);
895*4f2bd402SMark Johnston 		assert(task_switch->reason == TSR_IDT_GATE);
896*4f2bd402SMark Johnston 		error = push_errcode(vcpu, &task_switch->paging, nt_type,
897*4f2bd402SMark Johnston 		    task_switch->errcode, &fault);
898*4f2bd402SMark Johnston 		CHKERR(error, fault);
899*4f2bd402SMark Johnston 	}
900*4f2bd402SMark Johnston 
901*4f2bd402SMark Johnston 	/*
902*4f2bd402SMark Johnston 	 * Treatment of virtual-NMI blocking if NMI is delivered through
903*4f2bd402SMark Johnston 	 * a task gate.
904*4f2bd402SMark Johnston 	 *
905*4f2bd402SMark Johnston 	 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
906*4f2bd402SMark Johnston 	 * If the virtual NMIs VM-execution control is 1, VM entry injects
907*4f2bd402SMark Johnston 	 * an NMI, and delivery of the NMI causes a task switch that causes
908*4f2bd402SMark Johnston 	 * a VM exit, virtual-NMI blocking is in effect before the VM exit
909*4f2bd402SMark Johnston 	 * commences.
910*4f2bd402SMark Johnston 	 *
911*4f2bd402SMark Johnston 	 * Thus, virtual-NMI blocking is in effect at the time of the task
912*4f2bd402SMark Johnston 	 * switch VM exit.
913*4f2bd402SMark Johnston 	 */
914*4f2bd402SMark Johnston 
915*4f2bd402SMark Johnston 	/*
916*4f2bd402SMark Johnston 	 * Treatment of virtual-NMI unblocking on IRET from NMI handler task.
917*4f2bd402SMark Johnston 	 *
918*4f2bd402SMark Johnston 	 * Section "Changes to Instruction Behavior in VMX Non-Root Operation"
919*4f2bd402SMark Johnston 	 * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
920*4f2bd402SMark Johnston 	 * This unblocking of virtual-NMI occurs even if IRET causes a fault.
921*4f2bd402SMark Johnston 	 *
922*4f2bd402SMark Johnston 	 * Thus, virtual-NMI blocking is cleared at the time of the task switch
923*4f2bd402SMark Johnston 	 * VM exit.
924*4f2bd402SMark Johnston 	 */
925*4f2bd402SMark Johnston 
926*4f2bd402SMark Johnston 	/*
927*4f2bd402SMark Johnston 	 * If the task switch was triggered by an event delivered through
928*4f2bd402SMark Johnston 	 * the IDT then extinguish the pending event from the vcpu's
929*4f2bd402SMark Johnston 	 * exitintinfo.
930*4f2bd402SMark Johnston 	 */
931*4f2bd402SMark Johnston 	if (task_switch->reason == TSR_IDT_GATE) {
932*4f2bd402SMark Johnston 		error = vm_set_intinfo(vcpu, 0);
933*4f2bd402SMark Johnston 		assert(error == 0);
934*4f2bd402SMark Johnston 	}
935*4f2bd402SMark Johnston 
936*4f2bd402SMark Johnston 	/*
937*4f2bd402SMark Johnston 	 * XXX should inject debug exception if 'T' bit is 1
938*4f2bd402SMark Johnston 	 */
939*4f2bd402SMark Johnston done:
940*4f2bd402SMark Johnston 	return (VMEXIT_CONTINUE);
941*4f2bd402SMark Johnston }
942