xref: /illumos-gate/usr/src/uts/i86xpv/sys/machprivregs.h (revision e511d54dfc1c7eb3aea1a9125b54791fc2f23d42)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_MACHPRIVREGS_H
28 #define	_SYS_MACHPRIVREGS_H
29 
30 #include <sys/hypervisor.h>
31 
32 /*
33  * Platform dependent instruction sequences for manipulating
34  * privileged state
35  */
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 /*
42  * CLI and STI are quite complex to virtualize!
43  */
44 
45 #if defined(__amd64)
46 
47 #define	CURVCPU(r)					\
48 	movq	%gs:CPU_VCPU_INFO, r
49 
50 #define	CURTHREAD(r)					\
51 	movq	%gs:CPU_THREAD, r
52 
53 #elif defined(__i386)
54 
55 #define	CURVCPU(r)					\
56 	movl	%gs:CPU_VCPU_INFO, r
57 
58 #define	CURTHREAD(r)					\
59 	movl	%gs:CPU_THREAD, r
60 
61 #endif	/* __i386 */
62 
63 #define	XEN_TEST_EVENT_PENDING(r)			\
64 	testb	$0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r)
65 
66 #define	XEN_SET_UPCALL_MASK(r)				\
67 	movb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
68 
69 #define	XEN_GET_UPCALL_MASK(r, mask)			\
70 	movb	VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask
71 
72 #define	XEN_TEST_UPCALL_MASK(r)				\
73 	testb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
74 
75 #define	XEN_CLEAR_UPCALL_MASK(r)			\
76 	ASSERT_UPCALL_MASK_IS_SET;			\
77 	movb	$0, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
78 
79 #ifdef DEBUG
80 
81 /*
82  * Much logic depends on the upcall mask being set at
83  * various points in the code; use this macro to validate.
84  *
85  * Need to use CURVCPU(r) to establish the vcpu pointer.
86  */
87 #if defined(__amd64)
88 
89 #define	ASSERT_UPCALL_MASK_IS_SET			\
90 	pushq	%r11;					\
91 	CURVCPU(%r11);					\
92 	XEN_TEST_UPCALL_MASK(%r11);			\
93 	jne	6f;					\
94 	cmpl	$0, stistipanic(%rip);			\
95 	jle	6f;					\
96 	movl	$-1, stistipanic(%rip);			\
97 	movq	stistimsg(%rip), %rdi;			\
98 	xorl	%eax, %eax;				\
99 	call	panic;					\
100 6:	pushq	%rax;					\
101 	pushq	%rbx;					\
102 	movl	%gs:CPU_ID, %eax;			\
103 	leaq	.+0(%rip), %r11;			\
104 	leaq	laststi(%rip), %rbx;			\
105 	movq	%r11, (%rbx, %rax, 8);			\
106 	popq	%rbx;					\
107 	popq	%rax;					\
108 	popq	%r11
109 
110 #define	SAVE_CLI_LOCATION				\
111 	pushq	%rax;					\
112 	pushq	%rbx;					\
113 	pushq	%rcx;					\
114 	movl	%gs:CPU_ID, %eax;			\
115 	leaq	.+0(%rip), %rcx;			\
116 	leaq	lastcli, %rbx;				\
117 	movq	%rcx, (%rbx, %rax, 8);			\
118 	popq	%rcx;					\
119 	popq	%rbx;					\
120 	popq	%rax;					\
121 
122 #elif defined(__i386)
123 
124 #define	ASSERT_UPCALL_MASK_IS_SET			\
125 	pushl	%ecx;					\
126 	CURVCPU(%ecx);					\
127 	XEN_TEST_UPCALL_MASK(%ecx);			\
128 	jne	6f;					\
129 	cmpl	$0, stistipanic;			\
130 	jle	6f;					\
131 	movl	$-1, stistipanic;			\
132 	movl	stistimsg, %ecx;			\
133 	pushl	%ecx;					\
134 	call	panic;					\
135 6:	pushl	%eax;					\
136 	pushl	%ebx;					\
137 	movl	%gs:CPU_ID, %eax;			\
138 	leal	.+0, %ecx;				\
139 	leal	laststi, %ebx;				\
140 	movl	%ecx, (%ebx, %eax, 4);			\
141 	popl	%ebx;					\
142 	popl	%eax;					\
143 	popl	%ecx
144 
145 #define	SAVE_CLI_LOCATION				\
146 	pushl	%eax;					\
147 	pushl	%ebx;					\
148 	pushl	%ecx;					\
149 	movl	%gs:CPU_ID, %eax;			\
150 	leal	.+0, %ecx;				\
151 	leal	lastcli, %ebx;				\
152 	movl	%ecx, (%ebx, %eax, 4);			\
153 	popl	%ecx;					\
154 	popl	%ebx;					\
155 	popl	%eax;					\
156 
157 #endif	/* __i386 */
158 
159 #else	/* DEBUG */
160 
161 #define	ASSERT_UPCALL_MASK_IS_SET	/* empty */
162 #define	SAVE_CLI_LOCATION		/* empty */
163 
164 #endif	/* DEBUG */
165 
166 #define	KPREEMPT_DISABLE(t)				\
167 	addb	$1, T_PREEMPT(t)
168 
169 #define	KPREEMPT_ENABLE_NOKP(t)				\
170 	subb	$1, T_PREEMPT(t)
171 
172 #define	CLI(r)						\
173 	CURTHREAD(r);					\
174 	KPREEMPT_DISABLE(r);				\
175 	CURVCPU(r);					\
176 	XEN_SET_UPCALL_MASK(r);				\
177 	SAVE_CLI_LOCATION;				\
178 	CURTHREAD(r);					\
179 	KPREEMPT_ENABLE_NOKP(r)
180 
181 #define	CLIRET(r, ret)					\
182 	CURTHREAD(r);					\
183 	KPREEMPT_DISABLE(r);				\
184 	CURVCPU(r);					\
185 	XEN_GET_UPCALL_MASK(r, ret);			\
186 	XEN_SET_UPCALL_MASK(r);				\
187 	SAVE_CLI_LOCATION;				\
188 	CURTHREAD(r);					\
189 	KPREEMPT_ENABLE_NOKP(r)
190 
191 /*
192  * We use the fact that HYPERVISOR_block will clear the upcall mask
193  * for us and then give us an upcall if there is a pending event
194  * to achieve getting a callback on this cpu without the danger of
195  * being preempted and migrating to another cpu between the upcall
196  * enable and the callback delivery.
197  */
198 #if defined(__amd64)
199 
200 #define	STI_CLOBBER		/* clobbers %rax, %rdi, %r11 */		\
201 	CURVCPU(%r11);							\
202 	ASSERT_UPCALL_MASK_IS_SET;					\
203 	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
204 	movw	$0, %di;	/* clear mask and pending */		\
205 	lock;								\
206 	cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11);		\
207 	jz	7f;		/* xchg worked, we're done */		\
208 	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
209 	movl	$SCHEDOP_block, %edi;					\
210 	pushq	%rsi;	/* hypercall clobbers C param regs plus r10 */	\
211 	pushq	%rcx;							\
212 	pushq	%rdx;							\
213 	pushq	%r8;							\
214 	pushq	%r9;							\
215 	pushq	%r10;							\
216 	TRAP_INSTR;	/* clear upcall mask, force upcall */ 		\
217 	popq	%r10;							\
218 	popq	%r9;							\
219 	popq	%r8;							\
220 	popq	%rdx;							\
221 	popq	%rcx;							\
222 	popq	%rsi;							\
223 7:
224 
225 #define	STI								\
226 	pushq	%r11;							\
227 	pushq	%rdi;							\
228 	pushq	%rax;							\
229 	STI_CLOBBER;	/* clobbers %r11, %rax, %rdi */			\
230 	popq	%rax;							\
231 	popq	%rdi;							\
232 	popq	%r11
233 
234 #elif defined(__i386)
235 
236 #define	STI_CLOBBER		/* clobbers %eax, %ebx, %ecx */		\
237 	CURVCPU(%ecx);							\
238 	ASSERT_UPCALL_MASK_IS_SET;					\
239 	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
240 	movw	$0, %bx;	/* clear mask and pending */		\
241 	lock;								\
242 	cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx);		\
243 	jz	7f;		/* xchg worked, we're done */		\
244 	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
245 	movl	$SCHEDOP_block, %ebx;					\
246 	TRAP_INSTR;		/* clear upcall mask, force upcall */	\
247 7:
248 
249 #define	STI						\
250 	pushl	%eax;					\
251 	pushl	%ebx;					\
252 	pushl	%ecx;					\
253 	STI_CLOBBER;	/* clobbers %eax, %ebx, %ecx */	\
254 	popl	%ecx;					\
255 	popl	%ebx;					\
256 	popl	%eax
257 
258 #endif	/* __i386 */
259 
260 /*
261  * Map the PS_IE bit to the hypervisor's event mask bit
262  * To -set- the event mask, we have to do a CLI
263  * To -clear- the event mask, we have to do a STI
264  * (with all the accompanying pre-emption and callbacks, ick)
265  *
266  * And vice versa.
267  */
268 
269 #if defined(__amd64)
270 
271 #define	IE_TO_EVENT_MASK(rtmp, rfl)		\
272 	testq	$PS_IE, rfl;			\
273 	jnz	4f;				\
274 	CLI(rtmp);				\
275 	jmp	5f;				\
276 4:	STI;					\
277 5:
278 
279 #define	EVENT_MASK_TO_IE(rtmp, rfl)		\
280 	andq	$_BITNOT(PS_IE), rfl;		\
281 	CURVCPU(rtmp);				\
282 	XEN_TEST_UPCALL_MASK(rtmp);		\
283 	jnz	1f;				\
284 	orq	$PS_IE, rfl;			\
285 1:
286 
287 #elif defined(__i386)
288 
289 #define	IE_TO_EVENT_MASK(rtmp, rfl)		\
290 	testl	$PS_IE, rfl;			\
291 	jnz	4f;				\
292 	CLI(rtmp);				\
293 	jmp	5f;				\
294 4:	STI;					\
295 5:
296 
297 #define	EVENT_MASK_TO_IE(rtmp, rfl)		\
298 	andl	$_BITNOT(PS_IE), rfl;		\
299 	CURVCPU(rtmp);				\
300 	XEN_TEST_UPCALL_MASK(rtmp);		\
301 	jnz	1f;				\
302 	orl	$PS_IE, rfl;			\
303 1:
304 
305 #endif	/* __i386 */
306 
307 /*
308  * Used to re-enable interrupts in the body of exception handlers
309  */
310 
311 #if defined(__amd64)
312 
313 #define	ENABLE_INTR_FLAGS		\
314 	pushq	$F_ON;			\
315 	popfq;				\
316 	STI
317 
318 #elif defined(__i386)
319 
320 #define	ENABLE_INTR_FLAGS		\
321 	pushl	$F_ON;			\
322 	popfl;				\
323 	STI
324 
325 #endif	/* __i386 */
326 
327 /*
328  * Virtualize IRET and SYSRET
329  */
330 
331 #if defined(__amd64)
332 
333 #if defined(DEBUG)
334 
335 /*
336  * Die nastily with a #ud trap if we are about to switch to user
337  * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set.
338  */
339 #define	__ASSERT_NO_RUPDATE_PENDING			\
340 	pushq	%r15;					\
341 	cmpw	$KCS_SEL, 0x10(%rsp);			\
342 	je	1f;					\
343 	movq	%gs:CPU_THREAD, %r15;			\
344 	movq	T_LWP(%r15), %r15;			\
345 	testb	$0x1, PCB_RUPDATE(%r15);		\
346 	je	1f;					\
347 	ud2;						\
348 1:	popq	%r15
349 
350 #else	/* DEBUG */
351 
352 #define	__ASSERT_NO_RUPDATE_PENDING
353 
354 #endif	/* DEBUG */
355 
356 /*
357  * Switching from guest kernel to user mode.
358  * flag == VGCF_IN_SYSCALL => return via sysret
359  * flag == 0 => return via iretq
360  *
361  * See definition in public/arch-x86_64.h. Stack going in must be:
362  * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss.
363  */
364 #define	HYPERVISOR_IRET(flag)			\
365 	__ASSERT_NO_RUPDATE_PENDING;		\
366 	pushq	$flag;				\
367 	pushq	%rcx;				\
368 	pushq	%r11;				\
369 	pushq	%rax;				\
370 	movl	$__HYPERVISOR_iret, %eax;	\
371 	syscall;				\
372 	ud2	/* die nastily if we return! */
373 
374 #define	IRET	HYPERVISOR_IRET(0)
375 #define	SYSRETQ	HYPERVISOR_IRET(VGCF_IN_KERNEL)
376 #define	SYSRETL	ud2		/* 32-bit syscall/sysret not supported */
377 #define	SWAPGS	/* empty - handled in hypervisor */
378 
379 #elif defined(__i386)
380 
381 /*
382  * Switching from guest kernel to user mode.
383  * See definition in public/arch-x86_32.h. Stack going in must be:
384  * eax, flags, eip, cs, eflags, esp, ss.
385  */
386 #define	HYPERVISOR_IRET				\
387 	pushl	%eax;				\
388 	movl	$__HYPERVISOR_iret, %eax;	\
389 	int	$0x82;				\
390 	ud2	/* die nastily if we return! */
391 
392 #define	IRET	HYPERVISOR_IRET
393 #define	SYSRET	ud2		/* 32-bit syscall/sysret not supported */
394 
395 #endif	/* __i386 */
396 
397 
398 /*
399  * Xen 3.x wedges the current value of upcall_mask into unused byte of
400  * saved %cs on stack at the time of passing through a trap or interrupt
401  * gate.  Since Xen also updates PS_IE in %[e,r]lags as well, we always
402  * mask off the saved upcall mask so the kernel and/or tools like debuggers
403  * will not be confused about bits set in reserved portions of %cs slot.
404  *
405  * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details.
406  */
407 #if defined(__amd64)
408 
409 #define	CLEAN_CS	movb	$0, REGOFF_CS+4(%rsp)
410 
411 #elif defined(__i386)
412 
413 #define	CLEAN_CS	movb	$0, REGOFF_CS+2(%esp)
414 
415 #endif	/* __i386 */
416 
417 /*
418  * All exceptions for amd64 have %r11 and %rcx on the stack.
419  * Just pop them back into their appropriate registers and
420  * let it get saved as is running native.
421  */
422 #if defined(__amd64)
423 
424 #define	XPV_TRAP_POP	\
425 	popq	%rcx;	\
426 	popq	%r11
427 
428 #define	XPV_TRAP_PUSH	\
429 	pushq	%r11;	\
430 	pushq	%rcx
431 
432 #endif	/* __amd64 */
433 
434 
435 /*
436  * Macros for saving the original segment registers and restoring them
437  * for fast traps.
438  */
439 #if defined(__amd64)
440 
441 /*
442  * Smaller versions of INTR_PUSH and INTR_POP for fast traps.
443  * The following registers have been pushed onto the stack by
444  * hardware at this point:
445  *
446  *	greg_t	r_rip;
447  *	greg_t	r_cs;
448  *	greg_t	r_rfl;
449  *	greg_t	r_rsp;
450  *	greg_t	r_ss;
451  *
452  * This handler is executed both by 32-bit and 64-bit applications.
453  * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx,
454  * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls.
455  * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile
456  * across a function call -- in particular, %esi and %edi MUST be saved!
457  *
458  * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit
459  * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem
460  * particularly worth it.
461  *
462  */
463 #define	FAST_INTR_PUSH			\
464 	INTGATE_INIT_KERNEL_FLAGS;	\
465 	popq	%rcx;			\
466 	popq	%r11;			\
467 	subq    $REGOFF_RIP, %rsp;	\
468 	movq    %rsi, REGOFF_RSI(%rsp);	\
469 	movq    %rdi, REGOFF_RDI(%rsp);	\
470 	CLEAN_CS
471 
472 #define	FAST_INTR_POP			\
473 	movq    REGOFF_RSI(%rsp), %rsi;	\
474 	movq    REGOFF_RDI(%rsp), %rdi;	\
475 	addq    $REGOFF_RIP, %rsp
476 
477 #define	FAST_INTR_RETURN		\
478 	ASSERT_UPCALL_MASK_IS_SET;	\
479 	HYPERVISOR_IRET(0)
480 
481 #elif defined(__i386)
482 
483 #define	FAST_INTR_PUSH			\
484 	cld;				\
485 	__SEGREGS_PUSH			\
486 	__SEGREGS_LOAD_KERNEL		\
487 
488 #define	FAST_INTR_POP			\
489 	__SEGREGS_POP
490 
491 #define	FAST_INTR_RETURN		\
492 	IRET
493 
494 #endif	/* __i386 */
495 
496 /*
497  * Handling the CR0.TS bit for floating point handling.
498  *
499  * When the TS bit is *set*, attempts to touch the floating
500  * point hardware will result in a #nm trap.
501  */
502 #if defined(__amd64)
503 
504 #define	STTS(rtmp)				\
505 	pushq	%rdi;				\
506 	movl	$1, %edi;			\
507 	call	HYPERVISOR_fpu_taskswitch;	\
508 	popq	%rdi
509 
510 #define	CLTS					\
511 	pushq	%rdi;				\
512 	xorl	%edi, %edi;			\
513 	call	HYPERVISOR_fpu_taskswitch;	\
514 	popq	%rdi
515 
516 #elif defined(__i386)
517 
518 #define	STTS(r)					\
519 	pushl	$1;				\
520 	call	HYPERVISOR_fpu_taskswitch;	\
521 	addl	$4, %esp
522 
523 #define	CLTS					\
524 	pushl	$0;				\
525 	call	HYPERVISOR_fpu_taskswitch;	\
526 	addl	$4, %esp
527 
528 #endif	/* __i386 */
529 
530 #ifdef __cplusplus
531 }
532 #endif
533 
534 #endif	/* _SYS_MACHPRIVREGS_H */
535