1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_MACHPRIVREGS_H 28 #define _SYS_MACHPRIVREGS_H 29 30 #include <sys/hypervisor.h> 31 32 /* 33 * Platform dependent instruction sequences for manipulating 34 * privileged state 35 */ 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 /* 42 * CLI and STI are quite complex to virtualize! 43 */ 44 45 #if defined(__amd64) 46 47 #define CURVCPU(r) \ 48 movq %gs:CPU_VCPU_INFO, r 49 50 #define CURTHREAD(r) \ 51 movq %gs:CPU_THREAD, r 52 53 #elif defined(__i386) 54 55 #define CURVCPU(r) \ 56 movl %gs:CPU_VCPU_INFO, r 57 58 #define CURTHREAD(r) \ 59 movl %gs:CPU_THREAD, r 60 61 #endif /* __i386 */ 62 63 #define XEN_TEST_EVENT_PENDING(r) \ 64 testb $0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r) 65 66 #define XEN_SET_UPCALL_MASK(r) \ 67 movb $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r) 68 69 #define XEN_GET_UPCALL_MASK(r, mask) \ 70 movb VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask 71 72 #define XEN_TEST_UPCALL_MASK(r) \ 73 testb $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r) 74 75 #define XEN_CLEAR_UPCALL_MASK(r) \ 76 ASSERT_UPCALL_MASK_IS_SET; \ 77 movb $0, VCPU_INFO_EVTCHN_UPCALL_MASK(r) 78 79 #ifdef DEBUG 80 81 /* 82 * Much logic depends on the upcall mask being set at 83 * various points in the code; use this macro to validate. 84 * 85 * Need to use CURVCPU(r) to establish the vcpu pointer. 86 */ 87 #if defined(__amd64) 88 89 #define ASSERT_UPCALL_MASK_IS_SET \ 90 pushq %r11; \ 91 CURVCPU(%r11); \ 92 XEN_TEST_UPCALL_MASK(%r11); \ 93 jne 6f; \ 94 cmpl $0, stistipanic(%rip); \ 95 jle 6f; \ 96 movl $-1, stistipanic(%rip); \ 97 movq stistimsg(%rip), %rdi; \ 98 xorl %eax, %eax; \ 99 call panic; \ 100 6: pushq %rax; \ 101 pushq %rbx; \ 102 movl %gs:CPU_ID, %eax; \ 103 leaq .+0(%rip), %r11; \ 104 leaq laststi(%rip), %rbx; \ 105 movq %r11, (%rbx, %rax, 8); \ 106 popq %rbx; \ 107 popq %rax; \ 108 popq %r11 109 110 #define SAVE_CLI_LOCATION \ 111 pushq %rax; \ 112 pushq %rbx; \ 113 pushq %rcx; \ 114 movl %gs:CPU_ID, %eax; \ 115 leaq .+0(%rip), %rcx; \ 116 leaq lastcli, %rbx; \ 117 movq %rcx, (%rbx, %rax, 8); \ 118 popq %rcx; \ 119 popq %rbx; \ 120 popq %rax; \ 121 122 #elif defined(__i386) 123 124 #define ASSERT_UPCALL_MASK_IS_SET \ 125 pushl %ecx; \ 126 CURVCPU(%ecx); \ 127 XEN_TEST_UPCALL_MASK(%ecx); \ 128 jne 6f; \ 129 cmpl $0, stistipanic; \ 130 jle 6f; \ 131 movl $-1, stistipanic; \ 132 movl stistimsg, %ecx; \ 133 pushl %ecx; \ 134 call panic; \ 135 6: pushl %eax; \ 136 pushl %ebx; \ 137 movl %gs:CPU_ID, %eax; \ 138 leal .+0, %ecx; \ 139 leal laststi, %ebx; \ 140 movl %ecx, (%ebx, %eax, 4); \ 141 popl %ebx; \ 142 popl %eax; \ 143 popl %ecx 144 145 #define SAVE_CLI_LOCATION \ 146 pushl %eax; \ 147 pushl %ebx; \ 148 pushl %ecx; \ 149 movl %gs:CPU_ID, %eax; \ 150 leal .+0, %ecx; \ 151 leal lastcli, %ebx; \ 152 movl %ecx, (%ebx, %eax, 4); \ 153 popl %ecx; \ 154 popl %ebx; \ 155 popl %eax; \ 156 157 #endif /* __i386 */ 158 159 #else /* DEBUG */ 160 161 #define ASSERT_UPCALL_MASK_IS_SET /* empty */ 162 #define SAVE_CLI_LOCATION /* empty */ 163 164 #endif /* DEBUG */ 165 166 #define KPREEMPT_DISABLE(t) \ 167 addb $1, T_PREEMPT(t) 168 169 #define KPREEMPT_ENABLE_NOKP(t) \ 170 subb $1, T_PREEMPT(t) 171 172 #define CLI(r) \ 173 CURTHREAD(r); \ 174 KPREEMPT_DISABLE(r); \ 175 CURVCPU(r); \ 176 XEN_SET_UPCALL_MASK(r); \ 177 SAVE_CLI_LOCATION; \ 178 CURTHREAD(r); \ 179 KPREEMPT_ENABLE_NOKP(r) 180 181 #define CLIRET(r, ret) \ 182 CURTHREAD(r); \ 183 KPREEMPT_DISABLE(r); \ 184 CURVCPU(r); \ 185 XEN_GET_UPCALL_MASK(r, ret); \ 186 XEN_SET_UPCALL_MASK(r); \ 187 SAVE_CLI_LOCATION; \ 188 CURTHREAD(r); \ 189 KPREEMPT_ENABLE_NOKP(r) 190 191 /* 192 * We use the fact that HYPERVISOR_block will clear the upcall mask 193 * for us and then give us an upcall if there is a pending event 194 * to achieve getting a callback on this cpu without the danger of 195 * being preempted and migrating to another cpu between the upcall 196 * enable and the callback delivery. 197 */ 198 #if defined(__amd64) 199 200 #define STI_CLOBBER /* clobbers %rax, %rdi, %r11 */ \ 201 CURVCPU(%r11); \ 202 ASSERT_UPCALL_MASK_IS_SET; \ 203 movw $0x100, %ax; /* assume mask set, pending clear */ \ 204 movw $0, %di; /* clear mask and pending */ \ 205 lock; \ 206 cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11); \ 207 jz 7f; /* xchg worked, we're done */ \ 208 movl $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \ 209 movl $SCHEDOP_block, %edi; \ 210 pushq %rsi; /* hypercall clobbers C param regs plus r10 */ \ 211 pushq %rcx; \ 212 pushq %rdx; \ 213 pushq %r8; \ 214 pushq %r9; \ 215 pushq %r10; \ 216 TRAP_INSTR; /* clear upcall mask, force upcall */ \ 217 popq %r10; \ 218 popq %r9; \ 219 popq %r8; \ 220 popq %rdx; \ 221 popq %rcx; \ 222 popq %rsi; \ 223 7: 224 225 #define STI \ 226 pushq %r11; \ 227 pushq %rdi; \ 228 pushq %rax; \ 229 STI_CLOBBER; /* clobbers %r11, %rax, %rdi */ \ 230 popq %rax; \ 231 popq %rdi; \ 232 popq %r11 233 234 #elif defined(__i386) 235 236 #define STI_CLOBBER /* clobbers %eax, %ebx, %ecx */ \ 237 CURVCPU(%ecx); \ 238 ASSERT_UPCALL_MASK_IS_SET; \ 239 movw $0x100, %ax; /* assume mask set, pending clear */ \ 240 movw $0, %bx; /* clear mask and pending */ \ 241 lock; \ 242 cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx); \ 243 jz 7f; /* xchg worked, we're done */ \ 244 movl $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \ 245 movl $SCHEDOP_block, %ebx; \ 246 TRAP_INSTR; /* clear upcall mask, force upcall */ \ 247 7: 248 249 #define STI \ 250 pushl %eax; \ 251 pushl %ebx; \ 252 pushl %ecx; \ 253 STI_CLOBBER; /* clobbers %eax, %ebx, %ecx */ \ 254 popl %ecx; \ 255 popl %ebx; \ 256 popl %eax 257 258 #endif /* __i386 */ 259 260 /* 261 * Map the PS_IE bit to the hypervisor's event mask bit 262 * To -set- the event mask, we have to do a CLI 263 * To -clear- the event mask, we have to do a STI 264 * (with all the accompanying pre-emption and callbacks, ick) 265 * 266 * And vice versa. 267 */ 268 269 #if defined(__amd64) 270 271 #define IE_TO_EVENT_MASK(rtmp, rfl) \ 272 testq $PS_IE, rfl; \ 273 jnz 4f; \ 274 CLI(rtmp); \ 275 jmp 5f; \ 276 4: STI; \ 277 5: 278 279 #define EVENT_MASK_TO_IE(rtmp, rfl) \ 280 andq $_BITNOT(PS_IE), rfl; \ 281 CURVCPU(rtmp); \ 282 XEN_TEST_UPCALL_MASK(rtmp); \ 283 jnz 1f; \ 284 orq $PS_IE, rfl; \ 285 1: 286 287 #elif defined(__i386) 288 289 #define IE_TO_EVENT_MASK(rtmp, rfl) \ 290 testl $PS_IE, rfl; \ 291 jnz 4f; \ 292 CLI(rtmp); \ 293 jmp 5f; \ 294 4: STI; \ 295 5: 296 297 #define EVENT_MASK_TO_IE(rtmp, rfl) \ 298 andl $_BITNOT(PS_IE), rfl; \ 299 CURVCPU(rtmp); \ 300 XEN_TEST_UPCALL_MASK(rtmp); \ 301 jnz 1f; \ 302 orl $PS_IE, rfl; \ 303 1: 304 305 #endif /* __i386 */ 306 307 /* 308 * Used to re-enable interrupts in the body of exception handlers 309 */ 310 311 #if defined(__amd64) 312 313 #define ENABLE_INTR_FLAGS \ 314 pushq $F_ON; \ 315 popfq; \ 316 STI 317 318 #elif defined(__i386) 319 320 #define ENABLE_INTR_FLAGS \ 321 pushl $F_ON; \ 322 popfl; \ 323 STI 324 325 #endif /* __i386 */ 326 327 /* 328 * Virtualize IRET and SYSRET 329 */ 330 331 #if defined(__amd64) 332 333 #if defined(DEBUG) 334 335 /* 336 * Die nastily with a #ud trap if we are about to switch to user 337 * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set. 338 */ 339 #define __ASSERT_NO_RUPDATE_PENDING \ 340 pushq %r15; \ 341 cmpw $KCS_SEL, 0x10(%rsp); \ 342 je 1f; \ 343 movq %gs:CPU_THREAD, %r15; \ 344 movq T_LWP(%r15), %r15; \ 345 testb $0x1, PCB_RUPDATE(%r15); \ 346 je 1f; \ 347 ud2; \ 348 1: popq %r15 349 350 #else /* DEBUG */ 351 352 #define __ASSERT_NO_RUPDATE_PENDING 353 354 #endif /* DEBUG */ 355 356 /* 357 * Switching from guest kernel to user mode. 358 * flag == VGCF_IN_SYSCALL => return via sysret 359 * flag == 0 => return via iretq 360 * 361 * See definition in public/arch-x86_64.h. Stack going in must be: 362 * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss. 363 */ 364 #define HYPERVISOR_IRET(flag) \ 365 __ASSERT_NO_RUPDATE_PENDING; \ 366 pushq $flag; \ 367 pushq %rcx; \ 368 pushq %r11; \ 369 pushq %rax; \ 370 movl $__HYPERVISOR_iret, %eax; \ 371 syscall; \ 372 ud2 /* die nastily if we return! */ 373 374 #define IRET HYPERVISOR_IRET(0) 375 376 /* 377 * XXPV: Normally we would expect to use sysret to return from kernel to 378 * user mode when using the syscall instruction. The iret hypercall 379 * does support both iret and sysret semantics. For us to use sysret 380 * style would require that we use the hypervisor's private descriptors 381 * that obey syscall instruction's imposed segment selector ordering. 382 * With iret we can use whatever %cs value we choose. We should fix 383 * this to use sysret one day. 384 */ 385 #define SYSRETQ HYPERVISOR_IRET(0) 386 #define SYSRETL ud2 /* 32-bit syscall/sysret not supported */ 387 #define SWAPGS /* empty - handled in hypervisor */ 388 389 #elif defined(__i386) 390 391 /* 392 * Switching from guest kernel to user mode. 393 * See definition in public/arch-x86_32.h. Stack going in must be: 394 * eax, flags, eip, cs, eflags, esp, ss. 395 */ 396 #define HYPERVISOR_IRET \ 397 pushl %eax; \ 398 movl $__HYPERVISOR_iret, %eax; \ 399 int $0x82; \ 400 ud2 /* die nastily if we return! */ 401 402 #define IRET HYPERVISOR_IRET 403 #define SYSRET ud2 /* 32-bit syscall/sysret not supported */ 404 405 #endif /* __i386 */ 406 407 408 /* 409 * Xen 3.x wedges the current value of upcall_mask into unused byte of 410 * saved %cs on stack at the time of passing through a trap or interrupt 411 * gate. Since Xen also updates PS_IE in %[e,r]lags as well, we always 412 * mask off the saved upcall mask so the kernel and/or tools like debuggers 413 * will not be confused about bits set in reserved portions of %cs slot. 414 * 415 * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details. 416 */ 417 #if defined(__amd64) 418 419 #define CLEAN_CS movb $0, REGOFF_CS+4(%rsp) 420 421 #elif defined(__i386) 422 423 #define CLEAN_CS movb $0, REGOFF_CS+2(%esp) 424 425 #endif /* __i386 */ 426 427 /* 428 * All exceptions for amd64 have %r11 and %rcx on the stack. 429 * Just pop them back into their appropriate registers and 430 * let it get saved as is running native. 431 */ 432 #if defined(__amd64) 433 434 #define XPV_TRAP_POP \ 435 popq %rcx; \ 436 popq %r11 437 438 #define XPV_TRAP_PUSH \ 439 pushq %r11; \ 440 pushq %rcx 441 442 #endif /* __amd64 */ 443 444 445 /* 446 * Macros for saving the original segment registers and restoring them 447 * for fast traps. 448 */ 449 #if defined(__amd64) 450 451 /* 452 * Smaller versions of INTR_PUSH and INTR_POP for fast traps. 453 * The following registers have been pushed onto the stack by 454 * hardware at this point: 455 * 456 * greg_t r_rip; 457 * greg_t r_cs; 458 * greg_t r_rfl; 459 * greg_t r_rsp; 460 * greg_t r_ss; 461 * 462 * This handler is executed both by 32-bit and 64-bit applications. 463 * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx, 464 * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls. 465 * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile 466 * across a function call -- in particular, %esi and %edi MUST be saved! 467 * 468 * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit 469 * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem 470 * particularly worth it. 471 * 472 */ 473 #define FAST_INTR_PUSH \ 474 INTGATE_INIT_KERNEL_FLAGS; \ 475 popq %rcx; \ 476 popq %r11; \ 477 subq $REGOFF_RIP, %rsp; \ 478 movq %rsi, REGOFF_RSI(%rsp); \ 479 movq %rdi, REGOFF_RDI(%rsp); \ 480 CLEAN_CS 481 482 #define FAST_INTR_POP \ 483 movq REGOFF_RSI(%rsp), %rsi; \ 484 movq REGOFF_RDI(%rsp), %rdi; \ 485 addq $REGOFF_RIP, %rsp 486 487 #define FAST_INTR_RETURN \ 488 ASSERT_UPCALL_MASK_IS_SET; \ 489 HYPERVISOR_IRET(0) 490 491 #elif defined(__i386) 492 493 #define FAST_INTR_PUSH \ 494 cld; \ 495 __SEGREGS_PUSH \ 496 __SEGREGS_LOAD_KERNEL \ 497 498 #define FAST_INTR_POP \ 499 __SEGREGS_POP 500 501 #define FAST_INTR_RETURN \ 502 IRET 503 504 #endif /* __i386 */ 505 506 /* 507 * Handling the CR0.TS bit for floating point handling. 508 * 509 * When the TS bit is *set*, attempts to touch the floating 510 * point hardware will result in a #nm trap. 511 */ 512 #if defined(__amd64) 513 514 #define STTS(rtmp) \ 515 pushq %rdi; \ 516 movl $1, %edi; \ 517 call HYPERVISOR_fpu_taskswitch; \ 518 popq %rdi 519 520 #define CLTS \ 521 pushq %rdi; \ 522 xorl %edi, %edi; \ 523 call HYPERVISOR_fpu_taskswitch; \ 524 popq %rdi 525 526 #elif defined(__i386) 527 528 #define STTS(r) \ 529 pushl $1; \ 530 call HYPERVISOR_fpu_taskswitch; \ 531 addl $4, %esp 532 533 #define CLTS \ 534 pushl $0; \ 535 call HYPERVISOR_fpu_taskswitch; \ 536 addl $4, %esp 537 538 #endif /* __i386 */ 539 540 #ifdef __cplusplus 541 } 542 #endif 543 544 #endif /* _SYS_MACHPRIVREGS_H */ 545