1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_MACHPRIVREGS_H 28 #define _SYS_MACHPRIVREGS_H 29 30 #include <sys/hypervisor.h> 31 32 /* 33 * Platform dependent instruction sequences for manipulating 34 * privileged state 35 */ 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 /* 42 * CLI and STI are quite complex to virtualize! 43 */ 44 45 #if defined(__amd64) 46 47 #define CURVCPU(r) \ 48 movq %gs:CPU_VCPU_INFO, r 49 50 #define CURTHREAD(r) \ 51 movq %gs:CPU_THREAD, r 52 53 #elif defined(__i386) 54 55 #define CURVCPU(r) \ 56 movl %gs:CPU_VCPU_INFO, r 57 58 #define CURTHREAD(r) \ 59 movl %gs:CPU_THREAD, r 60 61 #endif /* __i386 */ 62 63 #define XEN_TEST_EVENT_PENDING(r) \ 64 testb $0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r) 65 66 #define XEN_SET_UPCALL_MASK(r) \ 67 movb $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r) 68 69 #define XEN_GET_UPCALL_MASK(r, mask) \ 70 movb VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask 71 72 #define XEN_TEST_UPCALL_MASK(r) \ 73 testb $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r) 74 75 #define XEN_CLEAR_UPCALL_MASK(r) \ 76 ASSERT_UPCALL_MASK_IS_SET; \ 77 movb $0, VCPU_INFO_EVTCHN_UPCALL_MASK(r) 78 79 #ifdef DEBUG 80 81 /* 82 * Much logic depends on the upcall mask being set at 83 * various points in the code; use this macro to validate. 84 * 85 * Need to use CURVCPU(r) to establish the vcpu pointer. 86 */ 87 #if defined(__amd64) 88 89 #define ASSERT_UPCALL_MASK_IS_SET \ 90 pushq %r11; \ 91 CURVCPU(%r11); \ 92 XEN_TEST_UPCALL_MASK(%r11); \ 93 jne 6f; \ 94 cmpl $0, stistipanic(%rip); \ 95 jle 6f; \ 96 movl $-1, stistipanic(%rip); \ 97 movq stistimsg(%rip), %rdi; \ 98 xorl %eax, %eax; \ 99 call panic; \ 100 6: pushq %rax; \ 101 pushq %rbx; \ 102 movl %gs:CPU_ID, %eax; \ 103 leaq .+0(%rip), %r11; \ 104 leaq laststi(%rip), %rbx; \ 105 movq %r11, (%rbx, %rax, 8); \ 106 popq %rbx; \ 107 popq %rax; \ 108 popq %r11 109 110 #define SAVE_CLI_LOCATION \ 111 pushq %rax; \ 112 pushq %rbx; \ 113 pushq %rcx; \ 114 movl %gs:CPU_ID, %eax; \ 115 leaq .+0(%rip), %rcx; \ 116 leaq lastcli, %rbx; \ 117 movq %rcx, (%rbx, %rax, 8); \ 118 popq %rcx; \ 119 popq %rbx; \ 120 popq %rax; \ 121 122 #elif defined(__i386) 123 124 #define ASSERT_UPCALL_MASK_IS_SET \ 125 pushl %ecx; \ 126 CURVCPU(%ecx); \ 127 XEN_TEST_UPCALL_MASK(%ecx); \ 128 jne 6f; \ 129 cmpl $0, stistipanic; \ 130 jle 6f; \ 131 movl $-1, stistipanic; \ 132 movl stistimsg, %ecx; \ 133 pushl %ecx; \ 134 call panic; \ 135 6: pushl %eax; \ 136 pushl %ebx; \ 137 movl %gs:CPU_ID, %eax; \ 138 leal .+0, %ecx; \ 139 leal laststi, %ebx; \ 140 movl %ecx, (%ebx, %eax, 4); \ 141 popl %ebx; \ 142 popl %eax; \ 143 popl %ecx 144 145 #define SAVE_CLI_LOCATION \ 146 pushl %eax; \ 147 pushl %ebx; \ 148 pushl %ecx; \ 149 movl %gs:CPU_ID, %eax; \ 150 leal .+0, %ecx; \ 151 leal lastcli, %ebx; \ 152 movl %ecx, (%ebx, %eax, 4); \ 153 popl %ecx; \ 154 popl %ebx; \ 155 popl %eax; \ 156 157 #endif /* __i386 */ 158 159 #else /* DEBUG */ 160 161 #define ASSERT_UPCALL_MASK_IS_SET /* empty */ 162 #define SAVE_CLI_LOCATION /* empty */ 163 164 #endif /* DEBUG */ 165 166 #define KPREEMPT_DISABLE(t) \ 167 addb $1, T_PREEMPT(t) 168 169 #define KPREEMPT_ENABLE_NOKP(t) \ 170 subb $1, T_PREEMPT(t) 171 172 #define CLI(r) \ 173 CURTHREAD(r); \ 174 KPREEMPT_DISABLE(r); \ 175 CURVCPU(r); \ 176 XEN_SET_UPCALL_MASK(r); \ 177 SAVE_CLI_LOCATION; \ 178 CURTHREAD(r); \ 179 KPREEMPT_ENABLE_NOKP(r) 180 181 #define CLIRET(r, ret) \ 182 CURTHREAD(r); \ 183 KPREEMPT_DISABLE(r); \ 184 CURVCPU(r); \ 185 XEN_GET_UPCALL_MASK(r, ret); \ 186 XEN_SET_UPCALL_MASK(r); \ 187 SAVE_CLI_LOCATION; \ 188 CURTHREAD(r); \ 189 KPREEMPT_ENABLE_NOKP(r) 190 191 /* 192 * We use the fact that HYPERVISOR_block will clear the upcall mask 193 * for us and then give us an upcall if there is a pending event 194 * to achieve getting a callback on this cpu without the danger of 195 * being preempted and migrating to another cpu between the upcall 196 * enable and the callback delivery. 197 */ 198 #if defined(__amd64) 199 200 #define STI_CLOBBER /* clobbers %rax, %rdi, %r11 */ \ 201 CURVCPU(%r11); \ 202 ASSERT_UPCALL_MASK_IS_SET; \ 203 movw $0x100, %ax; /* assume mask set, pending clear */ \ 204 movw $0, %di; /* clear mask and pending */ \ 205 lock; \ 206 cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11); \ 207 jz 7f; /* xchg worked, we're done */ \ 208 movl $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \ 209 movl $SCHEDOP_block, %edi; \ 210 pushq %rsi; /* hypercall clobbers C param regs plus r10 */ \ 211 pushq %rcx; \ 212 pushq %rdx; \ 213 pushq %r8; \ 214 pushq %r9; \ 215 pushq %r10; \ 216 TRAP_INSTR; /* clear upcall mask, force upcall */ \ 217 popq %r10; \ 218 popq %r9; \ 219 popq %r8; \ 220 popq %rdx; \ 221 popq %rcx; \ 222 popq %rsi; \ 223 7: 224 225 #define STI \ 226 pushq %r11; \ 227 pushq %rdi; \ 228 pushq %rax; \ 229 STI_CLOBBER; /* clobbers %r11, %rax, %rdi */ \ 230 popq %rax; \ 231 popq %rdi; \ 232 popq %r11 233 234 #elif defined(__i386) 235 236 #define STI_CLOBBER /* clobbers %eax, %ebx, %ecx */ \ 237 CURVCPU(%ecx); \ 238 ASSERT_UPCALL_MASK_IS_SET; \ 239 movw $0x100, %ax; /* assume mask set, pending clear */ \ 240 movw $0, %bx; /* clear mask and pending */ \ 241 lock; \ 242 cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx); \ 243 jz 7f; /* xchg worked, we're done */ \ 244 movl $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \ 245 movl $SCHEDOP_block, %ebx; \ 246 TRAP_INSTR; /* clear upcall mask, force upcall */ \ 247 7: 248 249 #define STI \ 250 pushl %eax; \ 251 pushl %ebx; \ 252 pushl %ecx; \ 253 STI_CLOBBER; /* clobbers %eax, %ebx, %ecx */ \ 254 popl %ecx; \ 255 popl %ebx; \ 256 popl %eax 257 258 #endif /* __i386 */ 259 260 /* 261 * Map the PS_IE bit to the hypervisor's event mask bit 262 * To -set- the event mask, we have to do a CLI 263 * To -clear- the event mask, we have to do a STI 264 * (with all the accompanying pre-emption and callbacks, ick) 265 * 266 * And vice versa. 267 */ 268 269 #if defined(__amd64) 270 271 #define IE_TO_EVENT_MASK(rtmp, rfl) \ 272 testq $PS_IE, rfl; \ 273 jnz 4f; \ 274 CLI(rtmp); \ 275 jmp 5f; \ 276 4: STI; \ 277 5: 278 279 #define EVENT_MASK_TO_IE(rtmp, rfl) \ 280 andq $_BITNOT(PS_IE), rfl; \ 281 CURVCPU(rtmp); \ 282 XEN_TEST_UPCALL_MASK(rtmp); \ 283 jnz 1f; \ 284 orq $PS_IE, rfl; \ 285 1: 286 287 #elif defined(__i386) 288 289 #define IE_TO_EVENT_MASK(rtmp, rfl) \ 290 testl $PS_IE, rfl; \ 291 jnz 4f; \ 292 CLI(rtmp); \ 293 jmp 5f; \ 294 4: STI; \ 295 5: 296 297 #define EVENT_MASK_TO_IE(rtmp, rfl) \ 298 andl $_BITNOT(PS_IE), rfl; \ 299 CURVCPU(rtmp); \ 300 XEN_TEST_UPCALL_MASK(rtmp); \ 301 jnz 1f; \ 302 orl $PS_IE, rfl; \ 303 1: 304 305 #endif /* __i386 */ 306 307 /* 308 * Used to re-enable interrupts in the body of exception handlers 309 */ 310 311 #if defined(__amd64) 312 313 #define ENABLE_INTR_FLAGS \ 314 pushq $F_ON; \ 315 popfq; \ 316 STI 317 318 #elif defined(__i386) 319 320 #define ENABLE_INTR_FLAGS \ 321 pushl $F_ON; \ 322 popfl; \ 323 STI 324 325 #endif /* __i386 */ 326 327 /* 328 * Virtualize IRET and SYSRET 329 */ 330 331 #if defined(__amd64) 332 333 #if defined(DEBUG) 334 335 /* 336 * Die nastily with a #ud trap if we are about to switch to user 337 * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set. 338 */ 339 #define __ASSERT_NO_RUPDATE_PENDING \ 340 pushq %r15; \ 341 cmpw $KCS_SEL, 0x10(%rsp); \ 342 je 1f; \ 343 movq %gs:CPU_THREAD, %r15; \ 344 movq T_LWP(%r15), %r15; \ 345 testb $0x1, PCB_RUPDATE(%r15); \ 346 je 1f; \ 347 ud2; \ 348 1: popq %r15 349 350 #else /* DEBUG */ 351 352 #define __ASSERT_NO_RUPDATE_PENDING 353 354 #endif /* DEBUG */ 355 356 /* 357 * Switching from guest kernel to user mode. 358 * flag == VGCF_IN_SYSCALL => return via sysret 359 * flag == 0 => return via iretq 360 * 361 * See definition in public/arch-x86_64.h. Stack going in must be: 362 * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss. 363 */ 364 #define HYPERVISOR_IRET(flag) \ 365 __ASSERT_NO_RUPDATE_PENDING; \ 366 pushq $flag; \ 367 pushq %rcx; \ 368 pushq %r11; \ 369 pushq %rax; \ 370 movl $__HYPERVISOR_iret, %eax; \ 371 syscall; \ 372 ud2 /* die nastily if we return! */ 373 374 #define IRET HYPERVISOR_IRET(0) 375 376 /* 377 * XXPV: Normally we would expect to use sysret to return from kernel to 378 * user mode when using the syscall instruction. The iret hypercall 379 * does support both iret and sysret semantics. For us to use sysret 380 * style would require that we use the hypervisor's private descriptors 381 * that obey syscall instruction's imposed segment selector ordering. 382 * With iret we can use whatever %cs value we choose. We should fix 383 * this to use sysret one day. 384 */ 385 #define SYSRETQ HYPERVISOR_IRET(0) 386 #define SYSRETL ud2 /* 32-bit syscall/sysret not supported */ 387 #define SWAPGS /* empty - handled in hypervisor */ 388 389 /* 390 * As of GNU binutils 2.37, the assembler has split the 'sysexit' instruction 391 * into 'sysexitl' and 'sysexitq'. Using a plain 'sysexit' is interpreted as 392 * 'sysexitl' but comes with a warning about the assumption being made. Since 393 * all warnings are treated as errors in the kernel build, this results in a 394 * build failure. Unfortunately the desired 'sysexitl' cannot be used since 395 * older versions of the GNU assembler do not understand it. 396 * The following macro emits the correct byte sequence for 'sysexitl' on this 397 * platform. 398 */ 399 #define SYSEXITL .byte 0x0f, 0x35 400 401 #elif defined(__i386) 402 403 /* 404 * Switching from guest kernel to user mode. 405 * See definition in public/arch-x86_32.h. Stack going in must be: 406 * eax, flags, eip, cs, eflags, esp, ss. 407 */ 408 #define HYPERVISOR_IRET \ 409 pushl %eax; \ 410 movl $__HYPERVISOR_iret, %eax; \ 411 int $0x82; \ 412 ud2 /* die nastily if we return! */ 413 414 #define IRET HYPERVISOR_IRET 415 #define SYSRET ud2 /* 32-bit syscall/sysret not supported */ 416 417 #endif /* __i386 */ 418 419 420 /* 421 * Xen 3.x wedges the current value of upcall_mask into unused byte of 422 * saved %cs on stack at the time of passing through a trap or interrupt 423 * gate. Since Xen also updates PS_IE in %[e,r]lags as well, we always 424 * mask off the saved upcall mask so the kernel and/or tools like debuggers 425 * will not be confused about bits set in reserved portions of %cs slot. 426 * 427 * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details. 428 */ 429 #if defined(__amd64) 430 431 #define CLEAN_CS movb $0, REGOFF_CS+4(%rsp) 432 433 #elif defined(__i386) 434 435 #define CLEAN_CS movb $0, REGOFF_CS+2(%esp) 436 437 #endif /* __i386 */ 438 439 /* 440 * All exceptions for amd64 have %r11 and %rcx on the stack. 441 * Just pop them back into their appropriate registers and 442 * let it get saved as is running native. 443 */ 444 #if defined(__amd64) 445 446 #define XPV_TRAP_POP \ 447 popq %rcx; \ 448 popq %r11 449 450 #define XPV_TRAP_PUSH \ 451 pushq %r11; \ 452 pushq %rcx 453 454 #endif /* __amd64 */ 455 456 457 /* 458 * Macros for saving the original segment registers and restoring them 459 * for fast traps. 460 */ 461 #if defined(__amd64) 462 463 /* 464 * Smaller versions of INTR_PUSH and INTR_POP for fast traps. 465 * The following registers have been pushed onto the stack by 466 * hardware at this point: 467 * 468 * greg_t r_rip; 469 * greg_t r_cs; 470 * greg_t r_rfl; 471 * greg_t r_rsp; 472 * greg_t r_ss; 473 * 474 * This handler is executed both by 32-bit and 64-bit applications. 475 * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx, 476 * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls. 477 * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile 478 * across a function call -- in particular, %esi and %edi MUST be saved! 479 * 480 * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit 481 * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem 482 * particularly worth it. 483 * 484 */ 485 #define FAST_INTR_PUSH \ 486 INTGATE_INIT_KERNEL_FLAGS; \ 487 popq %rcx; \ 488 popq %r11; \ 489 subq $REGOFF_RIP, %rsp; \ 490 movq %rsi, REGOFF_RSI(%rsp); \ 491 movq %rdi, REGOFF_RDI(%rsp); \ 492 CLEAN_CS 493 494 #define FAST_INTR_POP \ 495 movq REGOFF_RSI(%rsp), %rsi; \ 496 movq REGOFF_RDI(%rsp), %rdi; \ 497 addq $REGOFF_RIP, %rsp 498 499 #define FAST_INTR_RETURN \ 500 ASSERT_UPCALL_MASK_IS_SET; \ 501 HYPERVISOR_IRET(0) 502 503 #elif defined(__i386) 504 505 #define FAST_INTR_PUSH \ 506 cld; \ 507 __SEGREGS_PUSH \ 508 __SEGREGS_LOAD_KERNEL \ 509 510 #define FAST_INTR_POP \ 511 __SEGREGS_POP 512 513 #define FAST_INTR_RETURN \ 514 IRET 515 516 #endif /* __i386 */ 517 518 /* 519 * Handling the CR0.TS bit for floating point handling. 520 * 521 * When the TS bit is *set*, attempts to touch the floating 522 * point hardware will result in a #nm trap. 523 */ 524 #if defined(__amd64) 525 526 #define STTS(rtmp) \ 527 pushq %rdi; \ 528 movl $1, %edi; \ 529 call HYPERVISOR_fpu_taskswitch; \ 530 popq %rdi 531 532 #define CLTS \ 533 pushq %rdi; \ 534 xorl %edi, %edi; \ 535 call HYPERVISOR_fpu_taskswitch; \ 536 popq %rdi 537 538 #elif defined(__i386) 539 540 #define STTS(r) \ 541 pushl $1; \ 542 call HYPERVISOR_fpu_taskswitch; \ 543 addl $4, %esp 544 545 #define CLTS \ 546 pushl $0; \ 547 call HYPERVISOR_fpu_taskswitch; \ 548 addl $4, %esp 549 550 #endif /* __i386 */ 551 552 #ifdef __cplusplus 553 } 554 #endif 555 556 #endif /* _SYS_MACHPRIVREGS_H */ 557