1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Copyright (c) 1992 Terrence R. Lambert. 31 * Copyright (c) 1990 The Regents of the University of California. 32 * All rights reserved. 33 * 34 * This code is derived from software contributed to Berkeley by 35 * William Jolitz. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 66 */ 67 68 #include <sys/types.h> 69 #include <sys/sysmacros.h> 70 #include <sys/tss.h> 71 #include <sys/segments.h> 72 #include <sys/trap.h> 73 #include <sys/cpuvar.h> 74 #include <sys/bootconf.h> 75 #include <sys/x86_archext.h> 76 #include <sys/controlregs.h> 77 #include <sys/archsystm.h> 78 #include <sys/machsystm.h> 79 #include <sys/kobj.h> 80 #include <sys/cmn_err.h> 81 #include <sys/reboot.h> 82 #include <sys/kdi.h> 83 #include <sys/mach_mmu.h> 84 #include <sys/systm.h> 85 86 #ifdef __xpv 87 #include <sys/hypervisor.h> 88 #include <vm/as.h> 89 #endif 90 91 #include <sys/promif.h> 92 #include <sys/bootinfo.h> 93 #include <vm/kboot_mmu.h> 94 #include <vm/hat_pte.h> 95 96 /* 97 * cpu0 and default tables and structures. 98 */ 99 user_desc_t *gdt0; 100 #if !defined(__xpv) 101 desctbr_t gdt0_default_r; 102 #endif 103 104 #pragma align 16(idt0) 105 gate_desc_t idt0[NIDT]; /* interrupt descriptor table */ 106 #if defined(__i386) 107 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */ 108 #endif 109 110 #pragma align 16(ktss0) 111 struct tss ktss0; /* kernel task state structure */ 112 113 #if defined(__i386) 114 #pragma align 16(dftss0) 115 struct tss dftss0; /* #DF double-fault exception */ 116 #endif /* __i386 */ 117 118 user_desc_t zero_udesc; /* base zero user desc native procs */ 119 user_desc_t null_udesc; /* null user descriptor */ 120 system_desc_t null_sdesc; /* null system descriptor */ 121 122 #if defined(__amd64) 123 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 124 #endif /* __amd64 */ 125 126 #if defined(__amd64) 127 user_desc_t ucs_on; 128 user_desc_t ucs_off; 129 user_desc_t ucs32_on; 130 user_desc_t ucs32_off; 131 #endif /* __amd64 */ 132 133 #pragma align 16(dblfault_stack0) 134 char dblfault_stack0[DEFAULTSTKSZ]; 135 136 extern void fast_null(void); 137 extern hrtime_t get_hrtime(void); 138 extern hrtime_t gethrvtime(void); 139 extern hrtime_t get_hrestime(void); 140 extern uint64_t getlgrp(void); 141 142 void (*(fasttable[]))(void) = { 143 fast_null, /* T_FNULL routine */ 144 fast_null, /* T_FGETFP routine (initially null) */ 145 fast_null, /* T_FSETFP routine (initially null) */ 146 (void (*)())get_hrtime, /* T_GETHRTIME */ 147 (void (*)())gethrvtime, /* T_GETHRVTIME */ 148 (void (*)())get_hrestime, /* T_GETHRESTIME */ 149 (void (*)())getlgrp /* T_GETLGRP */ 150 }; 151 152 /* 153 * Structure containing pre-computed descriptors to allow us to temporarily 154 * interpose on a standard handler. 155 */ 156 struct interposing_handler { 157 int ih_inum; 158 gate_desc_t ih_interp_desc; 159 gate_desc_t ih_default_desc; 160 }; 161 162 /* 163 * The brand infrastructure interposes on two handlers, and we use one as a 164 * NULL signpost. 165 */ 166 static struct interposing_handler brand_tbl[3]; 167 168 /* 169 * software prototypes for default local descriptor table 170 */ 171 172 /* 173 * Routines for loading segment descriptors in format the hardware 174 * can understand. 175 */ 176 177 #if defined(__amd64) 178 179 /* 180 * In long mode we have the new L or long mode attribute bit 181 * for code segments. Only the conforming bit in type is used along 182 * with descriptor priority and present bits. Default operand size must 183 * be zero when in long mode. In 32-bit compatibility mode all fields 184 * are treated as in legacy mode. For data segments while in long mode 185 * only the present bit is loaded. 186 */ 187 void 188 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 189 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 190 { 191 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 192 193 /* 194 * 64-bit long mode. 195 */ 196 if (lmode == SDP_LONG) 197 dp->usd_def32 = 0; /* 32-bit operands only */ 198 else 199 /* 200 * 32-bit compatibility mode. 201 */ 202 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 203 204 dp->usd_long = lmode; /* 64-bit mode */ 205 dp->usd_type = type; 206 dp->usd_dpl = dpl; 207 dp->usd_p = 1; 208 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 209 210 dp->usd_lobase = (uintptr_t)base; 211 dp->usd_midbase = (uintptr_t)base >> 16; 212 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 213 dp->usd_lolimit = size; 214 dp->usd_hilimit = (uintptr_t)size >> 16; 215 } 216 217 #elif defined(__i386) 218 219 /* 220 * Install user segment descriptor for code and data. 221 */ 222 void 223 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type, 224 uint_t dpl, uint_t gran, uint_t defopsz) 225 { 226 dp->usd_lolimit = size; 227 dp->usd_hilimit = (uintptr_t)size >> 16; 228 229 dp->usd_lobase = (uintptr_t)base; 230 dp->usd_midbase = (uintptr_t)base >> 16; 231 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 232 233 dp->usd_type = type; 234 dp->usd_dpl = dpl; 235 dp->usd_p = 1; 236 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */ 237 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 238 } 239 240 #endif /* __i386 */ 241 242 /* 243 * Install system segment descriptor for LDT and TSS segments. 244 */ 245 246 #if defined(__amd64) 247 248 void 249 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 250 uint_t dpl) 251 { 252 dp->ssd_lolimit = size; 253 dp->ssd_hilimit = (uintptr_t)size >> 16; 254 255 dp->ssd_lobase = (uintptr_t)base; 256 dp->ssd_midbase = (uintptr_t)base >> 16; 257 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 258 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 259 260 dp->ssd_type = type; 261 dp->ssd_zero1 = 0; /* must be zero */ 262 dp->ssd_zero2 = 0; 263 dp->ssd_dpl = dpl; 264 dp->ssd_p = 1; 265 dp->ssd_gran = 0; /* force byte units */ 266 } 267 268 void * 269 get_ssd_base(system_desc_t *dp) 270 { 271 uintptr_t base; 272 273 base = (uintptr_t)dp->ssd_lobase | 274 (uintptr_t)dp->ssd_midbase << 16 | 275 (uintptr_t)dp->ssd_hibase << (16 + 8) | 276 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); 277 return ((void *)base); 278 } 279 280 #elif defined(__i386) 281 282 void 283 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 284 uint_t dpl) 285 { 286 dp->ssd_lolimit = size; 287 dp->ssd_hilimit = (uintptr_t)size >> 16; 288 289 dp->ssd_lobase = (uintptr_t)base; 290 dp->ssd_midbase = (uintptr_t)base >> 16; 291 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 292 293 dp->ssd_type = type; 294 dp->ssd_zero = 0; /* must be zero */ 295 dp->ssd_dpl = dpl; 296 dp->ssd_p = 1; 297 dp->ssd_gran = 0; /* force byte units */ 298 } 299 300 void * 301 get_ssd_base(system_desc_t *dp) 302 { 303 uintptr_t base; 304 305 base = (uintptr_t)dp->ssd_lobase | 306 (uintptr_t)dp->ssd_midbase << 16 | 307 (uintptr_t)dp->ssd_hibase << (16 + 8); 308 return ((void *)base); 309 } 310 311 #endif /* __i386 */ 312 313 /* 314 * Install gate segment descriptor for interrupt, trap, call and task gates. 315 */ 316 317 #if defined(__amd64) 318 319 void 320 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 321 uint_t type, uint_t dpl) 322 { 323 dp->sgd_looffset = (uintptr_t)func; 324 dp->sgd_hioffset = (uintptr_t)func >> 16; 325 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 326 327 dp->sgd_selector = (uint16_t)sel; 328 329 /* 330 * For 64 bit native we use the IST stack mechanism 331 * for double faults. All other traps use the CPL = 0 332 * (tss_rsp0) stack. 333 */ 334 #if !defined(__xpv) 335 if (type == T_DBLFLT) 336 dp->sgd_ist = 1; 337 else 338 #endif 339 dp->sgd_ist = 0; 340 341 dp->sgd_type = type; 342 dp->sgd_dpl = dpl; 343 dp->sgd_p = 1; 344 } 345 346 #elif defined(__i386) 347 348 void 349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 350 uint_t type, uint_t dpl) 351 { 352 dp->sgd_looffset = (uintptr_t)func; 353 dp->sgd_hioffset = (uintptr_t)func >> 16; 354 355 dp->sgd_selector = (uint16_t)sel; 356 dp->sgd_stkcpy = 0; /* always zero bytes */ 357 dp->sgd_type = type; 358 dp->sgd_dpl = dpl; 359 dp->sgd_p = 1; 360 } 361 362 #endif /* __i386 */ 363 364 /* 365 * Updates a single user descriptor in the the GDT of the current cpu. 366 * Caller is responsible for preventing cpu migration. 367 */ 368 369 void 370 gdt_update_usegd(uint_t sidx, user_desc_t *udp) 371 { 372 #if defined(__xpv) 373 374 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; 375 376 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) 377 panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); 378 379 #else /* __xpv */ 380 381 CPU->cpu_gdt[sidx] = *udp; 382 383 #endif /* __xpv */ 384 } 385 386 /* 387 * Writes single descriptor pointed to by udp into a processes 388 * LDT entry pointed to by ldp. 389 */ 390 int 391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) 392 { 393 #if defined(__xpv) 394 395 uint64_t dpa; 396 397 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | 398 ((uintptr_t)ldp & PAGEOFFSET); 399 400 /* 401 * The hypervisor is a little more restrictive about what it 402 * supports in the LDT. 403 */ 404 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) 405 return (EINVAL); 406 407 #else /* __xpv */ 408 409 *ldp = *udp; 410 411 #endif /* __xpv */ 412 return (0); 413 } 414 415 #if defined(__xpv) 416 417 /* 418 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. 419 * Returns true if a valid entry was written. 420 */ 421 int 422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) 423 { 424 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ 425 426 /* 427 * skip holes in the IDT 428 */ 429 if (GATESEG_GETOFFSET(sgd) == 0) 430 return (0); 431 432 ASSERT(sgd->sgd_type == SDT_SYSIGT); 433 ti->vector = vec; 434 TI_SET_DPL(ti, sgd->sgd_dpl); 435 436 /* 437 * Is this an interrupt gate? 438 */ 439 if (sgd->sgd_type == SDT_SYSIGT) { 440 /* LINTED */ 441 TI_SET_IF(ti, 1); 442 } 443 ti->cs = sgd->sgd_selector; 444 #if defined(__amd64) 445 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ 446 #endif 447 ti->address = GATESEG_GETOFFSET(sgd); 448 return (1); 449 } 450 451 /* 452 * Convert a single hw format gate descriptor and write it into our virtual IDT. 453 */ 454 void 455 xen_idt_write(gate_desc_t *sgd, uint_t vec) 456 { 457 trap_info_t trapinfo[2]; 458 459 bzero(trapinfo, sizeof (trapinfo)); 460 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) 461 return; 462 if (xen_set_trap_table(trapinfo) != 0) 463 panic("xen_idt_write: xen_set_trap_table() failed"); 464 } 465 466 #endif /* __xpv */ 467 468 #if defined(__amd64) 469 470 /* 471 * Build kernel GDT. 472 */ 473 474 static void 475 init_gdt_common(user_desc_t *gdt) 476 { 477 int i; 478 479 /* 480 * 64-bit kernel code segment. 481 */ 482 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 483 SDP_PAGES, SDP_OP32); 484 485 /* 486 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 487 * mode, but we set it here to 0xFFFF so that we can use the SYSRET 488 * instruction to return from system calls back to 32-bit applications. 489 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 490 * descriptors. We therefore must ensure that the kernel uses something, 491 * though it will be ignored by hardware, that is compatible with 32-bit 492 * apps. For the same reason we must set the default op size of this 493 * descriptor to 32-bit operands. 494 */ 495 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 496 SEL_KPL, SDP_PAGES, SDP_OP32); 497 gdt[GDT_KDATA].usd_def32 = 1; 498 499 /* 500 * 64-bit user code segment. 501 */ 502 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 503 SDP_PAGES, SDP_OP32); 504 505 /* 506 * 32-bit user code segment. 507 */ 508 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, 509 SEL_UPL, SDP_PAGES, SDP_OP32); 510 511 /* 512 * See gdt_ucode32() and gdt_ucode_native(). 513 */ 514 ucs_on = ucs_off = gdt[GDT_UCODE]; 515 ucs_off.usd_p = 0; /* forces #np fault */ 516 517 ucs32_on = ucs32_off = gdt[GDT_U32CODE]; 518 ucs32_off.usd_p = 0; /* forces #np fault */ 519 520 /* 521 * 32 and 64 bit data segments can actually share the same descriptor. 522 * In long mode only the present bit is checked but all other fields 523 * are loaded. But in compatibility mode all fields are interpreted 524 * as in legacy mode so they must be set correctly for a 32-bit data 525 * segment. 526 */ 527 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, 528 SDP_PAGES, SDP_OP32); 529 530 #if !defined(__xpv) 531 532 /* 533 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 534 * in the GDT is 0. 535 */ 536 537 /* 538 * Kernel TSS 539 */ 540 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0, 541 sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL); 542 543 #endif /* !__xpv */ 544 545 /* 546 * Initialize fs and gs descriptors for 32 bit processes. 547 * Only attributes and limits are initialized, the effective 548 * base address is programmed via fsbase/gsbase. 549 */ 550 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 551 SEL_UPL, SDP_PAGES, SDP_OP32); 552 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 553 SEL_UPL, SDP_PAGES, SDP_OP32); 554 555 /* 556 * Initialize the descriptors set aside for brand usage. 557 * Only attributes and limits are initialized. 558 */ 559 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 560 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, 561 SEL_UPL, SDP_PAGES, SDP_OP32); 562 563 /* 564 * Initialize convenient zero base user descriptors for clearing 565 * lwp private %fs and %gs descriptors in GDT. See setregs() for 566 * an example. 567 */ 568 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 569 SDP_BYTES, SDP_OP32); 570 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 571 SDP_PAGES, SDP_OP32); 572 } 573 574 #if defined(__xpv) 575 576 static user_desc_t * 577 init_gdt(void) 578 { 579 uint64_t gdtpa; 580 ulong_t ma[1]; /* XXPV should be a memory_t */ 581 ulong_t addr; 582 583 #if !defined(__lint) 584 /* 585 * Our gdt is never larger than a single page. 586 */ 587 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 588 #endif 589 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 590 PAGESIZE, PAGESIZE); 591 if (gdt0 == NULL) 592 panic("init_gdt: BOP_ALLOC failed"); 593 bzero(gdt0, PAGESIZE); 594 595 init_gdt_common(gdt0); 596 597 /* 598 * XXX Since we never invoke kmdb until after the kernel takes 599 * over the descriptor tables why not have it use the kernel's 600 * selectors? 601 */ 602 if (boothowto & RB_DEBUG) { 603 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 604 SEL_KPL, SDP_PAGES, SDP_OP32); 605 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, 606 SEL_KPL, SDP_PAGES, SDP_OP32); 607 } 608 609 /* 610 * Clear write permission for page containing the gdt and install it. 611 */ 612 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 613 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 614 kbm_read_only((uintptr_t)gdt0, gdtpa); 615 xen_set_gdt(ma, NGDT); 616 617 /* 618 * Reload the segment registers to use the new GDT. 619 * On 64-bit, fixup KCS_SEL to be in ring 3. 620 * See KCS_SEL in segments.h. 621 */ 622 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); 623 624 /* 625 * setup %gs for kernel 626 */ 627 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); 628 629 /* 630 * XX64 We should never dereference off "other gsbase" or 631 * "fsbase". So, we should arrange to point FSBASE and 632 * KGSBASE somewhere truly awful e.g. point it at the last 633 * valid address below the hole so that any attempts to index 634 * off them cause an exception. 635 * 636 * For now, point it at 8G -- at least it should be unmapped 637 * until some 64-bit processes run. 638 */ 639 addr = 0x200000000ul; 640 xen_set_segment_base(SEGBASE_FS, addr); 641 xen_set_segment_base(SEGBASE_GS_USER, addr); 642 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); 643 644 return (gdt0); 645 } 646 647 #else /* __xpv */ 648 649 static user_desc_t * 650 init_gdt(void) 651 { 652 desctbr_t r_bgdt, r_gdt; 653 user_desc_t *bgdt; 654 655 #if !defined(__lint) 656 /* 657 * Our gdt is never larger than a single page. 658 */ 659 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 660 #endif 661 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 662 PAGESIZE, PAGESIZE); 663 if (gdt0 == NULL) 664 panic("init_gdt: BOP_ALLOC failed"); 665 bzero(gdt0, PAGESIZE); 666 667 init_gdt_common(gdt0); 668 669 /* 670 * Copy in from boot's gdt to our gdt. 671 * Entry 0 is the null descriptor by definition. 672 */ 673 rd_gdtr(&r_bgdt); 674 bgdt = (user_desc_t *)r_bgdt.dtr_base; 675 if (bgdt == NULL) 676 panic("null boot gdt"); 677 678 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 679 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 680 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 681 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 682 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 683 684 /* 685 * Install our new GDT 686 */ 687 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 688 r_gdt.dtr_base = (uintptr_t)gdt0; 689 wr_gdtr(&r_gdt); 690 691 /* 692 * Reload the segment registers to use the new GDT 693 */ 694 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 695 696 /* 697 * setup %gs for kernel 698 */ 699 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 700 701 /* 702 * XX64 We should never dereference off "other gsbase" or 703 * "fsbase". So, we should arrange to point FSBASE and 704 * KGSBASE somewhere truly awful e.g. point it at the last 705 * valid address below the hole so that any attempts to index 706 * off them cause an exception. 707 * 708 * For now, point it at 8G -- at least it should be unmapped 709 * until some 64-bit processes run. 710 */ 711 wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 712 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 713 return (gdt0); 714 } 715 716 #endif /* __xpv */ 717 718 #elif defined(__i386) 719 720 static void 721 init_gdt_common(user_desc_t *gdt) 722 { 723 int i; 724 725 /* 726 * Text and data for both kernel and user span entire 32 bit 727 * address space. 728 */ 729 730 /* 731 * kernel code segment. 732 */ 733 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES, 734 SDP_OP32); 735 736 /* 737 * kernel data segment. 738 */ 739 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES, 740 SDP_OP32); 741 742 /* 743 * user code segment. 744 */ 745 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES, 746 SDP_OP32); 747 748 /* 749 * user data segment. 750 */ 751 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, 752 SDP_OP32); 753 754 #if !defined(__xpv) 755 756 /* 757 * TSS for T_DBLFLT (double fault) handler 758 */ 759 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], &dftss0, 760 sizeof (dftss0) - 1, SDT_SYSTSS, SEL_KPL); 761 762 /* 763 * TSS for kernel 764 */ 765 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0, 766 sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL); 767 768 #endif /* !__xpv */ 769 770 /* 771 * %gs selector for kernel 772 */ 773 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA, 774 SEL_KPL, SDP_BYTES, SDP_OP32); 775 776 /* 777 * Initialize lwp private descriptors. 778 * Only attributes and limits are initialized, the effective 779 * base address is programmed via fsbase/gsbase. 780 */ 781 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 782 SDP_PAGES, SDP_OP32); 783 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 784 SDP_PAGES, SDP_OP32); 785 786 /* 787 * Initialize the descriptors set aside for brand usage. 788 * Only attributes and limits are initialized. 789 */ 790 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 791 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 792 SDP_PAGES, SDP_OP32); 793 /* 794 * Initialize convenient zero base user descriptor for clearing 795 * lwp private %fs and %gs descriptors in GDT. See setregs() for 796 * an example. 797 */ 798 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL, 799 SDP_BYTES, SDP_OP32); 800 } 801 802 #if defined(__xpv) 803 804 static user_desc_t * 805 init_gdt(void) 806 { 807 uint64_t gdtpa; 808 ulong_t ma[1]; /* XXPV should be a memory_t */ 809 810 #if !defined(__lint) 811 /* 812 * Our gdt is never larger than a single page. 813 */ 814 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 815 #endif 816 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 817 PAGESIZE, PAGESIZE); 818 if (gdt0 == NULL) 819 panic("init_gdt: BOP_ALLOC failed"); 820 bzero(gdt0, PAGESIZE); 821 822 init_gdt_common(gdt0); 823 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 824 825 /* 826 * XXX Since we never invoke kmdb until after the kernel takes 827 * over the descriptor tables why not have it use the kernel's 828 * selectors? 829 */ 830 if (boothowto & RB_DEBUG) { 831 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 832 SDP_PAGES, SDP_OP32); 833 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 834 SDP_PAGES, SDP_OP32); 835 } 836 837 /* 838 * Clear write permission for page containing the gdt and install it. 839 */ 840 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 841 kbm_read_only((uintptr_t)gdt0, gdtpa); 842 xen_set_gdt(ma, NGDT); 843 844 /* 845 * Reload the segment registers to use the new GDT 846 */ 847 load_segment_registers( 848 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 849 850 return (gdt0); 851 } 852 853 #else /* __xpv */ 854 855 static user_desc_t * 856 init_gdt(void) 857 { 858 desctbr_t r_bgdt, r_gdt; 859 user_desc_t *bgdt; 860 861 #if !defined(__lint) 862 /* 863 * Our gdt is never larger than a single page. 864 */ 865 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 866 #endif 867 /* 868 * XXX this allocation belongs in our caller, not here. 869 */ 870 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 871 PAGESIZE, PAGESIZE); 872 if (gdt0 == NULL) 873 panic("init_gdt: BOP_ALLOC failed"); 874 bzero(gdt0, PAGESIZE); 875 876 init_gdt_common(gdt0); 877 878 /* 879 * Copy in from boot's gdt to our gdt entries. 880 * Entry 0 is null descriptor by definition. 881 */ 882 rd_gdtr(&r_bgdt); 883 bgdt = (user_desc_t *)r_bgdt.dtr_base; 884 if (bgdt == NULL) 885 panic("null boot gdt"); 886 887 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 888 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 889 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 890 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 891 892 /* 893 * Install our new GDT 894 */ 895 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 896 r_gdt.dtr_base = (uintptr_t)gdt0; 897 wr_gdtr(&r_gdt); 898 899 /* 900 * Reload the segment registers to use the new GDT 901 */ 902 load_segment_registers( 903 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 904 905 return (gdt0); 906 } 907 908 #endif /* __xpv */ 909 #endif /* __i386 */ 910 911 /* 912 * Build kernel IDT. 913 * 914 * Note that for amd64 we pretty much require every gate to be an interrupt 915 * gate which blocks interrupts atomically on entry; that's because of our 916 * dependency on using 'swapgs' every time we come into the kernel to find 917 * the cpu structure. If we get interrupted just before doing that, %cs could 918 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 919 * %gsbase is really still pointing at something in userland. Bad things will 920 * ensue. We also use interrupt gates for i386 as well even though this is not 921 * required for some traps. 922 * 923 * Perhaps they should have invented a trap gate that does an atomic swapgs? 924 */ 925 static void 926 init_idt_common(gate_desc_t *idt) 927 { 928 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 929 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 930 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL); 931 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL); 932 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL); 933 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT, 934 TRP_KPL); 935 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 936 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 937 938 /* 939 * double fault handler. 940 * 941 * Note that on the hypervisor a guest does not receive #df faults. 942 * Instead a failsafe event is injected into the guest if its selectors 943 * and/or stack is in a broken state. See xen_failsafe_callback. 944 */ 945 #if !defined(__xpv) 946 #if defined(__amd64) 947 948 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 949 950 #elif defined(__i386) 951 952 /* 953 * task gate required. 954 */ 955 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL); 956 957 #endif /* __i386 */ 958 #endif /* !__xpv */ 959 960 /* 961 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 962 */ 963 964 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 965 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 966 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 967 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 968 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 969 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL); 970 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT, 971 TRP_KPL); 972 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 973 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 974 975 /* 976 * install "int80" handler at, well, 0x80. 977 */ 978 set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL); 979 980 /* 981 * install fast trap handler at 210. 982 */ 983 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL); 984 985 /* 986 * System call handler. 987 */ 988 #if defined(__amd64) 989 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT, 990 TRP_UPL); 991 992 #elif defined(__i386) 993 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT, 994 TRP_UPL); 995 #endif /* __i386 */ 996 997 /* 998 * Install the DTrace interrupt handler for the pid provider. 999 */ 1000 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 1001 SDT_SYSIGT, TRP_UPL); 1002 1003 /* 1004 * Prepare interposing descriptors for the branded "int80" 1005 * and syscall handlers and cache copies of the default 1006 * descriptors. 1007 */ 1008 brand_tbl[0].ih_inum = T_INT80; 1009 brand_tbl[0].ih_default_desc = idt0[T_INT80]; 1010 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, 1011 SDT_SYSIGT, TRP_UPL); 1012 1013 brand_tbl[1].ih_inum = T_SYSCALLINT; 1014 brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; 1015 1016 #if defined(__amd64) 1017 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int, 1018 KCS_SEL, SDT_SYSIGT, TRP_UPL); 1019 #elif defined(__i386) 1020 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call, 1021 KCS_SEL, SDT_SYSIGT, TRP_UPL); 1022 #endif /* __i386 */ 1023 1024 brand_tbl[2].ih_inum = 0; 1025 } 1026 1027 #if defined(__xpv) 1028 1029 static void 1030 init_idt(gate_desc_t *idt) 1031 { 1032 /* 1033 * currently nothing extra for the hypervisor 1034 */ 1035 init_idt_common(idt); 1036 } 1037 1038 #else /* __xpv */ 1039 1040 static void 1041 init_idt(gate_desc_t *idt) 1042 { 1043 char ivctname[80]; 1044 void (*ivctptr)(void); 1045 int i; 1046 1047 /* 1048 * Initialize entire table with 'reserved' trap and then overwrite 1049 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 1050 * since it can only be generated on a 386 processor. 15 is also 1051 * unsupported and reserved. 1052 */ 1053 for (i = 0; i < NIDT; i++) 1054 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 1055 1056 /* 1057 * 20-31 reserved 1058 */ 1059 for (i = 20; i < 32; i++) 1060 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL); 1061 1062 /* 1063 * interrupts 32 - 255 1064 */ 1065 for (i = 32; i < 256; i++) { 1066 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 1067 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 1068 if (ivctptr == NULL) 1069 panic("kobj_getsymvalue(%s) failed", ivctname); 1070 1071 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL); 1072 } 1073 1074 /* 1075 * Now install the common ones. Note that it will overlay some 1076 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 1077 */ 1078 init_idt_common(idt); 1079 } 1080 1081 #endif /* __xpv */ 1082 1083 /* 1084 * The kernel does not deal with LDTs unless a user explicitly creates 1085 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 1086 * to reference the LDT will therefore cause a #gp. System calls made via the 1087 * obsolete lcall mechanism are emulated by the #gp fault handler. 1088 */ 1089 static void 1090 init_ldt(void) 1091 { 1092 #if defined(__xpv) 1093 xen_set_ldt(NULL, 0); 1094 #else 1095 wr_ldtr(0); 1096 #endif 1097 } 1098 1099 #if !defined(__xpv) 1100 #if defined(__amd64) 1101 1102 static void 1103 init_tss(void) 1104 { 1105 /* 1106 * tss_rsp0 is dynamically filled in by resume() on each context switch. 1107 * All exceptions but #DF will run on the thread stack. 1108 * Set up the double fault stack here. 1109 */ 1110 ktss0.tss_ist1 = 1111 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1112 1113 /* 1114 * Set I/O bit map offset equal to size of TSS segment limit 1115 * for no I/O permission map. This will force all user I/O 1116 * instructions to generate #gp fault. 1117 */ 1118 ktss0.tss_bitmapbase = sizeof (ktss0); 1119 1120 /* 1121 * Point %tr to descriptor for ktss0 in gdt. 1122 */ 1123 wr_tsr(KTSS_SEL); 1124 } 1125 1126 #elif defined(__i386) 1127 1128 static void 1129 init_tss(void) 1130 { 1131 /* 1132 * ktss0.tss_esp dynamically filled in by resume() on each 1133 * context switch. 1134 */ 1135 ktss0.tss_ss0 = KDS_SEL; 1136 ktss0.tss_eip = (uint32_t)_start; 1137 ktss0.tss_ds = ktss0.tss_es = ktss0.tss_ss = KDS_SEL; 1138 ktss0.tss_cs = KCS_SEL; 1139 ktss0.tss_fs = KFS_SEL; 1140 ktss0.tss_gs = KGS_SEL; 1141 ktss0.tss_ldt = ULDT_SEL; 1142 1143 /* 1144 * Initialize double fault tss. 1145 */ 1146 dftss0.tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1147 dftss0.tss_ss0 = KDS_SEL; 1148 1149 /* 1150 * tss_cr3 will get initialized in hat_kern_setup() once our page 1151 * tables have been setup. 1152 */ 1153 dftss0.tss_eip = (uint32_t)syserrtrap; 1154 dftss0.tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1155 dftss0.tss_cs = KCS_SEL; 1156 dftss0.tss_ds = KDS_SEL; 1157 dftss0.tss_es = KDS_SEL; 1158 dftss0.tss_ss = KDS_SEL; 1159 dftss0.tss_fs = KFS_SEL; 1160 dftss0.tss_gs = KGS_SEL; 1161 1162 /* 1163 * Set I/O bit map offset equal to size of TSS segment limit 1164 * for no I/O permission map. This will force all user I/O 1165 * instructions to generate #gp fault. 1166 */ 1167 ktss0.tss_bitmapbase = sizeof (ktss0); 1168 1169 /* 1170 * Point %tr to descriptor for ktss0 in gdt. 1171 */ 1172 wr_tsr(KTSS_SEL); 1173 } 1174 1175 #endif /* __i386 */ 1176 #endif /* !__xpv */ 1177 1178 #if defined(__xpv) 1179 1180 void 1181 init_desctbls(void) 1182 { 1183 uint_t vec; 1184 user_desc_t *gdt; 1185 1186 /* 1187 * Setup and install our GDT. 1188 */ 1189 gdt = init_gdt(); 1190 1191 /* 1192 * Store static pa of gdt to speed up pa_to_ma() translations 1193 * on lwp context switches. 1194 */ 1195 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1196 CPU->cpu_m.mcpu_gdt = gdt; 1197 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); 1198 1199 /* 1200 * Setup and install our IDT. 1201 */ 1202 init_idt(&idt0[0]); 1203 for (vec = 0; vec < NIDT; vec++) 1204 xen_idt_write(&idt0[vec], vec); 1205 1206 CPU->cpu_m.mcpu_idt = idt0; 1207 1208 /* 1209 * set default kernel stack 1210 */ 1211 xen_stack_switch(KDS_SEL, 1212 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); 1213 1214 xen_init_callbacks(); 1215 1216 init_ldt(); 1217 } 1218 1219 #else /* __xpv */ 1220 1221 void 1222 init_desctbls(void) 1223 { 1224 user_desc_t *gdt; 1225 desctbr_t idtr; 1226 1227 /* 1228 * Setup and install our GDT. 1229 */ 1230 gdt = init_gdt(); 1231 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1232 CPU->cpu_m.mcpu_gdt = gdt; 1233 1234 /* 1235 * Setup and install our IDT. 1236 */ 1237 init_idt(&idt0[0]); 1238 1239 idtr.dtr_base = (uintptr_t)idt0; 1240 idtr.dtr_limit = sizeof (idt0) - 1; 1241 wr_idtr(&idtr); 1242 CPU->cpu_m.mcpu_idt = idt0; 1243 1244 #if defined(__i386) 1245 /* 1246 * We maintain a description of idt0 in convenient IDTR format 1247 * for #pf's on some older pentium processors. See pentium_pftrap(). 1248 */ 1249 idt0_default_r = idtr; 1250 #endif /* __i386 */ 1251 1252 init_tss(); 1253 CPU->cpu_tss = &ktss0; 1254 init_ldt(); 1255 } 1256 1257 #endif /* __xpv */ 1258 1259 /* 1260 * In the early kernel, we need to set up a simple GDT to run on. 1261 * 1262 * XXPV Can dboot use this too? See dboot_gdt.s 1263 */ 1264 void 1265 init_boot_gdt(user_desc_t *bgdt) 1266 { 1267 #if defined(__amd64) 1268 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, 1269 SDP_PAGES, SDP_OP32); 1270 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, 1271 SDP_PAGES, SDP_OP32); 1272 #elif defined(__i386) 1273 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 1274 SDP_PAGES, SDP_OP32); 1275 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 1276 SDP_PAGES, SDP_OP32); 1277 #endif /* __i386 */ 1278 } 1279 1280 /* 1281 * Enable interpositioning on the system call path by rewriting the 1282 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1283 * the branded entry points. 1284 */ 1285 void 1286 brand_interpositioning_enable(void) 1287 { 1288 gate_desc_t *idt = CPU->cpu_idt; 1289 int i; 1290 1291 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1292 1293 for (i = 0; brand_tbl[i].ih_inum; i++) { 1294 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; 1295 #if defined(__xpv) 1296 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1297 brand_tbl[i].ih_inum); 1298 #endif 1299 } 1300 1301 #if defined(__amd64) 1302 #if defined(__xpv) 1303 1304 /* 1305 * Currently the hypervisor only supports 64-bit syscalls via 1306 * syscall instruction. The 32-bit syscalls are handled by 1307 * interrupt gate above. 1308 */ 1309 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, 1310 CALLBACKF_mask_events); 1311 1312 #else 1313 1314 if (x86_feature & X86_ASYSC) { 1315 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 1316 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 1317 } 1318 1319 #endif 1320 #endif /* __amd64 */ 1321 1322 if (x86_feature & X86_SEP) 1323 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 1324 } 1325 1326 /* 1327 * Disable interpositioning on the system call path by rewriting the 1328 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1329 * the standard entry points, which bypass the interpositioning hooks. 1330 */ 1331 void 1332 brand_interpositioning_disable(void) 1333 { 1334 gate_desc_t *idt = CPU->cpu_idt; 1335 int i; 1336 1337 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1338 1339 for (i = 0; brand_tbl[i].ih_inum; i++) { 1340 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; 1341 #if defined(__xpv) 1342 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1343 brand_tbl[i].ih_inum); 1344 #endif 1345 } 1346 1347 #if defined(__amd64) 1348 #if defined(__xpv) 1349 1350 /* 1351 * See comment above in brand_interpositioning_enable. 1352 */ 1353 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 1354 CALLBACKF_mask_events); 1355 1356 #else 1357 1358 if (x86_feature & X86_ASYSC) { 1359 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 1360 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 1361 } 1362 1363 #endif 1364 #endif /* __amd64 */ 1365 1366 if (x86_feature & X86_SEP) 1367 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 1368 } 1369