1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1992 Terrence R. Lambert. 28 * Copyright (c) 1990 The Regents of the University of California. 29 * All rights reserved. 30 * 31 * This code is derived from software contributed to Berkeley by 32 * William Jolitz. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the University of 45 * California, Berkeley and its contributors. 46 * 4. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 63 */ 64 65 #include <sys/types.h> 66 #include <sys/sysmacros.h> 67 #include <sys/tss.h> 68 #include <sys/segments.h> 69 #include <sys/trap.h> 70 #include <sys/cpuvar.h> 71 #include <sys/bootconf.h> 72 #include <sys/x86_archext.h> 73 #include <sys/controlregs.h> 74 #include <sys/archsystm.h> 75 #include <sys/machsystm.h> 76 #include <sys/kobj.h> 77 #include <sys/cmn_err.h> 78 #include <sys/reboot.h> 79 #include <sys/kdi.h> 80 #include <sys/mach_mmu.h> 81 #include <sys/systm.h> 82 83 #ifdef __xpv 84 #include <sys/hypervisor.h> 85 #include <vm/as.h> 86 #endif 87 88 #include <sys/promif.h> 89 #include <sys/bootinfo.h> 90 #include <vm/kboot_mmu.h> 91 #include <vm/hat_pte.h> 92 93 /* 94 * cpu0 and default tables and structures. 95 */ 96 user_desc_t *gdt0; 97 #if !defined(__xpv) 98 desctbr_t gdt0_default_r; 99 #endif 100 101 gate_desc_t *idt0; /* interrupt descriptor table */ 102 #if defined(__i386) 103 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */ 104 #endif 105 106 struct tss *ktss0; /* kernel task state structure */ 107 108 #if defined(__i386) 109 struct tss *dftss0; /* #DF double-fault exception */ 110 #endif /* __i386 */ 111 112 user_desc_t zero_udesc; /* base zero user desc native procs */ 113 user_desc_t null_udesc; /* null user descriptor */ 114 system_desc_t null_sdesc; /* null system descriptor */ 115 116 #if defined(__amd64) 117 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 118 #endif /* __amd64 */ 119 120 #if defined(__amd64) 121 user_desc_t ucs_on; 122 user_desc_t ucs_off; 123 user_desc_t ucs32_on; 124 user_desc_t ucs32_off; 125 #endif /* __amd64 */ 126 127 #pragma align 16(dblfault_stack0) 128 char dblfault_stack0[DEFAULTSTKSZ]; 129 130 extern void fast_null(void); 131 extern hrtime_t get_hrtime(void); 132 extern hrtime_t gethrvtime(void); 133 extern hrtime_t get_hrestime(void); 134 extern uint64_t getlgrp(void); 135 136 void (*(fasttable[]))(void) = { 137 fast_null, /* T_FNULL routine */ 138 fast_null, /* T_FGETFP routine (initially null) */ 139 fast_null, /* T_FSETFP routine (initially null) */ 140 (void (*)())get_hrtime, /* T_GETHRTIME */ 141 (void (*)())gethrvtime, /* T_GETHRVTIME */ 142 (void (*)())get_hrestime, /* T_GETHRESTIME */ 143 (void (*)())getlgrp /* T_GETLGRP */ 144 }; 145 146 /* 147 * Structure containing pre-computed descriptors to allow us to temporarily 148 * interpose on a standard handler. 149 */ 150 struct interposing_handler { 151 int ih_inum; 152 gate_desc_t ih_interp_desc; 153 gate_desc_t ih_default_desc; 154 }; 155 156 /* 157 * The brand infrastructure interposes on two handlers, and we use one as a 158 * NULL signpost. 159 */ 160 static struct interposing_handler brand_tbl[2]; 161 162 /* 163 * software prototypes for default local descriptor table 164 */ 165 166 /* 167 * Routines for loading segment descriptors in format the hardware 168 * can understand. 169 */ 170 171 #if defined(__amd64) 172 173 /* 174 * In long mode we have the new L or long mode attribute bit 175 * for code segments. Only the conforming bit in type is used along 176 * with descriptor priority and present bits. Default operand size must 177 * be zero when in long mode. In 32-bit compatibility mode all fields 178 * are treated as in legacy mode. For data segments while in long mode 179 * only the present bit is loaded. 180 */ 181 void 182 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 183 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 184 { 185 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 186 187 /* 188 * 64-bit long mode. 189 */ 190 if (lmode == SDP_LONG) 191 dp->usd_def32 = 0; /* 32-bit operands only */ 192 else 193 /* 194 * 32-bit compatibility mode. 195 */ 196 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 197 198 dp->usd_long = lmode; /* 64-bit mode */ 199 dp->usd_type = type; 200 dp->usd_dpl = dpl; 201 dp->usd_p = 1; 202 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 203 204 dp->usd_lobase = (uintptr_t)base; 205 dp->usd_midbase = (uintptr_t)base >> 16; 206 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 207 dp->usd_lolimit = size; 208 dp->usd_hilimit = (uintptr_t)size >> 16; 209 } 210 211 #elif defined(__i386) 212 213 /* 214 * Install user segment descriptor for code and data. 215 */ 216 void 217 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type, 218 uint_t dpl, uint_t gran, uint_t defopsz) 219 { 220 dp->usd_lolimit = size; 221 dp->usd_hilimit = (uintptr_t)size >> 16; 222 223 dp->usd_lobase = (uintptr_t)base; 224 dp->usd_midbase = (uintptr_t)base >> 16; 225 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 226 227 dp->usd_type = type; 228 dp->usd_dpl = dpl; 229 dp->usd_p = 1; 230 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */ 231 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 232 } 233 234 #endif /* __i386 */ 235 236 /* 237 * Install system segment descriptor for LDT and TSS segments. 238 */ 239 240 #if defined(__amd64) 241 242 void 243 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 244 uint_t dpl) 245 { 246 dp->ssd_lolimit = size; 247 dp->ssd_hilimit = (uintptr_t)size >> 16; 248 249 dp->ssd_lobase = (uintptr_t)base; 250 dp->ssd_midbase = (uintptr_t)base >> 16; 251 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 252 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 253 254 dp->ssd_type = type; 255 dp->ssd_zero1 = 0; /* must be zero */ 256 dp->ssd_zero2 = 0; 257 dp->ssd_dpl = dpl; 258 dp->ssd_p = 1; 259 dp->ssd_gran = 0; /* force byte units */ 260 } 261 262 void * 263 get_ssd_base(system_desc_t *dp) 264 { 265 uintptr_t base; 266 267 base = (uintptr_t)dp->ssd_lobase | 268 (uintptr_t)dp->ssd_midbase << 16 | 269 (uintptr_t)dp->ssd_hibase << (16 + 8) | 270 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); 271 return ((void *)base); 272 } 273 274 #elif defined(__i386) 275 276 void 277 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 278 uint_t dpl) 279 { 280 dp->ssd_lolimit = size; 281 dp->ssd_hilimit = (uintptr_t)size >> 16; 282 283 dp->ssd_lobase = (uintptr_t)base; 284 dp->ssd_midbase = (uintptr_t)base >> 16; 285 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 286 287 dp->ssd_type = type; 288 dp->ssd_zero = 0; /* must be zero */ 289 dp->ssd_dpl = dpl; 290 dp->ssd_p = 1; 291 dp->ssd_gran = 0; /* force byte units */ 292 } 293 294 void * 295 get_ssd_base(system_desc_t *dp) 296 { 297 uintptr_t base; 298 299 base = (uintptr_t)dp->ssd_lobase | 300 (uintptr_t)dp->ssd_midbase << 16 | 301 (uintptr_t)dp->ssd_hibase << (16 + 8); 302 return ((void *)base); 303 } 304 305 #endif /* __i386 */ 306 307 /* 308 * Install gate segment descriptor for interrupt, trap, call and task gates. 309 */ 310 311 #if defined(__amd64) 312 313 /*ARGSUSED*/ 314 void 315 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 316 uint_t type, uint_t dpl, uint_t vector) 317 { 318 dp->sgd_looffset = (uintptr_t)func; 319 dp->sgd_hioffset = (uintptr_t)func >> 16; 320 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 321 322 dp->sgd_selector = (uint16_t)sel; 323 324 /* 325 * For 64 bit native we use the IST stack mechanism 326 * for double faults. All other traps use the CPL = 0 327 * (tss_rsp0) stack. 328 */ 329 #if !defined(__xpv) 330 if (vector == T_DBLFLT) 331 dp->sgd_ist = 1; 332 else 333 #endif 334 dp->sgd_ist = 0; 335 336 dp->sgd_type = type; 337 dp->sgd_dpl = dpl; 338 dp->sgd_p = 1; 339 } 340 341 #elif defined(__i386) 342 343 /*ARGSUSED*/ 344 void 345 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 346 uint_t type, uint_t dpl, uint_t unused) 347 { 348 dp->sgd_looffset = (uintptr_t)func; 349 dp->sgd_hioffset = (uintptr_t)func >> 16; 350 351 dp->sgd_selector = (uint16_t)sel; 352 dp->sgd_stkcpy = 0; /* always zero bytes */ 353 dp->sgd_type = type; 354 dp->sgd_dpl = dpl; 355 dp->sgd_p = 1; 356 } 357 358 #endif /* __i386 */ 359 360 /* 361 * Updates a single user descriptor in the the GDT of the current cpu. 362 * Caller is responsible for preventing cpu migration. 363 */ 364 365 void 366 gdt_update_usegd(uint_t sidx, user_desc_t *udp) 367 { 368 #if defined(__xpv) 369 370 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; 371 372 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) 373 panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); 374 375 #else /* __xpv */ 376 377 CPU->cpu_gdt[sidx] = *udp; 378 379 #endif /* __xpv */ 380 } 381 382 /* 383 * Writes single descriptor pointed to by udp into a processes 384 * LDT entry pointed to by ldp. 385 */ 386 int 387 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) 388 { 389 #if defined(__xpv) 390 391 uint64_t dpa; 392 393 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | 394 ((uintptr_t)ldp & PAGEOFFSET); 395 396 /* 397 * The hypervisor is a little more restrictive about what it 398 * supports in the LDT. 399 */ 400 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) 401 return (EINVAL); 402 403 #else /* __xpv */ 404 405 *ldp = *udp; 406 407 #endif /* __xpv */ 408 return (0); 409 } 410 411 #if defined(__xpv) 412 413 /* 414 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. 415 * Returns true if a valid entry was written. 416 */ 417 int 418 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) 419 { 420 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ 421 422 /* 423 * skip holes in the IDT 424 */ 425 if (GATESEG_GETOFFSET(sgd) == 0) 426 return (0); 427 428 ASSERT(sgd->sgd_type == SDT_SYSIGT); 429 ti->vector = vec; 430 TI_SET_DPL(ti, sgd->sgd_dpl); 431 432 /* 433 * Is this an interrupt gate? 434 */ 435 if (sgd->sgd_type == SDT_SYSIGT) { 436 /* LINTED */ 437 TI_SET_IF(ti, 1); 438 } 439 ti->cs = sgd->sgd_selector; 440 #if defined(__amd64) 441 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ 442 #endif 443 ti->address = GATESEG_GETOFFSET(sgd); 444 return (1); 445 } 446 447 /* 448 * Convert a single hw format gate descriptor and write it into our virtual IDT. 449 */ 450 void 451 xen_idt_write(gate_desc_t *sgd, uint_t vec) 452 { 453 trap_info_t trapinfo[2]; 454 455 bzero(trapinfo, sizeof (trapinfo)); 456 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) 457 return; 458 if (xen_set_trap_table(trapinfo) != 0) 459 panic("xen_idt_write: xen_set_trap_table() failed"); 460 } 461 462 #endif /* __xpv */ 463 464 #if defined(__amd64) 465 466 /* 467 * Build kernel GDT. 468 */ 469 470 static void 471 init_gdt_common(user_desc_t *gdt) 472 { 473 int i; 474 475 /* 476 * 64-bit kernel code segment. 477 */ 478 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 479 SDP_PAGES, SDP_OP32); 480 481 /* 482 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 483 * mode, but we set it here to 0xFFFF so that we can use the SYSRET 484 * instruction to return from system calls back to 32-bit applications. 485 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 486 * descriptors. We therefore must ensure that the kernel uses something, 487 * though it will be ignored by hardware, that is compatible with 32-bit 488 * apps. For the same reason we must set the default op size of this 489 * descriptor to 32-bit operands. 490 */ 491 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 492 SEL_KPL, SDP_PAGES, SDP_OP32); 493 gdt[GDT_KDATA].usd_def32 = 1; 494 495 /* 496 * 64-bit user code segment. 497 */ 498 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 499 SDP_PAGES, SDP_OP32); 500 501 /* 502 * 32-bit user code segment. 503 */ 504 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, 505 SEL_UPL, SDP_PAGES, SDP_OP32); 506 507 /* 508 * See gdt_ucode32() and gdt_ucode_native(). 509 */ 510 ucs_on = ucs_off = gdt[GDT_UCODE]; 511 ucs_off.usd_p = 0; /* forces #np fault */ 512 513 ucs32_on = ucs32_off = gdt[GDT_U32CODE]; 514 ucs32_off.usd_p = 0; /* forces #np fault */ 515 516 /* 517 * 32 and 64 bit data segments can actually share the same descriptor. 518 * In long mode only the present bit is checked but all other fields 519 * are loaded. But in compatibility mode all fields are interpreted 520 * as in legacy mode so they must be set correctly for a 32-bit data 521 * segment. 522 */ 523 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, 524 SDP_PAGES, SDP_OP32); 525 526 #if !defined(__xpv) 527 528 /* 529 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 530 * in the GDT is 0. 531 */ 532 533 /* 534 * Kernel TSS 535 */ 536 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 537 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 538 539 #endif /* !__xpv */ 540 541 /* 542 * Initialize fs and gs descriptors for 32 bit processes. 543 * Only attributes and limits are initialized, the effective 544 * base address is programmed via fsbase/gsbase. 545 */ 546 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 547 SEL_UPL, SDP_PAGES, SDP_OP32); 548 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 549 SEL_UPL, SDP_PAGES, SDP_OP32); 550 551 /* 552 * Initialize the descriptors set aside for brand usage. 553 * Only attributes and limits are initialized. 554 */ 555 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 556 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, 557 SEL_UPL, SDP_PAGES, SDP_OP32); 558 559 /* 560 * Initialize convenient zero base user descriptors for clearing 561 * lwp private %fs and %gs descriptors in GDT. See setregs() for 562 * an example. 563 */ 564 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 565 SDP_BYTES, SDP_OP32); 566 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 567 SDP_PAGES, SDP_OP32); 568 } 569 570 #if defined(__xpv) 571 572 static user_desc_t * 573 init_gdt(void) 574 { 575 uint64_t gdtpa; 576 ulong_t ma[1]; /* XXPV should be a memory_t */ 577 ulong_t addr; 578 579 #if !defined(__lint) 580 /* 581 * Our gdt is never larger than a single page. 582 */ 583 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 584 #endif 585 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 586 PAGESIZE, PAGESIZE); 587 bzero(gdt0, PAGESIZE); 588 589 init_gdt_common(gdt0); 590 591 /* 592 * XXX Since we never invoke kmdb until after the kernel takes 593 * over the descriptor tables why not have it use the kernel's 594 * selectors? 595 */ 596 if (boothowto & RB_DEBUG) { 597 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 598 SEL_KPL, SDP_PAGES, SDP_OP32); 599 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, 600 SEL_KPL, SDP_PAGES, SDP_OP32); 601 } 602 603 /* 604 * Clear write permission for page containing the gdt and install it. 605 */ 606 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 607 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 608 kbm_read_only((uintptr_t)gdt0, gdtpa); 609 xen_set_gdt(ma, NGDT); 610 611 /* 612 * Reload the segment registers to use the new GDT. 613 * On 64-bit, fixup KCS_SEL to be in ring 3. 614 * See KCS_SEL in segments.h. 615 */ 616 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); 617 618 /* 619 * setup %gs for kernel 620 */ 621 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); 622 623 /* 624 * XX64 We should never dereference off "other gsbase" or 625 * "fsbase". So, we should arrange to point FSBASE and 626 * KGSBASE somewhere truly awful e.g. point it at the last 627 * valid address below the hole so that any attempts to index 628 * off them cause an exception. 629 * 630 * For now, point it at 8G -- at least it should be unmapped 631 * until some 64-bit processes run. 632 */ 633 addr = 0x200000000ul; 634 xen_set_segment_base(SEGBASE_FS, addr); 635 xen_set_segment_base(SEGBASE_GS_USER, addr); 636 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); 637 638 return (gdt0); 639 } 640 641 #else /* __xpv */ 642 643 static user_desc_t * 644 init_gdt(void) 645 { 646 desctbr_t r_bgdt, r_gdt; 647 user_desc_t *bgdt; 648 649 #if !defined(__lint) 650 /* 651 * Our gdt is never larger than a single page. 652 */ 653 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 654 #endif 655 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 656 PAGESIZE, PAGESIZE); 657 bzero(gdt0, PAGESIZE); 658 659 init_gdt_common(gdt0); 660 661 /* 662 * Copy in from boot's gdt to our gdt. 663 * Entry 0 is the null descriptor by definition. 664 */ 665 rd_gdtr(&r_bgdt); 666 bgdt = (user_desc_t *)r_bgdt.dtr_base; 667 if (bgdt == NULL) 668 panic("null boot gdt"); 669 670 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 671 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 672 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 673 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 674 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 675 676 /* 677 * Install our new GDT 678 */ 679 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 680 r_gdt.dtr_base = (uintptr_t)gdt0; 681 wr_gdtr(&r_gdt); 682 683 /* 684 * Reload the segment registers to use the new GDT 685 */ 686 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 687 688 /* 689 * setup %gs for kernel 690 */ 691 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 692 693 /* 694 * XX64 We should never dereference off "other gsbase" or 695 * "fsbase". So, we should arrange to point FSBASE and 696 * KGSBASE somewhere truly awful e.g. point it at the last 697 * valid address below the hole so that any attempts to index 698 * off them cause an exception. 699 * 700 * For now, point it at 8G -- at least it should be unmapped 701 * until some 64-bit processes run. 702 */ 703 wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 704 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 705 return (gdt0); 706 } 707 708 #endif /* __xpv */ 709 710 #elif defined(__i386) 711 712 static void 713 init_gdt_common(user_desc_t *gdt) 714 { 715 int i; 716 717 /* 718 * Text and data for both kernel and user span entire 32 bit 719 * address space. 720 */ 721 722 /* 723 * kernel code segment. 724 */ 725 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES, 726 SDP_OP32); 727 728 /* 729 * kernel data segment. 730 */ 731 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES, 732 SDP_OP32); 733 734 /* 735 * user code segment. 736 */ 737 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES, 738 SDP_OP32); 739 740 /* 741 * user data segment. 742 */ 743 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, 744 SDP_OP32); 745 746 #if !defined(__xpv) 747 748 /* 749 * TSS for T_DBLFLT (double fault) handler 750 */ 751 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0, 752 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL); 753 754 /* 755 * TSS for kernel 756 */ 757 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 758 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 759 760 #endif /* !__xpv */ 761 762 /* 763 * %gs selector for kernel 764 */ 765 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA, 766 SEL_KPL, SDP_BYTES, SDP_OP32); 767 768 /* 769 * Initialize lwp private descriptors. 770 * Only attributes and limits are initialized, the effective 771 * base address is programmed via fsbase/gsbase. 772 */ 773 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 774 SDP_PAGES, SDP_OP32); 775 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 776 SDP_PAGES, SDP_OP32); 777 778 /* 779 * Initialize the descriptors set aside for brand usage. 780 * Only attributes and limits are initialized. 781 */ 782 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 783 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 784 SDP_PAGES, SDP_OP32); 785 /* 786 * Initialize convenient zero base user descriptor for clearing 787 * lwp private %fs and %gs descriptors in GDT. See setregs() for 788 * an example. 789 */ 790 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL, 791 SDP_BYTES, SDP_OP32); 792 } 793 794 #if defined(__xpv) 795 796 static user_desc_t * 797 init_gdt(void) 798 { 799 uint64_t gdtpa; 800 ulong_t ma[1]; /* XXPV should be a memory_t */ 801 802 #if !defined(__lint) 803 /* 804 * Our gdt is never larger than a single page. 805 */ 806 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 807 #endif 808 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 809 PAGESIZE, PAGESIZE); 810 bzero(gdt0, PAGESIZE); 811 812 init_gdt_common(gdt0); 813 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 814 815 /* 816 * XXX Since we never invoke kmdb until after the kernel takes 817 * over the descriptor tables why not have it use the kernel's 818 * selectors? 819 */ 820 if (boothowto & RB_DEBUG) { 821 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 822 SDP_PAGES, SDP_OP32); 823 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 824 SDP_PAGES, SDP_OP32); 825 } 826 827 /* 828 * Clear write permission for page containing the gdt and install it. 829 */ 830 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 831 kbm_read_only((uintptr_t)gdt0, gdtpa); 832 xen_set_gdt(ma, NGDT); 833 834 /* 835 * Reload the segment registers to use the new GDT 836 */ 837 load_segment_registers( 838 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 839 840 return (gdt0); 841 } 842 843 #else /* __xpv */ 844 845 static user_desc_t * 846 init_gdt(void) 847 { 848 desctbr_t r_bgdt, r_gdt; 849 user_desc_t *bgdt; 850 851 #if !defined(__lint) 852 /* 853 * Our gdt is never larger than a single page. 854 */ 855 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 856 #endif 857 /* 858 * XXX this allocation belongs in our caller, not here. 859 */ 860 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 861 PAGESIZE, PAGESIZE); 862 bzero(gdt0, PAGESIZE); 863 864 init_gdt_common(gdt0); 865 866 /* 867 * Copy in from boot's gdt to our gdt entries. 868 * Entry 0 is null descriptor by definition. 869 */ 870 rd_gdtr(&r_bgdt); 871 bgdt = (user_desc_t *)r_bgdt.dtr_base; 872 if (bgdt == NULL) 873 panic("null boot gdt"); 874 875 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 876 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 877 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 878 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 879 880 /* 881 * Install our new GDT 882 */ 883 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 884 r_gdt.dtr_base = (uintptr_t)gdt0; 885 wr_gdtr(&r_gdt); 886 887 /* 888 * Reload the segment registers to use the new GDT 889 */ 890 load_segment_registers( 891 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 892 893 return (gdt0); 894 } 895 896 #endif /* __xpv */ 897 #endif /* __i386 */ 898 899 /* 900 * Build kernel IDT. 901 * 902 * Note that for amd64 we pretty much require every gate to be an interrupt 903 * gate which blocks interrupts atomically on entry; that's because of our 904 * dependency on using 'swapgs' every time we come into the kernel to find 905 * the cpu structure. If we get interrupted just before doing that, %cs could 906 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 907 * %gsbase is really still pointing at something in userland. Bad things will 908 * ensue. We also use interrupt gates for i386 as well even though this is not 909 * required for some traps. 910 * 911 * Perhaps they should have invented a trap gate that does an atomic swapgs? 912 */ 913 static void 914 init_idt_common(gate_desc_t *idt) 915 { 916 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 917 0); 918 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 919 0); 920 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL, 921 0); 922 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 923 0); 924 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 925 0); 926 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT, 927 TRP_KPL, 0); 928 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 929 0); 930 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 931 0); 932 933 /* 934 * double fault handler. 935 * 936 * Note that on the hypervisor a guest does not receive #df faults. 937 * Instead a failsafe event is injected into the guest if its selectors 938 * and/or stack is in a broken state. See xen_failsafe_callback. 939 */ 940 #if !defined(__xpv) 941 #if defined(__amd64) 942 943 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 944 T_DBLFLT); 945 946 #elif defined(__i386) 947 948 /* 949 * task gate required. 950 */ 951 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL, 952 0); 953 954 #endif /* __i386 */ 955 #endif /* !__xpv */ 956 957 /* 958 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 959 */ 960 961 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 962 0); 963 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 964 0); 965 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 966 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 967 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 968 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 969 0); 970 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT, 971 TRP_KPL, 0); 972 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 973 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 974 975 /* 976 * install fast trap handler at 210. 977 */ 978 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 979 0); 980 981 /* 982 * System call handler. 983 */ 984 #if defined(__amd64) 985 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT, 986 TRP_UPL, 0); 987 988 #elif defined(__i386) 989 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT, 990 TRP_UPL, 0); 991 #endif /* __i386 */ 992 993 /* 994 * Install the DTrace interrupt handler for the pid provider. 995 */ 996 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 997 SDT_SYSIGT, TRP_UPL, 0); 998 999 /* 1000 * Prepare interposing descriptor for the syscall handler 1001 * and cache copy of the default descriptor. 1002 */ 1003 brand_tbl[0].ih_inum = T_SYSCALLINT; 1004 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; 1005 1006 #if defined(__amd64) 1007 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int, 1008 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1009 #elif defined(__i386) 1010 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call, 1011 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1012 #endif /* __i386 */ 1013 1014 brand_tbl[1].ih_inum = 0; 1015 } 1016 1017 #if defined(__xpv) 1018 1019 static void 1020 init_idt(gate_desc_t *idt) 1021 { 1022 init_idt_common(idt); 1023 } 1024 1025 #else /* __xpv */ 1026 1027 static void 1028 init_idt(gate_desc_t *idt) 1029 { 1030 char ivctname[80]; 1031 void (*ivctptr)(void); 1032 int i; 1033 1034 /* 1035 * Initialize entire table with 'reserved' trap and then overwrite 1036 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 1037 * since it can only be generated on a 386 processor. 15 is also 1038 * unsupported and reserved. 1039 */ 1040 for (i = 0; i < NIDT; i++) 1041 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1042 0); 1043 1044 /* 1045 * 20-31 reserved 1046 */ 1047 for (i = 20; i < 32; i++) 1048 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1049 0); 1050 1051 /* 1052 * interrupts 32 - 255 1053 */ 1054 for (i = 32; i < 256; i++) { 1055 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 1056 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 1057 if (ivctptr == NULL) 1058 panic("kobj_getsymvalue(%s) failed", ivctname); 1059 1060 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 1061 } 1062 1063 /* 1064 * Now install the common ones. Note that it will overlay some 1065 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 1066 */ 1067 init_idt_common(idt); 1068 } 1069 1070 #endif /* __xpv */ 1071 1072 /* 1073 * The kernel does not deal with LDTs unless a user explicitly creates 1074 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 1075 * to reference the LDT will therefore cause a #gp. System calls made via the 1076 * obsolete lcall mechanism are emulated by the #gp fault handler. 1077 */ 1078 static void 1079 init_ldt(void) 1080 { 1081 #if defined(__xpv) 1082 xen_set_ldt(NULL, 0); 1083 #else 1084 wr_ldtr(0); 1085 #endif 1086 } 1087 1088 #if !defined(__xpv) 1089 #if defined(__amd64) 1090 1091 static void 1092 init_tss(void) 1093 { 1094 /* 1095 * tss_rsp0 is dynamically filled in by resume() on each context switch. 1096 * All exceptions but #DF will run on the thread stack. 1097 * Set up the double fault stack here. 1098 */ 1099 ktss0->tss_ist1 = 1100 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1101 1102 /* 1103 * Set I/O bit map offset equal to size of TSS segment limit 1104 * for no I/O permission map. This will force all user I/O 1105 * instructions to generate #gp fault. 1106 */ 1107 ktss0->tss_bitmapbase = sizeof (*ktss0); 1108 1109 /* 1110 * Point %tr to descriptor for ktss0 in gdt. 1111 */ 1112 wr_tsr(KTSS_SEL); 1113 } 1114 1115 #elif defined(__i386) 1116 1117 static void 1118 init_tss(void) 1119 { 1120 /* 1121 * ktss0->tss_esp dynamically filled in by resume() on each 1122 * context switch. 1123 */ 1124 ktss0->tss_ss0 = KDS_SEL; 1125 ktss0->tss_eip = (uint32_t)_start; 1126 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL; 1127 ktss0->tss_cs = KCS_SEL; 1128 ktss0->tss_fs = KFS_SEL; 1129 ktss0->tss_gs = KGS_SEL; 1130 ktss0->tss_ldt = ULDT_SEL; 1131 1132 /* 1133 * Initialize double fault tss. 1134 */ 1135 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1136 dftss0->tss_ss0 = KDS_SEL; 1137 1138 /* 1139 * tss_cr3 will get initialized in hat_kern_setup() once our page 1140 * tables have been setup. 1141 */ 1142 dftss0->tss_eip = (uint32_t)syserrtrap; 1143 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1144 dftss0->tss_cs = KCS_SEL; 1145 dftss0->tss_ds = KDS_SEL; 1146 dftss0->tss_es = KDS_SEL; 1147 dftss0->tss_ss = KDS_SEL; 1148 dftss0->tss_fs = KFS_SEL; 1149 dftss0->tss_gs = KGS_SEL; 1150 1151 /* 1152 * Set I/O bit map offset equal to size of TSS segment limit 1153 * for no I/O permission map. This will force all user I/O 1154 * instructions to generate #gp fault. 1155 */ 1156 ktss0->tss_bitmapbase = sizeof (*ktss0); 1157 1158 /* 1159 * Point %tr to descriptor for ktss0 in gdt. 1160 */ 1161 wr_tsr(KTSS_SEL); 1162 } 1163 1164 #endif /* __i386 */ 1165 #endif /* !__xpv */ 1166 1167 #if defined(__xpv) 1168 1169 void 1170 init_desctbls(void) 1171 { 1172 uint_t vec; 1173 user_desc_t *gdt; 1174 1175 /* 1176 * Setup and install our GDT. 1177 */ 1178 gdt = init_gdt(); 1179 1180 /* 1181 * Store static pa of gdt to speed up pa_to_ma() translations 1182 * on lwp context switches. 1183 */ 1184 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1185 CPU->cpu_gdt = gdt; 1186 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); 1187 1188 /* 1189 * Setup and install our IDT. 1190 */ 1191 #if !defined(__lint) 1192 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1193 #endif 1194 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1195 PAGESIZE, PAGESIZE); 1196 bzero(idt0, PAGESIZE); 1197 init_idt(idt0); 1198 for (vec = 0; vec < NIDT; vec++) 1199 xen_idt_write(&idt0[vec], vec); 1200 1201 CPU->cpu_idt = idt0; 1202 1203 /* 1204 * set default kernel stack 1205 */ 1206 xen_stack_switch(KDS_SEL, 1207 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); 1208 1209 xen_init_callbacks(); 1210 1211 init_ldt(); 1212 } 1213 1214 #else /* __xpv */ 1215 1216 void 1217 init_desctbls(void) 1218 { 1219 user_desc_t *gdt; 1220 desctbr_t idtr; 1221 1222 /* 1223 * Allocate IDT and TSS structures on unique pages for better 1224 * performance in virtual machines. 1225 */ 1226 #if !defined(__lint) 1227 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1228 #endif 1229 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1230 PAGESIZE, PAGESIZE); 1231 bzero(idt0, PAGESIZE); 1232 #if !defined(__lint) 1233 ASSERT(sizeof (*ktss0) <= PAGESIZE); 1234 #endif 1235 ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, 1236 PAGESIZE, PAGESIZE); 1237 bzero(ktss0, PAGESIZE); 1238 1239 #if defined(__i386) 1240 #if !defined(__lint) 1241 ASSERT(sizeof (*dftss0) <= PAGESIZE); 1242 #endif 1243 dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA, 1244 PAGESIZE, PAGESIZE); 1245 bzero(dftss0, PAGESIZE); 1246 #endif 1247 1248 /* 1249 * Setup and install our GDT. 1250 */ 1251 gdt = init_gdt(); 1252 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1253 CPU->cpu_gdt = gdt; 1254 1255 /* 1256 * Setup and install our IDT. 1257 */ 1258 init_idt(idt0); 1259 1260 idtr.dtr_base = (uintptr_t)idt0; 1261 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; 1262 wr_idtr(&idtr); 1263 CPU->cpu_idt = idt0; 1264 1265 #if defined(__i386) 1266 /* 1267 * We maintain a description of idt0 in convenient IDTR format 1268 * for #pf's on some older pentium processors. See pentium_pftrap(). 1269 */ 1270 idt0_default_r = idtr; 1271 #endif /* __i386 */ 1272 1273 init_tss(); 1274 CPU->cpu_tss = ktss0; 1275 init_ldt(); 1276 } 1277 1278 #endif /* __xpv */ 1279 1280 /* 1281 * In the early kernel, we need to set up a simple GDT to run on. 1282 * 1283 * XXPV Can dboot use this too? See dboot_gdt.s 1284 */ 1285 void 1286 init_boot_gdt(user_desc_t *bgdt) 1287 { 1288 #if defined(__amd64) 1289 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, 1290 SDP_PAGES, SDP_OP32); 1291 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, 1292 SDP_PAGES, SDP_OP32); 1293 #elif defined(__i386) 1294 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 1295 SDP_PAGES, SDP_OP32); 1296 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 1297 SDP_PAGES, SDP_OP32); 1298 #endif /* __i386 */ 1299 } 1300 1301 /* 1302 * Enable interpositioning on the system call path by rewriting the 1303 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1304 * the branded entry points. 1305 */ 1306 void 1307 brand_interpositioning_enable(void) 1308 { 1309 gate_desc_t *idt = CPU->cpu_idt; 1310 int i; 1311 1312 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1313 1314 for (i = 0; brand_tbl[i].ih_inum; i++) { 1315 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; 1316 #if defined(__xpv) 1317 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1318 brand_tbl[i].ih_inum); 1319 #endif 1320 } 1321 1322 #if defined(__amd64) 1323 #if defined(__xpv) 1324 1325 /* 1326 * Currently the hypervisor only supports 64-bit syscalls via 1327 * syscall instruction. The 32-bit syscalls are handled by 1328 * interrupt gate above. 1329 */ 1330 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, 1331 CALLBACKF_mask_events); 1332 1333 #else 1334 1335 if (x86_feature & X86_ASYSC) { 1336 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 1337 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 1338 } 1339 1340 #endif 1341 #endif /* __amd64 */ 1342 1343 if (x86_feature & X86_SEP) 1344 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 1345 } 1346 1347 /* 1348 * Disable interpositioning on the system call path by rewriting the 1349 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1350 * the standard entry points, which bypass the interpositioning hooks. 1351 */ 1352 void 1353 brand_interpositioning_disable(void) 1354 { 1355 gate_desc_t *idt = CPU->cpu_idt; 1356 int i; 1357 1358 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1359 1360 for (i = 0; brand_tbl[i].ih_inum; i++) { 1361 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; 1362 #if defined(__xpv) 1363 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1364 brand_tbl[i].ih_inum); 1365 #endif 1366 } 1367 1368 #if defined(__amd64) 1369 #if defined(__xpv) 1370 1371 /* 1372 * See comment above in brand_interpositioning_enable. 1373 */ 1374 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 1375 CALLBACKF_mask_events); 1376 1377 #else 1378 1379 if (x86_feature & X86_ASYSC) { 1380 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 1381 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 1382 } 1383 1384 #endif 1385 #endif /* __amd64 */ 1386 1387 if (x86_feature & X86_SEP) 1388 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 1389 } 1390