1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Copyright (c) 1992 Terrence R. Lambert. 31 * Copyright (c) 1990 The Regents of the University of California. 32 * All rights reserved. 33 * 34 * This code is derived from software contributed to Berkeley by 35 * William Jolitz. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 66 */ 67 68 #include <sys/types.h> 69 #include <sys/sysmacros.h> 70 #include <sys/tss.h> 71 #include <sys/segments.h> 72 #include <sys/trap.h> 73 #include <sys/cpuvar.h> 74 #include <sys/bootconf.h> 75 #include <sys/x86_archext.h> 76 #include <sys/controlregs.h> 77 #include <sys/archsystm.h> 78 #include <sys/machsystm.h> 79 #include <sys/kobj.h> 80 #include <sys/cmn_err.h> 81 #include <sys/reboot.h> 82 #include <sys/kdi.h> 83 #include <sys/mach_mmu.h> 84 #include <sys/systm.h> 85 #include <sys/promif.h> 86 #include <sys/bootinfo.h> 87 #include <vm/kboot_mmu.h> 88 89 /* 90 * cpu0 and default tables and structures. 91 */ 92 user_desc_t *gdt0; 93 desctbr_t gdt0_default_r; 94 95 #pragma align 16(idt0) 96 gate_desc_t idt0[NIDT]; /* interrupt descriptor table */ 97 #if defined(__i386) 98 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */ 99 #endif 100 101 #pragma align 16(ktss0) 102 struct tss ktss0; /* kernel task state structure */ 103 104 #if defined(__i386) 105 #pragma align 16(dftss0) 106 struct tss dftss0; /* #DF double-fault exception */ 107 #endif /* __i386 */ 108 109 user_desc_t zero_udesc; /* base zero user desc native procs */ 110 system_desc_t zero_sdesc; 111 112 #if defined(__amd64) 113 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 114 #endif /* __amd64 */ 115 116 #pragma align 16(dblfault_stack0) 117 char dblfault_stack0[DEFAULTSTKSZ]; 118 119 extern void fast_null(void); 120 extern hrtime_t get_hrtime(void); 121 extern hrtime_t gethrvtime(void); 122 extern hrtime_t get_hrestime(void); 123 extern uint64_t getlgrp(void); 124 125 void (*(fasttable[]))(void) = { 126 fast_null, /* T_FNULL routine */ 127 fast_null, /* T_FGETFP routine (initially null) */ 128 fast_null, /* T_FSETFP routine (initially null) */ 129 (void (*)())get_hrtime, /* T_GETHRTIME */ 130 (void (*)())gethrvtime, /* T_GETHRVTIME */ 131 (void (*)())get_hrestime, /* T_GETHRESTIME */ 132 (void (*)())getlgrp /* T_GETLGRP */ 133 }; 134 135 /* 136 * Structure containing pre-computed descriptors to allow us to temporarily 137 * interpose on a standard handler. 138 */ 139 struct interposing_handler { 140 int ih_inum; 141 gate_desc_t ih_interp_desc; 142 gate_desc_t ih_default_desc; 143 }; 144 145 /* 146 * The brand infrastructure interposes on two handlers, and we use one as a 147 * NULL signpost. 148 */ 149 static struct interposing_handler brand_tbl[3]; 150 151 /* 152 * software prototypes for default local descriptor table 153 */ 154 155 /* 156 * Routines for loading segment descriptors in format the hardware 157 * can understand. 158 */ 159 160 #if defined(__amd64) 161 162 /* 163 * In long mode we have the new L or long mode attribute bit 164 * for code segments. Only the conforming bit in type is used along 165 * with descriptor priority and present bits. Default operand size must 166 * be zero when in long mode. In 32-bit compatibility mode all fields 167 * are treated as in legacy mode. For data segments while in long mode 168 * only the present bit is loaded. 169 */ 170 void 171 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 172 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 173 { 174 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 175 176 /* 177 * 64-bit long mode. 178 */ 179 if (lmode == SDP_LONG) 180 dp->usd_def32 = 0; /* 32-bit operands only */ 181 else 182 /* 183 * 32-bit compatibility mode. 184 */ 185 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 186 187 dp->usd_long = lmode; /* 64-bit mode */ 188 dp->usd_type = type; 189 dp->usd_dpl = dpl; 190 dp->usd_p = 1; 191 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 192 193 dp->usd_lobase = (uintptr_t)base; 194 dp->usd_midbase = (uintptr_t)base >> 16; 195 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 196 dp->usd_lolimit = size; 197 dp->usd_hilimit = (uintptr_t)size >> 16; 198 } 199 200 #elif defined(__i386) 201 202 /* 203 * Install user segment descriptor for code and data. 204 */ 205 void 206 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type, 207 uint_t dpl, uint_t gran, uint_t defopsz) 208 { 209 dp->usd_lolimit = size; 210 dp->usd_hilimit = (uintptr_t)size >> 16; 211 212 dp->usd_lobase = (uintptr_t)base; 213 dp->usd_midbase = (uintptr_t)base >> 16; 214 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 215 216 dp->usd_type = type; 217 dp->usd_dpl = dpl; 218 dp->usd_p = 1; 219 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */ 220 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 221 } 222 223 #endif /* __i386 */ 224 225 /* 226 * Install system segment descriptor for LDT and TSS segments. 227 */ 228 229 #if defined(__amd64) 230 231 void 232 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 233 uint_t dpl) 234 { 235 dp->ssd_lolimit = size; 236 dp->ssd_hilimit = (uintptr_t)size >> 16; 237 238 dp->ssd_lobase = (uintptr_t)base; 239 dp->ssd_midbase = (uintptr_t)base >> 16; 240 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 241 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 242 243 dp->ssd_type = type; 244 dp->ssd_zero1 = 0; /* must be zero */ 245 dp->ssd_zero2 = 0; 246 dp->ssd_dpl = dpl; 247 dp->ssd_p = 1; 248 dp->ssd_gran = 0; /* force byte units */ 249 } 250 251 #elif defined(__i386) 252 253 void 254 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 255 uint_t dpl) 256 { 257 dp->ssd_lolimit = size; 258 dp->ssd_hilimit = (uintptr_t)size >> 16; 259 260 dp->ssd_lobase = (uintptr_t)base; 261 dp->ssd_midbase = (uintptr_t)base >> 16; 262 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 263 264 dp->ssd_type = type; 265 dp->ssd_zero = 0; /* must be zero */ 266 dp->ssd_dpl = dpl; 267 dp->ssd_p = 1; 268 dp->ssd_gran = 0; /* force byte units */ 269 } 270 271 #endif /* __i386 */ 272 273 /* 274 * Install gate segment descriptor for interrupt, trap, call and task gates. 275 */ 276 277 #if defined(__amd64) 278 279 void 280 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 281 uint_t type, uint_t dpl) 282 { 283 dp->sgd_looffset = (uintptr_t)func; 284 dp->sgd_hioffset = (uintptr_t)func >> 16; 285 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 286 287 dp->sgd_selector = (uint16_t)sel; 288 289 /* 290 * For 64 bit native we use the IST stack mechanism 291 * for double faults. All other traps use the CPL = 0 292 * (tss_rsp0) stack. 293 */ 294 if (type == T_DBLFLT) 295 dp->sgd_ist = 1; 296 else 297 dp->sgd_ist = 0; 298 299 dp->sgd_type = type; 300 dp->sgd_dpl = dpl; 301 dp->sgd_p = 1; 302 } 303 304 #elif defined(__i386) 305 306 void 307 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 308 uint_t type, uint_t dpl) 309 { 310 dp->sgd_looffset = (uintptr_t)func; 311 dp->sgd_hioffset = (uintptr_t)func >> 16; 312 313 dp->sgd_selector = (uint16_t)sel; 314 dp->sgd_stkcpy = 0; /* always zero bytes */ 315 dp->sgd_type = type; 316 dp->sgd_dpl = dpl; 317 dp->sgd_p = 1; 318 } 319 320 #endif /* __i386 */ 321 322 #if defined(__amd64) 323 324 /* 325 * Build kernel GDT. 326 */ 327 328 static void 329 init_gdt_common(user_desc_t *gdt) 330 { 331 int i; 332 333 /* 334 * 64-bit kernel code segment. 335 */ 336 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 337 SDP_PAGES, SDP_OP32); 338 339 /* 340 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 341 * mode, but we set it here to 0xFFFF so that we can use the SYSRET 342 * instruction to return from system calls back to 32-bit applications. 343 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 344 * descriptors. We therefore must ensure that the kernel uses something, 345 * though it will be ignored by hardware, that is compatible with 32-bit 346 * apps. For the same reason we must set the default op size of this 347 * descriptor to 32-bit operands. 348 */ 349 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 350 SEL_KPL, SDP_PAGES, SDP_OP32); 351 gdt[GDT_KDATA].usd_def32 = 1; 352 353 /* 354 * 64-bit user code segment. 355 */ 356 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 357 SDP_PAGES, SDP_OP32); 358 359 /* 360 * 32-bit user code segment. 361 */ 362 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, 363 SEL_UPL, SDP_PAGES, SDP_OP32); 364 365 /* 366 * 32 and 64 bit data segments can actually share the same descriptor. 367 * In long mode only the present bit is checked but all other fields 368 * are loaded. But in compatibility mode all fields are interpreted 369 * as in legacy mode so they must be set correctly for a 32-bit data 370 * segment. 371 */ 372 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, 373 SDP_PAGES, SDP_OP32); 374 375 /* 376 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 377 * in the GDT is 0. 378 */ 379 380 /* 381 * Kernel TSS 382 */ 383 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0, 384 sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL); 385 386 /* 387 * Initialize fs and gs descriptors for 32 bit processes. 388 * Only attributes and limits are initialized, the effective 389 * base address is programmed via fsbase/gsbase. 390 */ 391 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 392 SEL_UPL, SDP_PAGES, SDP_OP32); 393 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 394 SEL_UPL, SDP_PAGES, SDP_OP32); 395 396 /* 397 * Initialize the descriptors set aside for brand usage. 398 * Only attributes and limits are initialized. 399 */ 400 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 401 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, 402 SEL_UPL, SDP_PAGES, SDP_OP32); 403 404 /* 405 * Initialize convenient zero base user descriptors for clearing 406 * lwp private %fs and %gs descriptors in GDT. See setregs() for 407 * an example. 408 */ 409 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 410 SDP_BYTES, SDP_OP32); 411 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 412 SDP_PAGES, SDP_OP32); 413 } 414 415 static user_desc_t * 416 init_gdt(void) 417 { 418 desctbr_t r_bgdt, r_gdt; 419 user_desc_t *bgdt; 420 421 #if !defined(__lint) 422 /* 423 * Our gdt is never larger than a single page. 424 */ 425 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 426 #endif 427 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 428 PAGESIZE, PAGESIZE); 429 if (gdt0 == NULL) 430 panic("init_gdt: BOP_ALLOC failed"); 431 bzero(gdt0, PAGESIZE); 432 433 init_gdt_common(gdt0); 434 435 /* 436 * Copy in from boot's gdt to our gdt. 437 * Entry 0 is the null descriptor by definition. 438 */ 439 rd_gdtr(&r_bgdt); 440 bgdt = (user_desc_t *)r_bgdt.dtr_base; 441 if (bgdt == NULL) 442 panic("null boot gdt"); 443 444 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 445 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 446 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 447 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 448 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 449 450 /* 451 * Install our new GDT 452 */ 453 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 454 r_gdt.dtr_base = (uintptr_t)gdt0; 455 wr_gdtr(&r_gdt); 456 457 /* 458 * Reload the segment registers to use the new GDT 459 */ 460 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 461 462 /* 463 * setup %gs for kernel 464 */ 465 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 466 467 /* 468 * XX64 We should never dereference off "other gsbase" or 469 * "fsbase". So, we should arrange to point FSBASE and 470 * KGSBASE somewhere truly awful e.g. point it at the last 471 * valid address below the hole so that any attempts to index 472 * off them cause an exception. 473 * 474 * For now, point it at 8G -- at least it should be unmapped 475 * until some 64-bit processes run. 476 */ 477 wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 478 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 479 return (gdt0); 480 } 481 482 #elif defined(__i386) 483 484 static void 485 init_gdt_common(user_desc_t *gdt) 486 { 487 int i; 488 489 /* 490 * Text and data for both kernel and user span entire 32 bit 491 * address space. 492 */ 493 494 /* 495 * kernel code segment. 496 */ 497 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES, 498 SDP_OP32); 499 500 /* 501 * kernel data segment. 502 */ 503 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES, 504 SDP_OP32); 505 506 /* 507 * user code segment. 508 */ 509 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES, 510 SDP_OP32); 511 512 /* 513 * user data segment. 514 */ 515 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, 516 SDP_OP32); 517 518 /* 519 * TSS for T_DBLFLT (double fault) handler 520 */ 521 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], &dftss0, 522 sizeof (dftss0) - 1, SDT_SYSTSS, SEL_KPL); 523 524 /* 525 * TSS for kernel 526 */ 527 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0, 528 sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL); 529 530 /* 531 * %gs selector for kernel 532 */ 533 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA, 534 SEL_KPL, SDP_BYTES, SDP_OP32); 535 536 /* 537 * Initialize lwp private descriptors. 538 * Only attributes and limits are initialized, the effective 539 * base address is programmed via fsbase/gsbase. 540 */ 541 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 542 SDP_PAGES, SDP_OP32); 543 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 544 SDP_PAGES, SDP_OP32); 545 546 /* 547 * Initialize the descriptors set aside for brand usage. 548 * Only attributes and limits are initialized. 549 */ 550 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 551 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 552 SDP_PAGES, SDP_OP32); 553 /* 554 * Initialize convenient zero base user descriptor for clearing 555 * lwp private %fs and %gs descriptors in GDT. See setregs() for 556 * an example. 557 */ 558 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL, 559 SDP_BYTES, SDP_OP32); 560 } 561 562 static user_desc_t * 563 init_gdt(void) 564 { 565 desctbr_t r_bgdt, r_gdt; 566 user_desc_t *bgdt; 567 568 #if !defined(__lint) 569 /* 570 * Our gdt is never larger than a single page. 571 */ 572 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 573 #endif 574 /* 575 * XXX this allocation belongs in our caller, not here. 576 */ 577 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 578 PAGESIZE, PAGESIZE); 579 if (gdt0 == NULL) 580 panic("init_gdt: BOP_ALLOC failed"); 581 bzero(gdt0, PAGESIZE); 582 583 init_gdt_common(gdt0); 584 585 /* 586 * Copy in from boot's gdt to our gdt entries. 587 * Entry 0 is null descriptor by definition. 588 */ 589 rd_gdtr(&r_bgdt); 590 bgdt = (user_desc_t *)r_bgdt.dtr_base; 591 if (bgdt == NULL) 592 panic("null boot gdt"); 593 594 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 595 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 596 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 597 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 598 599 /* 600 * Install our new GDT 601 */ 602 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 603 r_gdt.dtr_base = (uintptr_t)gdt0; 604 wr_gdtr(&r_gdt); 605 606 /* 607 * Reload the segment registers to use the new GDT 608 */ 609 load_segment_registers( 610 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 611 612 return (gdt0); 613 } 614 615 #endif /* __i386 */ 616 617 /* 618 * Build kernel IDT. 619 * 620 * Note that for amd64 we pretty much require every gate to be an interrupt 621 * gate which blocks interrupts atomically on entry; that's because of our 622 * dependency on using 'swapgs' every time we come into the kernel to find 623 * the cpu structure. If we get interrupted just before doing that, %cs could 624 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 625 * %gsbase is really still pointing at something in userland. Bad things will 626 * ensue. We also use interrupt gates for i386 as well even though this is not 627 * required for some traps. 628 * 629 * Perhaps they should have invented a trap gate that does an atomic swapgs? 630 */ 631 static void 632 init_idt_common(gate_desc_t *idt) 633 { 634 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 635 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 636 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, SEL_KPL); 637 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, SEL_UPL); 638 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, SEL_UPL); 639 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT, 640 SEL_KPL); 641 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 642 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 643 644 /* 645 * double fault handler. 646 */ 647 #if defined(__amd64) 648 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 649 #elif defined(__i386) 650 /* 651 * task gate required. 652 */ 653 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, SEL_KPL); 654 655 #endif /* __i386 */ 656 657 /* 658 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 659 */ 660 661 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 662 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 663 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 664 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 665 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 666 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, SEL_KPL); 667 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT, 668 SEL_KPL); 669 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 670 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 671 672 /* 673 * install "int80" handler at, well, 0x80. 674 */ 675 set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, SEL_UPL); 676 677 /* 678 * install fast trap handler at 210. 679 */ 680 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, SEL_UPL); 681 682 /* 683 * System call handler. 684 */ 685 #if defined(__amd64) 686 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT, 687 SEL_UPL); 688 689 #elif defined(__i386) 690 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT, 691 SEL_UPL); 692 #endif /* __i386 */ 693 694 /* 695 * Install the DTrace interrupt handler for the pid provider. 696 */ 697 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 698 SDT_SYSIGT, SEL_UPL); 699 700 /* 701 * Prepare interposing descriptors for the branded "int80" 702 * and syscall handlers and cache copies of the default 703 * descriptors. 704 */ 705 brand_tbl[0].ih_inum = T_INT80; 706 brand_tbl[0].ih_default_desc = idt0[T_INT80]; 707 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, 708 SDT_SYSIGT, SEL_UPL); 709 710 brand_tbl[1].ih_inum = T_SYSCALLINT; 711 brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; 712 713 #if defined(__amd64) 714 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int, 715 KCS_SEL, SDT_SYSIGT, SEL_UPL); 716 #elif defined(__i386) 717 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call, 718 KCS_SEL, SDT_SYSIGT, SEL_UPL); 719 #endif /* __i386 */ 720 721 brand_tbl[2].ih_inum = 0; 722 } 723 724 static void 725 init_idt(gate_desc_t *idt) 726 { 727 char ivctname[80]; 728 void (*ivctptr)(void); 729 int i; 730 731 /* 732 * Initialize entire table with 'reserved' trap and then overwrite 733 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 734 * since it can only be generated on a 386 processor. 15 is also 735 * unsupported and reserved. 736 */ 737 for (i = 0; i < NIDT; i++) 738 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 739 740 /* 741 * 20-31 reserved 742 */ 743 for (i = 20; i < 32; i++) 744 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, SEL_KPL); 745 746 /* 747 * interrupts 32 - 255 748 */ 749 for (i = 32; i < 256; i++) { 750 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 751 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 752 if (ivctptr == NULL) 753 panic("kobj_getsymvalue(%s) failed", ivctname); 754 755 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, SEL_KPL); 756 } 757 758 /* 759 * Now install the common ones. Note that it will overlay some 760 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 761 */ 762 init_idt_common(idt); 763 } 764 765 /* 766 * The kernel does not deal with LDTs unless a user explicitly creates 767 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 768 * to reference the LDT will therefore cause a #gp. System calls made via the 769 * obsolete lcall mechanism are emulated by the #gp fault handler. 770 */ 771 static void 772 init_ldt(void) 773 { 774 wr_ldtr(0); 775 } 776 777 #if defined(__amd64) 778 779 static void 780 init_tss(void) 781 { 782 /* 783 * tss_rsp0 is dynamically filled in by resume() on each context switch. 784 * All exceptions but #DF will run on the thread stack. 785 * Set up the double fault stack here. 786 */ 787 ktss0.tss_ist1 = 788 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 789 790 /* 791 * Set I/O bit map offset equal to size of TSS segment limit 792 * for no I/O permission map. This will force all user I/O 793 * instructions to generate #gp fault. 794 */ 795 ktss0.tss_bitmapbase = sizeof (ktss0); 796 797 /* 798 * Point %tr to descriptor for ktss0 in gdt. 799 */ 800 wr_tsr(KTSS_SEL); 801 } 802 803 #elif defined(__i386) 804 805 static void 806 init_tss(void) 807 { 808 /* 809 * ktss0.tss_esp dynamically filled in by resume() on each 810 * context switch. 811 */ 812 ktss0.tss_ss0 = KDS_SEL; 813 ktss0.tss_eip = (uint32_t)_start; 814 ktss0.tss_ds = ktss0.tss_es = ktss0.tss_ss = KDS_SEL; 815 ktss0.tss_cs = KCS_SEL; 816 ktss0.tss_fs = KFS_SEL; 817 ktss0.tss_gs = KGS_SEL; 818 ktss0.tss_ldt = ULDT_SEL; 819 820 /* 821 * Initialize double fault tss. 822 */ 823 dftss0.tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 824 dftss0.tss_ss0 = KDS_SEL; 825 826 /* 827 * tss_cr3 will get initialized in hat_kern_setup() once our page 828 * tables have been setup. 829 */ 830 dftss0.tss_eip = (uint32_t)syserrtrap; 831 dftss0.tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 832 dftss0.tss_cs = KCS_SEL; 833 dftss0.tss_ds = KDS_SEL; 834 dftss0.tss_es = KDS_SEL; 835 dftss0.tss_ss = KDS_SEL; 836 dftss0.tss_fs = KFS_SEL; 837 dftss0.tss_gs = KGS_SEL; 838 839 /* 840 * Set I/O bit map offset equal to size of TSS segment limit 841 * for no I/O permission map. This will force all user I/O 842 * instructions to generate #gp fault. 843 */ 844 ktss0.tss_bitmapbase = sizeof (ktss0); 845 846 /* 847 * Point %tr to descriptor for ktss0 in gdt. 848 */ 849 wr_tsr(KTSS_SEL); 850 } 851 852 #endif /* __i386 */ 853 854 void 855 init_desctbls(void) 856 { 857 user_desc_t *gdt; 858 desctbr_t idtr; 859 860 /* 861 * Setup and install our GDT. 862 */ 863 gdt = init_gdt(); 864 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 865 CPU->cpu_m.mcpu_gdt = gdt; 866 867 /* 868 * Setup and install our IDT. 869 */ 870 init_idt(&idt0[0]); 871 872 idtr.dtr_base = (uintptr_t)idt0; 873 idtr.dtr_limit = sizeof (idt0) - 1; 874 wr_idtr(&idtr); 875 CPU->cpu_m.mcpu_idt = idt0; 876 877 #if defined(__i386) 878 /* 879 * We maintain a description of idt0 in convenient IDTR format 880 * for #pf's on some older pentium processors. See pentium_pftrap(). 881 */ 882 idt0_default_r = idtr; 883 #endif /* __i386 */ 884 885 init_tss(); 886 CPU->cpu_tss = &ktss0; 887 init_ldt(); 888 } 889 890 /* 891 * In the early kernel, we need to set up a simple GDT to run on. 892 */ 893 void 894 init_boot_gdt(user_desc_t *bgdt) 895 { 896 #if defined(__amd64) 897 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, 898 SDP_PAGES, SDP_OP32); 899 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, 900 SDP_PAGES, SDP_OP32); 901 #elif defined(__i386) 902 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 903 SDP_PAGES, SDP_OP32); 904 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 905 SDP_PAGES, SDP_OP32); 906 #endif /* __i386 */ 907 } 908 909 /* 910 * Enable interpositioning on the system call path by rewriting the 911 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 912 * the branded entry points. 913 */ 914 void 915 brand_interpositioning_enable(void) 916 { 917 int i; 918 919 for (i = 0; brand_tbl[i].ih_inum; i++) 920 CPU->cpu_idt[brand_tbl[i].ih_inum] = 921 brand_tbl[i].ih_interp_desc; 922 923 #if defined(__amd64) 924 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 925 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 926 #endif 927 928 if (x86_feature & X86_SEP) 929 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 930 } 931 932 /* 933 * Disable interpositioning on the system call path by rewriting the 934 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 935 * the standard entry points, which bypass the interpositioning hooks. 936 */ 937 void 938 brand_interpositioning_disable(void) 939 { 940 int i; 941 942 for (i = 0; brand_tbl[i].ih_inum; i++) 943 CPU->cpu_idt[brand_tbl[i].ih_inum] = 944 brand_tbl[i].ih_default_desc; 945 946 #if defined(__amd64) 947 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 948 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 949 #endif 950 951 if (x86_feature & X86_SEP) 952 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 953 } 954