1 /* 2 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 #pragma ident "%Z%%M% %I% %E% SMI" 7 8 /* 9 * Copyright (c) 1992 Terrence R. Lambert. 10 * Copyright (c) 1990 The Regents of the University of California. 11 * All rights reserved. 12 * 13 * This code is derived from software contributed to Berkeley by 14 * William Jolitz. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. All advertising materials mentioning features or use of this software 25 * must display the following acknowledgement: 26 * This product includes software developed by the University of 27 * California, Berkeley and its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 * 44 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 45 */ 46 47 #include <sys/types.h> 48 #include <sys/tss.h> 49 #include <sys/segments.h> 50 #include <sys/trap.h> 51 #include <sys/cpuvar.h> 52 #include <sys/x86_archext.h> 53 #include <sys/archsystm.h> 54 #include <sys/machsystm.h> 55 #include <sys/kobj.h> 56 #include <sys/cmn_err.h> 57 #include <sys/reboot.h> 58 #include <sys/kdi.h> 59 #include <sys/systm.h> 60 #include <sys/controlregs.h> 61 62 extern void syscall_int(void); 63 64 /* 65 * cpu0 and default tables and structures. 66 */ 67 desctbr_t gdt0_default_r; 68 69 #pragma align 16(idt0) 70 gate_desc_t idt0[NIDT]; /* interrupt descriptor table */ 71 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */ 72 73 #pragma align 16(ktss0) 74 struct tss ktss0; /* kernel task state structure */ 75 76 #if defined(__i386) 77 #pragma align 16(dftss0) 78 struct tss dftss0; /* #DF double-fault exception */ 79 #endif /* __i386 */ 80 81 user_desc_t zero_udesc; /* base zero user desc native procs */ 82 system_desc_t zero_sdesc; 83 84 #if defined(__amd64) 85 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 86 #endif /* __amd64 */ 87 88 #pragma align 16(dblfault_stack0) 89 char dblfault_stack0[DEFAULTSTKSZ]; 90 91 extern void fast_null(void); 92 extern hrtime_t get_hrtime(void); 93 extern hrtime_t gethrvtime(void); 94 extern hrtime_t get_hrestime(void); 95 extern uint64_t getlgrp(void); 96 97 void (*(fasttable[]))(void) = { 98 fast_null, /* T_FNULL routine */ 99 fast_null, /* T_FGETFP routine (initially null) */ 100 fast_null, /* T_FSETFP routine (initially null) */ 101 (void (*)())get_hrtime, /* T_GETHRTIME */ 102 (void (*)())gethrvtime, /* T_GETHRVTIME */ 103 (void (*)())get_hrestime, /* T_GETHRESTIME */ 104 (void (*)())getlgrp /* T_GETLGRP */ 105 }; 106 107 /* 108 * Structure containing pre-computed descriptors to allow us to temporarily 109 * interpose on a standard handler. 110 */ 111 struct interposing_handler { 112 int ih_inum; 113 gate_desc_t ih_interp_desc; 114 gate_desc_t ih_default_desc; 115 }; 116 117 /* 118 * The brand infrastructure interposes on two handlers, and we use one as a 119 * NULL signpost. 120 */ 121 static struct interposing_handler brand_tbl[3]; 122 123 /* 124 * software prototypes for default local descriptor table 125 */ 126 127 /* 128 * Routines for loading segment descriptors in format the hardware 129 * can understand. 130 */ 131 132 #if defined(__amd64) 133 134 /* 135 * In long mode we have the new L or long mode attribute bit 136 * for code segments. Only the conforming bit in type is used along 137 * with descriptor priority and present bits. Default operand size must 138 * be zero when in long mode. In 32-bit compatibility mode all fields 139 * are treated as in legacy mode. For data segments while in long mode 140 * only the present bit is loaded. 141 */ 142 void 143 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 144 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 145 { 146 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 147 148 /* 149 * 64-bit long mode. 150 */ 151 if (lmode == SDP_LONG) 152 dp->usd_def32 = 0; /* 32-bit operands only */ 153 else 154 /* 155 * 32-bit compatibility mode. 156 */ 157 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 158 159 dp->usd_long = lmode; /* 64-bit mode */ 160 dp->usd_type = type; 161 dp->usd_dpl = dpl; 162 dp->usd_p = 1; 163 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 164 165 dp->usd_lobase = (uintptr_t)base; 166 dp->usd_midbase = (uintptr_t)base >> 16; 167 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 168 dp->usd_lolimit = size; 169 dp->usd_hilimit = (uintptr_t)size >> 16; 170 } 171 172 #elif defined(__i386) 173 174 /* 175 * Install user segment descriptor for code and data. 176 */ 177 void 178 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type, 179 uint_t dpl, uint_t gran, uint_t defopsz) 180 { 181 dp->usd_lolimit = size; 182 dp->usd_hilimit = (uintptr_t)size >> 16; 183 184 dp->usd_lobase = (uintptr_t)base; 185 dp->usd_midbase = (uintptr_t)base >> 16; 186 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 187 188 dp->usd_type = type; 189 dp->usd_dpl = dpl; 190 dp->usd_p = 1; 191 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */ 192 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 193 } 194 195 #endif /* __i386 */ 196 197 /* 198 * Install system segment descriptor for LDT and TSS segments. 199 */ 200 201 #if defined(__amd64) 202 203 void 204 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 205 uint_t dpl) 206 { 207 dp->ssd_lolimit = size; 208 dp->ssd_hilimit = (uintptr_t)size >> 16; 209 210 dp->ssd_lobase = (uintptr_t)base; 211 dp->ssd_midbase = (uintptr_t)base >> 16; 212 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 213 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 214 215 dp->ssd_type = type; 216 dp->ssd_zero1 = 0; /* must be zero */ 217 dp->ssd_zero2 = 0; 218 dp->ssd_dpl = dpl; 219 dp->ssd_p = 1; 220 dp->ssd_gran = 0; /* force byte units */ 221 } 222 223 #elif defined(__i386) 224 225 void 226 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 227 uint_t dpl) 228 { 229 dp->ssd_lolimit = size; 230 dp->ssd_hilimit = (uintptr_t)size >> 16; 231 232 dp->ssd_lobase = (uintptr_t)base; 233 dp->ssd_midbase = (uintptr_t)base >> 16; 234 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 235 236 dp->ssd_type = type; 237 dp->ssd_zero = 0; /* must be zero */ 238 dp->ssd_dpl = dpl; 239 dp->ssd_p = 1; 240 dp->ssd_gran = 0; /* force byte units */ 241 } 242 243 #endif /* __i386 */ 244 245 /* 246 * Install gate segment descriptor for interrupt, trap, call and task gates. 247 */ 248 249 #if defined(__amd64) 250 251 /* 252 * Note stkcpy is replaced with ist. Read the PRM for details on this. 253 */ 254 void 255 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, uint_t ist, 256 uint_t type, uint_t dpl) 257 { 258 dp->sgd_looffset = (uintptr_t)func; 259 dp->sgd_hioffset = (uintptr_t)func >> 16; 260 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 261 262 dp->sgd_selector = (uint16_t)sel; 263 dp->sgd_ist = ist; 264 dp->sgd_type = type; 265 dp->sgd_dpl = dpl; 266 dp->sgd_p = 1; 267 } 268 269 #elif defined(__i386) 270 271 void 272 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 273 uint_t wcount, uint_t type, uint_t dpl) 274 { 275 dp->sgd_looffset = (uintptr_t)func; 276 dp->sgd_hioffset = (uintptr_t)func >> 16; 277 278 dp->sgd_selector = (uint16_t)sel; 279 dp->sgd_stkcpy = wcount; 280 dp->sgd_type = type; 281 dp->sgd_dpl = dpl; 282 dp->sgd_p = 1; 283 } 284 285 #endif /* __i386 */ 286 287 /* 288 * Build kernel GDT. 289 */ 290 291 #if defined(__amd64) 292 293 static void 294 init_gdt(void) 295 { 296 desctbr_t r_bgdt, r_gdt; 297 user_desc_t *bgdt; 298 size_t alen = 0xfffff; /* entire 32-bit address space */ 299 int i; 300 301 /* 302 * Copy in from boot's gdt to our gdt entries 1 - 4. 303 * Entry 0 is the null descriptor by definition. 304 */ 305 rd_gdtr(&r_bgdt); 306 bgdt = (user_desc_t *)r_bgdt.dtr_base; 307 if (bgdt == NULL) 308 panic("null boot gdt"); 309 310 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 311 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 312 gdt0[GDT_B64DATA] = bgdt[GDT_B64DATA]; 313 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 314 315 /* 316 * 64-bit kernel code segment. 317 */ 318 set_usegd(&gdt0[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 319 SDP_PAGES, SDP_OP32); 320 321 /* 322 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 323 * mode, but we set it here to 0xFFFF so that we can use the SYSRET 324 * instruction to return from system calls back to 32-bit applications. 325 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 326 * descriptors. We therefore must ensure that the kernel uses something, 327 * though it will be ignored by hardware, that is compatible with 32-bit 328 * apps. For the same reason we must set the default op size of this 329 * descriptor to 32-bit operands. 330 */ 331 set_usegd(&gdt0[GDT_KDATA], SDP_LONG, NULL, alen, SDT_MEMRWA, 332 SEL_KPL, SDP_PAGES, SDP_OP32); 333 gdt0[GDT_KDATA].usd_def32 = 1; 334 335 /* 336 * 64-bit user code segment. 337 */ 338 set_usegd(&gdt0[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 339 SDP_PAGES, SDP_OP32); 340 341 /* 342 * 32-bit user code segment. 343 */ 344 set_usegd(&gdt0[GDT_U32CODE], SDP_SHORT, NULL, alen, SDT_MEMERA, 345 SEL_UPL, SDP_PAGES, SDP_OP32); 346 347 /* 348 * 32 and 64 bit data segments can actually share the same descriptor. 349 * In long mode only the present bit is checked but all other fields 350 * are loaded. But in compatibility mode all fields are interpreted 351 * as in legacy mode so they must be set correctly for a 32-bit data 352 * segment. 353 */ 354 set_usegd(&gdt0[GDT_UDATA], SDP_SHORT, NULL, alen, SDT_MEMRWA, SEL_UPL, 355 SDP_PAGES, SDP_OP32); 356 357 /* 358 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 359 * in the GDT is 0. 360 */ 361 362 /* 363 * Kernel TSS 364 */ 365 set_syssegd((system_desc_t *)&gdt0[GDT_KTSS], &ktss0, 366 sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL); 367 368 /* 369 * Initialize fs and gs descriptors for 32 bit processes. 370 * Only attributes and limits are initialized, the effective 371 * base address is programmed via fsbase/gsbase. 372 */ 373 set_usegd(&gdt0[GDT_LWPFS], SDP_SHORT, NULL, alen, SDT_MEMRWA, 374 SEL_UPL, SDP_PAGES, SDP_OP32); 375 set_usegd(&gdt0[GDT_LWPGS], SDP_SHORT, NULL, alen, SDT_MEMRWA, 376 SEL_UPL, SDP_PAGES, SDP_OP32); 377 378 /* 379 * Initialize the descriptors set aside for brand usage. 380 * Only attributes and limits are initialized. 381 */ 382 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 383 set_usegd(&gdt0[i], SDP_SHORT, NULL, alen, SDT_MEMRWA, 384 SEL_UPL, SDP_PAGES, SDP_OP32); 385 386 /* 387 * Install our new GDT 388 */ 389 r_gdt.dtr_limit = sizeof (gdt0) - 1; 390 r_gdt.dtr_base = (uintptr_t)gdt0; 391 wr_gdtr(&r_gdt); 392 393 /* 394 * Initialize convenient zero base user descriptors for clearing 395 * lwp private %fs and %gs descriptors in GDT. See setregs() for 396 * an example. 397 */ 398 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 399 SDP_BYTES, SDP_OP32); 400 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 401 SDP_PAGES, SDP_OP32); 402 } 403 404 #elif defined(__i386) 405 406 static void 407 init_gdt(void) 408 { 409 desctbr_t r_bgdt, r_gdt; 410 user_desc_t *bgdt; 411 int i; 412 413 /* 414 * Copy in from boot's gdt to our gdt entries 1 - 4. 415 * Entry 0 is null descriptor by definition. 416 */ 417 rd_gdtr(&r_bgdt); 418 bgdt = (user_desc_t *)r_bgdt.dtr_base; 419 if (bgdt == NULL) 420 panic("null boot gdt"); 421 422 gdt0[GDT_BOOTFLAT] = bgdt[GDT_BOOTFLAT]; 423 gdt0[GDT_BOOTCODE] = bgdt[GDT_BOOTCODE]; 424 gdt0[GDT_BOOTCODE16] = bgdt[GDT_BOOTCODE16]; 425 gdt0[GDT_BOOTDATA] = bgdt[GDT_BOOTDATA]; 426 427 /* 428 * Text and data for both kernel and user span entire 32 bit 429 * address space. 430 */ 431 432 /* 433 * kernel code segment. 434 */ 435 set_usegd(&gdt0[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES, 436 SDP_OP32); 437 438 /* 439 * kernel data segment. 440 */ 441 set_usegd(&gdt0[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES, 442 SDP_OP32); 443 444 /* 445 * user code segment. 446 */ 447 set_usegd(&gdt0[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES, 448 SDP_OP32); 449 450 /* 451 * user data segment. 452 */ 453 set_usegd(&gdt0[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, 454 SDP_OP32); 455 456 /* 457 * TSS for T_DBLFLT (double fault) handler 458 */ 459 set_syssegd((system_desc_t *)&gdt0[GDT_DBFLT], &dftss0, 460 sizeof (dftss0) - 1, SDT_SYSTSS, SEL_KPL); 461 462 /* 463 * TSS for kernel 464 */ 465 set_syssegd((system_desc_t *)&gdt0[GDT_KTSS], &ktss0, 466 sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL); 467 468 /* 469 * %gs selector for kernel 470 */ 471 set_usegd(&gdt0[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA, 472 SEL_KPL, SDP_BYTES, SDP_OP32); 473 474 /* 475 * Initialize lwp private descriptors. 476 * Only attributes and limits are initialized, the effective 477 * base address is programmed via fsbase/gsbase. 478 */ 479 set_usegd(&gdt0[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 480 SDP_PAGES, SDP_OP32); 481 set_usegd(&gdt0[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 482 SDP_PAGES, SDP_OP32); 483 484 /* 485 * Initialize the descriptors set aside for brand usage. 486 * Only attributes and limits are initialized. 487 */ 488 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 489 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 490 SDP_PAGES, SDP_OP32); 491 492 /* 493 * Install our new GDT 494 */ 495 r_gdt.dtr_limit = sizeof (gdt0) - 1; 496 r_gdt.dtr_base = (uintptr_t)gdt0; 497 wr_gdtr(&r_gdt); 498 499 /* 500 * Initialize convenient zero base user descriptors for clearing 501 * lwp private %fs and %gs descriptors in GDT. See setregs() for 502 * an example. 503 */ 504 set_usegd(&zero_udesc, 0, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, SDP_OP32); 505 } 506 507 #endif /* __i386 */ 508 509 #if defined(__amd64) 510 511 /* 512 * Build kernel IDT. 513 * 514 * Note that we pretty much require every gate to be an interrupt gate; 515 * that's because of our dependency on using 'swapgs' every time we come 516 * into the kernel to find the cpu structure - if we get interrupted just 517 * before doing that, so that %cs is in kernel mode (so that the trap prolog 518 * doesn't do a swapgs), but %gsbase is really still pointing at something 519 * in userland, bad things ensue. 520 * 521 * Perhaps they should have invented a trap gate that does an atomic swapgs? 522 * 523 * XX64 We do need to think further about the follow-on impact of this. 524 * Most of the kernel handlers re-enable interrupts as soon as they've 525 * saved register state and done the swapgs, but there may be something 526 * more subtle going on. 527 */ 528 static void 529 init_idt(void) 530 { 531 char ivctname[80]; 532 void (*ivctptr)(void); 533 int i; 534 535 /* 536 * Initialize entire table with 'reserved' trap and then overwrite 537 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 538 * since it can only be generated on a 386 processor. 15 is also 539 * unsupported and reserved. 540 */ 541 for (i = 0; i < NIDT; i++) 542 set_gatesegd(&idt0[i], &resvtrap, KCS_SEL, 0, SDT_SYSIGT, 543 SEL_KPL); 544 545 set_gatesegd(&idt0[T_ZERODIV], &div0trap, KCS_SEL, 0, SDT_SYSIGT, 546 SEL_KPL); 547 set_gatesegd(&idt0[T_SGLSTP], &dbgtrap, KCS_SEL, 0, SDT_SYSIGT, 548 SEL_KPL); 549 set_gatesegd(&idt0[T_NMIFLT], &nmiint, KCS_SEL, 0, SDT_SYSIGT, 550 SEL_KPL); 551 set_gatesegd(&idt0[T_BPTFLT], &brktrap, KCS_SEL, 0, SDT_SYSIGT, 552 SEL_UPL); 553 set_gatesegd(&idt0[T_OVFLW], &ovflotrap, KCS_SEL, 0, SDT_SYSIGT, 554 SEL_UPL); 555 set_gatesegd(&idt0[T_BOUNDFLT], &boundstrap, KCS_SEL, 0, SDT_SYSIGT, 556 SEL_KPL); 557 set_gatesegd(&idt0[T_ILLINST], &invoptrap, KCS_SEL, 0, SDT_SYSIGT, 558 SEL_KPL); 559 set_gatesegd(&idt0[T_NOEXTFLT], &ndptrap, KCS_SEL, 0, SDT_SYSIGT, 560 SEL_KPL); 561 562 /* 563 * double fault handler. 564 */ 565 set_gatesegd(&idt0[T_DBLFLT], &syserrtrap, KCS_SEL, 1, SDT_SYSIGT, 566 SEL_KPL); 567 568 /* 569 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 570 */ 571 572 set_gatesegd(&idt0[T_TSSFLT], &invtsstrap, KCS_SEL, 0, SDT_SYSIGT, 573 SEL_KPL); 574 set_gatesegd(&idt0[T_SEGFLT], &segnptrap, KCS_SEL, 0, SDT_SYSIGT, 575 SEL_KPL); 576 set_gatesegd(&idt0[T_STKFLT], &stktrap, KCS_SEL, 0, SDT_SYSIGT, 577 SEL_KPL); 578 set_gatesegd(&idt0[T_GPFLT], &gptrap, KCS_SEL, 0, SDT_SYSIGT, 579 SEL_KPL); 580 set_gatesegd(&idt0[T_PGFLT], &pftrap, KCS_SEL, 0, SDT_SYSIGT, 581 SEL_KPL); 582 583 /* 584 * 15 reserved. 585 */ 586 set_gatesegd(&idt0[15], &resvtrap, KCS_SEL, 0, SDT_SYSIGT, SEL_KPL); 587 588 set_gatesegd(&idt0[T_EXTERRFLT], &ndperr, KCS_SEL, 0, SDT_SYSIGT, 589 SEL_KPL); 590 set_gatesegd(&idt0[T_ALIGNMENT], &achktrap, KCS_SEL, 0, SDT_SYSIGT, 591 SEL_KPL); 592 set_gatesegd(&idt0[T_MCE], &mcetrap, KCS_SEL, 0, SDT_SYSIGT, 593 SEL_KPL); 594 set_gatesegd(&idt0[T_SIMDFPE], &xmtrap, KCS_SEL, 0, SDT_SYSIGT, 595 SEL_KPL); 596 597 /* 598 * 20-31 reserved 599 */ 600 for (i = 20; i < 32; i++) 601 set_gatesegd(&idt0[i], &invaltrap, KCS_SEL, 0, SDT_SYSIGT, 602 SEL_KPL); 603 604 /* 605 * interrupts 32 - 255 606 */ 607 for (i = 32; i < 256; i++) { 608 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 609 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 610 if (ivctptr == NULL) 611 panic("kobj_getsymvalue(%s) failed", ivctname); 612 613 set_gatesegd(&idt0[i], ivctptr, KCS_SEL, 0, SDT_SYSIGT, 614 SEL_KPL); 615 } 616 617 /* 618 * install "int80" handler at, well, 0x80. 619 */ 620 set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, 0, SDT_SYSIGT, 621 SEL_UPL); 622 623 /* 624 * install fast trap handler at 210. 625 */ 626 set_gatesegd(&idt0[T_FASTTRAP], &fasttrap, KCS_SEL, 0, 627 SDT_SYSIGT, SEL_UPL); 628 629 /* 630 * System call handler. 631 */ 632 set_gatesegd(&idt0[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, 0, 633 SDT_SYSIGT, SEL_UPL); 634 635 /* 636 * Install the DTrace interrupt handler for the pid provider. 637 */ 638 set_gatesegd(&idt0[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 0, 639 SDT_SYSIGT, SEL_UPL); 640 641 if (boothowto & RB_DEBUG) 642 kdi_dvec_idt_sync(idt0); 643 644 /* 645 * We must maintain a description of idt0 in convenient IDTR format 646 * for use by T_NMIFLT and T_PGFLT (nmiint() and pentium_pftrap()) 647 * handlers. 648 */ 649 idt0_default_r.dtr_limit = sizeof (idt0) - 1; 650 idt0_default_r.dtr_base = (uintptr_t)idt0; 651 wr_idtr(&idt0_default_r); 652 653 /* 654 * Prepare interposing descriptors for the branded "int80" 655 * and syscall handlers and cache copies of the default 656 * descriptors. 657 */ 658 brand_tbl[0].ih_inum = T_INT80; 659 brand_tbl[0].ih_default_desc = idt0[T_INT80]; 660 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, 661 0, SDT_SYSIGT, SEL_UPL); 662 663 brand_tbl[1].ih_inum = T_SYSCALLINT; 664 brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; 665 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int, 666 KCS_SEL, 0, SDT_SYSIGT, SEL_UPL); 667 668 brand_tbl[2].ih_inum = 0; 669 } 670 671 #elif defined(__i386) 672 673 /* 674 * Build kernel IDT. 675 */ 676 static void 677 init_idt(void) 678 { 679 char ivctname[80]; 680 void (*ivctptr)(void); 681 int i; 682 683 /* 684 * Initialize entire table with 'reserved' trap and then overwrite 685 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 686 * since it can only be generated on a 386 processor. 15 is also 687 * unsupported and reserved. 688 */ 689 for (i = 0; i < NIDT; i++) 690 set_gatesegd(&idt0[i], &resvtrap, KCS_SEL, 0, SDT_SYSTGT, 691 SEL_KPL); 692 693 set_gatesegd(&idt0[T_ZERODIV], &div0trap, KCS_SEL, 0, SDT_SYSTGT, 694 SEL_KPL); 695 set_gatesegd(&idt0[T_SGLSTP], &dbgtrap, KCS_SEL, 0, SDT_SYSIGT, 696 SEL_KPL); 697 set_gatesegd(&idt0[T_NMIFLT], &nmiint, KCS_SEL, 0, SDT_SYSIGT, 698 SEL_KPL); 699 set_gatesegd(&idt0[T_BPTFLT], &brktrap, KCS_SEL, 0, SDT_SYSTGT, 700 SEL_UPL); 701 set_gatesegd(&idt0[T_OVFLW], &ovflotrap, KCS_SEL, 0, SDT_SYSTGT, 702 SEL_UPL); 703 set_gatesegd(&idt0[T_BOUNDFLT], &boundstrap, KCS_SEL, 0, SDT_SYSTGT, 704 SEL_KPL); 705 set_gatesegd(&idt0[T_ILLINST], &invoptrap, KCS_SEL, 0, SDT_SYSIGT, 706 SEL_KPL); 707 set_gatesegd(&idt0[T_NOEXTFLT], &ndptrap, KCS_SEL, 0, SDT_SYSIGT, 708 SEL_KPL); 709 710 /* 711 * Install TSS for T_DBLFLT handler. 712 */ 713 set_gatesegd(&idt0[T_DBLFLT], NULL, DFTSS_SEL, 0, SDT_SYSTASKGT, 714 SEL_KPL); 715 716 /* 717 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 718 */ 719 720 set_gatesegd(&idt0[T_TSSFLT], &invtsstrap, KCS_SEL, 0, SDT_SYSTGT, 721 SEL_KPL); 722 set_gatesegd(&idt0[T_SEGFLT], &segnptrap, KCS_SEL, 0, SDT_SYSTGT, 723 SEL_KPL); 724 set_gatesegd(&idt0[T_STKFLT], &stktrap, KCS_SEL, 0, SDT_SYSTGT, 725 SEL_KPL); 726 set_gatesegd(&idt0[T_GPFLT], &gptrap, KCS_SEL, 0, SDT_SYSTGT, 727 SEL_KPL); 728 set_gatesegd(&idt0[T_PGFLT], &pftrap, KCS_SEL, 0, SDT_SYSIGT, 729 SEL_KPL); 730 731 /* 732 * 15 reserved. 733 */ 734 set_gatesegd(&idt0[15], &resvtrap, KCS_SEL, 0, SDT_SYSTGT, SEL_KPL); 735 736 set_gatesegd(&idt0[T_EXTERRFLT], &ndperr, KCS_SEL, 0, SDT_SYSIGT, 737 SEL_KPL); 738 set_gatesegd(&idt0[T_ALIGNMENT], &achktrap, KCS_SEL, 0, SDT_SYSTGT, 739 SEL_KPL); 740 set_gatesegd(&idt0[T_MCE], &mcetrap, KCS_SEL, 0, SDT_SYSIGT, 741 SEL_KPL); 742 set_gatesegd(&idt0[T_SIMDFPE], &xmtrap, KCS_SEL, 0, SDT_SYSTGT, 743 SEL_KPL); 744 745 /* 746 * 20-31 reserved 747 */ 748 for (i = 20; i < 32; i++) 749 set_gatesegd(&idt0[i], &invaltrap, KCS_SEL, 0, SDT_SYSTGT, 750 SEL_KPL); 751 752 /* 753 * interrupts 32 - 255 754 */ 755 for (i = 32; i < 256; i++) { 756 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 757 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 758 if (ivctptr == NULL) 759 panic("kobj_getsymvalue(%s) failed", ivctname); 760 761 set_gatesegd(&idt0[i], ivctptr, KCS_SEL, 0, SDT_SYSIGT, 762 SEL_KPL); 763 } 764 765 /* 766 * install "int80" handler at, well, 0x80. 767 */ 768 set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, 0, SDT_SYSIGT, 769 SEL_UPL); 770 771 /* 772 * install fast trap handler at 210. 773 */ 774 set_gatesegd(&idt0[T_FASTTRAP], &fasttrap, KCS_SEL, 0, 775 SDT_SYSIGT, SEL_UPL); 776 777 /* 778 * System call handler. Note that we don't use the hardware's parameter 779 * copying mechanism here; see the comment above sys_call() for details. 780 */ 781 set_gatesegd(&idt0[T_SYSCALLINT], &sys_call, KCS_SEL, 0, 782 SDT_SYSIGT, SEL_UPL); 783 784 /* 785 * Install the DTrace interrupt handler for the pid provider. 786 */ 787 set_gatesegd(&idt0[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 0, 788 SDT_SYSIGT, SEL_UPL); 789 790 if (boothowto & RB_DEBUG) 791 kdi_dvec_idt_sync(idt0); 792 793 /* 794 * We must maintain a description of idt0 in convenient IDTR format 795 * for use by T_NMIFLT and T_PGFLT (nmiint() and pentium_pftrap()) 796 * handlers. 797 */ 798 idt0_default_r.dtr_limit = sizeof (idt0) - 1; 799 idt0_default_r.dtr_base = (uintptr_t)idt0; 800 wr_idtr(&idt0_default_r); 801 802 /* 803 * Prepare interposing descriptors for the branded "int80" 804 * and syscall handlers and cache copies of the default 805 * descriptors. 806 */ 807 brand_tbl[0].ih_inum = T_INT80; 808 brand_tbl[0].ih_default_desc = idt0[T_INT80]; 809 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, 810 0, SDT_SYSIGT, SEL_UPL); 811 812 brand_tbl[1].ih_inum = T_SYSCALLINT; 813 brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; 814 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call, 815 KCS_SEL, 0, SDT_SYSIGT, SEL_UPL); 816 817 brand_tbl[2].ih_inum = 0; 818 } 819 820 #endif /* __i386 */ 821 822 /* 823 * The kernel does not deal with LDTs unless a user explicitly creates 824 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 825 * to reference the LDT will therefore cause a #gp. System calls made via the 826 * obsolete lcall mechanism are emulated by the #gp fault handler. 827 */ 828 static void 829 init_ldt(void) 830 { 831 wr_ldtr(0); 832 } 833 834 #if defined(__amd64) 835 836 static void 837 init_tss(void) 838 { 839 /* 840 * tss_rsp0 is dynamically filled in by resume() on each context switch. 841 * All exceptions but #DF will run on the thread stack. 842 * Set up the double fault stack here. 843 */ 844 ktss0.tss_ist1 = 845 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 846 847 /* 848 * Set I/O bit map offset equal to size of TSS segment limit 849 * for no I/O permission map. This will force all user I/O 850 * instructions to generate #gp fault. 851 */ 852 ktss0.tss_bitmapbase = sizeof (ktss0); 853 854 /* 855 * Point %tr to descriptor for ktss0 in gdt. 856 */ 857 wr_tsr(KTSS_SEL); 858 } 859 860 #elif defined(__i386) 861 862 static void 863 init_tss(void) 864 { 865 /* 866 * ktss0.tss_esp dynamically filled in by resume() on each 867 * context switch. 868 */ 869 ktss0.tss_ss0 = KDS_SEL; 870 ktss0.tss_eip = (uint32_t)_start; 871 ktss0.tss_ds = ktss0.tss_es = ktss0.tss_ss = KDS_SEL; 872 ktss0.tss_cs = KCS_SEL; 873 ktss0.tss_fs = KFS_SEL; 874 ktss0.tss_gs = KGS_SEL; 875 ktss0.tss_ldt = ULDT_SEL; 876 877 /* 878 * Initialize double fault tss. 879 */ 880 dftss0.tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 881 dftss0.tss_ss0 = KDS_SEL; 882 883 /* 884 * tss_cr3 will get initialized in hat_kern_setup() once our page 885 * tables have been setup. 886 */ 887 dftss0.tss_eip = (uint32_t)syserrtrap; 888 dftss0.tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 889 dftss0.tss_cs = KCS_SEL; 890 dftss0.tss_ds = KDS_SEL; 891 dftss0.tss_es = KDS_SEL; 892 dftss0.tss_ss = KDS_SEL; 893 dftss0.tss_fs = KFS_SEL; 894 dftss0.tss_gs = KGS_SEL; 895 896 /* 897 * Set I/O bit map offset equal to size of TSS segment limit 898 * for no I/O permission map. This will force all user I/O 899 * instructions to generate #gp fault. 900 */ 901 ktss0.tss_bitmapbase = sizeof (ktss0); 902 903 /* 904 * Point %tr to descriptor for ktss0 in gdt. 905 */ 906 wr_tsr(KTSS_SEL); 907 } 908 909 #endif /* __i386 */ 910 911 void 912 init_tables(void) 913 { 914 init_gdt(); 915 init_tss(); 916 init_idt(); 917 init_ldt(); 918 } 919 920 /* 921 * Enable interpositioning on the system call path by rewriting the 922 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 923 * the branded entry points. 924 */ 925 void 926 brand_interpositioning_enable(void) 927 { 928 int i; 929 930 for (i = 0; brand_tbl[i].ih_inum; i++) 931 CPU->cpu_idt[brand_tbl[i].ih_inum] = 932 brand_tbl[i].ih_interp_desc; 933 934 #if defined(__amd64) 935 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 936 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 937 #endif 938 939 if (x86_feature & X86_SEP) 940 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 941 } 942 943 /* 944 * Disable interpositioning on the system call path by rewriting the 945 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 946 * the standard entry points, which bypass the interpositioning hooks. 947 */ 948 void 949 brand_interpositioning_disable(void) 950 { 951 int i; 952 953 for (i = 0; brand_tbl[i].ih_inum; i++) 954 CPU->cpu_idt[brand_tbl[i].ih_inum] = 955 brand_tbl[i].ih_default_desc; 956 957 #if defined(__amd64) 958 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 959 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 960 #endif 961 962 if (x86_feature & X86_SEP) 963 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 964 } 965