1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2018 Joyent, Inc. All rights reserved. 28 * Copyright 2022 Oxide Computer Compnay 29 */ 30 31 /* 32 * Copyright (c) 1992 Terrence R. Lambert. 33 * Copyright (c) 1990 The Regents of the University of California. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * William Jolitz. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed by the University of 50 * California, Berkeley and its contributors. 51 * 4. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 68 */ 69 70 #include <sys/types.h> 71 #include <sys/sysmacros.h> 72 #include <sys/tss.h> 73 #include <sys/segments.h> 74 #include <sys/trap.h> 75 #include <sys/cpuvar.h> 76 #include <sys/bootconf.h> 77 #include <sys/x86_archext.h> 78 #include <sys/controlregs.h> 79 #include <sys/archsystm.h> 80 #include <sys/machsystm.h> 81 #include <sys/kobj.h> 82 #include <sys/cmn_err.h> 83 #include <sys/reboot.h> 84 #include <sys/kdi.h> 85 #include <sys/mach_mmu.h> 86 #include <sys/systm.h> 87 #include <sys/note.h> 88 89 #ifdef __xpv 90 #include <sys/hypervisor.h> 91 #include <vm/as.h> 92 #endif 93 94 #include <sys/promif.h> 95 #include <sys/bootinfo.h> 96 #include <vm/kboot_mmu.h> 97 #include <vm/hat_pte.h> 98 99 /* 100 * cpu0 and default tables and structures. 101 */ 102 user_desc_t *gdt0; 103 #if !defined(__xpv) 104 desctbr_t gdt0_default_r; 105 #endif 106 107 gate_desc_t *idt0; /* interrupt descriptor table */ 108 109 tss_t *ktss0; /* kernel task state structure */ 110 111 112 user_desc_t zero_udesc; /* base zero user desc native procs */ 113 user_desc_t null_udesc; /* null user descriptor */ 114 system_desc_t null_sdesc; /* null system descriptor */ 115 116 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 117 118 user_desc_t ucs_on; 119 user_desc_t ucs_off; 120 user_desc_t ucs32_on; 121 user_desc_t ucs32_off; 122 123 /* 124 * If the size of this is changed, you must update hat_pcp_setup() and the 125 * definitions in exception.s 126 */ 127 extern char dblfault_stack0[DEFAULTSTKSZ]; 128 extern char nmi_stack0[DEFAULTSTKSZ]; 129 extern char mce_stack0[DEFAULTSTKSZ]; 130 131 extern void fast_null(void); 132 extern hrtime_t get_hrtime(void); 133 extern hrtime_t gethrvtime(void); 134 extern hrtime_t get_hrestime(void); 135 extern uint64_t getlgrp(void); 136 137 void (*(fasttable[]))(void) = { 138 fast_null, /* T_FNULL routine */ 139 fast_null, /* T_FGETFP routine (initially null) */ 140 fast_null, /* T_FSETFP routine (initially null) */ 141 (void (*)())(uintptr_t)get_hrtime, /* T_GETHRTIME */ 142 (void (*)())(uintptr_t)gethrvtime, /* T_GETHRVTIME */ 143 (void (*)())(uintptr_t)get_hrestime, /* T_GETHRESTIME */ 144 (void (*)())(uintptr_t)getlgrp /* T_GETLGRP */ 145 }; 146 147 /* 148 * Structure containing pre-computed descriptors to allow us to temporarily 149 * interpose on a standard handler. 150 */ 151 struct interposing_handler { 152 int ih_inum; 153 gate_desc_t ih_interp_desc; 154 gate_desc_t ih_default_desc; 155 }; 156 157 /* 158 * The brand infrastructure interposes on two handlers, and we use one as a 159 * NULL signpost. 160 */ 161 static struct interposing_handler brand_tbl[2]; 162 163 /* 164 * software prototypes for default local descriptor table 165 */ 166 167 /* 168 * Routines for loading segment descriptors in format the hardware 169 * can understand. 170 */ 171 172 /* 173 * In long mode we have the new L or long mode attribute bit 174 * for code segments. Only the conforming bit in type is used along 175 * with descriptor priority and present bits. Default operand size must 176 * be zero when in long mode. In 32-bit compatibility mode all fields 177 * are treated as in legacy mode. For data segments while in long mode 178 * only the present bit is loaded. 179 */ 180 void 181 set_usegd(user_desc_t *dp, uint_t lmode, void *base, uint32_t size, 182 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 183 { 184 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 185 /* This should never be a "system" segment. */ 186 ASSERT3U(type & SDT_S, !=, 0); 187 ASSERT3P(dp, !=, NULL); 188 189 /* 190 * 64-bit long mode. 191 */ 192 if (lmode == SDP_LONG) 193 dp->usd_def32 = 0; /* 32-bit operands only */ 194 else 195 /* 196 * 32-bit compatibility mode. 197 */ 198 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 199 200 /* 201 * We should always set the "accessed" bit (SDT_A), otherwise the CPU 202 * will write to the GDT whenever we change segment registers around. 203 * With KPTI on, the GDT is read-only in the user page table, which 204 * causes crashes if we don't set this. 205 */ 206 ASSERT3U(type & SDT_A, !=, 0); 207 208 dp->usd_long = lmode; /* 64-bit mode */ 209 dp->usd_type = type; 210 dp->usd_dpl = dpl; 211 dp->usd_p = 1; 212 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 213 214 dp->usd_lobase = (uintptr_t)base; 215 dp->usd_midbase = (uintptr_t)base >> 16; 216 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 217 dp->usd_lolimit = size; 218 dp->usd_hilimit = (uintptr_t)size >> 16; 219 } 220 221 /* 222 * Install system segment descriptor for LDT and TSS segments. 223 */ 224 225 void 226 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 227 uint_t dpl) 228 { 229 dp->ssd_lolimit = size; 230 dp->ssd_hilimit = (uintptr_t)size >> 16; 231 232 dp->ssd_lobase = (uintptr_t)base; 233 dp->ssd_midbase = (uintptr_t)base >> 16; 234 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 235 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 236 237 dp->ssd_type = type; 238 dp->ssd_zero1 = 0; /* must be zero */ 239 dp->ssd_zero2 = 0; 240 dp->ssd_dpl = dpl; 241 dp->ssd_p = 1; 242 dp->ssd_gran = 0; /* force byte units */ 243 } 244 245 void * 246 get_ssd_base(system_desc_t *dp) 247 { 248 uintptr_t base; 249 250 base = (uintptr_t)dp->ssd_lobase | 251 (uintptr_t)dp->ssd_midbase << 16 | 252 (uintptr_t)dp->ssd_hibase << (16 + 8) | 253 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); 254 return ((void *)base); 255 } 256 257 /* 258 * Install gate segment descriptor for interrupt, trap, call and task gates. 259 * 260 * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on 261 * all interrupts. We have different ISTs for each class of exceptions that are 262 * most likely to occur while handling an existing exception; while many of 263 * these are just going to panic, it's nice not to trample on the existing 264 * exception state for debugging purposes. 265 * 266 * Normal interrupts are all redirected unconditionally to the KPTI trampoline 267 * stack space. This unifies the trampoline handling between user and kernel 268 * space (and avoids the need to touch %gs). 269 * 270 * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when 271 * we do a read from KMDB that cause another #PF. Without its own IST, this 272 * would stomp on the kernel's mcpu_kpti_flt frame. 273 */ 274 uint_t 275 idt_vector_to_ist(uint_t vector) 276 { 277 #if defined(__xpv) 278 _NOTE(ARGUNUSED(vector)); 279 return (IST_NONE); 280 #else 281 switch (vector) { 282 /* These should always use IST even without KPTI enabled. */ 283 case T_DBLFLT: 284 return (IST_DF); 285 case T_NMIFLT: 286 return (IST_NMI); 287 case T_MCE: 288 return (IST_MCE); 289 290 case T_BPTFLT: 291 case T_SGLSTP: 292 if (kpti_enable == 1) { 293 return (IST_DBG); 294 } 295 return (IST_NONE); 296 case T_STKFLT: 297 case T_GPFLT: 298 case T_PGFLT: 299 if (kpti_enable == 1) { 300 return (IST_NESTABLE); 301 } 302 return (IST_NONE); 303 default: 304 if (kpti_enable == 1) { 305 return (IST_DEFAULT); 306 } 307 return (IST_NONE); 308 } 309 #endif 310 } 311 312 void 313 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 314 uint_t type, uint_t dpl, uint_t ist) 315 { 316 dp->sgd_looffset = (uintptr_t)func; 317 dp->sgd_hioffset = (uintptr_t)func >> 16; 318 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 319 dp->sgd_selector = (uint16_t)sel; 320 dp->sgd_ist = ist; 321 dp->sgd_type = type; 322 dp->sgd_dpl = dpl; 323 dp->sgd_p = 1; 324 } 325 326 /* 327 * Updates a single user descriptor in the the GDT of the current cpu. 328 * Caller is responsible for preventing cpu migration. 329 */ 330 331 void 332 gdt_update_usegd(uint_t sidx, user_desc_t *udp) 333 { 334 #if defined(DEBUG) 335 /* This should never be a "system" segment, but it might be null. */ 336 if (udp->usd_p != 0 || udp->usd_type != 0) { 337 ASSERT3U(udp->usd_type & SDT_S, !=, 0); 338 } 339 /* 340 * We should always set the "accessed" bit (SDT_A), otherwise the CPU 341 * will write to the GDT whenever we change segment registers around. 342 * With KPTI on, the GDT is read-only in the user page table, which 343 * causes crashes if we don't set this. 344 */ 345 if (udp->usd_p != 0 || udp->usd_type != 0) { 346 ASSERT3U(udp->usd_type & SDT_A, !=, 0); 347 } 348 #endif 349 350 #if defined(__xpv) 351 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; 352 353 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) 354 panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); 355 356 #else /* __xpv */ 357 CPU->cpu_gdt[sidx] = *udp; 358 #endif /* __xpv */ 359 } 360 361 /* 362 * Writes single descriptor pointed to by udp into a processes 363 * LDT entry pointed to by ldp. 364 */ 365 int 366 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) 367 { 368 #if defined(DEBUG) 369 /* This should never be a "system" segment, but it might be null. */ 370 if (udp->usd_p != 0 || udp->usd_type != 0) { 371 ASSERT3U(udp->usd_type & SDT_S, !=, 0); 372 } 373 /* 374 * We should always set the "accessed" bit (SDT_A), otherwise the CPU 375 * will write to the LDT whenever we change segment registers around. 376 * With KPTI on, the LDT is read-only in the user page table, which 377 * causes crashes if we don't set this. 378 */ 379 if (udp->usd_p != 0 || udp->usd_type != 0) { 380 ASSERT3U(udp->usd_type & SDT_A, !=, 0); 381 } 382 #endif 383 384 #if defined(__xpv) 385 uint64_t dpa; 386 387 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | 388 ((uintptr_t)ldp & PAGEOFFSET); 389 390 /* 391 * The hypervisor is a little more restrictive about what it 392 * supports in the LDT. 393 */ 394 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) 395 return (EINVAL); 396 397 #else /* __xpv */ 398 *ldp = *udp; 399 400 #endif /* __xpv */ 401 return (0); 402 } 403 404 #if defined(__xpv) 405 406 /* 407 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. 408 * Returns true if a valid entry was written. 409 */ 410 int 411 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) 412 { 413 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ 414 415 /* 416 * skip holes in the IDT 417 */ 418 if (GATESEG_GETOFFSET(sgd) == 0) 419 return (0); 420 421 ASSERT(sgd->sgd_type == SDT_SYSIGT); 422 ti->vector = vec; 423 TI_SET_DPL(ti, sgd->sgd_dpl); 424 425 /* 426 * Is this an interrupt gate? 427 */ 428 if (sgd->sgd_type == SDT_SYSIGT) { 429 /* LINTED */ 430 TI_SET_IF(ti, 1); 431 } 432 ti->cs = sgd->sgd_selector; 433 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ 434 ti->address = GATESEG_GETOFFSET(sgd); 435 return (1); 436 } 437 438 /* 439 * Convert a single hw format gate descriptor and write it into our virtual IDT. 440 */ 441 void 442 xen_idt_write(gate_desc_t *sgd, uint_t vec) 443 { 444 trap_info_t trapinfo[2]; 445 446 bzero(trapinfo, sizeof (trapinfo)); 447 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) 448 return; 449 if (xen_set_trap_table(trapinfo) != 0) 450 panic("xen_idt_write: xen_set_trap_table() failed"); 451 } 452 453 #endif /* __xpv */ 454 455 456 /* 457 * Build kernel GDT. 458 */ 459 460 static void 461 init_gdt_common(user_desc_t *gdt) 462 { 463 int i; 464 465 ASSERT3P(gdt, !=, NULL); 466 467 init_boot_gdt(gdt); 468 469 /* 470 * 64-bit kernel code segment. 471 */ 472 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 473 SDP_PAGES, SDP_OP32); 474 475 /* 476 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 477 * mode, but we set it here to SDP_LIMIT_MAX so that we can use the 478 * SYSRET instruction to return from system calls back to 32-bit 479 * applications. SYSRET doesn't update the base, limit, or attributes 480 * of %ss or %ds descriptors. We therefore must ensure that the kernel 481 * uses something, though it will be ignored by hardware, that is 482 * compatible with 32-bit apps. For the same reason we must set the 483 * default op size of this descriptor to 32-bit operands. 484 */ 485 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA, 486 SEL_KPL, SDP_PAGES, SDP_OP32); 487 gdt[GDT_KDATA].usd_def32 = 1; 488 489 /* 490 * 64-bit user code segment. 491 */ 492 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 493 SDP_PAGES, SDP_OP32); 494 495 /* 496 * 32-bit user code segment. 497 */ 498 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMERA, 499 SEL_UPL, SDP_PAGES, SDP_OP32); 500 501 /* 502 * See gdt_ucode32() and gdt_ucode_native(). 503 */ 504 ucs_on = ucs_off = gdt[GDT_UCODE]; 505 ucs_off.usd_p = 0; /* forces #np fault */ 506 507 ucs32_on = ucs32_off = gdt[GDT_U32CODE]; 508 ucs32_off.usd_p = 0; /* forces #np fault */ 509 510 /* 511 * 32 and 64 bit data segments can actually share the same descriptor. 512 * In long mode only the present bit is checked but all other fields 513 * are loaded. But in compatibility mode all fields are interpreted 514 * as in legacy mode so they must be set correctly for a 32-bit data 515 * segment. 516 */ 517 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA, 518 SEL_UPL, SDP_PAGES, SDP_OP32); 519 520 #if !defined(__xpv) 521 522 /* 523 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 524 * in the GDT is 0. 525 */ 526 527 /* 528 * Kernel TSS 529 */ 530 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 531 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 532 533 #endif /* !__xpv */ 534 535 /* 536 * Initialize fs and gs descriptors for 32 bit processes. 537 * Only attributes and limits are initialized, the effective 538 * base address is programmed via fsbase/gsbase. 539 */ 540 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA, 541 SEL_UPL, SDP_PAGES, SDP_OP32); 542 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA, 543 SEL_UPL, SDP_PAGES, SDP_OP32); 544 545 /* 546 * Initialize the descriptors set aside for brand usage. 547 * Only attributes and limits are initialized. 548 */ 549 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 550 set_usegd(&gdt0[i], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA, 551 SEL_UPL, SDP_PAGES, SDP_OP32); 552 553 /* 554 * Initialize convenient zero base user descriptors for clearing 555 * lwp private %fs and %gs descriptors in GDT. See setregs() for 556 * an example. 557 */ 558 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 559 SDP_BYTES, SDP_OP32); 560 set_usegd(&zero_u32desc, SDP_SHORT, 0, SDP_LIMIT_MAX, SDT_MEMRWA, 561 SEL_UPL, SDP_PAGES, SDP_OP32); 562 } 563 564 #if defined(__xpv) 565 566 static user_desc_t * 567 init_gdt(void) 568 { 569 uint64_t gdtpa; 570 ulong_t ma[1]; /* XXPV should be a memory_t */ 571 ulong_t addr; 572 573 #if !defined(__lint) 574 /* 575 * Our gdt is never larger than a single page. 576 */ 577 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 578 #endif 579 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 580 PAGESIZE, PAGESIZE); 581 ASSERT3P(gdt0, !=, NULL); 582 bzero(gdt0, PAGESIZE); 583 584 init_gdt_common(gdt0); 585 586 /* 587 * XXX Since we never invoke kmdb until after the kernel takes 588 * over the descriptor tables why not have it use the kernel's 589 * selectors? 590 */ 591 if (boothowto & RB_DEBUG) { 592 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX, 593 SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32); 594 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX, 595 SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32); 596 } 597 598 /* 599 * Clear write permission for page containing the gdt and install it. 600 */ 601 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 602 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 603 kbm_read_only((uintptr_t)gdt0, gdtpa); 604 xen_set_gdt(ma, NGDT); 605 606 /* 607 * Reload the segment registers to use the new GDT. 608 * On 64-bit, fixup KCS_SEL to be in ring 3. 609 * See KCS_SEL in segments.h. 610 */ 611 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); 612 613 /* 614 * setup %gs for kernel 615 */ 616 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); 617 618 /* 619 * XX64 We should never dereference off "other gsbase" or 620 * "fsbase". So, we should arrange to point FSBASE and 621 * KGSBASE somewhere truly awful e.g. point it at the last 622 * valid address below the hole so that any attempts to index 623 * off them cause an exception. 624 * 625 * For now, point it at 8G -- at least it should be unmapped 626 * until some 64-bit processes run. 627 */ 628 addr = 0x200000000ul; 629 xen_set_segment_base(SEGBASE_FS, addr); 630 xen_set_segment_base(SEGBASE_GS_USER, addr); 631 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); 632 633 return (gdt0); 634 } 635 636 #else /* __xpv */ 637 638 static user_desc_t * 639 init_gdt(void) 640 { 641 desctbr_t r_gdt; 642 643 #if !defined(__lint) 644 /* 645 * Our gdt is never larger than a single page. 646 */ 647 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 648 #endif 649 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 650 PAGESIZE, PAGESIZE); 651 bzero(gdt0, PAGESIZE); 652 653 init_gdt_common(gdt0); 654 655 /* 656 * Install our new GDT 657 */ 658 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 659 r_gdt.dtr_base = (uintptr_t)gdt0; 660 wr_gdtr(&r_gdt); 661 662 /* 663 * Reload the segment registers to use the new GDT 664 */ 665 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 666 667 /* 668 * setup %gs for kernel 669 */ 670 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 671 672 /* 673 * XX64 We should never dereference off "other gsbase" or 674 * "fsbase". So, we should arrange to point FSBASE and 675 * KGSBASE somewhere truly awful e.g. point it at the last 676 * valid address below the hole so that any attempts to index 677 * off them cause an exception. 678 * 679 * For now, point it at 8G -- at least it should be unmapped 680 * until some 64-bit processes run. 681 */ 682 wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 683 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 684 return (gdt0); 685 } 686 687 #endif /* __xpv */ 688 689 690 /* 691 * Build kernel IDT. 692 * 693 * Note that for amd64 we pretty much require every gate to be an interrupt 694 * gate which blocks interrupts atomically on entry; that's because of our 695 * dependency on using 'swapgs' every time we come into the kernel to find 696 * the cpu structure. If we get interrupted just before doing that, %cs could 697 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 698 * %gsbase is really still pointing at something in userland. Bad things will 699 * ensue. We also use interrupt gates for i386 as well even though this is not 700 * required for some traps. 701 * 702 * Perhaps they should have invented a trap gate that does an atomic swapgs? 703 */ 704 static void 705 init_idt_common(gate_desc_t *idt) 706 { 707 set_gatesegd(&idt[T_ZERODIV], 708 (kpti_enable == 1) ? &tr_div0trap : &div0trap, 709 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV)); 710 set_gatesegd(&idt[T_SGLSTP], 711 (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap, 712 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP)); 713 set_gatesegd(&idt[T_NMIFLT], 714 (kpti_enable == 1) ? &tr_nmiint : &nmiint, 715 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT)); 716 set_gatesegd(&idt[T_BPTFLT], 717 (kpti_enable == 1) ? &tr_brktrap : &brktrap, 718 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT)); 719 set_gatesegd(&idt[T_OVFLW], 720 (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap, 721 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW)); 722 set_gatesegd(&idt[T_BOUNDFLT], 723 (kpti_enable == 1) ? &tr_boundstrap : &boundstrap, 724 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT)); 725 set_gatesegd(&idt[T_ILLINST], 726 (kpti_enable == 1) ? &tr_invoptrap : &invoptrap, 727 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST)); 728 set_gatesegd(&idt[T_NOEXTFLT], 729 (kpti_enable == 1) ? &tr_ndptrap : &ndptrap, 730 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT)); 731 732 /* 733 * double fault handler. 734 * 735 * Note that on the hypervisor a guest does not receive #df faults. 736 * Instead a failsafe event is injected into the guest if its selectors 737 * and/or stack is in a broken state. See xen_failsafe_callback. 738 */ 739 #if !defined(__xpv) 740 set_gatesegd(&idt[T_DBLFLT], 741 (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap, 742 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT)); 743 #endif /* !__xpv */ 744 745 /* 746 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 747 */ 748 set_gatesegd(&idt[T_TSSFLT], 749 (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap, 750 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT)); 751 set_gatesegd(&idt[T_SEGFLT], 752 (kpti_enable == 1) ? &tr_segnptrap : &segnptrap, 753 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT)); 754 set_gatesegd(&idt[T_STKFLT], 755 (kpti_enable == 1) ? &tr_stktrap : &stktrap, 756 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT)); 757 set_gatesegd(&idt[T_GPFLT], 758 (kpti_enable == 1) ? &tr_gptrap : &gptrap, 759 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT)); 760 set_gatesegd(&idt[T_PGFLT], 761 (kpti_enable == 1) ? &tr_pftrap : &pftrap, 762 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT)); 763 set_gatesegd(&idt[T_EXTERRFLT], 764 (kpti_enable == 1) ? &tr_ndperr : &ndperr, 765 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT)); 766 set_gatesegd(&idt[T_ALIGNMENT], 767 (kpti_enable == 1) ? &tr_achktrap : &achktrap, 768 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT)); 769 set_gatesegd(&idt[T_MCE], 770 (kpti_enable == 1) ? &tr_mcetrap : &mcetrap, 771 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE)); 772 set_gatesegd(&idt[T_SIMDFPE], 773 (kpti_enable == 1) ? &tr_xmtrap : &xmtrap, 774 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE)); 775 776 /* 777 * install fast trap handler at 210. 778 */ 779 set_gatesegd(&idt[T_FASTTRAP], 780 (kpti_enable == 1) ? &tr_fasttrap : &fasttrap, 781 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP)); 782 783 /* 784 * System call handler. 785 */ 786 set_gatesegd(&idt[T_SYSCALLINT], 787 (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int, 788 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT)); 789 790 /* 791 * Install the DTrace interrupt handler for the pid provider. 792 */ 793 set_gatesegd(&idt[T_DTRACE_RET], 794 (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret, 795 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET)); 796 797 /* 798 * Prepare interposing descriptor for the syscall handler 799 * and cache copy of the default descriptor. 800 */ 801 brand_tbl[0].ih_inum = T_SYSCALLINT; 802 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; 803 804 set_gatesegd(&(brand_tbl[0].ih_interp_desc), 805 (kpti_enable == 1) ? &tr_brand_sys_syscall_int : 806 &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL, 807 idt_vector_to_ist(T_SYSCALLINT)); 808 809 brand_tbl[1].ih_inum = 0; 810 } 811 812 #if defined(__xpv) 813 814 static void 815 init_idt(gate_desc_t *idt) 816 { 817 init_idt_common(idt); 818 } 819 820 #else /* __xpv */ 821 822 static void 823 init_idt(gate_desc_t *idt) 824 { 825 char ivctname[80]; 826 void (*ivctptr)(void); 827 int i; 828 829 /* 830 * Initialize entire table with 'reserved' trap and then overwrite 831 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 832 * since it can only be generated on a 386 processor. 15 is also 833 * unsupported and reserved. 834 */ 835 for (i = 0; i < NIDT; i++) { 836 set_gatesegd(&idt[i], 837 (kpti_enable == 1) ? &tr_resvtrap : &resvtrap, 838 KCS_SEL, SDT_SYSIGT, TRP_KPL, 839 idt_vector_to_ist(T_RESVTRAP)); 840 } 841 842 /* 843 * 20-31 reserved 844 */ 845 for (i = 20; i < 32; i++) { 846 set_gatesegd(&idt[i], 847 (kpti_enable == 1) ? &tr_invaltrap : &invaltrap, 848 KCS_SEL, SDT_SYSIGT, TRP_KPL, 849 idt_vector_to_ist(T_INVALTRAP)); 850 } 851 852 /* 853 * interrupts 32 - 255 854 */ 855 for (i = 32; i < 256; i++) { 856 (void) snprintf(ivctname, sizeof (ivctname), 857 (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i); 858 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 859 if (ivctptr == NULL) 860 panic("kobj_getsymvalue(%s) failed", ivctname); 861 862 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 863 idt_vector_to_ist(i)); 864 } 865 866 /* 867 * Now install the common ones. Note that it will overlay some 868 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 869 */ 870 init_idt_common(idt); 871 } 872 873 #endif /* __xpv */ 874 875 /* 876 * The kernel does not deal with LDTs unless a user explicitly creates 877 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 878 * to reference the LDT will therefore cause a #gp. System calls made via the 879 * obsolete lcall mechanism are emulated by the #gp fault handler. 880 */ 881 static void 882 init_ldt(void) 883 { 884 #if defined(__xpv) 885 xen_set_ldt(NULL, 0); 886 #else 887 wr_ldtr(0); 888 #endif 889 } 890 891 #if !defined(__xpv) 892 893 static void 894 init_tss(void) 895 { 896 extern struct cpu cpus[]; 897 898 /* 899 * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each 900 * context switch but it'll be overwritten with this same value anyway. 901 */ 902 if (kpti_enable == 1) { 903 ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; 904 } 905 906 /* Set up the IST stacks for double fault, NMI, MCE. */ 907 ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 908 ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)]; 909 ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)]; 910 911 /* 912 * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is 913 * enabled), and also for KDI (always). 914 */ 915 ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp; 916 917 if (kpti_enable == 1) { 918 /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */ 919 ktss0->tss_ist5 = 920 (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp; 921 922 /* This IST stack is used for all other intrs (for KPTI). */ 923 ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; 924 } 925 926 /* 927 * Set I/O bit map offset equal to size of TSS segment limit 928 * for no I/O permission map. This will force all user I/O 929 * instructions to generate #gp fault. 930 */ 931 ktss0->tss_bitmapbase = sizeof (*ktss0); 932 933 /* 934 * Point %tr to descriptor for ktss0 in gdt. 935 */ 936 wr_tsr(KTSS_SEL); 937 } 938 939 #endif /* !__xpv */ 940 941 #if defined(__xpv) 942 943 void 944 init_desctbls(void) 945 { 946 uint_t vec; 947 user_desc_t *gdt; 948 949 /* 950 * Setup and install our GDT. 951 */ 952 gdt = init_gdt(); 953 954 /* 955 * Store static pa of gdt to speed up pa_to_ma() translations 956 * on lwp context switches. 957 */ 958 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 959 CPU->cpu_gdt = gdt; 960 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); 961 962 /* 963 * Setup and install our IDT. 964 */ 965 #if !defined(__lint) 966 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 967 #endif 968 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 969 PAGESIZE, PAGESIZE); 970 bzero(idt0, PAGESIZE); 971 init_idt(idt0); 972 for (vec = 0; vec < NIDT; vec++) 973 xen_idt_write(&idt0[vec], vec); 974 975 CPU->cpu_idt = idt0; 976 977 /* 978 * set default kernel stack 979 */ 980 xen_stack_switch(KDS_SEL, 981 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); 982 983 xen_init_callbacks(); 984 985 init_ldt(); 986 } 987 988 #else /* __xpv */ 989 990 void 991 init_desctbls(void) 992 { 993 user_desc_t *gdt; 994 desctbr_t idtr; 995 996 /* 997 * Allocate IDT and TSS structures on unique pages for better 998 * performance in virtual machines. 999 */ 1000 #if !defined(__lint) 1001 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1002 #endif 1003 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1004 PAGESIZE, PAGESIZE); 1005 bzero(idt0, PAGESIZE); 1006 #if !defined(__lint) 1007 ASSERT(sizeof (*ktss0) <= PAGESIZE); 1008 #endif 1009 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, 1010 PAGESIZE, PAGESIZE); 1011 bzero(ktss0, PAGESIZE); 1012 1013 1014 /* 1015 * Setup and install our GDT. 1016 */ 1017 gdt = init_gdt(); 1018 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1019 CPU->cpu_gdt = gdt; 1020 1021 /* 1022 * Initialize this CPU's LDT. 1023 */ 1024 CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA, 1025 LDT_CPU_SIZE, PAGESIZE); 1026 bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE); 1027 CPU->cpu_m.mcpu_ldt_len = 0; 1028 1029 /* 1030 * Setup and install our IDT. 1031 */ 1032 init_idt(idt0); 1033 1034 idtr.dtr_base = (uintptr_t)idt0; 1035 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; 1036 wr_idtr(&idtr); 1037 CPU->cpu_idt = idt0; 1038 1039 1040 init_tss(); 1041 CPU->cpu_tss = ktss0; 1042 init_ldt(); 1043 1044 /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */ 1045 kpti_safe_cr3 = (uint64_t)getcr3(); 1046 } 1047 1048 #endif /* __xpv */ 1049 1050 #ifndef __xpv 1051 /* 1052 * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so 1053 * we have to manually fix it up ourselves. 1054 * 1055 * The caller may still need to make sure that it can't go off-CPU with the 1056 * incorrect limit, before calling this (such as disabling pre-emption). 1057 */ 1058 void 1059 reset_gdtr_limit(void) 1060 { 1061 ulong_t flags = intr_clear(); 1062 desctbr_t gdtr; 1063 1064 rd_gdtr(&gdtr); 1065 gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1; 1066 wr_gdtr(&gdtr); 1067 1068 intr_restore(flags); 1069 } 1070 #endif /* __xpv */ 1071 1072 /* 1073 * We need a GDT owned by the kernel and not the bootstrap relatively 1074 * early in kernel initialization (e.g., to have segments we can reliably 1075 * catch an exception on). 1076 * 1077 * Initializes a GDT with segments normally defined in the boot loader. 1078 */ 1079 void 1080 init_boot_gdt(user_desc_t *bgdt) 1081 { 1082 ASSERT3P(bgdt, !=, NULL); 1083 1084 #ifdef __xpv 1085 /* XXX: It is unclear why this 32-bit data segment is marked long. */ 1086 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA, 1087 SEL_KPL, SDP_PAGES, SDP_OP32); 1088 #else 1089 /* 1090 * Reset boot segments. These ostensibly come from the boot loader, 1091 * but we reset them to match our expectations, particulary if we 1092 * are not using that loader. 1093 */ 1094 set_usegd(&bgdt[GDT_B32DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX, 1095 SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32); 1096 set_usegd(&bgdt[GDT_B32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX, 1097 SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32); 1098 1099 /* 1100 * 16-bit segments for making BIOS calls (not applicable on all 1101 * architectures). 1102 */ 1103 set_usegd(&bgdt[GDT_B16CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX, 1104 SDT_MEMERA, SEL_KPL, 0, 0); 1105 /* 1106 * XXX: SDP_OP32 makes this a 32-bit segment, which seems wrong 1107 * here, but that's what boot_gdt.s used. 1108 */ 1109 set_usegd(&bgdt[GDT_B16DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX, 1110 SDT_MEMRWA, SEL_KPL, 0, SDP_OP32); 1111 #endif /* __xpv */ 1112 1113 /* 1114 * A 64-bit code segment used in early boot. Early IDTs refer to this. 1115 */ 1116 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMERA, 1117 SEL_KPL, SDP_PAGES, SDP_OP32); 1118 } 1119 1120 /* 1121 * Enable interpositioning on the system call path by rewriting the 1122 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1123 * the branded entry points. 1124 */ 1125 void 1126 brand_interpositioning_enable(void *arg __unused) 1127 { 1128 gate_desc_t *idt = CPU->cpu_idt; 1129 int i; 1130 1131 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1132 1133 for (i = 0; brand_tbl[i].ih_inum; i++) { 1134 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; 1135 #if defined(__xpv) 1136 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1137 brand_tbl[i].ih_inum); 1138 #endif 1139 } 1140 1141 #if defined(__xpv) 1142 1143 /* 1144 * Currently the hypervisor only supports 64-bit syscalls via 1145 * syscall instruction. The 32-bit syscalls are handled by 1146 * interrupt gate above. 1147 */ 1148 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, 1149 CALLBACKF_mask_events); 1150 1151 #else 1152 1153 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1154 if (kpti_enable == 1) { 1155 wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall); 1156 wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32); 1157 } else { 1158 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 1159 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 1160 } 1161 } 1162 1163 #endif 1164 1165 if (is_x86_feature(x86_featureset, X86FSET_SEP)) { 1166 if (kpti_enable == 1) { 1167 wrmsr(MSR_INTC_SEP_EIP, 1168 (uintptr_t)tr_brand_sys_sysenter); 1169 } else { 1170 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 1171 } 1172 } 1173 } 1174 1175 /* 1176 * Disable interpositioning on the system call path by rewriting the 1177 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1178 * the standard entry points, which bypass the interpositioning hooks. 1179 */ 1180 void 1181 brand_interpositioning_disable(void *arg __unused) 1182 { 1183 gate_desc_t *idt = CPU->cpu_idt; 1184 int i; 1185 1186 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1187 1188 for (i = 0; brand_tbl[i].ih_inum; i++) { 1189 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; 1190 #if defined(__xpv) 1191 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1192 brand_tbl[i].ih_inum); 1193 #endif 1194 } 1195 1196 #if defined(__xpv) 1197 1198 /* 1199 * See comment above in brand_interpositioning_enable. 1200 */ 1201 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 1202 CALLBACKF_mask_events); 1203 1204 #else 1205 1206 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1207 if (kpti_enable == 1) { 1208 wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall); 1209 wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32); 1210 } else { 1211 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 1212 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 1213 } 1214 } 1215 1216 #endif 1217 1218 if (is_x86_feature(x86_featureset, X86FSET_SEP)) { 1219 if (kpti_enable == 1) { 1220 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter); 1221 } else { 1222 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 1223 } 1224 } 1225 } 1226