1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Joyent, Inc. All rights reserved.
28 * Copyright 2022 Oxide Computer Compnay
29 */
30
31 /*
32 * Copyright (c) 1992 Terrence R. Lambert.
33 * Copyright (c) 1990 The Regents of the University of California.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * William Jolitz.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. All advertising materials mentioning features or use of this software
48 * must display the following acknowledgement:
49 * This product includes software developed by the University of
50 * California, Berkeley and its contributors.
51 * 4. Neither the name of the University nor the names of its contributors
52 * may be used to endorse or promote products derived from this software
53 * without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * SUCH DAMAGE.
66 *
67 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
68 */
69
70 #include <sys/types.h>
71 #include <sys/sysmacros.h>
72 #include <sys/tss.h>
73 #include <sys/segments.h>
74 #include <sys/trap.h>
75 #include <sys/cpuvar.h>
76 #include <sys/bootconf.h>
77 #include <sys/x86_archext.h>
78 #include <sys/controlregs.h>
79 #include <sys/archsystm.h>
80 #include <sys/machsystm.h>
81 #include <sys/kobj.h>
82 #include <sys/cmn_err.h>
83 #include <sys/reboot.h>
84 #include <sys/kdi.h>
85 #include <sys/mach_mmu.h>
86 #include <sys/systm.h>
87 #include <sys/note.h>
88
89 #ifdef __xpv
90 #include <sys/hypervisor.h>
91 #include <vm/as.h>
92 #endif
93
94 #include <sys/promif.h>
95 #include <sys/bootinfo.h>
96 #include <vm/kboot_mmu.h>
97 #include <vm/hat_pte.h>
98
99 /*
100 * cpu0 and default tables and structures.
101 */
102 user_desc_t *gdt0;
103 #if !defined(__xpv)
104 desctbr_t gdt0_default_r;
105 #endif
106
107 gate_desc_t *idt0; /* interrupt descriptor table */
108
109 tss_t *ktss0; /* kernel task state structure */
110
111
112 user_desc_t zero_udesc; /* base zero user desc native procs */
113 user_desc_t null_udesc; /* null user descriptor */
114 system_desc_t null_sdesc; /* null system descriptor */
115
116 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
117
118 user_desc_t ucs_on;
119 user_desc_t ucs_off;
120 user_desc_t ucs32_on;
121 user_desc_t ucs32_off;
122
123 /*
124 * If the size of this is changed, you must update hat_pcp_setup() and the
125 * definitions in exception.s
126 */
127 extern char dblfault_stack0[DEFAULTSTKSZ];
128 extern char nmi_stack0[DEFAULTSTKSZ];
129 extern char mce_stack0[DEFAULTSTKSZ];
130
131 extern void fast_null(void);
132 extern hrtime_t get_hrtime(void);
133 extern hrtime_t gethrvtime(void);
134 extern hrtime_t get_hrestime(void);
135 extern uint64_t getlgrp(void);
136
137 void (*(fasttable[]))(void) = {
138 fast_null, /* T_FNULL routine */
139 fast_null, /* T_FGETFP routine (initially null) */
140 fast_null, /* T_FSETFP routine (initially null) */
141 (void (*)())(uintptr_t)get_hrtime, /* T_GETHRTIME */
142 (void (*)())(uintptr_t)gethrvtime, /* T_GETHRVTIME */
143 (void (*)())(uintptr_t)get_hrestime, /* T_GETHRESTIME */
144 (void (*)())(uintptr_t)getlgrp /* T_GETLGRP */
145 };
146
147 /*
148 * Structure containing pre-computed descriptors to allow us to temporarily
149 * interpose on a standard handler.
150 */
151 struct interposing_handler {
152 int ih_inum;
153 gate_desc_t ih_interp_desc;
154 gate_desc_t ih_default_desc;
155 };
156
157 /*
158 * The brand infrastructure interposes on two handlers, and we use one as a
159 * NULL signpost.
160 */
161 static struct interposing_handler brand_tbl[2];
162
163 /*
164 * software prototypes for default local descriptor table
165 */
166
167 /*
168 * Routines for loading segment descriptors in format the hardware
169 * can understand.
170 */
171
172 /*
173 * In long mode we have the new L or long mode attribute bit
174 * for code segments. Only the conforming bit in type is used along
175 * with descriptor priority and present bits. Default operand size must
176 * be zero when in long mode. In 32-bit compatibility mode all fields
177 * are treated as in legacy mode. For data segments while in long mode
178 * only the present bit is loaded.
179 */
180 void
set_usegd(user_desc_t * dp,uint_t lmode,void * base,uint32_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)181 set_usegd(user_desc_t *dp, uint_t lmode, void *base, uint32_t size,
182 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
183 {
184 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
185 /* This should never be a "system" segment. */
186 ASSERT3U(type & SDT_S, !=, 0);
187 ASSERT3P(dp, !=, NULL);
188
189 /*
190 * 64-bit long mode.
191 */
192 if (lmode == SDP_LONG)
193 dp->usd_def32 = 0; /* 32-bit operands only */
194 else
195 /*
196 * 32-bit compatibility mode.
197 */
198 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
199
200 /*
201 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
202 * will write to the GDT whenever we change segment registers around.
203 * With KPTI on, the GDT is read-only in the user page table, which
204 * causes crashes if we don't set this.
205 */
206 ASSERT3U(type & SDT_A, !=, 0);
207
208 dp->usd_long = lmode; /* 64-bit mode */
209 dp->usd_type = type;
210 dp->usd_dpl = dpl;
211 dp->usd_p = 1;
212 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
213
214 dp->usd_lobase = (uintptr_t)base;
215 dp->usd_midbase = (uintptr_t)base >> 16;
216 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
217 dp->usd_lolimit = size;
218 dp->usd_hilimit = (uintptr_t)size >> 16;
219 }
220
221 /*
222 * Install system segment descriptor for LDT and TSS segments.
223 */
224
225 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)226 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
227 uint_t dpl)
228 {
229 dp->ssd_lolimit = size;
230 dp->ssd_hilimit = (uintptr_t)size >> 16;
231
232 dp->ssd_lobase = (uintptr_t)base;
233 dp->ssd_midbase = (uintptr_t)base >> 16;
234 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
235 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
236
237 dp->ssd_type = type;
238 dp->ssd_zero1 = 0; /* must be zero */
239 dp->ssd_zero2 = 0;
240 dp->ssd_dpl = dpl;
241 dp->ssd_p = 1;
242 dp->ssd_gran = 0; /* force byte units */
243 }
244
245 void *
get_ssd_base(system_desc_t * dp)246 get_ssd_base(system_desc_t *dp)
247 {
248 uintptr_t base;
249
250 base = (uintptr_t)dp->ssd_lobase |
251 (uintptr_t)dp->ssd_midbase << 16 |
252 (uintptr_t)dp->ssd_hibase << (16 + 8) |
253 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
254 return ((void *)base);
255 }
256
257 /*
258 * Install gate segment descriptor for interrupt, trap, call and task gates.
259 *
260 * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
261 * all interrupts. We have different ISTs for each class of exceptions that are
262 * most likely to occur while handling an existing exception; while many of
263 * these are just going to panic, it's nice not to trample on the existing
264 * exception state for debugging purposes.
265 *
266 * Normal interrupts are all redirected unconditionally to the KPTI trampoline
267 * stack space. This unifies the trampoline handling between user and kernel
268 * space (and avoids the need to touch %gs).
269 *
270 * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
271 * we do a read from KMDB that cause another #PF. Without its own IST, this
272 * would stomp on the kernel's mcpu_kpti_flt frame.
273 */
274 uint_t
idt_vector_to_ist(uint_t vector)275 idt_vector_to_ist(uint_t vector)
276 {
277 #if defined(__xpv)
278 _NOTE(ARGUNUSED(vector));
279 return (IST_NONE);
280 #else
281 switch (vector) {
282 /* These should always use IST even without KPTI enabled. */
283 case T_DBLFLT:
284 return (IST_DF);
285 case T_NMIFLT:
286 return (IST_NMI);
287 case T_MCE:
288 return (IST_MCE);
289
290 case T_BPTFLT:
291 case T_SGLSTP:
292 if (kpti_enable == 1) {
293 return (IST_DBG);
294 }
295 return (IST_NONE);
296 case T_STKFLT:
297 case T_GPFLT:
298 case T_PGFLT:
299 if (kpti_enable == 1) {
300 return (IST_NESTABLE);
301 }
302 return (IST_NONE);
303 default:
304 if (kpti_enable == 1) {
305 return (IST_DEFAULT);
306 }
307 return (IST_NONE);
308 }
309 #endif
310 }
311
312 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t ist)313 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
314 uint_t type, uint_t dpl, uint_t ist)
315 {
316 dp->sgd_looffset = (uintptr_t)func;
317 dp->sgd_hioffset = (uintptr_t)func >> 16;
318 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
319 dp->sgd_selector = (uint16_t)sel;
320 dp->sgd_ist = ist;
321 dp->sgd_type = type;
322 dp->sgd_dpl = dpl;
323 dp->sgd_p = 1;
324 }
325
326 /*
327 * Updates a single user descriptor in the the GDT of the current cpu.
328 * Caller is responsible for preventing cpu migration.
329 */
330
331 void
gdt_update_usegd(uint_t sidx,user_desc_t * udp)332 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
333 {
334 #if defined(DEBUG)
335 /* This should never be a "system" segment, but it might be null. */
336 if (udp->usd_p != 0 || udp->usd_type != 0) {
337 ASSERT3U(udp->usd_type & SDT_S, !=, 0);
338 }
339 /*
340 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
341 * will write to the GDT whenever we change segment registers around.
342 * With KPTI on, the GDT is read-only in the user page table, which
343 * causes crashes if we don't set this.
344 */
345 if (udp->usd_p != 0 || udp->usd_type != 0) {
346 ASSERT3U(udp->usd_type & SDT_A, !=, 0);
347 }
348 #endif
349
350 #if defined(__xpv)
351 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
352
353 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
354 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
355
356 #else /* __xpv */
357 CPU->cpu_gdt[sidx] = *udp;
358 #endif /* __xpv */
359 }
360
361 /*
362 * Writes single descriptor pointed to by udp into a processes
363 * LDT entry pointed to by ldp.
364 */
365 int
ldt_update_segd(user_desc_t * ldp,user_desc_t * udp)366 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
367 {
368 #if defined(DEBUG)
369 /* This should never be a "system" segment, but it might be null. */
370 if (udp->usd_p != 0 || udp->usd_type != 0) {
371 ASSERT3U(udp->usd_type & SDT_S, !=, 0);
372 }
373 /*
374 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
375 * will write to the LDT whenever we change segment registers around.
376 * With KPTI on, the LDT is read-only in the user page table, which
377 * causes crashes if we don't set this.
378 */
379 if (udp->usd_p != 0 || udp->usd_type != 0) {
380 ASSERT3U(udp->usd_type & SDT_A, !=, 0);
381 }
382 #endif
383
384 #if defined(__xpv)
385 uint64_t dpa;
386
387 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
388 ((uintptr_t)ldp & PAGEOFFSET);
389
390 /*
391 * The hypervisor is a little more restrictive about what it
392 * supports in the LDT.
393 */
394 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
395 return (EINVAL);
396
397 #else /* __xpv */
398 *ldp = *udp;
399
400 #endif /* __xpv */
401 return (0);
402 }
403
404 #if defined(__xpv)
405
406 /*
407 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
408 * Returns true if a valid entry was written.
409 */
410 int
xen_idt_to_trap_info(uint_t vec,gate_desc_t * sgd,void * ti_arg)411 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
412 {
413 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
414
415 /*
416 * skip holes in the IDT
417 */
418 if (GATESEG_GETOFFSET(sgd) == 0)
419 return (0);
420
421 ASSERT(sgd->sgd_type == SDT_SYSIGT);
422 ti->vector = vec;
423 TI_SET_DPL(ti, sgd->sgd_dpl);
424
425 /*
426 * Is this an interrupt gate?
427 */
428 if (sgd->sgd_type == SDT_SYSIGT) {
429 /* LINTED */
430 TI_SET_IF(ti, 1);
431 }
432 ti->cs = sgd->sgd_selector;
433 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
434 ti->address = GATESEG_GETOFFSET(sgd);
435 return (1);
436 }
437
438 /*
439 * Convert a single hw format gate descriptor and write it into our virtual IDT.
440 */
441 void
xen_idt_write(gate_desc_t * sgd,uint_t vec)442 xen_idt_write(gate_desc_t *sgd, uint_t vec)
443 {
444 trap_info_t trapinfo[2];
445
446 bzero(trapinfo, sizeof (trapinfo));
447 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
448 return;
449 if (xen_set_trap_table(trapinfo) != 0)
450 panic("xen_idt_write: xen_set_trap_table() failed");
451 }
452
453 #endif /* __xpv */
454
455
456 /*
457 * Build kernel GDT.
458 */
459
460 static void
init_gdt_common(user_desc_t * gdt)461 init_gdt_common(user_desc_t *gdt)
462 {
463 int i;
464
465 ASSERT3P(gdt, !=, NULL);
466
467 init_boot_gdt(gdt);
468
469 /*
470 * 64-bit kernel code segment.
471 */
472 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
473 SDP_PAGES, SDP_OP32);
474
475 /*
476 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
477 * mode, but we set it here to SDP_LIMIT_MAX so that we can use the
478 * SYSRET instruction to return from system calls back to 32-bit
479 * applications. SYSRET doesn't update the base, limit, or attributes
480 * of %ss or %ds descriptors. We therefore must ensure that the kernel
481 * uses something, though it will be ignored by hardware, that is
482 * compatible with 32-bit apps. For the same reason we must set the
483 * default op size of this descriptor to 32-bit operands.
484 */
485 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
486 SEL_KPL, SDP_PAGES, SDP_OP32);
487 gdt[GDT_KDATA].usd_def32 = 1;
488
489 /*
490 * 64-bit user code segment.
491 */
492 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
493 SDP_PAGES, SDP_OP32);
494
495 /*
496 * 32-bit user code segment.
497 */
498 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMERA,
499 SEL_UPL, SDP_PAGES, SDP_OP32);
500
501 /*
502 * See gdt_ucode32() and gdt_ucode_native().
503 */
504 ucs_on = ucs_off = gdt[GDT_UCODE];
505 ucs_off.usd_p = 0; /* forces #np fault */
506
507 ucs32_on = ucs32_off = gdt[GDT_U32CODE];
508 ucs32_off.usd_p = 0; /* forces #np fault */
509
510 /*
511 * 32 and 64 bit data segments can actually share the same descriptor.
512 * In long mode only the present bit is checked but all other fields
513 * are loaded. But in compatibility mode all fields are interpreted
514 * as in legacy mode so they must be set correctly for a 32-bit data
515 * segment.
516 */
517 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
518 SEL_UPL, SDP_PAGES, SDP_OP32);
519
520 #if !defined(__xpv)
521
522 /*
523 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
524 * in the GDT is 0.
525 */
526
527 /*
528 * Kernel TSS
529 */
530 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
531 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
532
533 #endif /* !__xpv */
534
535 /*
536 * Initialize fs and gs descriptors for 32 bit processes.
537 * Only attributes and limits are initialized, the effective
538 * base address is programmed via fsbase/gsbase.
539 */
540 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
541 SEL_UPL, SDP_PAGES, SDP_OP32);
542 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
543 SEL_UPL, SDP_PAGES, SDP_OP32);
544
545 /*
546 * Initialize the descriptors set aside for brand usage.
547 * Only attributes and limits are initialized.
548 */
549 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
550 set_usegd(&gdt0[i], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
551 SEL_UPL, SDP_PAGES, SDP_OP32);
552
553 /*
554 * Initialize convenient zero base user descriptors for clearing
555 * lwp private %fs and %gs descriptors in GDT. See setregs() for
556 * an example.
557 */
558 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
559 SDP_BYTES, SDP_OP32);
560 set_usegd(&zero_u32desc, SDP_SHORT, 0, SDP_LIMIT_MAX, SDT_MEMRWA,
561 SEL_UPL, SDP_PAGES, SDP_OP32);
562 }
563
564 #if defined(__xpv)
565
566 static user_desc_t *
init_gdt(void)567 init_gdt(void)
568 {
569 uint64_t gdtpa;
570 ulong_t ma[1]; /* XXPV should be a memory_t */
571 ulong_t addr;
572
573 #if !defined(__lint)
574 /*
575 * Our gdt is never larger than a single page.
576 */
577 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
578 #endif
579 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
580 PAGESIZE, PAGESIZE);
581 ASSERT3P(gdt0, !=, NULL);
582 bzero(gdt0, PAGESIZE);
583
584 init_gdt_common(gdt0);
585
586 /*
587 * XXX Since we never invoke kmdb until after the kernel takes
588 * over the descriptor tables why not have it use the kernel's
589 * selectors?
590 */
591 if (boothowto & RB_DEBUG) {
592 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX,
593 SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32);
594 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX,
595 SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32);
596 }
597
598 /*
599 * Clear write permission for page containing the gdt and install it.
600 */
601 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
602 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
603 kbm_read_only((uintptr_t)gdt0, gdtpa);
604 xen_set_gdt(ma, NGDT);
605
606 /*
607 * Reload the segment registers to use the new GDT.
608 * On 64-bit, fixup KCS_SEL to be in ring 3.
609 * See KCS_SEL in segments.h.
610 */
611 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
612
613 /*
614 * setup %gs for kernel
615 */
616 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
617
618 /*
619 * XX64 We should never dereference off "other gsbase" or
620 * "fsbase". So, we should arrange to point FSBASE and
621 * KGSBASE somewhere truly awful e.g. point it at the last
622 * valid address below the hole so that any attempts to index
623 * off them cause an exception.
624 *
625 * For now, point it at 8G -- at least it should be unmapped
626 * until some 64-bit processes run.
627 */
628 addr = 0x200000000ul;
629 xen_set_segment_base(SEGBASE_FS, addr);
630 xen_set_segment_base(SEGBASE_GS_USER, addr);
631 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
632
633 return (gdt0);
634 }
635
636 #else /* __xpv */
637
638 static user_desc_t *
init_gdt(void)639 init_gdt(void)
640 {
641 desctbr_t r_gdt;
642
643 #if !defined(__lint)
644 /*
645 * Our gdt is never larger than a single page.
646 */
647 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
648 #endif
649 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
650 PAGESIZE, PAGESIZE);
651 bzero(gdt0, PAGESIZE);
652
653 init_gdt_common(gdt0);
654
655 /*
656 * Install our new GDT
657 */
658 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
659 r_gdt.dtr_base = (uintptr_t)gdt0;
660 wr_gdtr(&r_gdt);
661
662 /*
663 * Reload the segment registers to use the new GDT
664 */
665 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
666
667 /*
668 * setup %gs for kernel
669 */
670 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
671
672 /*
673 * XX64 We should never dereference off "other gsbase" or
674 * "fsbase". So, we should arrange to point FSBASE and
675 * KGSBASE somewhere truly awful e.g. point it at the last
676 * valid address below the hole so that any attempts to index
677 * off them cause an exception.
678 *
679 * For now, point it at 8G -- at least it should be unmapped
680 * until some 64-bit processes run.
681 */
682 wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
683 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
684 return (gdt0);
685 }
686
687 #endif /* __xpv */
688
689
690 /*
691 * Build kernel IDT.
692 *
693 * Note that for amd64 we pretty much require every gate to be an interrupt
694 * gate which blocks interrupts atomically on entry; that's because of our
695 * dependency on using 'swapgs' every time we come into the kernel to find
696 * the cpu structure. If we get interrupted just before doing that, %cs could
697 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
698 * %gsbase is really still pointing at something in userland. Bad things will
699 * ensue. We also use interrupt gates for i386 as well even though this is not
700 * required for some traps.
701 *
702 * Perhaps they should have invented a trap gate that does an atomic swapgs?
703 */
704 static void
init_idt_common(gate_desc_t * idt)705 init_idt_common(gate_desc_t *idt)
706 {
707 set_gatesegd(&idt[T_ZERODIV],
708 (kpti_enable == 1) ? &tr_div0trap : &div0trap,
709 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
710 set_gatesegd(&idt[T_SGLSTP],
711 (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
712 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
713 set_gatesegd(&idt[T_NMIFLT],
714 (kpti_enable == 1) ? &tr_nmiint : &nmiint,
715 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
716 set_gatesegd(&idt[T_BPTFLT],
717 (kpti_enable == 1) ? &tr_brktrap : &brktrap,
718 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
719 set_gatesegd(&idt[T_OVFLW],
720 (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
721 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
722 set_gatesegd(&idt[T_BOUNDFLT],
723 (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
724 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
725 set_gatesegd(&idt[T_ILLINST],
726 (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
727 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
728 set_gatesegd(&idt[T_NOEXTFLT],
729 (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
730 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
731
732 /*
733 * double fault handler.
734 *
735 * Note that on the hypervisor a guest does not receive #df faults.
736 * Instead a failsafe event is injected into the guest if its selectors
737 * and/or stack is in a broken state. See xen_failsafe_callback.
738 */
739 #if !defined(__xpv)
740 set_gatesegd(&idt[T_DBLFLT],
741 (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
742 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));
743 #endif /* !__xpv */
744
745 /*
746 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
747 */
748 set_gatesegd(&idt[T_TSSFLT],
749 (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
750 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
751 set_gatesegd(&idt[T_SEGFLT],
752 (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
753 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
754 set_gatesegd(&idt[T_STKFLT],
755 (kpti_enable == 1) ? &tr_stktrap : &stktrap,
756 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
757 set_gatesegd(&idt[T_GPFLT],
758 (kpti_enable == 1) ? &tr_gptrap : &gptrap,
759 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
760 set_gatesegd(&idt[T_PGFLT],
761 (kpti_enable == 1) ? &tr_pftrap : &pftrap,
762 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
763 set_gatesegd(&idt[T_EXTERRFLT],
764 (kpti_enable == 1) ? &tr_ndperr : &ndperr,
765 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
766 set_gatesegd(&idt[T_ALIGNMENT],
767 (kpti_enable == 1) ? &tr_achktrap : &achktrap,
768 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
769 set_gatesegd(&idt[T_MCE],
770 (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
771 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
772 set_gatesegd(&idt[T_SIMDFPE],
773 (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
774 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
775
776 /*
777 * install fast trap handler at 210.
778 */
779 set_gatesegd(&idt[T_FASTTRAP],
780 (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
781 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
782
783 /*
784 * System call handler.
785 */
786 set_gatesegd(&idt[T_SYSCALLINT],
787 (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
788 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
789
790 /*
791 * Install the DTrace interrupt handler for the pid provider.
792 */
793 set_gatesegd(&idt[T_DTRACE_RET],
794 (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
795 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
796
797 /*
798 * Prepare interposing descriptor for the syscall handler
799 * and cache copy of the default descriptor.
800 */
801 brand_tbl[0].ih_inum = T_SYSCALLINT;
802 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
803
804 set_gatesegd(&(brand_tbl[0].ih_interp_desc),
805 (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
806 &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
807 idt_vector_to_ist(T_SYSCALLINT));
808
809 brand_tbl[1].ih_inum = 0;
810 }
811
812 #if defined(__xpv)
813
814 static void
init_idt(gate_desc_t * idt)815 init_idt(gate_desc_t *idt)
816 {
817 init_idt_common(idt);
818 }
819
820 #else /* __xpv */
821
822 static void
init_idt(gate_desc_t * idt)823 init_idt(gate_desc_t *idt)
824 {
825 char ivctname[80];
826 void (*ivctptr)(void);
827 int i;
828
829 /*
830 * Initialize entire table with 'reserved' trap and then overwrite
831 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
832 * since it can only be generated on a 386 processor. 15 is also
833 * unsupported and reserved.
834 */
835 for (i = 0; i < NIDT; i++) {
836 set_gatesegd(&idt[i],
837 (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
838 KCS_SEL, SDT_SYSIGT, TRP_KPL,
839 idt_vector_to_ist(T_RESVTRAP));
840 }
841
842 /*
843 * 20-31 reserved
844 */
845 for (i = 20; i < 32; i++) {
846 set_gatesegd(&idt[i],
847 (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
848 KCS_SEL, SDT_SYSIGT, TRP_KPL,
849 idt_vector_to_ist(T_INVALTRAP));
850 }
851
852 /*
853 * interrupts 32 - 255
854 */
855 for (i = 32; i < 256; i++) {
856 (void) snprintf(ivctname, sizeof (ivctname),
857 (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
858 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
859 if (ivctptr == NULL)
860 panic("kobj_getsymvalue(%s) failed", ivctname);
861
862 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
863 idt_vector_to_ist(i));
864 }
865
866 /*
867 * Now install the common ones. Note that it will overlay some
868 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
869 */
870 init_idt_common(idt);
871 }
872
873 #endif /* __xpv */
874
875 /*
876 * The kernel does not deal with LDTs unless a user explicitly creates
877 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
878 * to reference the LDT will therefore cause a #gp. System calls made via the
879 * obsolete lcall mechanism are emulated by the #gp fault handler.
880 */
881 static void
init_ldt(void)882 init_ldt(void)
883 {
884 #if defined(__xpv)
885 xen_set_ldt(NULL, 0);
886 #else
887 wr_ldtr(0);
888 #endif
889 }
890
891 #if !defined(__xpv)
892
893 static void
init_tss(void)894 init_tss(void)
895 {
896 extern struct cpu cpus[];
897
898 /*
899 * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
900 * context switch but it'll be overwritten with this same value anyway.
901 */
902 if (kpti_enable == 1) {
903 ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
904 }
905
906 /* Set up the IST stacks for double fault, NMI, MCE. */
907 ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
908 ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
909 ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];
910
911 /*
912 * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
913 * enabled), and also for KDI (always).
914 */
915 ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
916
917 if (kpti_enable == 1) {
918 /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
919 ktss0->tss_ist5 =
920 (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
921
922 /* This IST stack is used for all other intrs (for KPTI). */
923 ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
924 }
925
926 /*
927 * Set I/O bit map offset equal to size of TSS segment limit
928 * for no I/O permission map. This will force all user I/O
929 * instructions to generate #gp fault.
930 */
931 ktss0->tss_bitmapbase = sizeof (*ktss0);
932
933 /*
934 * Point %tr to descriptor for ktss0 in gdt.
935 */
936 wr_tsr(KTSS_SEL);
937 }
938
939 #endif /* !__xpv */
940
941 #if defined(__xpv)
942
943 void
init_desctbls(void)944 init_desctbls(void)
945 {
946 uint_t vec;
947 user_desc_t *gdt;
948
949 /*
950 * Setup and install our GDT.
951 */
952 gdt = init_gdt();
953
954 /*
955 * Store static pa of gdt to speed up pa_to_ma() translations
956 * on lwp context switches.
957 */
958 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
959 CPU->cpu_gdt = gdt;
960 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
961
962 /*
963 * Setup and install our IDT.
964 */
965 #if !defined(__lint)
966 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
967 #endif
968 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
969 PAGESIZE, PAGESIZE);
970 bzero(idt0, PAGESIZE);
971 init_idt(idt0);
972 for (vec = 0; vec < NIDT; vec++)
973 xen_idt_write(&idt0[vec], vec);
974
975 CPU->cpu_idt = idt0;
976
977 /*
978 * set default kernel stack
979 */
980 xen_stack_switch(KDS_SEL,
981 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
982
983 xen_init_callbacks();
984
985 init_ldt();
986 }
987
988 #else /* __xpv */
989
990 void
init_desctbls(void)991 init_desctbls(void)
992 {
993 user_desc_t *gdt;
994 desctbr_t idtr;
995
996 /*
997 * Allocate IDT and TSS structures on unique pages for better
998 * performance in virtual machines.
999 */
1000 #if !defined(__lint)
1001 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1002 #endif
1003 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1004 PAGESIZE, PAGESIZE);
1005 bzero(idt0, PAGESIZE);
1006 #if !defined(__lint)
1007 ASSERT(sizeof (*ktss0) <= PAGESIZE);
1008 #endif
1009 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1010 PAGESIZE, PAGESIZE);
1011 bzero(ktss0, PAGESIZE);
1012
1013
1014 /*
1015 * Setup and install our GDT.
1016 */
1017 gdt = init_gdt();
1018 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1019 CPU->cpu_gdt = gdt;
1020
1021 /*
1022 * Initialize this CPU's LDT.
1023 */
1024 CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
1025 LDT_CPU_SIZE, PAGESIZE);
1026 bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
1027 CPU->cpu_m.mcpu_ldt_len = 0;
1028
1029 /*
1030 * Setup and install our IDT.
1031 */
1032 init_idt(idt0);
1033
1034 idtr.dtr_base = (uintptr_t)idt0;
1035 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1036 wr_idtr(&idtr);
1037 CPU->cpu_idt = idt0;
1038
1039
1040 init_tss();
1041 CPU->cpu_tss = ktss0;
1042 init_ldt();
1043
1044 /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
1045 kpti_safe_cr3 = (uint64_t)getcr3();
1046 }
1047
1048 #endif /* __xpv */
1049
1050 #ifndef __xpv
1051 /*
1052 * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so
1053 * we have to manually fix it up ourselves.
1054 *
1055 * The caller may still need to make sure that it can't go off-CPU with the
1056 * incorrect limit, before calling this (such as disabling pre-emption).
1057 */
1058 void
reset_gdtr_limit(void)1059 reset_gdtr_limit(void)
1060 {
1061 ulong_t flags = intr_clear();
1062 desctbr_t gdtr;
1063
1064 rd_gdtr(&gdtr);
1065 gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1;
1066 wr_gdtr(&gdtr);
1067
1068 intr_restore(flags);
1069 }
1070 #endif /* __xpv */
1071
1072 /*
1073 * We need a GDT owned by the kernel and not the bootstrap relatively
1074 * early in kernel initialization (e.g., to have segments we can reliably
1075 * catch an exception on).
1076 *
1077 * Initializes a GDT with segments normally defined in the boot loader.
1078 */
1079 void
init_boot_gdt(user_desc_t * bgdt)1080 init_boot_gdt(user_desc_t *bgdt)
1081 {
1082 ASSERT3P(bgdt, !=, NULL);
1083
1084 #ifdef __xpv
1085 /* XXX: It is unclear why this 32-bit data segment is marked long. */
1086 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
1087 SEL_KPL, SDP_PAGES, SDP_OP32);
1088 #else
1089 /*
1090 * Reset boot segments. These ostensibly come from the boot loader,
1091 * but we reset them to match our expectations, particulary if we
1092 * are not using that loader.
1093 */
1094 set_usegd(&bgdt[GDT_B32DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1095 SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32);
1096 set_usegd(&bgdt[GDT_B32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1097 SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32);
1098
1099 /*
1100 * 16-bit segments for making BIOS calls (not applicable on all
1101 * architectures).
1102 */
1103 set_usegd(&bgdt[GDT_B16CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1104 SDT_MEMERA, SEL_KPL, 0, 0);
1105 /*
1106 * XXX: SDP_OP32 makes this a 32-bit segment, which seems wrong
1107 * here, but that's what boot_gdt.s used.
1108 */
1109 set_usegd(&bgdt[GDT_B16DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1110 SDT_MEMRWA, SEL_KPL, 0, SDP_OP32);
1111 #endif /* __xpv */
1112
1113 /*
1114 * A 64-bit code segment used in early boot. Early IDTs refer to this.
1115 */
1116 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMERA,
1117 SEL_KPL, SDP_PAGES, SDP_OP32);
1118 }
1119
1120 /*
1121 * Enable interpositioning on the system call path by rewriting the
1122 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1123 * the branded entry points.
1124 */
1125 void
brand_interpositioning_enable(void * arg __unused)1126 brand_interpositioning_enable(void *arg __unused)
1127 {
1128 gate_desc_t *idt = CPU->cpu_idt;
1129 int i;
1130
1131 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1132
1133 for (i = 0; brand_tbl[i].ih_inum; i++) {
1134 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1135 #if defined(__xpv)
1136 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1137 brand_tbl[i].ih_inum);
1138 #endif
1139 }
1140
1141 #if defined(__xpv)
1142
1143 /*
1144 * Currently the hypervisor only supports 64-bit syscalls via
1145 * syscall instruction. The 32-bit syscalls are handled by
1146 * interrupt gate above.
1147 */
1148 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1149 CALLBACKF_mask_events);
1150
1151 #else
1152
1153 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1154 if (kpti_enable == 1) {
1155 wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
1156 wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
1157 } else {
1158 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1159 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1160 }
1161 }
1162
1163 #endif
1164
1165 if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1166 if (kpti_enable == 1) {
1167 wrmsr(MSR_INTC_SEP_EIP,
1168 (uintptr_t)tr_brand_sys_sysenter);
1169 } else {
1170 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1171 }
1172 }
1173 }
1174
1175 /*
1176 * Disable interpositioning on the system call path by rewriting the
1177 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1178 * the standard entry points, which bypass the interpositioning hooks.
1179 */
1180 void
brand_interpositioning_disable(void * arg __unused)1181 brand_interpositioning_disable(void *arg __unused)
1182 {
1183 gate_desc_t *idt = CPU->cpu_idt;
1184 int i;
1185
1186 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1187
1188 for (i = 0; brand_tbl[i].ih_inum; i++) {
1189 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1190 #if defined(__xpv)
1191 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1192 brand_tbl[i].ih_inum);
1193 #endif
1194 }
1195
1196 #if defined(__xpv)
1197
1198 /*
1199 * See comment above in brand_interpositioning_enable.
1200 */
1201 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1202 CALLBACKF_mask_events);
1203
1204 #else
1205
1206 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1207 if (kpti_enable == 1) {
1208 wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
1209 wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
1210 } else {
1211 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1212 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1213 }
1214 }
1215
1216 #endif
1217
1218 if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1219 if (kpti_enable == 1) {
1220 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
1221 } else {
1222 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1223 }
1224 }
1225 }
1226