xref: /titanic_51/usr/src/uts/intel/ia32/os/desctbls.c (revision 6185db853e024a486ff8837e6784dd290d866112)
1 /*
2  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 #pragma ident	"%Z%%M%	%I%	%E% SMI"
7 
8 /*
9  * Copyright (c) 1992 Terrence R. Lambert.
10  * Copyright (c) 1990 The Regents of the University of California.
11  * All rights reserved.
12  *
13  * This code is derived from software contributed to Berkeley by
14  * William Jolitz.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  * 3. All advertising materials mentioning features or use of this software
25  *    must display the following acknowledgement:
26  *	This product includes software developed by the University of
27  *	California, Berkeley and its contributors.
28  * 4. Neither the name of the University nor the names of its contributors
29  *    may be used to endorse or promote products derived from this software
30  *    without specific prior written permission.
31  *
32  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42  * SUCH DAMAGE.
43  *
44  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
45  */
46 
47 #include <sys/types.h>
48 #include <sys/tss.h>
49 #include <sys/segments.h>
50 #include <sys/trap.h>
51 #include <sys/cpuvar.h>
52 #include <sys/x86_archext.h>
53 #include <sys/archsystm.h>
54 #include <sys/machsystm.h>
55 #include <sys/kobj.h>
56 #include <sys/cmn_err.h>
57 #include <sys/reboot.h>
58 #include <sys/kdi.h>
59 #include <sys/systm.h>
60 #include <sys/controlregs.h>
61 
62 extern void syscall_int(void);
63 
64 /*
65  * cpu0 and default tables and structures.
66  */
67 desctbr_t	gdt0_default_r;
68 
69 #pragma	align	16(idt0)
70 gate_desc_t	idt0[NIDT]; 		/* interrupt descriptor table */
71 desctbr_t	idt0_default_r;		/* describes idt0 in IDTR format */
72 
73 #pragma align	16(ktss0)
74 struct tss	ktss0;			/* kernel task state structure */
75 
76 #if defined(__i386)
77 #pragma align	16(dftss0)
78 struct tss	dftss0;			/* #DF double-fault exception */
79 #endif	/* __i386 */
80 
81 user_desc_t	zero_udesc;		/* base zero user desc native procs */
82 system_desc_t	zero_sdesc;
83 
84 #if defined(__amd64)
85 user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
86 #endif	/* __amd64 */
87 
88 #pragma	align	16(dblfault_stack0)
89 char		dblfault_stack0[DEFAULTSTKSZ];
90 
91 extern void	fast_null(void);
92 extern hrtime_t	get_hrtime(void);
93 extern hrtime_t	gethrvtime(void);
94 extern hrtime_t	get_hrestime(void);
95 extern uint64_t	getlgrp(void);
96 
97 void (*(fasttable[]))(void) = {
98 	fast_null,			/* T_FNULL routine */
99 	fast_null,			/* T_FGETFP routine (initially null) */
100 	fast_null,			/* T_FSETFP routine (initially null) */
101 	(void (*)())get_hrtime,		/* T_GETHRTIME */
102 	(void (*)())gethrvtime,		/* T_GETHRVTIME */
103 	(void (*)())get_hrestime,	/* T_GETHRESTIME */
104 	(void (*)())getlgrp		/* T_GETLGRP */
105 };
106 
107 /*
108  * Structure containing pre-computed descriptors to allow us to temporarily
109  * interpose on a standard handler.
110  */
111 struct interposing_handler {
112 	int ih_inum;
113 	gate_desc_t ih_interp_desc;
114 	gate_desc_t ih_default_desc;
115 };
116 
117 /*
118  * The brand infrastructure interposes on two handlers, and we use one as a
119  * NULL signpost.
120  */
121 static struct interposing_handler brand_tbl[3];
122 
123 /*
124  * software prototypes for default local descriptor table
125  */
126 
127 /*
128  * Routines for loading segment descriptors in format the hardware
129  * can understand.
130  */
131 
132 #if defined(__amd64)
133 
134 /*
135  * In long mode we have the new L or long mode attribute bit
136  * for code segments. Only the conforming bit in type is used along
137  * with descriptor priority and present bits. Default operand size must
138  * be zero when in long mode. In 32-bit compatibility mode all fields
139  * are treated as in legacy mode. For data segments while in long mode
140  * only the present bit is loaded.
141  */
142 void
143 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
144     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
145 {
146 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
147 
148 	/*
149 	 * 64-bit long mode.
150 	 */
151 	if (lmode == SDP_LONG)
152 		dp->usd_def32 = 0;		/* 32-bit operands only */
153 	else
154 		/*
155 		 * 32-bit compatibility mode.
156 		 */
157 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
158 
159 	dp->usd_long = lmode;	/* 64-bit mode */
160 	dp->usd_type = type;
161 	dp->usd_dpl = dpl;
162 	dp->usd_p = 1;
163 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
164 
165 	dp->usd_lobase = (uintptr_t)base;
166 	dp->usd_midbase = (uintptr_t)base >> 16;
167 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
168 	dp->usd_lolimit = size;
169 	dp->usd_hilimit = (uintptr_t)size >> 16;
170 }
171 
172 #elif defined(__i386)
173 
174 /*
175  * Install user segment descriptor for code and data.
176  */
177 void
178 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
179     uint_t dpl, uint_t gran, uint_t defopsz)
180 {
181 	dp->usd_lolimit = size;
182 	dp->usd_hilimit = (uintptr_t)size >> 16;
183 
184 	dp->usd_lobase = (uintptr_t)base;
185 	dp->usd_midbase = (uintptr_t)base >> 16;
186 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
187 
188 	dp->usd_type = type;
189 	dp->usd_dpl = dpl;
190 	dp->usd_p = 1;
191 	dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32 bit operands */
192 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
193 }
194 
195 #endif	/* __i386 */
196 
197 /*
198  * Install system segment descriptor for LDT and TSS segments.
199  */
200 
201 #if defined(__amd64)
202 
203 void
204 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
205     uint_t dpl)
206 {
207 	dp->ssd_lolimit = size;
208 	dp->ssd_hilimit = (uintptr_t)size >> 16;
209 
210 	dp->ssd_lobase = (uintptr_t)base;
211 	dp->ssd_midbase = (uintptr_t)base >> 16;
212 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
213 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
214 
215 	dp->ssd_type = type;
216 	dp->ssd_zero1 = 0;	/* must be zero */
217 	dp->ssd_zero2 = 0;
218 	dp->ssd_dpl = dpl;
219 	dp->ssd_p = 1;
220 	dp->ssd_gran = 0;	/* force byte units */
221 }
222 
223 #elif defined(__i386)
224 
225 void
226 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
227     uint_t dpl)
228 {
229 	dp->ssd_lolimit = size;
230 	dp->ssd_hilimit = (uintptr_t)size >> 16;
231 
232 	dp->ssd_lobase = (uintptr_t)base;
233 	dp->ssd_midbase = (uintptr_t)base >> 16;
234 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
235 
236 	dp->ssd_type = type;
237 	dp->ssd_zero = 0;	/* must be zero */
238 	dp->ssd_dpl = dpl;
239 	dp->ssd_p = 1;
240 	dp->ssd_gran = 0;	/* force byte units */
241 }
242 
243 #endif	/* __i386 */
244 
245 /*
246  * Install gate segment descriptor for interrupt, trap, call and task gates.
247  */
248 
249 #if defined(__amd64)
250 
251 /*
252  * Note stkcpy is replaced with ist. Read the PRM for details on this.
253  */
254 void
255 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, uint_t ist,
256     uint_t type, uint_t dpl)
257 {
258 	dp->sgd_looffset = (uintptr_t)func;
259 	dp->sgd_hioffset = (uintptr_t)func >> 16;
260 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
261 
262 	dp->sgd_selector =  (uint16_t)sel;
263 	dp->sgd_ist = ist;
264 	dp->sgd_type = type;
265 	dp->sgd_dpl = dpl;
266 	dp->sgd_p = 1;
267 }
268 
269 #elif defined(__i386)
270 
271 void
272 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
273     uint_t wcount, uint_t type, uint_t dpl)
274 {
275 	dp->sgd_looffset = (uintptr_t)func;
276 	dp->sgd_hioffset = (uintptr_t)func >> 16;
277 
278 	dp->sgd_selector =  (uint16_t)sel;
279 	dp->sgd_stkcpy = wcount;
280 	dp->sgd_type = type;
281 	dp->sgd_dpl = dpl;
282 	dp->sgd_p = 1;
283 }
284 
285 #endif /* __i386 */
286 
287 /*
288  * Build kernel GDT.
289  */
290 
291 #if defined(__amd64)
292 
293 static void
294 init_gdt(void)
295 {
296 	desctbr_t	r_bgdt, r_gdt;
297 	user_desc_t	*bgdt;
298 	size_t		alen = 0xfffff;	/* entire 32-bit address space */
299 	int		i;
300 
301 	/*
302 	 * Copy in from boot's gdt to our gdt entries 1 - 4.
303 	 * Entry 0 is the null descriptor by definition.
304 	 */
305 	rd_gdtr(&r_bgdt);
306 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
307 	if (bgdt == NULL)
308 		panic("null boot gdt");
309 
310 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
311 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
312 	gdt0[GDT_B64DATA] = bgdt[GDT_B64DATA];
313 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
314 
315 	/*
316 	 * 64-bit kernel code segment.
317 	 */
318 	set_usegd(&gdt0[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
319 	    SDP_PAGES, SDP_OP32);
320 
321 	/*
322 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
323 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
324 	 * instruction to return from system calls back to 32-bit applications.
325 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
326 	 * descriptors. We therefore must ensure that the kernel uses something,
327 	 * though it will be ignored by hardware, that is compatible with 32-bit
328 	 * apps. For the same reason we must set the default op size of this
329 	 * descriptor to 32-bit operands.
330 	 */
331 	set_usegd(&gdt0[GDT_KDATA], SDP_LONG, NULL, alen, SDT_MEMRWA,
332 	    SEL_KPL, SDP_PAGES, SDP_OP32);
333 	gdt0[GDT_KDATA].usd_def32 = 1;
334 
335 	/*
336 	 * 64-bit user code segment.
337 	 */
338 	set_usegd(&gdt0[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
339 	    SDP_PAGES, SDP_OP32);
340 
341 	/*
342 	 * 32-bit user code segment.
343 	 */
344 	set_usegd(&gdt0[GDT_U32CODE], SDP_SHORT, NULL, alen, SDT_MEMERA,
345 	    SEL_UPL, SDP_PAGES, SDP_OP32);
346 
347 	/*
348 	 * 32 and 64 bit data segments can actually share the same descriptor.
349 	 * In long mode only the present bit is checked but all other fields
350 	 * are loaded. But in compatibility mode all fields are interpreted
351 	 * as in legacy mode so they must be set correctly for a 32-bit data
352 	 * segment.
353 	 */
354 	set_usegd(&gdt0[GDT_UDATA], SDP_SHORT, NULL, alen, SDT_MEMRWA, SEL_UPL,
355 	    SDP_PAGES, SDP_OP32);
356 
357 	/*
358 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
359 	 * in the GDT is 0.
360 	 */
361 
362 	/*
363 	 * Kernel TSS
364 	 */
365 	set_syssegd((system_desc_t *)&gdt0[GDT_KTSS], &ktss0,
366 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
367 
368 	/*
369 	 * Initialize fs and gs descriptors for 32 bit processes.
370 	 * Only attributes and limits are initialized, the effective
371 	 * base address is programmed via fsbase/gsbase.
372 	 */
373 	set_usegd(&gdt0[GDT_LWPFS], SDP_SHORT, NULL, alen, SDT_MEMRWA,
374 	    SEL_UPL, SDP_PAGES, SDP_OP32);
375 	set_usegd(&gdt0[GDT_LWPGS], SDP_SHORT, NULL, alen, SDT_MEMRWA,
376 	    SEL_UPL, SDP_PAGES, SDP_OP32);
377 
378 	/*
379 	 * Initialize the descriptors set aside for brand usage.
380 	 * Only attributes and limits are initialized.
381 	 */
382 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
383 		set_usegd(&gdt0[i], SDP_SHORT, NULL, alen, SDT_MEMRWA,
384 		    SEL_UPL, SDP_PAGES, SDP_OP32);
385 
386 	/*
387 	 * Install our new GDT
388 	 */
389 	r_gdt.dtr_limit = sizeof (gdt0) - 1;
390 	r_gdt.dtr_base = (uintptr_t)gdt0;
391 	wr_gdtr(&r_gdt);
392 
393 	/*
394 	 * Initialize convenient zero base user descriptors for clearing
395 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
396 	 * an example.
397 	 */
398 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
399 	    SDP_BYTES, SDP_OP32);
400 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
401 	    SDP_PAGES, SDP_OP32);
402 }
403 
404 #elif defined(__i386)
405 
406 static void
407 init_gdt(void)
408 {
409 	desctbr_t	r_bgdt, r_gdt;
410 	user_desc_t	*bgdt;
411 	int		i;
412 
413 	/*
414 	 * Copy in from boot's gdt to our gdt entries 1 - 4.
415 	 * Entry 0 is null descriptor by definition.
416 	 */
417 	rd_gdtr(&r_bgdt);
418 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
419 	if (bgdt == NULL)
420 		panic("null boot gdt");
421 
422 	gdt0[GDT_BOOTFLAT] = bgdt[GDT_BOOTFLAT];
423 	gdt0[GDT_BOOTCODE] = bgdt[GDT_BOOTCODE];
424 	gdt0[GDT_BOOTCODE16] = bgdt[GDT_BOOTCODE16];
425 	gdt0[GDT_BOOTDATA] = bgdt[GDT_BOOTDATA];
426 
427 	/*
428 	 * Text and data for both kernel and user span entire 32 bit
429 	 * address space.
430 	 */
431 
432 	/*
433 	 * kernel code segment.
434 	 */
435 	set_usegd(&gdt0[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
436 	    SDP_OP32);
437 
438 	/*
439 	 * kernel data segment.
440 	 */
441 	set_usegd(&gdt0[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
442 	    SDP_OP32);
443 
444 	/*
445 	 * user code segment.
446 	 */
447 	set_usegd(&gdt0[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
448 	    SDP_OP32);
449 
450 	/*
451 	 * user data segment.
452 	 */
453 	set_usegd(&gdt0[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
454 	    SDP_OP32);
455 
456 	/*
457 	 * TSS for T_DBLFLT (double fault) handler
458 	 */
459 	set_syssegd((system_desc_t *)&gdt0[GDT_DBFLT], &dftss0,
460 	    sizeof (dftss0) - 1, SDT_SYSTSS, SEL_KPL);
461 
462 	/*
463 	 * TSS for kernel
464 	 */
465 	set_syssegd((system_desc_t *)&gdt0[GDT_KTSS], &ktss0,
466 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
467 
468 	/*
469 	 * %gs selector for kernel
470 	 */
471 	set_usegd(&gdt0[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
472 	    SEL_KPL, SDP_BYTES, SDP_OP32);
473 
474 	/*
475 	 * Initialize lwp private descriptors.
476 	 * Only attributes and limits are initialized, the effective
477 	 * base address is programmed via fsbase/gsbase.
478 	 */
479 	set_usegd(&gdt0[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
480 	    SDP_PAGES, SDP_OP32);
481 	set_usegd(&gdt0[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
482 	    SDP_PAGES, SDP_OP32);
483 
484 	/*
485 	 * Initialize the descriptors set aside for brand usage.
486 	 * Only attributes and limits are initialized.
487 	 */
488 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
489 		set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
490 		    SDP_PAGES, SDP_OP32);
491 
492 	/*
493 	 * Install our new GDT
494 	 */
495 	r_gdt.dtr_limit = sizeof (gdt0) - 1;
496 	r_gdt.dtr_base = (uintptr_t)gdt0;
497 	wr_gdtr(&r_gdt);
498 
499 	/*
500 	 * Initialize convenient zero base user descriptors for clearing
501 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
502 	 * an example.
503 	 */
504 	set_usegd(&zero_udesc, 0, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, SDP_OP32);
505 }
506 
507 #endif	/* __i386 */
508 
509 #if defined(__amd64)
510 
511 /*
512  * Build kernel IDT.
513  *
514  * Note that we pretty much require every gate to be an interrupt gate;
515  * that's because of our dependency on using 'swapgs' every time we come
516  * into the kernel to find the cpu structure - if we get interrupted just
517  * before doing that, so that %cs is in kernel mode (so that the trap prolog
518  * doesn't do a swapgs), but %gsbase is really still pointing at something
519  * in userland, bad things ensue.
520  *
521  * Perhaps they should have invented a trap gate that does an atomic swapgs?
522  *
523  * XX64	We do need to think further about the follow-on impact of this.
524  *	Most of the kernel handlers re-enable interrupts as soon as they've
525  *	saved register state and done the swapgs, but there may be something
526  *	more subtle going on.
527  */
528 static void
529 init_idt(void)
530 {
531 	char	ivctname[80];
532 	void	(*ivctptr)(void);
533 	int	i;
534 
535 	/*
536 	 * Initialize entire table with 'reserved' trap and then overwrite
537 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
538 	 * since it can only be generated on a 386 processor. 15 is also
539 	 * unsupported and reserved.
540 	 */
541 	for (i = 0; i < NIDT; i++)
542 		set_gatesegd(&idt0[i], &resvtrap, KCS_SEL, 0, SDT_SYSIGT,
543 		    SEL_KPL);
544 
545 	set_gatesegd(&idt0[T_ZERODIV], &div0trap, KCS_SEL, 0, SDT_SYSIGT,
546 	    SEL_KPL);
547 	set_gatesegd(&idt0[T_SGLSTP], &dbgtrap, KCS_SEL, 0, SDT_SYSIGT,
548 	    SEL_KPL);
549 	set_gatesegd(&idt0[T_NMIFLT], &nmiint, KCS_SEL, 0, SDT_SYSIGT,
550 	    SEL_KPL);
551 	set_gatesegd(&idt0[T_BPTFLT], &brktrap, KCS_SEL, 0, SDT_SYSIGT,
552 	    SEL_UPL);
553 	set_gatesegd(&idt0[T_OVFLW], &ovflotrap, KCS_SEL, 0, SDT_SYSIGT,
554 	    SEL_UPL);
555 	set_gatesegd(&idt0[T_BOUNDFLT], &boundstrap, KCS_SEL, 0, SDT_SYSIGT,
556 	    SEL_KPL);
557 	set_gatesegd(&idt0[T_ILLINST], &invoptrap, KCS_SEL, 0, SDT_SYSIGT,
558 	    SEL_KPL);
559 	set_gatesegd(&idt0[T_NOEXTFLT], &ndptrap,  KCS_SEL, 0, SDT_SYSIGT,
560 	    SEL_KPL);
561 
562 	/*
563 	 * double fault handler.
564 	 */
565 	set_gatesegd(&idt0[T_DBLFLT], &syserrtrap, KCS_SEL, 1, SDT_SYSIGT,
566 	    SEL_KPL);
567 
568 	/*
569 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
570 	 */
571 
572 	set_gatesegd(&idt0[T_TSSFLT], &invtsstrap, KCS_SEL, 0, SDT_SYSIGT,
573 	    SEL_KPL);
574 	set_gatesegd(&idt0[T_SEGFLT], &segnptrap, KCS_SEL, 0, SDT_SYSIGT,
575 	    SEL_KPL);
576 	set_gatesegd(&idt0[T_STKFLT], &stktrap, KCS_SEL, 0, SDT_SYSIGT,
577 	    SEL_KPL);
578 	set_gatesegd(&idt0[T_GPFLT], &gptrap, KCS_SEL, 0, SDT_SYSIGT,
579 	    SEL_KPL);
580 	set_gatesegd(&idt0[T_PGFLT], &pftrap, KCS_SEL, 0, SDT_SYSIGT,
581 	    SEL_KPL);
582 
583 	/*
584 	 * 15 reserved.
585 	 */
586 	set_gatesegd(&idt0[15], &resvtrap, KCS_SEL, 0, SDT_SYSIGT, SEL_KPL);
587 
588 	set_gatesegd(&idt0[T_EXTERRFLT], &ndperr, KCS_SEL, 0, SDT_SYSIGT,
589 	    SEL_KPL);
590 	set_gatesegd(&idt0[T_ALIGNMENT], &achktrap, KCS_SEL, 0, SDT_SYSIGT,
591 	    SEL_KPL);
592 	set_gatesegd(&idt0[T_MCE], &mcetrap, KCS_SEL, 0, SDT_SYSIGT,
593 	    SEL_KPL);
594 	set_gatesegd(&idt0[T_SIMDFPE], &xmtrap, KCS_SEL, 0, SDT_SYSIGT,
595 	    SEL_KPL);
596 
597 	/*
598 	 * 20-31 reserved
599 	 */
600 	for (i = 20; i < 32; i++)
601 		set_gatesegd(&idt0[i], &invaltrap, KCS_SEL, 0, SDT_SYSIGT,
602 		    SEL_KPL);
603 
604 	/*
605 	 * interrupts 32 - 255
606 	 */
607 	for (i = 32; i < 256; i++) {
608 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
609 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
610 		if (ivctptr == NULL)
611 			panic("kobj_getsymvalue(%s) failed", ivctname);
612 
613 		set_gatesegd(&idt0[i], ivctptr, KCS_SEL, 0, SDT_SYSIGT,
614 		    SEL_KPL);
615 	}
616 
617 	/*
618 	 * install "int80" handler at, well, 0x80.
619 	 */
620 	set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, 0, SDT_SYSIGT,
621 	    SEL_UPL);
622 
623 	/*
624 	 * install fast trap handler at 210.
625 	 */
626 	set_gatesegd(&idt0[T_FASTTRAP], &fasttrap, KCS_SEL, 0,
627 	    SDT_SYSIGT, SEL_UPL);
628 
629 	/*
630 	 * System call handler.
631 	 */
632 	set_gatesegd(&idt0[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, 0,
633 	    SDT_SYSIGT, SEL_UPL);
634 
635 	/*
636 	 * Install the DTrace interrupt handler for the pid provider.
637 	 */
638 	set_gatesegd(&idt0[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 0,
639 	    SDT_SYSIGT, SEL_UPL);
640 
641 	if (boothowto & RB_DEBUG)
642 		kdi_dvec_idt_sync(idt0);
643 
644 	/*
645 	 * We must maintain a description of idt0 in convenient IDTR format
646 	 * for use by T_NMIFLT and T_PGFLT (nmiint() and pentium_pftrap())
647 	 * handlers.
648 	 */
649 	idt0_default_r.dtr_limit = sizeof (idt0) - 1;
650 	idt0_default_r.dtr_base = (uintptr_t)idt0;
651 	wr_idtr(&idt0_default_r);
652 
653 	/*
654 	 * Prepare interposing descriptors for the branded "int80"
655 	 * and syscall handlers and cache copies of the default
656 	 * descriptors.
657 	 */
658 	brand_tbl[0].ih_inum = T_INT80;
659 	brand_tbl[0].ih_default_desc = idt0[T_INT80];
660 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
661 	    0, SDT_SYSIGT, SEL_UPL);
662 
663 	brand_tbl[1].ih_inum = T_SYSCALLINT;
664 	brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
665 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
666 	    KCS_SEL, 0, SDT_SYSIGT, SEL_UPL);
667 
668 	brand_tbl[2].ih_inum = 0;
669 }
670 
671 #elif defined(__i386)
672 
673 /*
674  * Build kernel IDT.
675  */
676 static void
677 init_idt(void)
678 {
679 	char	ivctname[80];
680 	void	(*ivctptr)(void);
681 	int	i;
682 
683 	/*
684 	 * Initialize entire table with 'reserved' trap and then overwrite
685 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
686 	 * since it can only be generated on a 386 processor. 15 is also
687 	 * unsupported and reserved.
688 	 */
689 	for (i = 0; i < NIDT; i++)
690 		set_gatesegd(&idt0[i], &resvtrap, KCS_SEL, 0, SDT_SYSTGT,
691 		    SEL_KPL);
692 
693 	set_gatesegd(&idt0[T_ZERODIV], &div0trap, KCS_SEL, 0, SDT_SYSTGT,
694 	    SEL_KPL);
695 	set_gatesegd(&idt0[T_SGLSTP], &dbgtrap, KCS_SEL, 0, SDT_SYSIGT,
696 	    SEL_KPL);
697 	set_gatesegd(&idt0[T_NMIFLT], &nmiint, KCS_SEL, 0, SDT_SYSIGT,
698 	    SEL_KPL);
699 	set_gatesegd(&idt0[T_BPTFLT], &brktrap, KCS_SEL, 0, SDT_SYSTGT,
700 	    SEL_UPL);
701 	set_gatesegd(&idt0[T_OVFLW], &ovflotrap, KCS_SEL, 0, SDT_SYSTGT,
702 	    SEL_UPL);
703 	set_gatesegd(&idt0[T_BOUNDFLT], &boundstrap, KCS_SEL, 0, SDT_SYSTGT,
704 	    SEL_KPL);
705 	set_gatesegd(&idt0[T_ILLINST], &invoptrap, KCS_SEL, 0, SDT_SYSIGT,
706 	    SEL_KPL);
707 	set_gatesegd(&idt0[T_NOEXTFLT], &ndptrap,  KCS_SEL, 0, SDT_SYSIGT,
708 	    SEL_KPL);
709 
710 	/*
711 	 * Install TSS for T_DBLFLT handler.
712 	 */
713 	set_gatesegd(&idt0[T_DBLFLT], NULL, DFTSS_SEL, 0, SDT_SYSTASKGT,
714 	    SEL_KPL);
715 
716 	/*
717 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
718 	 */
719 
720 	set_gatesegd(&idt0[T_TSSFLT], &invtsstrap, KCS_SEL, 0, SDT_SYSTGT,
721 	    SEL_KPL);
722 	set_gatesegd(&idt0[T_SEGFLT], &segnptrap, KCS_SEL, 0, SDT_SYSTGT,
723 	    SEL_KPL);
724 	set_gatesegd(&idt0[T_STKFLT], &stktrap, KCS_SEL, 0, SDT_SYSTGT,
725 	    SEL_KPL);
726 	set_gatesegd(&idt0[T_GPFLT], &gptrap, KCS_SEL, 0, SDT_SYSTGT,
727 	    SEL_KPL);
728 	set_gatesegd(&idt0[T_PGFLT], &pftrap, KCS_SEL, 0, SDT_SYSIGT,
729 	    SEL_KPL);
730 
731 	/*
732 	 * 15 reserved.
733 	 */
734 	set_gatesegd(&idt0[15], &resvtrap, KCS_SEL, 0, SDT_SYSTGT, SEL_KPL);
735 
736 	set_gatesegd(&idt0[T_EXTERRFLT], &ndperr, KCS_SEL, 0, SDT_SYSIGT,
737 	    SEL_KPL);
738 	set_gatesegd(&idt0[T_ALIGNMENT], &achktrap, KCS_SEL, 0, SDT_SYSTGT,
739 	    SEL_KPL);
740 	set_gatesegd(&idt0[T_MCE], &mcetrap, KCS_SEL, 0, SDT_SYSIGT,
741 	    SEL_KPL);
742 	set_gatesegd(&idt0[T_SIMDFPE], &xmtrap, KCS_SEL, 0, SDT_SYSTGT,
743 	    SEL_KPL);
744 
745 	/*
746 	 * 20-31 reserved
747 	 */
748 	for (i = 20; i < 32; i++)
749 		set_gatesegd(&idt0[i], &invaltrap, KCS_SEL, 0, SDT_SYSTGT,
750 		    SEL_KPL);
751 
752 	/*
753 	 * interrupts 32 - 255
754 	 */
755 	for (i = 32; i < 256; i++) {
756 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
757 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
758 		if (ivctptr == NULL)
759 			panic("kobj_getsymvalue(%s) failed", ivctname);
760 
761 		set_gatesegd(&idt0[i], ivctptr, KCS_SEL, 0, SDT_SYSIGT,
762 		    SEL_KPL);
763 	}
764 
765 	/*
766 	 * install "int80" handler at, well, 0x80.
767 	 */
768 	set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, 0, SDT_SYSIGT,
769 	    SEL_UPL);
770 
771 	/*
772 	 * install fast trap handler at 210.
773 	 */
774 	set_gatesegd(&idt0[T_FASTTRAP], &fasttrap, KCS_SEL, 0,
775 	    SDT_SYSIGT, SEL_UPL);
776 
777 	/*
778 	 * System call handler. Note that we don't use the hardware's parameter
779 	 * copying mechanism here; see the comment above sys_call() for details.
780 	 */
781 	set_gatesegd(&idt0[T_SYSCALLINT], &sys_call, KCS_SEL, 0,
782 	    SDT_SYSIGT, SEL_UPL);
783 
784 	/*
785 	 * Install the DTrace interrupt handler for the pid provider.
786 	 */
787 	set_gatesegd(&idt0[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 0,
788 	    SDT_SYSIGT, SEL_UPL);
789 
790 	if (boothowto & RB_DEBUG)
791 		kdi_dvec_idt_sync(idt0);
792 
793 	/*
794 	 * We must maintain a description of idt0 in convenient IDTR format
795 	 * for use by T_NMIFLT and T_PGFLT (nmiint() and pentium_pftrap())
796 	 * handlers.
797 	 */
798 	idt0_default_r.dtr_limit = sizeof (idt0) - 1;
799 	idt0_default_r.dtr_base = (uintptr_t)idt0;
800 	wr_idtr(&idt0_default_r);
801 
802 	/*
803 	 * Prepare interposing descriptors for the branded "int80"
804 	 * and syscall handlers and cache copies of the default
805 	 * descriptors.
806 	 */
807 	brand_tbl[0].ih_inum = T_INT80;
808 	brand_tbl[0].ih_default_desc = idt0[T_INT80];
809 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
810 	    0, SDT_SYSIGT, SEL_UPL);
811 
812 	brand_tbl[1].ih_inum = T_SYSCALLINT;
813 	brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
814 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
815 	    KCS_SEL, 0, SDT_SYSIGT, SEL_UPL);
816 
817 	brand_tbl[2].ih_inum = 0;
818 }
819 
820 #endif	/* __i386 */
821 
822 /*
823  * The kernel does not deal with LDTs unless a user explicitly creates
824  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
825  * to reference the LDT will therefore cause a #gp. System calls made via the
826  * obsolete lcall mechanism are emulated by the #gp fault handler.
827  */
828 static void
829 init_ldt(void)
830 {
831 	wr_ldtr(0);
832 }
833 
834 #if defined(__amd64)
835 
836 static void
837 init_tss(void)
838 {
839 	/*
840 	 * tss_rsp0 is dynamically filled in by resume() on each context switch.
841 	 * All exceptions but #DF will run on the thread stack.
842 	 * Set up the double fault stack here.
843 	 */
844 	ktss0.tss_ist1 =
845 	    (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
846 
847 	/*
848 	 * Set I/O bit map offset equal to size of TSS segment limit
849 	 * for no I/O permission map. This will force all user I/O
850 	 * instructions to generate #gp fault.
851 	 */
852 	ktss0.tss_bitmapbase = sizeof (ktss0);
853 
854 	/*
855 	 * Point %tr to descriptor for ktss0 in gdt.
856 	 */
857 	wr_tsr(KTSS_SEL);
858 }
859 
860 #elif defined(__i386)
861 
862 static void
863 init_tss(void)
864 {
865 	/*
866 	 * ktss0.tss_esp dynamically filled in by resume() on each
867 	 * context switch.
868 	 */
869 	ktss0.tss_ss0	= KDS_SEL;
870 	ktss0.tss_eip	= (uint32_t)_start;
871 	ktss0.tss_ds	= ktss0.tss_es = ktss0.tss_ss = KDS_SEL;
872 	ktss0.tss_cs	= KCS_SEL;
873 	ktss0.tss_fs	= KFS_SEL;
874 	ktss0.tss_gs	= KGS_SEL;
875 	ktss0.tss_ldt	= ULDT_SEL;
876 
877 	/*
878 	 * Initialize double fault tss.
879 	 */
880 	dftss0.tss_esp0	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
881 	dftss0.tss_ss0	= KDS_SEL;
882 
883 	/*
884 	 * tss_cr3 will get initialized in hat_kern_setup() once our page
885 	 * tables have been setup.
886 	 */
887 	dftss0.tss_eip	= (uint32_t)syserrtrap;
888 	dftss0.tss_esp	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
889 	dftss0.tss_cs	= KCS_SEL;
890 	dftss0.tss_ds	= KDS_SEL;
891 	dftss0.tss_es	= KDS_SEL;
892 	dftss0.tss_ss	= KDS_SEL;
893 	dftss0.tss_fs	= KFS_SEL;
894 	dftss0.tss_gs	= KGS_SEL;
895 
896 	/*
897 	 * Set I/O bit map offset equal to size of TSS segment limit
898 	 * for no I/O permission map. This will force all user I/O
899 	 * instructions to generate #gp fault.
900 	 */
901 	ktss0.tss_bitmapbase = sizeof (ktss0);
902 
903 	/*
904 	 * Point %tr to descriptor for ktss0 in gdt.
905 	 */
906 	wr_tsr(KTSS_SEL);
907 }
908 
909 #endif	/* __i386 */
910 
911 void
912 init_tables(void)
913 {
914 	init_gdt();
915 	init_tss();
916 	init_idt();
917 	init_ldt();
918 }
919 
920 /*
921  * Enable interpositioning on the system call path by rewriting the
922  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
923  * the branded entry points.
924  */
925 void
926 brand_interpositioning_enable(void)
927 {
928 	int i;
929 
930 	for (i = 0; brand_tbl[i].ih_inum; i++)
931 		CPU->cpu_idt[brand_tbl[i].ih_inum] =
932 		    brand_tbl[i].ih_interp_desc;
933 
934 #if defined(__amd64)
935 	wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
936 	wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
937 #endif
938 
939 	if (x86_feature & X86_SEP)
940 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
941 }
942 
943 /*
944  * Disable interpositioning on the system call path by rewriting the
945  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
946  * the standard entry points, which bypass the interpositioning hooks.
947  */
948 void
949 brand_interpositioning_disable(void)
950 {
951 	int i;
952 
953 	for (i = 0; brand_tbl[i].ih_inum; i++)
954 		CPU->cpu_idt[brand_tbl[i].ih_inum] =
955 		    brand_tbl[i].ih_default_desc;
956 
957 #if defined(__amd64)
958 	wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
959 	wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
960 #endif
961 
962 	if (x86_feature & X86_SEP)
963 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
964 }
965