xref: /titanic_51/usr/src/uts/intel/ia32/os/desctbls.c (revision 883492d5a933deb34cd27521e7f2756773cd27af)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Copyright (c) 1992 Terrence R. Lambert.
31  * Copyright (c) 1990 The Regents of the University of California.
32  * All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * William Jolitz.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
66  */
67 
68 #include <sys/types.h>
69 #include <sys/sysmacros.h>
70 #include <sys/tss.h>
71 #include <sys/segments.h>
72 #include <sys/trap.h>
73 #include <sys/cpuvar.h>
74 #include <sys/bootconf.h>
75 #include <sys/x86_archext.h>
76 #include <sys/controlregs.h>
77 #include <sys/archsystm.h>
78 #include <sys/machsystm.h>
79 #include <sys/kobj.h>
80 #include <sys/cmn_err.h>
81 #include <sys/reboot.h>
82 #include <sys/kdi.h>
83 #include <sys/mach_mmu.h>
84 #include <sys/systm.h>
85 #include <sys/promif.h>
86 #include <sys/bootinfo.h>
87 #include <vm/kboot_mmu.h>
88 
89 /*
90  * cpu0 and default tables and structures.
91  */
92 user_desc_t	*gdt0;
93 desctbr_t	gdt0_default_r;
94 
95 #pragma	align	16(idt0)
96 gate_desc_t	idt0[NIDT]; 		/* interrupt descriptor table */
97 #if defined(__i386)
98 desctbr_t	idt0_default_r;		/* describes idt0 in IDTR format */
99 #endif
100 
101 #pragma align	16(ktss0)
102 struct tss	ktss0;			/* kernel task state structure */
103 
104 #if defined(__i386)
105 #pragma align	16(dftss0)
106 struct tss	dftss0;			/* #DF double-fault exception */
107 #endif	/* __i386 */
108 
109 user_desc_t	zero_udesc;		/* base zero user desc native procs */
110 system_desc_t	zero_sdesc;
111 
112 #if defined(__amd64)
113 user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
114 #endif	/* __amd64 */
115 
116 #pragma	align	16(dblfault_stack0)
117 char		dblfault_stack0[DEFAULTSTKSZ];
118 
119 extern void	fast_null(void);
120 extern hrtime_t	get_hrtime(void);
121 extern hrtime_t	gethrvtime(void);
122 extern hrtime_t	get_hrestime(void);
123 extern uint64_t	getlgrp(void);
124 
125 void (*(fasttable[]))(void) = {
126 	fast_null,			/* T_FNULL routine */
127 	fast_null,			/* T_FGETFP routine (initially null) */
128 	fast_null,			/* T_FSETFP routine (initially null) */
129 	(void (*)())get_hrtime,		/* T_GETHRTIME */
130 	(void (*)())gethrvtime,		/* T_GETHRVTIME */
131 	(void (*)())get_hrestime,	/* T_GETHRESTIME */
132 	(void (*)())getlgrp		/* T_GETLGRP */
133 };
134 
135 /*
136  * Structure containing pre-computed descriptors to allow us to temporarily
137  * interpose on a standard handler.
138  */
139 struct interposing_handler {
140 	int ih_inum;
141 	gate_desc_t ih_interp_desc;
142 	gate_desc_t ih_default_desc;
143 };
144 
145 /*
146  * The brand infrastructure interposes on two handlers, and we use one as a
147  * NULL signpost.
148  */
149 static struct interposing_handler brand_tbl[3];
150 
151 /*
152  * software prototypes for default local descriptor table
153  */
154 
155 /*
156  * Routines for loading segment descriptors in format the hardware
157  * can understand.
158  */
159 
160 #if defined(__amd64)
161 
162 /*
163  * In long mode we have the new L or long mode attribute bit
164  * for code segments. Only the conforming bit in type is used along
165  * with descriptor priority and present bits. Default operand size must
166  * be zero when in long mode. In 32-bit compatibility mode all fields
167  * are treated as in legacy mode. For data segments while in long mode
168  * only the present bit is loaded.
169  */
170 void
171 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
172     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
173 {
174 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
175 
176 	/*
177 	 * 64-bit long mode.
178 	 */
179 	if (lmode == SDP_LONG)
180 		dp->usd_def32 = 0;		/* 32-bit operands only */
181 	else
182 		/*
183 		 * 32-bit compatibility mode.
184 		 */
185 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
186 
187 	dp->usd_long = lmode;	/* 64-bit mode */
188 	dp->usd_type = type;
189 	dp->usd_dpl = dpl;
190 	dp->usd_p = 1;
191 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
192 
193 	dp->usd_lobase = (uintptr_t)base;
194 	dp->usd_midbase = (uintptr_t)base >> 16;
195 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
196 	dp->usd_lolimit = size;
197 	dp->usd_hilimit = (uintptr_t)size >> 16;
198 }
199 
200 #elif defined(__i386)
201 
202 /*
203  * Install user segment descriptor for code and data.
204  */
205 void
206 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
207     uint_t dpl, uint_t gran, uint_t defopsz)
208 {
209 	dp->usd_lolimit = size;
210 	dp->usd_hilimit = (uintptr_t)size >> 16;
211 
212 	dp->usd_lobase = (uintptr_t)base;
213 	dp->usd_midbase = (uintptr_t)base >> 16;
214 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
215 
216 	dp->usd_type = type;
217 	dp->usd_dpl = dpl;
218 	dp->usd_p = 1;
219 	dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32 bit operands */
220 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
221 }
222 
223 #endif	/* __i386 */
224 
225 /*
226  * Install system segment descriptor for LDT and TSS segments.
227  */
228 
229 #if defined(__amd64)
230 
231 void
232 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
233     uint_t dpl)
234 {
235 	dp->ssd_lolimit = size;
236 	dp->ssd_hilimit = (uintptr_t)size >> 16;
237 
238 	dp->ssd_lobase = (uintptr_t)base;
239 	dp->ssd_midbase = (uintptr_t)base >> 16;
240 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
241 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
242 
243 	dp->ssd_type = type;
244 	dp->ssd_zero1 = 0;	/* must be zero */
245 	dp->ssd_zero2 = 0;
246 	dp->ssd_dpl = dpl;
247 	dp->ssd_p = 1;
248 	dp->ssd_gran = 0;	/* force byte units */
249 }
250 
251 #elif defined(__i386)
252 
253 void
254 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
255     uint_t dpl)
256 {
257 	dp->ssd_lolimit = size;
258 	dp->ssd_hilimit = (uintptr_t)size >> 16;
259 
260 	dp->ssd_lobase = (uintptr_t)base;
261 	dp->ssd_midbase = (uintptr_t)base >> 16;
262 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
263 
264 	dp->ssd_type = type;
265 	dp->ssd_zero = 0;	/* must be zero */
266 	dp->ssd_dpl = dpl;
267 	dp->ssd_p = 1;
268 	dp->ssd_gran = 0;	/* force byte units */
269 }
270 
271 #endif	/* __i386 */
272 
273 /*
274  * Install gate segment descriptor for interrupt, trap, call and task gates.
275  */
276 
277 #if defined(__amd64)
278 
279 void
280 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
281     uint_t type, uint_t dpl)
282 {
283 	dp->sgd_looffset = (uintptr_t)func;
284 	dp->sgd_hioffset = (uintptr_t)func >> 16;
285 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
286 
287 	dp->sgd_selector =  (uint16_t)sel;
288 
289 	/*
290 	 * For 64 bit native we use the IST stack mechanism
291 	 * for double faults. All other traps use the CPL = 0
292 	 * (tss_rsp0) stack.
293 	 */
294 	if (type == T_DBLFLT)
295 		dp->sgd_ist = 1;
296 	else
297 		dp->sgd_ist = 0;
298 
299 	dp->sgd_type = type;
300 	dp->sgd_dpl = dpl;
301 	dp->sgd_p = 1;
302 }
303 
304 #elif defined(__i386)
305 
306 void
307 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
308     uint_t type, uint_t dpl)
309 {
310 	dp->sgd_looffset = (uintptr_t)func;
311 	dp->sgd_hioffset = (uintptr_t)func >> 16;
312 
313 	dp->sgd_selector =  (uint16_t)sel;
314 	dp->sgd_stkcpy = 0;	/* always zero bytes */
315 	dp->sgd_type = type;
316 	dp->sgd_dpl = dpl;
317 	dp->sgd_p = 1;
318 }
319 
320 #endif	/* __i386 */
321 
322 #if defined(__amd64)
323 
324 /*
325  * Build kernel GDT.
326  */
327 
328 static void
329 init_gdt_common(user_desc_t *gdt)
330 {
331 	int i;
332 
333 	/*
334 	 * 64-bit kernel code segment.
335 	 */
336 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
337 	    SDP_PAGES, SDP_OP32);
338 
339 	/*
340 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
341 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
342 	 * instruction to return from system calls back to 32-bit applications.
343 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
344 	 * descriptors. We therefore must ensure that the kernel uses something,
345 	 * though it will be ignored by hardware, that is compatible with 32-bit
346 	 * apps. For the same reason we must set the default op size of this
347 	 * descriptor to 32-bit operands.
348 	 */
349 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
350 	    SEL_KPL, SDP_PAGES, SDP_OP32);
351 	gdt[GDT_KDATA].usd_def32 = 1;
352 
353 	/*
354 	 * 64-bit user code segment.
355 	 */
356 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
357 	    SDP_PAGES, SDP_OP32);
358 
359 	/*
360 	 * 32-bit user code segment.
361 	 */
362 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
363 	    SEL_UPL, SDP_PAGES, SDP_OP32);
364 
365 	/*
366 	 * 32 and 64 bit data segments can actually share the same descriptor.
367 	 * In long mode only the present bit is checked but all other fields
368 	 * are loaded. But in compatibility mode all fields are interpreted
369 	 * as in legacy mode so they must be set correctly for a 32-bit data
370 	 * segment.
371 	 */
372 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
373 	    SDP_PAGES, SDP_OP32);
374 
375 	/*
376 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
377 	 * in the GDT is 0.
378 	 */
379 
380 	/*
381 	 * Kernel TSS
382 	 */
383 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0,
384 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
385 
386 	/*
387 	 * Initialize fs and gs descriptors for 32 bit processes.
388 	 * Only attributes and limits are initialized, the effective
389 	 * base address is programmed via fsbase/gsbase.
390 	 */
391 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
392 	    SEL_UPL, SDP_PAGES, SDP_OP32);
393 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
394 	    SEL_UPL, SDP_PAGES, SDP_OP32);
395 
396 	/*
397 	 * Initialize the descriptors set aside for brand usage.
398 	 * Only attributes and limits are initialized.
399 	 */
400 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
401 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
402 		    SEL_UPL, SDP_PAGES, SDP_OP32);
403 
404 	/*
405 	 * Initialize convenient zero base user descriptors for clearing
406 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
407 	 * an example.
408 	 */
409 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
410 	    SDP_BYTES, SDP_OP32);
411 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
412 	    SDP_PAGES, SDP_OP32);
413 }
414 
415 static user_desc_t *
416 init_gdt(void)
417 {
418 	desctbr_t	r_bgdt, r_gdt;
419 	user_desc_t	*bgdt;
420 
421 #if !defined(__lint)
422 	/*
423 	 * Our gdt is never larger than a single page.
424 	 */
425 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
426 #endif
427 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
428 	    PAGESIZE, PAGESIZE);
429 	if (gdt0 == NULL)
430 		panic("init_gdt: BOP_ALLOC failed");
431 	bzero(gdt0, PAGESIZE);
432 
433 	init_gdt_common(gdt0);
434 
435 	/*
436 	 * Copy in from boot's gdt to our gdt.
437 	 * Entry 0 is the null descriptor by definition.
438 	 */
439 	rd_gdtr(&r_bgdt);
440 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
441 	if (bgdt == NULL)
442 		panic("null boot gdt");
443 
444 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
445 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
446 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
447 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
448 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
449 
450 	/*
451 	 * Install our new GDT
452 	 */
453 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
454 	r_gdt.dtr_base = (uintptr_t)gdt0;
455 	wr_gdtr(&r_gdt);
456 
457 	/*
458 	 * Reload the segment registers to use the new GDT
459 	 */
460 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
461 
462 	/*
463 	 *  setup %gs for kernel
464 	 */
465 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
466 
467 	/*
468 	 * XX64 We should never dereference off "other gsbase" or
469 	 * "fsbase".  So, we should arrange to point FSBASE and
470 	 * KGSBASE somewhere truly awful e.g. point it at the last
471 	 * valid address below the hole so that any attempts to index
472 	 * off them cause an exception.
473 	 *
474 	 * For now, point it at 8G -- at least it should be unmapped
475 	 * until some 64-bit processes run.
476 	 */
477 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
478 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
479 	return (gdt0);
480 }
481 
482 #elif defined(__i386)
483 
484 static void
485 init_gdt_common(user_desc_t *gdt)
486 {
487 	int i;
488 
489 	/*
490 	 * Text and data for both kernel and user span entire 32 bit
491 	 * address space.
492 	 */
493 
494 	/*
495 	 * kernel code segment.
496 	 */
497 	set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
498 	    SDP_OP32);
499 
500 	/*
501 	 * kernel data segment.
502 	 */
503 	set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
504 	    SDP_OP32);
505 
506 	/*
507 	 * user code segment.
508 	 */
509 	set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
510 	    SDP_OP32);
511 
512 	/*
513 	 * user data segment.
514 	 */
515 	set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
516 	    SDP_OP32);
517 
518 	/*
519 	 * TSS for T_DBLFLT (double fault) handler
520 	 */
521 	set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], &dftss0,
522 	    sizeof (dftss0) - 1, SDT_SYSTSS, SEL_KPL);
523 
524 	/*
525 	 * TSS for kernel
526 	 */
527 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0,
528 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
529 
530 	/*
531 	 * %gs selector for kernel
532 	 */
533 	set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
534 	    SEL_KPL, SDP_BYTES, SDP_OP32);
535 
536 	/*
537 	 * Initialize lwp private descriptors.
538 	 * Only attributes and limits are initialized, the effective
539 	 * base address is programmed via fsbase/gsbase.
540 	 */
541 	set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
542 	    SDP_PAGES, SDP_OP32);
543 	set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
544 	    SDP_PAGES, SDP_OP32);
545 
546 	/*
547 	 * Initialize the descriptors set aside for brand usage.
548 	 * Only attributes and limits are initialized.
549 	 */
550 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
551 		set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
552 		    SDP_PAGES, SDP_OP32);
553 	/*
554 	 * Initialize convenient zero base user descriptor for clearing
555 	 * lwp  private %fs and %gs descriptors in GDT. See setregs() for
556 	 * an example.
557 	 */
558 	set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
559 	    SDP_BYTES, SDP_OP32);
560 }
561 
562 static user_desc_t *
563 init_gdt(void)
564 {
565 	desctbr_t	r_bgdt, r_gdt;
566 	user_desc_t	*bgdt;
567 
568 #if !defined(__lint)
569 	/*
570 	 * Our gdt is never larger than a single page.
571 	 */
572 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
573 #endif
574 	/*
575 	 * XXX this allocation belongs in our caller, not here.
576 	 */
577 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
578 	    PAGESIZE, PAGESIZE);
579 	if (gdt0 == NULL)
580 		panic("init_gdt: BOP_ALLOC failed");
581 	bzero(gdt0, PAGESIZE);
582 
583 	init_gdt_common(gdt0);
584 
585 	/*
586 	 * Copy in from boot's gdt to our gdt entries.
587 	 * Entry 0 is null descriptor by definition.
588 	 */
589 	rd_gdtr(&r_bgdt);
590 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
591 	if (bgdt == NULL)
592 		panic("null boot gdt");
593 
594 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
595 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
596 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
597 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
598 
599 	/*
600 	 * Install our new GDT
601 	 */
602 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
603 	r_gdt.dtr_base = (uintptr_t)gdt0;
604 	wr_gdtr(&r_gdt);
605 
606 	/*
607 	 * Reload the segment registers to use the new GDT
608 	 */
609 	load_segment_registers(
610 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
611 
612 	return (gdt0);
613 }
614 
615 #endif	/* __i386 */
616 
617 /*
618  * Build kernel IDT.
619  *
620  * Note that for amd64 we pretty much require every gate to be an interrupt
621  * gate which blocks interrupts atomically on entry; that's because of our
622  * dependency on using 'swapgs' every time we come into the kernel to find
623  * the cpu structure. If we get interrupted just before doing that, %cs could
624  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
625  * %gsbase is really still pointing at something in userland. Bad things will
626  * ensue. We also use interrupt gates for i386 as well even though this is not
627  * required for some traps.
628  *
629  * Perhaps they should have invented a trap gate that does an atomic swapgs?
630  */
631 static void
632 init_idt_common(gate_desc_t *idt)
633 {
634 	set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
635 	set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
636 	set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, SEL_KPL);
637 	set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, SEL_UPL);
638 	set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, SEL_UPL);
639 	set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
640 	    SEL_KPL);
641 	set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
642 	set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, SEL_KPL);
643 
644 	/*
645 	 * double fault handler.
646 	 */
647 #if defined(__amd64)
648 	set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
649 #elif defined(__i386)
650 	/*
651 	 * task gate required.
652 	 */
653 	set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, SEL_KPL);
654 
655 #endif	/* __i386 */
656 
657 	/*
658 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
659 	 */
660 
661 	set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
662 	set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
663 	set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
664 	set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
665 	set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
666 	set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, SEL_KPL);
667 	set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
668 	    SEL_KPL);
669 	set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
670 	set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
671 
672 	/*
673 	 * install "int80" handler at, well, 0x80.
674 	 */
675 	set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, SEL_UPL);
676 
677 	/*
678 	 * install fast trap handler at 210.
679 	 */
680 	set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, SEL_UPL);
681 
682 	/*
683 	 * System call handler.
684 	 */
685 #if defined(__amd64)
686 	set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
687 	    SEL_UPL);
688 
689 #elif defined(__i386)
690 	set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
691 	    SEL_UPL);
692 #endif	/* __i386 */
693 
694 	/*
695 	 * Install the DTrace interrupt handler for the pid provider.
696 	 */
697 	set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
698 	    SDT_SYSIGT, SEL_UPL);
699 
700 	/*
701 	 * Prepare interposing descriptors for the branded "int80"
702 	 * and syscall handlers and cache copies of the default
703 	 * descriptors.
704 	 */
705 	brand_tbl[0].ih_inum = T_INT80;
706 	brand_tbl[0].ih_default_desc = idt0[T_INT80];
707 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
708 	    SDT_SYSIGT, SEL_UPL);
709 
710 	brand_tbl[1].ih_inum = T_SYSCALLINT;
711 	brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
712 
713 #if defined(__amd64)
714 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
715 	    KCS_SEL, SDT_SYSIGT, SEL_UPL);
716 #elif defined(__i386)
717 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
718 	    KCS_SEL, SDT_SYSIGT, SEL_UPL);
719 #endif	/* __i386 */
720 
721 	brand_tbl[2].ih_inum = 0;
722 }
723 
724 static void
725 init_idt(gate_desc_t *idt)
726 {
727 	char	ivctname[80];
728 	void	(*ivctptr)(void);
729 	int	i;
730 
731 	/*
732 	 * Initialize entire table with 'reserved' trap and then overwrite
733 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
734 	 * since it can only be generated on a 386 processor. 15 is also
735 	 * unsupported and reserved.
736 	 */
737 	for (i = 0; i < NIDT; i++)
738 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
739 
740 	/*
741 	 * 20-31 reserved
742 	 */
743 	for (i = 20; i < 32; i++)
744 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, SEL_KPL);
745 
746 	/*
747 	 * interrupts 32 - 255
748 	 */
749 	for (i = 32; i < 256; i++) {
750 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
751 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
752 		if (ivctptr == NULL)
753 			panic("kobj_getsymvalue(%s) failed", ivctname);
754 
755 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, SEL_KPL);
756 	}
757 
758 	/*
759 	 * Now install the common ones. Note that it will overlay some
760 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
761 	 */
762 	init_idt_common(idt);
763 }
764 
765 /*
766  * The kernel does not deal with LDTs unless a user explicitly creates
767  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
768  * to reference the LDT will therefore cause a #gp. System calls made via the
769  * obsolete lcall mechanism are emulated by the #gp fault handler.
770  */
771 static void
772 init_ldt(void)
773 {
774 	wr_ldtr(0);
775 }
776 
777 #if defined(__amd64)
778 
779 static void
780 init_tss(void)
781 {
782 	/*
783 	 * tss_rsp0 is dynamically filled in by resume() on each context switch.
784 	 * All exceptions but #DF will run on the thread stack.
785 	 * Set up the double fault stack here.
786 	 */
787 	ktss0.tss_ist1 =
788 	    (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
789 
790 	/*
791 	 * Set I/O bit map offset equal to size of TSS segment limit
792 	 * for no I/O permission map. This will force all user I/O
793 	 * instructions to generate #gp fault.
794 	 */
795 	ktss0.tss_bitmapbase = sizeof (ktss0);
796 
797 	/*
798 	 * Point %tr to descriptor for ktss0 in gdt.
799 	 */
800 	wr_tsr(KTSS_SEL);
801 }
802 
803 #elif defined(__i386)
804 
805 static void
806 init_tss(void)
807 {
808 	/*
809 	 * ktss0.tss_esp dynamically filled in by resume() on each
810 	 * context switch.
811 	 */
812 	ktss0.tss_ss0	= KDS_SEL;
813 	ktss0.tss_eip	= (uint32_t)_start;
814 	ktss0.tss_ds	= ktss0.tss_es = ktss0.tss_ss = KDS_SEL;
815 	ktss0.tss_cs	= KCS_SEL;
816 	ktss0.tss_fs	= KFS_SEL;
817 	ktss0.tss_gs	= KGS_SEL;
818 	ktss0.tss_ldt	= ULDT_SEL;
819 
820 	/*
821 	 * Initialize double fault tss.
822 	 */
823 	dftss0.tss_esp0	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
824 	dftss0.tss_ss0	= KDS_SEL;
825 
826 	/*
827 	 * tss_cr3 will get initialized in hat_kern_setup() once our page
828 	 * tables have been setup.
829 	 */
830 	dftss0.tss_eip	= (uint32_t)syserrtrap;
831 	dftss0.tss_esp	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
832 	dftss0.tss_cs	= KCS_SEL;
833 	dftss0.tss_ds	= KDS_SEL;
834 	dftss0.tss_es	= KDS_SEL;
835 	dftss0.tss_ss	= KDS_SEL;
836 	dftss0.tss_fs	= KFS_SEL;
837 	dftss0.tss_gs	= KGS_SEL;
838 
839 	/*
840 	 * Set I/O bit map offset equal to size of TSS segment limit
841 	 * for no I/O permission map. This will force all user I/O
842 	 * instructions to generate #gp fault.
843 	 */
844 	ktss0.tss_bitmapbase = sizeof (ktss0);
845 
846 	/*
847 	 * Point %tr to descriptor for ktss0 in gdt.
848 	 */
849 	wr_tsr(KTSS_SEL);
850 }
851 
852 #endif	/* __i386 */
853 
854 void
855 init_desctbls(void)
856 {
857 	user_desc_t *gdt;
858 	desctbr_t idtr;
859 
860 	/*
861 	 * Setup and install our GDT.
862 	 */
863 	gdt = init_gdt();
864 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
865 	CPU->cpu_m.mcpu_gdt = gdt;
866 
867 	/*
868 	 * Setup and install our IDT.
869 	 */
870 	init_idt(&idt0[0]);
871 
872 	idtr.dtr_base = (uintptr_t)idt0;
873 	idtr.dtr_limit = sizeof (idt0) - 1;
874 	wr_idtr(&idtr);
875 	CPU->cpu_m.mcpu_idt = idt0;
876 
877 #if defined(__i386)
878 	/*
879 	 * We maintain a description of idt0 in convenient IDTR format
880 	 * for #pf's on some older pentium processors. See pentium_pftrap().
881 	 */
882 	idt0_default_r = idtr;
883 #endif	/* __i386 */
884 
885 	init_tss();
886 	CPU->cpu_tss = &ktss0;
887 	init_ldt();
888 }
889 
890 /*
891  * In the early kernel, we need to set up a simple GDT to run on.
892  */
893 void
894 init_boot_gdt(user_desc_t *bgdt)
895 {
896 #if defined(__amd64)
897 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
898 	    SDP_PAGES, SDP_OP32);
899 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
900 	    SDP_PAGES, SDP_OP32);
901 #elif defined(__i386)
902 	set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
903 	    SDP_PAGES, SDP_OP32);
904 	set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
905 	    SDP_PAGES, SDP_OP32);
906 #endif	/* __i386 */
907 }
908 
909 /*
910  * Enable interpositioning on the system call path by rewriting the
911  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
912  * the branded entry points.
913  */
914 void
915 brand_interpositioning_enable(void)
916 {
917 	int i;
918 
919 	for (i = 0; brand_tbl[i].ih_inum; i++)
920 		CPU->cpu_idt[brand_tbl[i].ih_inum] =
921 		    brand_tbl[i].ih_interp_desc;
922 
923 #if defined(__amd64)
924 	wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
925 	wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
926 #endif
927 
928 	if (x86_feature & X86_SEP)
929 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
930 }
931 
932 /*
933  * Disable interpositioning on the system call path by rewriting the
934  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
935  * the standard entry points, which bypass the interpositioning hooks.
936  */
937 void
938 brand_interpositioning_disable(void)
939 {
940 	int i;
941 
942 	for (i = 0; brand_tbl[i].ih_inum; i++)
943 		CPU->cpu_idt[brand_tbl[i].ih_inum] =
944 		    brand_tbl[i].ih_default_desc;
945 
946 #if defined(__amd64)
947 	wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
948 	wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
949 #endif
950 
951 	if (x86_feature & X86_SEP)
952 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
953 }
954