xref: /titanic_50/usr/src/uts/intel/ia32/os/desctbls.c (revision 9ab815e1e50104cb1004a5ccca7a6da582994b57)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 1992 Terrence R. Lambert.
28  * Copyright (c) 1990 The Regents of the University of California.
29  * All rights reserved.
30  *
31  * This code is derived from software contributed to Berkeley by
32  * William Jolitz.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  * 3. All advertising materials mentioning features or use of this software
43  *    must display the following acknowledgement:
44  *	This product includes software developed by the University of
45  *	California, Berkeley and its contributors.
46  * 4. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
63  */
64 
65 #include <sys/types.h>
66 #include <sys/sysmacros.h>
67 #include <sys/tss.h>
68 #include <sys/segments.h>
69 #include <sys/trap.h>
70 #include <sys/cpuvar.h>
71 #include <sys/bootconf.h>
72 #include <sys/x86_archext.h>
73 #include <sys/controlregs.h>
74 #include <sys/archsystm.h>
75 #include <sys/machsystm.h>
76 #include <sys/kobj.h>
77 #include <sys/cmn_err.h>
78 #include <sys/reboot.h>
79 #include <sys/kdi.h>
80 #include <sys/mach_mmu.h>
81 #include <sys/systm.h>
82 
83 #ifdef __xpv
84 #include <sys/hypervisor.h>
85 #include <vm/as.h>
86 #endif
87 
88 #include <sys/promif.h>
89 #include <sys/bootinfo.h>
90 #include <vm/kboot_mmu.h>
91 #include <vm/hat_pte.h>
92 
93 /*
94  * cpu0 and default tables and structures.
95  */
96 user_desc_t	*gdt0;
97 #if !defined(__xpv)
98 desctbr_t	gdt0_default_r;
99 #endif
100 
101 gate_desc_t	*idt0; 		/* interrupt descriptor table */
102 #if defined(__i386)
103 desctbr_t	idt0_default_r;		/* describes idt0 in IDTR format */
104 #endif
105 
106 struct tss	*ktss0;			/* kernel task state structure */
107 
108 #if defined(__i386)
109 struct tss	*dftss0;		/* #DF double-fault exception */
110 #endif	/* __i386 */
111 
112 user_desc_t	zero_udesc;		/* base zero user desc native procs */
113 user_desc_t	null_udesc;		/* null user descriptor */
114 system_desc_t	null_sdesc;		/* null system descriptor */
115 
116 #if defined(__amd64)
117 user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
118 #endif	/* __amd64 */
119 
120 #if defined(__amd64)
121 user_desc_t	ucs_on;
122 user_desc_t	ucs_off;
123 user_desc_t	ucs32_on;
124 user_desc_t	ucs32_off;
125 #endif	/* __amd64 */
126 
127 #pragma	align	16(dblfault_stack0)
128 char		dblfault_stack0[DEFAULTSTKSZ];
129 
130 extern void	fast_null(void);
131 extern hrtime_t	get_hrtime(void);
132 extern hrtime_t	gethrvtime(void);
133 extern hrtime_t	get_hrestime(void);
134 extern uint64_t	getlgrp(void);
135 
136 void (*(fasttable[]))(void) = {
137 	fast_null,			/* T_FNULL routine */
138 	fast_null,			/* T_FGETFP routine (initially null) */
139 	fast_null,			/* T_FSETFP routine (initially null) */
140 	(void (*)())get_hrtime,		/* T_GETHRTIME */
141 	(void (*)())gethrvtime,		/* T_GETHRVTIME */
142 	(void (*)())get_hrestime,	/* T_GETHRESTIME */
143 	(void (*)())getlgrp		/* T_GETLGRP */
144 };
145 
146 /*
147  * Structure containing pre-computed descriptors to allow us to temporarily
148  * interpose on a standard handler.
149  */
150 struct interposing_handler {
151 	int ih_inum;
152 	gate_desc_t ih_interp_desc;
153 	gate_desc_t ih_default_desc;
154 };
155 
156 /*
157  * The brand infrastructure interposes on two handlers, and we use one as a
158  * NULL signpost.
159  */
160 static struct interposing_handler brand_tbl[2];
161 
162 /*
163  * software prototypes for default local descriptor table
164  */
165 
166 /*
167  * Routines for loading segment descriptors in format the hardware
168  * can understand.
169  */
170 
171 #if defined(__amd64)
172 
173 /*
174  * In long mode we have the new L or long mode attribute bit
175  * for code segments. Only the conforming bit in type is used along
176  * with descriptor priority and present bits. Default operand size must
177  * be zero when in long mode. In 32-bit compatibility mode all fields
178  * are treated as in legacy mode. For data segments while in long mode
179  * only the present bit is loaded.
180  */
181 void
182 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
183     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
184 {
185 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
186 
187 	/*
188 	 * 64-bit long mode.
189 	 */
190 	if (lmode == SDP_LONG)
191 		dp->usd_def32 = 0;		/* 32-bit operands only */
192 	else
193 		/*
194 		 * 32-bit compatibility mode.
195 		 */
196 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
197 
198 	dp->usd_long = lmode;	/* 64-bit mode */
199 	dp->usd_type = type;
200 	dp->usd_dpl = dpl;
201 	dp->usd_p = 1;
202 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
203 
204 	dp->usd_lobase = (uintptr_t)base;
205 	dp->usd_midbase = (uintptr_t)base >> 16;
206 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
207 	dp->usd_lolimit = size;
208 	dp->usd_hilimit = (uintptr_t)size >> 16;
209 }
210 
211 #elif defined(__i386)
212 
213 /*
214  * Install user segment descriptor for code and data.
215  */
216 void
217 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
218     uint_t dpl, uint_t gran, uint_t defopsz)
219 {
220 	dp->usd_lolimit = size;
221 	dp->usd_hilimit = (uintptr_t)size >> 16;
222 
223 	dp->usd_lobase = (uintptr_t)base;
224 	dp->usd_midbase = (uintptr_t)base >> 16;
225 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
226 
227 	dp->usd_type = type;
228 	dp->usd_dpl = dpl;
229 	dp->usd_p = 1;
230 	dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32 bit operands */
231 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
232 }
233 
234 #endif	/* __i386 */
235 
236 /*
237  * Install system segment descriptor for LDT and TSS segments.
238  */
239 
240 #if defined(__amd64)
241 
242 void
243 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
244     uint_t dpl)
245 {
246 	dp->ssd_lolimit = size;
247 	dp->ssd_hilimit = (uintptr_t)size >> 16;
248 
249 	dp->ssd_lobase = (uintptr_t)base;
250 	dp->ssd_midbase = (uintptr_t)base >> 16;
251 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
252 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
253 
254 	dp->ssd_type = type;
255 	dp->ssd_zero1 = 0;	/* must be zero */
256 	dp->ssd_zero2 = 0;
257 	dp->ssd_dpl = dpl;
258 	dp->ssd_p = 1;
259 	dp->ssd_gran = 0;	/* force byte units */
260 }
261 
262 void *
263 get_ssd_base(system_desc_t *dp)
264 {
265 	uintptr_t	base;
266 
267 	base = (uintptr_t)dp->ssd_lobase |
268 	    (uintptr_t)dp->ssd_midbase << 16 |
269 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
270 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
271 	return ((void *)base);
272 }
273 
274 #elif defined(__i386)
275 
276 void
277 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
278     uint_t dpl)
279 {
280 	dp->ssd_lolimit = size;
281 	dp->ssd_hilimit = (uintptr_t)size >> 16;
282 
283 	dp->ssd_lobase = (uintptr_t)base;
284 	dp->ssd_midbase = (uintptr_t)base >> 16;
285 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
286 
287 	dp->ssd_type = type;
288 	dp->ssd_zero = 0;	/* must be zero */
289 	dp->ssd_dpl = dpl;
290 	dp->ssd_p = 1;
291 	dp->ssd_gran = 0;	/* force byte units */
292 }
293 
294 void *
295 get_ssd_base(system_desc_t *dp)
296 {
297 	uintptr_t	base;
298 
299 	base = (uintptr_t)dp->ssd_lobase |
300 	    (uintptr_t)dp->ssd_midbase << 16 |
301 	    (uintptr_t)dp->ssd_hibase << (16 + 8);
302 	return ((void *)base);
303 }
304 
305 #endif	/* __i386 */
306 
307 /*
308  * Install gate segment descriptor for interrupt, trap, call and task gates.
309  */
310 
311 #if defined(__amd64)
312 
313 /*ARGSUSED*/
314 void
315 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
316     uint_t type, uint_t dpl, uint_t vector)
317 {
318 	dp->sgd_looffset = (uintptr_t)func;
319 	dp->sgd_hioffset = (uintptr_t)func >> 16;
320 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
321 
322 	dp->sgd_selector =  (uint16_t)sel;
323 
324 	/*
325 	 * For 64 bit native we use the IST stack mechanism
326 	 * for double faults. All other traps use the CPL = 0
327 	 * (tss_rsp0) stack.
328 	 */
329 #if !defined(__xpv)
330 	if (vector == T_DBLFLT)
331 		dp->sgd_ist = 1;
332 	else
333 #endif
334 		dp->sgd_ist = 0;
335 
336 	dp->sgd_type = type;
337 	dp->sgd_dpl = dpl;
338 	dp->sgd_p = 1;
339 }
340 
341 #elif defined(__i386)
342 
343 /*ARGSUSED*/
344 void
345 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
346     uint_t type, uint_t dpl, uint_t unused)
347 {
348 	dp->sgd_looffset = (uintptr_t)func;
349 	dp->sgd_hioffset = (uintptr_t)func >> 16;
350 
351 	dp->sgd_selector =  (uint16_t)sel;
352 	dp->sgd_stkcpy = 0;	/* always zero bytes */
353 	dp->sgd_type = type;
354 	dp->sgd_dpl = dpl;
355 	dp->sgd_p = 1;
356 }
357 
358 #endif	/* __i386 */
359 
360 /*
361  * Updates a single user descriptor in the the GDT of the current cpu.
362  * Caller is responsible for preventing cpu migration.
363  */
364 
365 void
366 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
367 {
368 #if defined(__xpv)
369 
370 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
371 
372 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
373 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
374 
375 #else	/* __xpv */
376 
377 	CPU->cpu_gdt[sidx] = *udp;
378 
379 #endif	/* __xpv */
380 }
381 
382 /*
383  * Writes single descriptor pointed to by udp into a processes
384  * LDT entry pointed to by ldp.
385  */
386 int
387 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
388 {
389 #if defined(__xpv)
390 
391 	uint64_t dpa;
392 
393 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
394 	    ((uintptr_t)ldp & PAGEOFFSET);
395 
396 	/*
397 	 * The hypervisor is a little more restrictive about what it
398 	 * supports in the LDT.
399 	 */
400 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
401 		return (EINVAL);
402 
403 #else	/* __xpv */
404 
405 	*ldp = *udp;
406 
407 #endif	/* __xpv */
408 	return (0);
409 }
410 
411 #if defined(__xpv)
412 
413 /*
414  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
415  * Returns true if a valid entry was written.
416  */
417 int
418 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
419 {
420 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
421 
422 	/*
423 	 * skip holes in the IDT
424 	 */
425 	if (GATESEG_GETOFFSET(sgd) == 0)
426 		return (0);
427 
428 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
429 	ti->vector = vec;
430 	TI_SET_DPL(ti, sgd->sgd_dpl);
431 
432 	/*
433 	 * Is this an interrupt gate?
434 	 */
435 	if (sgd->sgd_type == SDT_SYSIGT) {
436 		/* LINTED */
437 		TI_SET_IF(ti, 1);
438 	}
439 	ti->cs = sgd->sgd_selector;
440 #if defined(__amd64)
441 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
442 #endif
443 	ti->address = GATESEG_GETOFFSET(sgd);
444 	return (1);
445 }
446 
447 /*
448  * Convert a single hw format gate descriptor and write it into our virtual IDT.
449  */
450 void
451 xen_idt_write(gate_desc_t *sgd, uint_t vec)
452 {
453 	trap_info_t trapinfo[2];
454 
455 	bzero(trapinfo, sizeof (trapinfo));
456 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
457 		return;
458 	if (xen_set_trap_table(trapinfo) != 0)
459 		panic("xen_idt_write: xen_set_trap_table() failed");
460 }
461 
462 #endif	/* __xpv */
463 
464 #if defined(__amd64)
465 
466 /*
467  * Build kernel GDT.
468  */
469 
470 static void
471 init_gdt_common(user_desc_t *gdt)
472 {
473 	int i;
474 
475 	/*
476 	 * 64-bit kernel code segment.
477 	 */
478 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
479 	    SDP_PAGES, SDP_OP32);
480 
481 	/*
482 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
483 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
484 	 * instruction to return from system calls back to 32-bit applications.
485 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
486 	 * descriptors. We therefore must ensure that the kernel uses something,
487 	 * though it will be ignored by hardware, that is compatible with 32-bit
488 	 * apps. For the same reason we must set the default op size of this
489 	 * descriptor to 32-bit operands.
490 	 */
491 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
492 	    SEL_KPL, SDP_PAGES, SDP_OP32);
493 	gdt[GDT_KDATA].usd_def32 = 1;
494 
495 	/*
496 	 * 64-bit user code segment.
497 	 */
498 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
499 	    SDP_PAGES, SDP_OP32);
500 
501 	/*
502 	 * 32-bit user code segment.
503 	 */
504 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
505 	    SEL_UPL, SDP_PAGES, SDP_OP32);
506 
507 	/*
508 	 * See gdt_ucode32() and gdt_ucode_native().
509 	 */
510 	ucs_on = ucs_off = gdt[GDT_UCODE];
511 	ucs_off.usd_p = 0;	/* forces #np fault */
512 
513 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
514 	ucs32_off.usd_p = 0;	/* forces #np fault */
515 
516 	/*
517 	 * 32 and 64 bit data segments can actually share the same descriptor.
518 	 * In long mode only the present bit is checked but all other fields
519 	 * are loaded. But in compatibility mode all fields are interpreted
520 	 * as in legacy mode so they must be set correctly for a 32-bit data
521 	 * segment.
522 	 */
523 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
524 	    SDP_PAGES, SDP_OP32);
525 
526 #if !defined(__xpv)
527 
528 	/*
529 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
530 	 * in the GDT is 0.
531 	 */
532 
533 	/*
534 	 * Kernel TSS
535 	 */
536 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
537 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
538 
539 #endif	/* !__xpv */
540 
541 	/*
542 	 * Initialize fs and gs descriptors for 32 bit processes.
543 	 * Only attributes and limits are initialized, the effective
544 	 * base address is programmed via fsbase/gsbase.
545 	 */
546 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
547 	    SEL_UPL, SDP_PAGES, SDP_OP32);
548 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
549 	    SEL_UPL, SDP_PAGES, SDP_OP32);
550 
551 	/*
552 	 * Initialize the descriptors set aside for brand usage.
553 	 * Only attributes and limits are initialized.
554 	 */
555 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
556 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
557 		    SEL_UPL, SDP_PAGES, SDP_OP32);
558 
559 	/*
560 	 * Initialize convenient zero base user descriptors for clearing
561 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
562 	 * an example.
563 	 */
564 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
565 	    SDP_BYTES, SDP_OP32);
566 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
567 	    SDP_PAGES, SDP_OP32);
568 }
569 
570 #if defined(__xpv)
571 
572 static user_desc_t *
573 init_gdt(void)
574 {
575 	uint64_t gdtpa;
576 	ulong_t ma[1];		/* XXPV should be a memory_t */
577 	ulong_t addr;
578 
579 #if !defined(__lint)
580 	/*
581 	 * Our gdt is never larger than a single page.
582 	 */
583 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
584 #endif
585 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
586 	    PAGESIZE, PAGESIZE);
587 	bzero(gdt0, PAGESIZE);
588 
589 	init_gdt_common(gdt0);
590 
591 	/*
592 	 * XXX Since we never invoke kmdb until after the kernel takes
593 	 * over the descriptor tables why not have it use the kernel's
594 	 * selectors?
595 	 */
596 	if (boothowto & RB_DEBUG) {
597 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
598 		    SEL_KPL, SDP_PAGES, SDP_OP32);
599 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
600 		    SEL_KPL, SDP_PAGES, SDP_OP32);
601 	}
602 
603 	/*
604 	 * Clear write permission for page containing the gdt and install it.
605 	 */
606 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
607 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
608 	kbm_read_only((uintptr_t)gdt0, gdtpa);
609 	xen_set_gdt(ma, NGDT);
610 
611 	/*
612 	 * Reload the segment registers to use the new GDT.
613 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
614 	 * See KCS_SEL in segments.h.
615 	 */
616 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
617 
618 	/*
619 	 *  setup %gs for kernel
620 	 */
621 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
622 
623 	/*
624 	 * XX64 We should never dereference off "other gsbase" or
625 	 * "fsbase".  So, we should arrange to point FSBASE and
626 	 * KGSBASE somewhere truly awful e.g. point it at the last
627 	 * valid address below the hole so that any attempts to index
628 	 * off them cause an exception.
629 	 *
630 	 * For now, point it at 8G -- at least it should be unmapped
631 	 * until some 64-bit processes run.
632 	 */
633 	addr = 0x200000000ul;
634 	xen_set_segment_base(SEGBASE_FS, addr);
635 	xen_set_segment_base(SEGBASE_GS_USER, addr);
636 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
637 
638 	return (gdt0);
639 }
640 
641 #else	/* __xpv */
642 
643 static user_desc_t *
644 init_gdt(void)
645 {
646 	desctbr_t	r_bgdt, r_gdt;
647 	user_desc_t	*bgdt;
648 
649 #if !defined(__lint)
650 	/*
651 	 * Our gdt is never larger than a single page.
652 	 */
653 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
654 #endif
655 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
656 	    PAGESIZE, PAGESIZE);
657 	bzero(gdt0, PAGESIZE);
658 
659 	init_gdt_common(gdt0);
660 
661 	/*
662 	 * Copy in from boot's gdt to our gdt.
663 	 * Entry 0 is the null descriptor by definition.
664 	 */
665 	rd_gdtr(&r_bgdt);
666 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
667 	if (bgdt == NULL)
668 		panic("null boot gdt");
669 
670 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
671 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
672 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
673 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
674 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
675 
676 	/*
677 	 * Install our new GDT
678 	 */
679 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
680 	r_gdt.dtr_base = (uintptr_t)gdt0;
681 	wr_gdtr(&r_gdt);
682 
683 	/*
684 	 * Reload the segment registers to use the new GDT
685 	 */
686 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
687 
688 	/*
689 	 *  setup %gs for kernel
690 	 */
691 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
692 
693 	/*
694 	 * XX64 We should never dereference off "other gsbase" or
695 	 * "fsbase".  So, we should arrange to point FSBASE and
696 	 * KGSBASE somewhere truly awful e.g. point it at the last
697 	 * valid address below the hole so that any attempts to index
698 	 * off them cause an exception.
699 	 *
700 	 * For now, point it at 8G -- at least it should be unmapped
701 	 * until some 64-bit processes run.
702 	 */
703 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
704 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
705 	return (gdt0);
706 }
707 
708 #endif	/* __xpv */
709 
710 #elif defined(__i386)
711 
712 static void
713 init_gdt_common(user_desc_t *gdt)
714 {
715 	int i;
716 
717 	/*
718 	 * Text and data for both kernel and user span entire 32 bit
719 	 * address space.
720 	 */
721 
722 	/*
723 	 * kernel code segment.
724 	 */
725 	set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
726 	    SDP_OP32);
727 
728 	/*
729 	 * kernel data segment.
730 	 */
731 	set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
732 	    SDP_OP32);
733 
734 	/*
735 	 * user code segment.
736 	 */
737 	set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
738 	    SDP_OP32);
739 
740 	/*
741 	 * user data segment.
742 	 */
743 	set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
744 	    SDP_OP32);
745 
746 #if !defined(__xpv)
747 
748 	/*
749 	 * TSS for T_DBLFLT (double fault) handler
750 	 */
751 	set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
752 	    sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
753 
754 	/*
755 	 * TSS for kernel
756 	 */
757 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
758 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
759 
760 #endif	/* !__xpv */
761 
762 	/*
763 	 * %gs selector for kernel
764 	 */
765 	set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
766 	    SEL_KPL, SDP_BYTES, SDP_OP32);
767 
768 	/*
769 	 * Initialize lwp private descriptors.
770 	 * Only attributes and limits are initialized, the effective
771 	 * base address is programmed via fsbase/gsbase.
772 	 */
773 	set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
774 	    SDP_PAGES, SDP_OP32);
775 	set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
776 	    SDP_PAGES, SDP_OP32);
777 
778 	/*
779 	 * Initialize the descriptors set aside for brand usage.
780 	 * Only attributes and limits are initialized.
781 	 */
782 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
783 		set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
784 		    SDP_PAGES, SDP_OP32);
785 	/*
786 	 * Initialize convenient zero base user descriptor for clearing
787 	 * lwp  private %fs and %gs descriptors in GDT. See setregs() for
788 	 * an example.
789 	 */
790 	set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
791 	    SDP_BYTES, SDP_OP32);
792 }
793 
794 #if defined(__xpv)
795 
796 static user_desc_t *
797 init_gdt(void)
798 {
799 	uint64_t gdtpa;
800 	ulong_t ma[1];		/* XXPV should be a memory_t */
801 
802 #if !defined(__lint)
803 	/*
804 	 * Our gdt is never larger than a single page.
805 	 */
806 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
807 #endif
808 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
809 	    PAGESIZE, PAGESIZE);
810 	bzero(gdt0, PAGESIZE);
811 
812 	init_gdt_common(gdt0);
813 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
814 
815 	/*
816 	 * XXX Since we never invoke kmdb until after the kernel takes
817 	 * over the descriptor tables why not have it use the kernel's
818 	 * selectors?
819 	 */
820 	if (boothowto & RB_DEBUG) {
821 		set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
822 		    SDP_PAGES, SDP_OP32);
823 		set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
824 		    SDP_PAGES, SDP_OP32);
825 	}
826 
827 	/*
828 	 * Clear write permission for page containing the gdt and install it.
829 	 */
830 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
831 	kbm_read_only((uintptr_t)gdt0, gdtpa);
832 	xen_set_gdt(ma, NGDT);
833 
834 	/*
835 	 * Reload the segment registers to use the new GDT
836 	 */
837 	load_segment_registers(
838 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
839 
840 	return (gdt0);
841 }
842 
843 #else	/* __xpv */
844 
845 static user_desc_t *
846 init_gdt(void)
847 {
848 	desctbr_t	r_bgdt, r_gdt;
849 	user_desc_t	*bgdt;
850 
851 #if !defined(__lint)
852 	/*
853 	 * Our gdt is never larger than a single page.
854 	 */
855 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
856 #endif
857 	/*
858 	 * XXX this allocation belongs in our caller, not here.
859 	 */
860 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
861 	    PAGESIZE, PAGESIZE);
862 	bzero(gdt0, PAGESIZE);
863 
864 	init_gdt_common(gdt0);
865 
866 	/*
867 	 * Copy in from boot's gdt to our gdt entries.
868 	 * Entry 0 is null descriptor by definition.
869 	 */
870 	rd_gdtr(&r_bgdt);
871 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
872 	if (bgdt == NULL)
873 		panic("null boot gdt");
874 
875 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
876 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
877 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
878 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
879 
880 	/*
881 	 * Install our new GDT
882 	 */
883 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
884 	r_gdt.dtr_base = (uintptr_t)gdt0;
885 	wr_gdtr(&r_gdt);
886 
887 	/*
888 	 * Reload the segment registers to use the new GDT
889 	 */
890 	load_segment_registers(
891 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
892 
893 	return (gdt0);
894 }
895 
896 #endif	/* __xpv */
897 #endif	/* __i386 */
898 
899 /*
900  * Build kernel IDT.
901  *
902  * Note that for amd64 we pretty much require every gate to be an interrupt
903  * gate which blocks interrupts atomically on entry; that's because of our
904  * dependency on using 'swapgs' every time we come into the kernel to find
905  * the cpu structure. If we get interrupted just before doing that, %cs could
906  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
907  * %gsbase is really still pointing at something in userland. Bad things will
908  * ensue. We also use interrupt gates for i386 as well even though this is not
909  * required for some traps.
910  *
911  * Perhaps they should have invented a trap gate that does an atomic swapgs?
912  */
913 static void
914 init_idt_common(gate_desc_t *idt)
915 {
916 	set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
917 	    0);
918 	set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
919 	    0);
920 	set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 	    0);
922 	set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
923 	    0);
924 	set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
925 	    0);
926 	set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
927 	    TRP_KPL, 0);
928 	set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
929 	    0);
930 	set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
931 	    0);
932 
933 	/*
934 	 * double fault handler.
935 	 *
936 	 * Note that on the hypervisor a guest does not receive #df faults.
937 	 * Instead a failsafe event is injected into the guest if its selectors
938 	 * and/or stack is in a broken state. See xen_failsafe_callback.
939 	 */
940 #if !defined(__xpv)
941 #if defined(__amd64)
942 
943 	set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
944 	    T_DBLFLT);
945 
946 #elif defined(__i386)
947 
948 	/*
949 	 * task gate required.
950 	 */
951 	set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
952 	    0);
953 
954 #endif	/* __i386 */
955 #endif	/* !__xpv */
956 
957 	/*
958 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
959 	 */
960 
961 	set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
962 	    0);
963 	set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
964 	    0);
965 	set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
966 	set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
967 	set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
968 	set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
969 	    0);
970 	set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
971 	    TRP_KPL, 0);
972 	set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
973 	set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
974 
975 	/*
976 	 * install fast trap handler at 210.
977 	 */
978 	set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
979 	    0);
980 
981 	/*
982 	 * System call handler.
983 	 */
984 #if defined(__amd64)
985 	set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
986 	    TRP_UPL, 0);
987 
988 #elif defined(__i386)
989 	set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
990 	    TRP_UPL, 0);
991 #endif	/* __i386 */
992 
993 	/*
994 	 * Install the DTrace interrupt handler for the pid provider.
995 	 */
996 	set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
997 	    SDT_SYSIGT, TRP_UPL, 0);
998 
999 	/*
1000 	 * Prepare interposing descriptor for the syscall handler
1001 	 * and cache copy of the default descriptor.
1002 	 */
1003 	brand_tbl[0].ih_inum = T_SYSCALLINT;
1004 	brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1005 
1006 #if defined(__amd64)
1007 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1008 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1009 #elif defined(__i386)
1010 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1011 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1012 #endif	/* __i386 */
1013 
1014 	brand_tbl[1].ih_inum = 0;
1015 }
1016 
1017 #if defined(__xpv)
1018 
1019 static void
1020 init_idt(gate_desc_t *idt)
1021 {
1022 	init_idt_common(idt);
1023 }
1024 
1025 #else	/* __xpv */
1026 
1027 static void
1028 init_idt(gate_desc_t *idt)
1029 {
1030 	char	ivctname[80];
1031 	void	(*ivctptr)(void);
1032 	int	i;
1033 
1034 	/*
1035 	 * Initialize entire table with 'reserved' trap and then overwrite
1036 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1037 	 * since it can only be generated on a 386 processor. 15 is also
1038 	 * unsupported and reserved.
1039 	 */
1040 	for (i = 0; i < NIDT; i++)
1041 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1042 		    0);
1043 
1044 	/*
1045 	 * 20-31 reserved
1046 	 */
1047 	for (i = 20; i < 32; i++)
1048 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1049 		    0);
1050 
1051 	/*
1052 	 * interrupts 32 - 255
1053 	 */
1054 	for (i = 32; i < 256; i++) {
1055 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1056 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1057 		if (ivctptr == NULL)
1058 			panic("kobj_getsymvalue(%s) failed", ivctname);
1059 
1060 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1061 	}
1062 
1063 	/*
1064 	 * Now install the common ones. Note that it will overlay some
1065 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1066 	 */
1067 	init_idt_common(idt);
1068 }
1069 
1070 #endif	/* __xpv */
1071 
1072 /*
1073  * The kernel does not deal with LDTs unless a user explicitly creates
1074  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1075  * to reference the LDT will therefore cause a #gp. System calls made via the
1076  * obsolete lcall mechanism are emulated by the #gp fault handler.
1077  */
1078 static void
1079 init_ldt(void)
1080 {
1081 #if defined(__xpv)
1082 	xen_set_ldt(NULL, 0);
1083 #else
1084 	wr_ldtr(0);
1085 #endif
1086 }
1087 
1088 #if !defined(__xpv)
1089 #if defined(__amd64)
1090 
1091 static void
1092 init_tss(void)
1093 {
1094 	/*
1095 	 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1096 	 * All exceptions but #DF will run on the thread stack.
1097 	 * Set up the double fault stack here.
1098 	 */
1099 	ktss0->tss_ist1 =
1100 	    (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1101 
1102 	/*
1103 	 * Set I/O bit map offset equal to size of TSS segment limit
1104 	 * for no I/O permission map. This will force all user I/O
1105 	 * instructions to generate #gp fault.
1106 	 */
1107 	ktss0->tss_bitmapbase = sizeof (*ktss0);
1108 
1109 	/*
1110 	 * Point %tr to descriptor for ktss0 in gdt.
1111 	 */
1112 	wr_tsr(KTSS_SEL);
1113 }
1114 
1115 #elif defined(__i386)
1116 
1117 static void
1118 init_tss(void)
1119 {
1120 	/*
1121 	 * ktss0->tss_esp dynamically filled in by resume() on each
1122 	 * context switch.
1123 	 */
1124 	ktss0->tss_ss0	= KDS_SEL;
1125 	ktss0->tss_eip	= (uint32_t)_start;
1126 	ktss0->tss_ds	= ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1127 	ktss0->tss_cs	= KCS_SEL;
1128 	ktss0->tss_fs	= KFS_SEL;
1129 	ktss0->tss_gs	= KGS_SEL;
1130 	ktss0->tss_ldt	= ULDT_SEL;
1131 
1132 	/*
1133 	 * Initialize double fault tss.
1134 	 */
1135 	dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1136 	dftss0->tss_ss0	= KDS_SEL;
1137 
1138 	/*
1139 	 * tss_cr3 will get initialized in hat_kern_setup() once our page
1140 	 * tables have been setup.
1141 	 */
1142 	dftss0->tss_eip	= (uint32_t)syserrtrap;
1143 	dftss0->tss_esp	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1144 	dftss0->tss_cs	= KCS_SEL;
1145 	dftss0->tss_ds	= KDS_SEL;
1146 	dftss0->tss_es	= KDS_SEL;
1147 	dftss0->tss_ss	= KDS_SEL;
1148 	dftss0->tss_fs	= KFS_SEL;
1149 	dftss0->tss_gs	= KGS_SEL;
1150 
1151 	/*
1152 	 * Set I/O bit map offset equal to size of TSS segment limit
1153 	 * for no I/O permission map. This will force all user I/O
1154 	 * instructions to generate #gp fault.
1155 	 */
1156 	ktss0->tss_bitmapbase = sizeof (*ktss0);
1157 
1158 	/*
1159 	 * Point %tr to descriptor for ktss0 in gdt.
1160 	 */
1161 	wr_tsr(KTSS_SEL);
1162 }
1163 
1164 #endif	/* __i386 */
1165 #endif	/* !__xpv */
1166 
1167 #if defined(__xpv)
1168 
1169 void
1170 init_desctbls(void)
1171 {
1172 	uint_t vec;
1173 	user_desc_t *gdt;
1174 
1175 	/*
1176 	 * Setup and install our GDT.
1177 	 */
1178 	gdt = init_gdt();
1179 
1180 	/*
1181 	 * Store static pa of gdt to speed up pa_to_ma() translations
1182 	 * on lwp context switches.
1183 	 */
1184 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1185 	CPU->cpu_gdt = gdt;
1186 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1187 
1188 	/*
1189 	 * Setup and install our IDT.
1190 	 */
1191 #if !defined(__lint)
1192 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1193 #endif
1194 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1195 	    PAGESIZE, PAGESIZE);
1196 	bzero(idt0, PAGESIZE);
1197 	init_idt(idt0);
1198 	for (vec = 0; vec < NIDT; vec++)
1199 		xen_idt_write(&idt0[vec], vec);
1200 
1201 	CPU->cpu_idt = idt0;
1202 
1203 	/*
1204 	 * set default kernel stack
1205 	 */
1206 	xen_stack_switch(KDS_SEL,
1207 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1208 
1209 	xen_init_callbacks();
1210 
1211 	init_ldt();
1212 }
1213 
1214 #else	/* __xpv */
1215 
1216 void
1217 init_desctbls(void)
1218 {
1219 	user_desc_t *gdt;
1220 	desctbr_t idtr;
1221 
1222 	/*
1223 	 * Allocate IDT and TSS structures on unique pages for better
1224 	 * performance in virtual machines.
1225 	 */
1226 #if !defined(__lint)
1227 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1228 #endif
1229 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1230 	    PAGESIZE, PAGESIZE);
1231 	bzero(idt0, PAGESIZE);
1232 #if !defined(__lint)
1233 	ASSERT(sizeof (*ktss0) <= PAGESIZE);
1234 #endif
1235 	ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1236 	    PAGESIZE, PAGESIZE);
1237 	bzero(ktss0, PAGESIZE);
1238 
1239 #if defined(__i386)
1240 #if !defined(__lint)
1241 	ASSERT(sizeof (*dftss0) <= PAGESIZE);
1242 #endif
1243 	dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1244 	    PAGESIZE, PAGESIZE);
1245 	bzero(dftss0, PAGESIZE);
1246 #endif
1247 
1248 	/*
1249 	 * Setup and install our GDT.
1250 	 */
1251 	gdt = init_gdt();
1252 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1253 	CPU->cpu_gdt = gdt;
1254 
1255 	/*
1256 	 * Setup and install our IDT.
1257 	 */
1258 	init_idt(idt0);
1259 
1260 	idtr.dtr_base = (uintptr_t)idt0;
1261 	idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1262 	wr_idtr(&idtr);
1263 	CPU->cpu_idt = idt0;
1264 
1265 #if defined(__i386)
1266 	/*
1267 	 * We maintain a description of idt0 in convenient IDTR format
1268 	 * for #pf's on some older pentium processors. See pentium_pftrap().
1269 	 */
1270 	idt0_default_r = idtr;
1271 #endif	/* __i386 */
1272 
1273 	init_tss();
1274 	CPU->cpu_tss = ktss0;
1275 	init_ldt();
1276 }
1277 
1278 #endif	/* __xpv */
1279 
1280 /*
1281  * In the early kernel, we need to set up a simple GDT to run on.
1282  *
1283  * XXPV	Can dboot use this too?  See dboot_gdt.s
1284  */
1285 void
1286 init_boot_gdt(user_desc_t *bgdt)
1287 {
1288 #if defined(__amd64)
1289 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1290 	    SDP_PAGES, SDP_OP32);
1291 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1292 	    SDP_PAGES, SDP_OP32);
1293 #elif defined(__i386)
1294 	set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1295 	    SDP_PAGES, SDP_OP32);
1296 	set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1297 	    SDP_PAGES, SDP_OP32);
1298 #endif	/* __i386 */
1299 }
1300 
1301 /*
1302  * Enable interpositioning on the system call path by rewriting the
1303  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1304  * the branded entry points.
1305  */
1306 void
1307 brand_interpositioning_enable(void)
1308 {
1309 	gate_desc_t	*idt = CPU->cpu_idt;
1310 	int 		i;
1311 
1312 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1313 
1314 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1315 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1316 #if defined(__xpv)
1317 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1318 		    brand_tbl[i].ih_inum);
1319 #endif
1320 	}
1321 
1322 #if defined(__amd64)
1323 #if defined(__xpv)
1324 
1325 	/*
1326 	 * Currently the hypervisor only supports 64-bit syscalls via
1327 	 * syscall instruction. The 32-bit syscalls are handled by
1328 	 * interrupt gate above.
1329 	 */
1330 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1331 	    CALLBACKF_mask_events);
1332 
1333 #else
1334 
1335 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1336 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1337 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1338 	}
1339 
1340 #endif
1341 #endif	/* __amd64 */
1342 
1343 	if (is_x86_feature(x86_featureset, X86FSET_SEP))
1344 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1345 }
1346 
1347 /*
1348  * Disable interpositioning on the system call path by rewriting the
1349  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1350  * the standard entry points, which bypass the interpositioning hooks.
1351  */
1352 void
1353 brand_interpositioning_disable(void)
1354 {
1355 	gate_desc_t	*idt = CPU->cpu_idt;
1356 	int i;
1357 
1358 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1359 
1360 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1361 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1362 #if defined(__xpv)
1363 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1364 		    brand_tbl[i].ih_inum);
1365 #endif
1366 	}
1367 
1368 #if defined(__amd64)
1369 #if defined(__xpv)
1370 
1371 	/*
1372 	 * See comment above in brand_interpositioning_enable.
1373 	 */
1374 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1375 	    CALLBACKF_mask_events);
1376 
1377 #else
1378 
1379 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1380 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1381 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1382 	}
1383 
1384 #endif
1385 #endif	/* __amd64 */
1386 
1387 	if (is_x86_feature(x86_featureset, X86FSET_SEP))
1388 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1389 }
1390