xref: /titanic_50/usr/src/uts/intel/ia32/os/desctbls.c (revision f16a0f4cde3ff2f7a495def818cbdce2d570ea33)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2011 Joyent, Inc. All rights reserved.
28  */
29 
30 /*
31  * Copyright (c) 1992 Terrence R. Lambert.
32  * Copyright (c) 1990 The Regents of the University of California.
33  * All rights reserved.
34  *
35  * This code is derived from software contributed to Berkeley by
36  * William Jolitz.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
67  */
68 
69 #include <sys/types.h>
70 #include <sys/sysmacros.h>
71 #include <sys/tss.h>
72 #include <sys/segments.h>
73 #include <sys/trap.h>
74 #include <sys/cpuvar.h>
75 #include <sys/bootconf.h>
76 #include <sys/x86_archext.h>
77 #include <sys/controlregs.h>
78 #include <sys/archsystm.h>
79 #include <sys/machsystm.h>
80 #include <sys/kobj.h>
81 #include <sys/cmn_err.h>
82 #include <sys/reboot.h>
83 #include <sys/kdi.h>
84 #include <sys/mach_mmu.h>
85 #include <sys/systm.h>
86 
87 #ifdef __xpv
88 #include <sys/hypervisor.h>
89 #include <vm/as.h>
90 #endif
91 
92 #include <sys/promif.h>
93 #include <sys/bootinfo.h>
94 #include <vm/kboot_mmu.h>
95 #include <vm/hat_pte.h>
96 
97 /*
98  * cpu0 and default tables and structures.
99  */
100 user_desc_t	*gdt0;
101 #if !defined(__xpv)
102 desctbr_t	gdt0_default_r;
103 #endif
104 
105 gate_desc_t	*idt0; 		/* interrupt descriptor table */
106 #if defined(__i386)
107 desctbr_t	idt0_default_r;		/* describes idt0 in IDTR format */
108 #endif
109 
110 tss_t		*ktss0;			/* kernel task state structure */
111 
112 #if defined(__i386)
113 tss_t		*dftss0;		/* #DF double-fault exception */
114 #endif	/* __i386 */
115 
116 user_desc_t	zero_udesc;		/* base zero user desc native procs */
117 user_desc_t	null_udesc;		/* null user descriptor */
118 system_desc_t	null_sdesc;		/* null system descriptor */
119 
120 #if defined(__amd64)
121 user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
122 #endif	/* __amd64 */
123 
124 #if defined(__amd64)
125 user_desc_t	ucs_on;
126 user_desc_t	ucs_off;
127 user_desc_t	ucs32_on;
128 user_desc_t	ucs32_off;
129 #endif	/* __amd64 */
130 
131 #pragma	align	16(dblfault_stack0)
132 char		dblfault_stack0[DEFAULTSTKSZ];
133 
134 extern void	fast_null(void);
135 extern hrtime_t	get_hrtime(void);
136 extern hrtime_t	gethrvtime(void);
137 extern hrtime_t	get_hrestime(void);
138 extern uint64_t	getlgrp(void);
139 
140 void (*(fasttable[]))(void) = {
141 	fast_null,			/* T_FNULL routine */
142 	fast_null,			/* T_FGETFP routine (initially null) */
143 	fast_null,			/* T_FSETFP routine (initially null) */
144 	(void (*)())get_hrtime,		/* T_GETHRTIME */
145 	(void (*)())gethrvtime,		/* T_GETHRVTIME */
146 	(void (*)())get_hrestime,	/* T_GETHRESTIME */
147 	(void (*)())getlgrp		/* T_GETLGRP */
148 };
149 
150 /*
151  * Structure containing pre-computed descriptors to allow us to temporarily
152  * interpose on a standard handler.
153  */
154 struct interposing_handler {
155 	int ih_inum;
156 	gate_desc_t ih_interp_desc;
157 	gate_desc_t ih_default_desc;
158 };
159 
160 /*
161  * The brand infrastructure interposes on two handlers, and we use one as a
162  * NULL signpost.
163  */
164 static struct interposing_handler brand_tbl[2];
165 
166 /*
167  * software prototypes for default local descriptor table
168  */
169 
170 /*
171  * Routines for loading segment descriptors in format the hardware
172  * can understand.
173  */
174 
175 #if defined(__amd64)
176 
177 /*
178  * In long mode we have the new L or long mode attribute bit
179  * for code segments. Only the conforming bit in type is used along
180  * with descriptor priority and present bits. Default operand size must
181  * be zero when in long mode. In 32-bit compatibility mode all fields
182  * are treated as in legacy mode. For data segments while in long mode
183  * only the present bit is loaded.
184  */
185 void
set_usegd(user_desc_t * dp,uint_t lmode,void * base,size_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
187     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
188 {
189 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
190 
191 	/*
192 	 * 64-bit long mode.
193 	 */
194 	if (lmode == SDP_LONG)
195 		dp->usd_def32 = 0;		/* 32-bit operands only */
196 	else
197 		/*
198 		 * 32-bit compatibility mode.
199 		 */
200 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
201 
202 	dp->usd_long = lmode;	/* 64-bit mode */
203 	dp->usd_type = type;
204 	dp->usd_dpl = dpl;
205 	dp->usd_p = 1;
206 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
207 
208 	dp->usd_lobase = (uintptr_t)base;
209 	dp->usd_midbase = (uintptr_t)base >> 16;
210 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
211 	dp->usd_lolimit = size;
212 	dp->usd_hilimit = (uintptr_t)size >> 16;
213 }
214 
215 #elif defined(__i386)
216 
217 /*
218  * Install user segment descriptor for code and data.
219  */
220 void
set_usegd(user_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
222     uint_t dpl, uint_t gran, uint_t defopsz)
223 {
224 	dp->usd_lolimit = size;
225 	dp->usd_hilimit = (uintptr_t)size >> 16;
226 
227 	dp->usd_lobase = (uintptr_t)base;
228 	dp->usd_midbase = (uintptr_t)base >> 16;
229 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
230 
231 	dp->usd_type = type;
232 	dp->usd_dpl = dpl;
233 	dp->usd_p = 1;
234 	dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32 bit operands */
235 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
236 }
237 
238 #endif	/* __i386 */
239 
240 /*
241  * Install system segment descriptor for LDT and TSS segments.
242  */
243 
244 #if defined(__amd64)
245 
246 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
248     uint_t dpl)
249 {
250 	dp->ssd_lolimit = size;
251 	dp->ssd_hilimit = (uintptr_t)size >> 16;
252 
253 	dp->ssd_lobase = (uintptr_t)base;
254 	dp->ssd_midbase = (uintptr_t)base >> 16;
255 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
256 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
257 
258 	dp->ssd_type = type;
259 	dp->ssd_zero1 = 0;	/* must be zero */
260 	dp->ssd_zero2 = 0;
261 	dp->ssd_dpl = dpl;
262 	dp->ssd_p = 1;
263 	dp->ssd_gran = 0;	/* force byte units */
264 }
265 
266 void *
get_ssd_base(system_desc_t * dp)267 get_ssd_base(system_desc_t *dp)
268 {
269 	uintptr_t	base;
270 
271 	base = (uintptr_t)dp->ssd_lobase |
272 	    (uintptr_t)dp->ssd_midbase << 16 |
273 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
274 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
275 	return ((void *)base);
276 }
277 
278 #elif defined(__i386)
279 
280 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
282     uint_t dpl)
283 {
284 	dp->ssd_lolimit = size;
285 	dp->ssd_hilimit = (uintptr_t)size >> 16;
286 
287 	dp->ssd_lobase = (uintptr_t)base;
288 	dp->ssd_midbase = (uintptr_t)base >> 16;
289 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
290 
291 	dp->ssd_type = type;
292 	dp->ssd_zero = 0;	/* must be zero */
293 	dp->ssd_dpl = dpl;
294 	dp->ssd_p = 1;
295 	dp->ssd_gran = 0;	/* force byte units */
296 }
297 
298 void *
get_ssd_base(system_desc_t * dp)299 get_ssd_base(system_desc_t *dp)
300 {
301 	uintptr_t	base;
302 
303 	base = (uintptr_t)dp->ssd_lobase |
304 	    (uintptr_t)dp->ssd_midbase << 16 |
305 	    (uintptr_t)dp->ssd_hibase << (16 + 8);
306 	return ((void *)base);
307 }
308 
309 #endif	/* __i386 */
310 
311 /*
312  * Install gate segment descriptor for interrupt, trap, call and task gates.
313  */
314 
315 #if defined(__amd64)
316 
317 /*ARGSUSED*/
318 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t vector)319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
320     uint_t type, uint_t dpl, uint_t vector)
321 {
322 	dp->sgd_looffset = (uintptr_t)func;
323 	dp->sgd_hioffset = (uintptr_t)func >> 16;
324 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
325 
326 	dp->sgd_selector =  (uint16_t)sel;
327 
328 	/*
329 	 * For 64 bit native we use the IST stack mechanism
330 	 * for double faults. All other traps use the CPL = 0
331 	 * (tss_rsp0) stack.
332 	 */
333 #if !defined(__xpv)
334 	if (vector == T_DBLFLT)
335 		dp->sgd_ist = 1;
336 	else
337 #endif
338 		dp->sgd_ist = 0;
339 
340 	dp->sgd_type = type;
341 	dp->sgd_dpl = dpl;
342 	dp->sgd_p = 1;
343 }
344 
345 #elif defined(__i386)
346 
347 /*ARGSUSED*/
348 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t unused)349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
350     uint_t type, uint_t dpl, uint_t unused)
351 {
352 	dp->sgd_looffset = (uintptr_t)func;
353 	dp->sgd_hioffset = (uintptr_t)func >> 16;
354 
355 	dp->sgd_selector =  (uint16_t)sel;
356 	dp->sgd_stkcpy = 0;	/* always zero bytes */
357 	dp->sgd_type = type;
358 	dp->sgd_dpl = dpl;
359 	dp->sgd_p = 1;
360 }
361 
362 #endif	/* __i386 */
363 
364 /*
365  * Updates a single user descriptor in the the GDT of the current cpu.
366  * Caller is responsible for preventing cpu migration.
367  */
368 
369 void
gdt_update_usegd(uint_t sidx,user_desc_t * udp)370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371 {
372 #if defined(__xpv)
373 
374 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375 
376 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378 
379 #else	/* __xpv */
380 
381 	CPU->cpu_gdt[sidx] = *udp;
382 
383 #endif	/* __xpv */
384 }
385 
386 /*
387  * Writes single descriptor pointed to by udp into a processes
388  * LDT entry pointed to by ldp.
389  */
390 int
ldt_update_segd(user_desc_t * ldp,user_desc_t * udp)391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
392 {
393 #if defined(__xpv)
394 
395 	uint64_t dpa;
396 
397 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
398 	    ((uintptr_t)ldp & PAGEOFFSET);
399 
400 	/*
401 	 * The hypervisor is a little more restrictive about what it
402 	 * supports in the LDT.
403 	 */
404 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
405 		return (EINVAL);
406 
407 #else	/* __xpv */
408 
409 	*ldp = *udp;
410 
411 #endif	/* __xpv */
412 	return (0);
413 }
414 
415 #if defined(__xpv)
416 
417 /*
418  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
419  * Returns true if a valid entry was written.
420  */
421 int
xen_idt_to_trap_info(uint_t vec,gate_desc_t * sgd,void * ti_arg)422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
423 {
424 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
425 
426 	/*
427 	 * skip holes in the IDT
428 	 */
429 	if (GATESEG_GETOFFSET(sgd) == 0)
430 		return (0);
431 
432 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
433 	ti->vector = vec;
434 	TI_SET_DPL(ti, sgd->sgd_dpl);
435 
436 	/*
437 	 * Is this an interrupt gate?
438 	 */
439 	if (sgd->sgd_type == SDT_SYSIGT) {
440 		/* LINTED */
441 		TI_SET_IF(ti, 1);
442 	}
443 	ti->cs = sgd->sgd_selector;
444 #if defined(__amd64)
445 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
446 #endif
447 	ti->address = GATESEG_GETOFFSET(sgd);
448 	return (1);
449 }
450 
451 /*
452  * Convert a single hw format gate descriptor and write it into our virtual IDT.
453  */
454 void
xen_idt_write(gate_desc_t * sgd,uint_t vec)455 xen_idt_write(gate_desc_t *sgd, uint_t vec)
456 {
457 	trap_info_t trapinfo[2];
458 
459 	bzero(trapinfo, sizeof (trapinfo));
460 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
461 		return;
462 	if (xen_set_trap_table(trapinfo) != 0)
463 		panic("xen_idt_write: xen_set_trap_table() failed");
464 }
465 
466 #endif	/* __xpv */
467 
468 #if defined(__amd64)
469 
470 /*
471  * Build kernel GDT.
472  */
473 
474 static void
init_gdt_common(user_desc_t * gdt)475 init_gdt_common(user_desc_t *gdt)
476 {
477 	int i;
478 
479 	/*
480 	 * 64-bit kernel code segment.
481 	 */
482 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
483 	    SDP_PAGES, SDP_OP32);
484 
485 	/*
486 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
487 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
488 	 * instruction to return from system calls back to 32-bit applications.
489 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
490 	 * descriptors. We therefore must ensure that the kernel uses something,
491 	 * though it will be ignored by hardware, that is compatible with 32-bit
492 	 * apps. For the same reason we must set the default op size of this
493 	 * descriptor to 32-bit operands.
494 	 */
495 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
496 	    SEL_KPL, SDP_PAGES, SDP_OP32);
497 	gdt[GDT_KDATA].usd_def32 = 1;
498 
499 	/*
500 	 * 64-bit user code segment.
501 	 */
502 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
503 	    SDP_PAGES, SDP_OP32);
504 
505 	/*
506 	 * 32-bit user code segment.
507 	 */
508 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
509 	    SEL_UPL, SDP_PAGES, SDP_OP32);
510 
511 	/*
512 	 * See gdt_ucode32() and gdt_ucode_native().
513 	 */
514 	ucs_on = ucs_off = gdt[GDT_UCODE];
515 	ucs_off.usd_p = 0;	/* forces #np fault */
516 
517 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
518 	ucs32_off.usd_p = 0;	/* forces #np fault */
519 
520 	/*
521 	 * 32 and 64 bit data segments can actually share the same descriptor.
522 	 * In long mode only the present bit is checked but all other fields
523 	 * are loaded. But in compatibility mode all fields are interpreted
524 	 * as in legacy mode so they must be set correctly for a 32-bit data
525 	 * segment.
526 	 */
527 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
528 	    SDP_PAGES, SDP_OP32);
529 
530 #if !defined(__xpv)
531 
532 	/*
533 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
534 	 * in the GDT is 0.
535 	 */
536 
537 	/*
538 	 * Kernel TSS
539 	 */
540 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
541 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
542 
543 #endif	/* !__xpv */
544 
545 	/*
546 	 * Initialize fs and gs descriptors for 32 bit processes.
547 	 * Only attributes and limits are initialized, the effective
548 	 * base address is programmed via fsbase/gsbase.
549 	 */
550 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
551 	    SEL_UPL, SDP_PAGES, SDP_OP32);
552 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
553 	    SEL_UPL, SDP_PAGES, SDP_OP32);
554 
555 	/*
556 	 * Initialize the descriptors set aside for brand usage.
557 	 * Only attributes and limits are initialized.
558 	 */
559 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
560 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
561 		    SEL_UPL, SDP_PAGES, SDP_OP32);
562 
563 	/*
564 	 * Initialize convenient zero base user descriptors for clearing
565 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
566 	 * an example.
567 	 */
568 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
569 	    SDP_BYTES, SDP_OP32);
570 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
571 	    SDP_PAGES, SDP_OP32);
572 }
573 
574 #if defined(__xpv)
575 
576 static user_desc_t *
init_gdt(void)577 init_gdt(void)
578 {
579 	uint64_t gdtpa;
580 	ulong_t ma[1];		/* XXPV should be a memory_t */
581 	ulong_t addr;
582 
583 #if !defined(__lint)
584 	/*
585 	 * Our gdt is never larger than a single page.
586 	 */
587 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
588 #endif
589 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
590 	    PAGESIZE, PAGESIZE);
591 	bzero(gdt0, PAGESIZE);
592 
593 	init_gdt_common(gdt0);
594 
595 	/*
596 	 * XXX Since we never invoke kmdb until after the kernel takes
597 	 * over the descriptor tables why not have it use the kernel's
598 	 * selectors?
599 	 */
600 	if (boothowto & RB_DEBUG) {
601 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
602 		    SEL_KPL, SDP_PAGES, SDP_OP32);
603 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
604 		    SEL_KPL, SDP_PAGES, SDP_OP32);
605 	}
606 
607 	/*
608 	 * Clear write permission for page containing the gdt and install it.
609 	 */
610 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
611 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
612 	kbm_read_only((uintptr_t)gdt0, gdtpa);
613 	xen_set_gdt(ma, NGDT);
614 
615 	/*
616 	 * Reload the segment registers to use the new GDT.
617 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
618 	 * See KCS_SEL in segments.h.
619 	 */
620 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
621 
622 	/*
623 	 *  setup %gs for kernel
624 	 */
625 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
626 
627 	/*
628 	 * XX64 We should never dereference off "other gsbase" or
629 	 * "fsbase".  So, we should arrange to point FSBASE and
630 	 * KGSBASE somewhere truly awful e.g. point it at the last
631 	 * valid address below the hole so that any attempts to index
632 	 * off them cause an exception.
633 	 *
634 	 * For now, point it at 8G -- at least it should be unmapped
635 	 * until some 64-bit processes run.
636 	 */
637 	addr = 0x200000000ul;
638 	xen_set_segment_base(SEGBASE_FS, addr);
639 	xen_set_segment_base(SEGBASE_GS_USER, addr);
640 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
641 
642 	return (gdt0);
643 }
644 
645 #else	/* __xpv */
646 
647 static user_desc_t *
init_gdt(void)648 init_gdt(void)
649 {
650 	desctbr_t	r_bgdt, r_gdt;
651 	user_desc_t	*bgdt;
652 
653 #if !defined(__lint)
654 	/*
655 	 * Our gdt is never larger than a single page.
656 	 */
657 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
658 #endif
659 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
660 	    PAGESIZE, PAGESIZE);
661 	bzero(gdt0, PAGESIZE);
662 
663 	init_gdt_common(gdt0);
664 
665 	/*
666 	 * Copy in from boot's gdt to our gdt.
667 	 * Entry 0 is the null descriptor by definition.
668 	 */
669 	rd_gdtr(&r_bgdt);
670 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
671 	if (bgdt == NULL)
672 		panic("null boot gdt");
673 
674 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
675 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
676 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
677 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
678 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
679 
680 	/*
681 	 * Install our new GDT
682 	 */
683 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
684 	r_gdt.dtr_base = (uintptr_t)gdt0;
685 	wr_gdtr(&r_gdt);
686 
687 	/*
688 	 * Reload the segment registers to use the new GDT
689 	 */
690 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
691 
692 	/*
693 	 *  setup %gs for kernel
694 	 */
695 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
696 
697 	/*
698 	 * XX64 We should never dereference off "other gsbase" or
699 	 * "fsbase".  So, we should arrange to point FSBASE and
700 	 * KGSBASE somewhere truly awful e.g. point it at the last
701 	 * valid address below the hole so that any attempts to index
702 	 * off them cause an exception.
703 	 *
704 	 * For now, point it at 8G -- at least it should be unmapped
705 	 * until some 64-bit processes run.
706 	 */
707 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
708 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
709 	return (gdt0);
710 }
711 
712 #endif	/* __xpv */
713 
714 #elif defined(__i386)
715 
716 static void
init_gdt_common(user_desc_t * gdt)717 init_gdt_common(user_desc_t *gdt)
718 {
719 	int i;
720 
721 	/*
722 	 * Text and data for both kernel and user span entire 32 bit
723 	 * address space.
724 	 */
725 
726 	/*
727 	 * kernel code segment.
728 	 */
729 	set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
730 	    SDP_OP32);
731 
732 	/*
733 	 * kernel data segment.
734 	 */
735 	set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
736 	    SDP_OP32);
737 
738 	/*
739 	 * user code segment.
740 	 */
741 	set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
742 	    SDP_OP32);
743 
744 	/*
745 	 * user data segment.
746 	 */
747 	set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
748 	    SDP_OP32);
749 
750 #if !defined(__xpv)
751 
752 	/*
753 	 * TSS for T_DBLFLT (double fault) handler
754 	 */
755 	set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
756 	    sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
757 
758 	/*
759 	 * TSS for kernel
760 	 */
761 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
762 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
763 
764 #endif	/* !__xpv */
765 
766 	/*
767 	 * %gs selector for kernel
768 	 */
769 	set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
770 	    SEL_KPL, SDP_BYTES, SDP_OP32);
771 
772 	/*
773 	 * Initialize lwp private descriptors.
774 	 * Only attributes and limits are initialized, the effective
775 	 * base address is programmed via fsbase/gsbase.
776 	 */
777 	set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
778 	    SDP_PAGES, SDP_OP32);
779 	set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
780 	    SDP_PAGES, SDP_OP32);
781 
782 	/*
783 	 * Initialize the descriptors set aside for brand usage.
784 	 * Only attributes and limits are initialized.
785 	 */
786 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
787 		set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
788 		    SDP_PAGES, SDP_OP32);
789 	/*
790 	 * Initialize convenient zero base user descriptor for clearing
791 	 * lwp  private %fs and %gs descriptors in GDT. See setregs() for
792 	 * an example.
793 	 */
794 	set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
795 	    SDP_BYTES, SDP_OP32);
796 }
797 
798 #if defined(__xpv)
799 
800 static user_desc_t *
init_gdt(void)801 init_gdt(void)
802 {
803 	uint64_t gdtpa;
804 	ulong_t ma[1];		/* XXPV should be a memory_t */
805 
806 #if !defined(__lint)
807 	/*
808 	 * Our gdt is never larger than a single page.
809 	 */
810 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
811 #endif
812 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
813 	    PAGESIZE, PAGESIZE);
814 	bzero(gdt0, PAGESIZE);
815 
816 	init_gdt_common(gdt0);
817 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
818 
819 	/*
820 	 * XXX Since we never invoke kmdb until after the kernel takes
821 	 * over the descriptor tables why not have it use the kernel's
822 	 * selectors?
823 	 */
824 	if (boothowto & RB_DEBUG) {
825 		set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
826 		    SDP_PAGES, SDP_OP32);
827 		set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
828 		    SDP_PAGES, SDP_OP32);
829 	}
830 
831 	/*
832 	 * Clear write permission for page containing the gdt and install it.
833 	 */
834 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
835 	kbm_read_only((uintptr_t)gdt0, gdtpa);
836 	xen_set_gdt(ma, NGDT);
837 
838 	/*
839 	 * Reload the segment registers to use the new GDT
840 	 */
841 	load_segment_registers(
842 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
843 
844 	return (gdt0);
845 }
846 
847 #else	/* __xpv */
848 
849 static user_desc_t *
init_gdt(void)850 init_gdt(void)
851 {
852 	desctbr_t	r_bgdt, r_gdt;
853 	user_desc_t	*bgdt;
854 
855 #if !defined(__lint)
856 	/*
857 	 * Our gdt is never larger than a single page.
858 	 */
859 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
860 #endif
861 	/*
862 	 * XXX this allocation belongs in our caller, not here.
863 	 */
864 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
865 	    PAGESIZE, PAGESIZE);
866 	bzero(gdt0, PAGESIZE);
867 
868 	init_gdt_common(gdt0);
869 
870 	/*
871 	 * Copy in from boot's gdt to our gdt entries.
872 	 * Entry 0 is null descriptor by definition.
873 	 */
874 	rd_gdtr(&r_bgdt);
875 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
876 	if (bgdt == NULL)
877 		panic("null boot gdt");
878 
879 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
880 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
881 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
882 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
883 
884 	/*
885 	 * Install our new GDT
886 	 */
887 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
888 	r_gdt.dtr_base = (uintptr_t)gdt0;
889 	wr_gdtr(&r_gdt);
890 
891 	/*
892 	 * Reload the segment registers to use the new GDT
893 	 */
894 	load_segment_registers(
895 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
896 
897 	return (gdt0);
898 }
899 
900 #endif	/* __xpv */
901 #endif	/* __i386 */
902 
903 /*
904  * Build kernel IDT.
905  *
906  * Note that for amd64 we pretty much require every gate to be an interrupt
907  * gate which blocks interrupts atomically on entry; that's because of our
908  * dependency on using 'swapgs' every time we come into the kernel to find
909  * the cpu structure. If we get interrupted just before doing that, %cs could
910  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
911  * %gsbase is really still pointing at something in userland. Bad things will
912  * ensue. We also use interrupt gates for i386 as well even though this is not
913  * required for some traps.
914  *
915  * Perhaps they should have invented a trap gate that does an atomic swapgs?
916  */
917 static void
init_idt_common(gate_desc_t * idt)918 init_idt_common(gate_desc_t *idt)
919 {
920 	set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 	    0);
922 	set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
923 	    0);
924 	set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
925 	    0);
926 	set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
927 	    0);
928 	set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
929 	    0);
930 	set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
931 	    TRP_KPL, 0);
932 	set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
933 	    0);
934 	set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
935 	    0);
936 
937 	/*
938 	 * double fault handler.
939 	 *
940 	 * Note that on the hypervisor a guest does not receive #df faults.
941 	 * Instead a failsafe event is injected into the guest if its selectors
942 	 * and/or stack is in a broken state. See xen_failsafe_callback.
943 	 */
944 #if !defined(__xpv)
945 #if defined(__amd64)
946 
947 	set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
948 	    T_DBLFLT);
949 
950 #elif defined(__i386)
951 
952 	/*
953 	 * task gate required.
954 	 */
955 	set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
956 	    0);
957 
958 #endif	/* __i386 */
959 #endif	/* !__xpv */
960 
961 	/*
962 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
963 	 */
964 
965 	set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
966 	    0);
967 	set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
968 	    0);
969 	set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
970 	set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
971 	set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
972 	set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
973 	    0);
974 	set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
975 	    TRP_KPL, 0);
976 	set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
977 	set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
978 
979 	/*
980 	 * install fast trap handler at 210.
981 	 */
982 	set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
983 	    0);
984 
985 	/*
986 	 * System call handler.
987 	 */
988 #if defined(__amd64)
989 	set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
990 	    TRP_UPL, 0);
991 
992 #elif defined(__i386)
993 	set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
994 	    TRP_UPL, 0);
995 #endif	/* __i386 */
996 
997 	/*
998 	 * Install the DTrace interrupt handler for the pid provider.
999 	 */
1000 	set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001 	    SDT_SYSIGT, TRP_UPL, 0);
1002 
1003 	/*
1004 	 * Prepare interposing descriptor for the syscall handler
1005 	 * and cache copy of the default descriptor.
1006 	 */
1007 	brand_tbl[0].ih_inum = T_SYSCALLINT;
1008 	brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1009 
1010 #if defined(__amd64)
1011 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1012 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 #elif defined(__i386)
1014 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1015 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 #endif	/* __i386 */
1017 
1018 	brand_tbl[1].ih_inum = 0;
1019 }
1020 
1021 #if defined(__xpv)
1022 
1023 static void
init_idt(gate_desc_t * idt)1024 init_idt(gate_desc_t *idt)
1025 {
1026 	init_idt_common(idt);
1027 }
1028 
1029 #else	/* __xpv */
1030 
1031 static void
init_idt(gate_desc_t * idt)1032 init_idt(gate_desc_t *idt)
1033 {
1034 	char	ivctname[80];
1035 	void	(*ivctptr)(void);
1036 	int	i;
1037 
1038 	/*
1039 	 * Initialize entire table with 'reserved' trap and then overwrite
1040 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041 	 * since it can only be generated on a 386 processor. 15 is also
1042 	 * unsupported and reserved.
1043 	 */
1044 	for (i = 0; i < NIDT; i++)
1045 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046 		    0);
1047 
1048 	/*
1049 	 * 20-31 reserved
1050 	 */
1051 	for (i = 20; i < 32; i++)
1052 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053 		    0);
1054 
1055 	/*
1056 	 * interrupts 32 - 255
1057 	 */
1058 	for (i = 32; i < 256; i++) {
1059 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1060 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061 		if (ivctptr == NULL)
1062 			panic("kobj_getsymvalue(%s) failed", ivctname);
1063 
1064 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1065 	}
1066 
1067 	/*
1068 	 * Now install the common ones. Note that it will overlay some
1069 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070 	 */
1071 	init_idt_common(idt);
1072 }
1073 
1074 #endif	/* __xpv */
1075 
1076 /*
1077  * The kernel does not deal with LDTs unless a user explicitly creates
1078  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079  * to reference the LDT will therefore cause a #gp. System calls made via the
1080  * obsolete lcall mechanism are emulated by the #gp fault handler.
1081  */
1082 static void
init_ldt(void)1083 init_ldt(void)
1084 {
1085 #if defined(__xpv)
1086 	xen_set_ldt(NULL, 0);
1087 #else
1088 	wr_ldtr(0);
1089 #endif
1090 }
1091 
1092 #if !defined(__xpv)
1093 #if defined(__amd64)
1094 
1095 static void
init_tss(void)1096 init_tss(void)
1097 {
1098 	/*
1099 	 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100 	 * All exceptions but #DF will run on the thread stack.
1101 	 * Set up the double fault stack here.
1102 	 */
1103 	ktss0->tss_ist1 =
1104 	    (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1105 
1106 	/*
1107 	 * Set I/O bit map offset equal to size of TSS segment limit
1108 	 * for no I/O permission map. This will force all user I/O
1109 	 * instructions to generate #gp fault.
1110 	 */
1111 	ktss0->tss_bitmapbase = sizeof (*ktss0);
1112 
1113 	/*
1114 	 * Point %tr to descriptor for ktss0 in gdt.
1115 	 */
1116 	wr_tsr(KTSS_SEL);
1117 }
1118 
1119 #elif defined(__i386)
1120 
1121 static void
init_tss(void)1122 init_tss(void)
1123 {
1124 	/*
1125 	 * ktss0->tss_esp dynamically filled in by resume() on each
1126 	 * context switch.
1127 	 */
1128 	ktss0->tss_ss0	= KDS_SEL;
1129 	ktss0->tss_eip	= (uint32_t)_start;
1130 	ktss0->tss_ds	= ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131 	ktss0->tss_cs	= KCS_SEL;
1132 	ktss0->tss_fs	= KFS_SEL;
1133 	ktss0->tss_gs	= KGS_SEL;
1134 	ktss0->tss_ldt	= ULDT_SEL;
1135 
1136 	/*
1137 	 * Initialize double fault tss.
1138 	 */
1139 	dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140 	dftss0->tss_ss0	= KDS_SEL;
1141 
1142 	/*
1143 	 * tss_cr3 will get initialized in hat_kern_setup() once our page
1144 	 * tables have been setup.
1145 	 */
1146 	dftss0->tss_eip	= (uint32_t)syserrtrap;
1147 	dftss0->tss_esp	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148 	dftss0->tss_cs	= KCS_SEL;
1149 	dftss0->tss_ds	= KDS_SEL;
1150 	dftss0->tss_es	= KDS_SEL;
1151 	dftss0->tss_ss	= KDS_SEL;
1152 	dftss0->tss_fs	= KFS_SEL;
1153 	dftss0->tss_gs	= KGS_SEL;
1154 
1155 	/*
1156 	 * Set I/O bit map offset equal to size of TSS segment limit
1157 	 * for no I/O permission map. This will force all user I/O
1158 	 * instructions to generate #gp fault.
1159 	 */
1160 	ktss0->tss_bitmapbase = sizeof (*ktss0);
1161 
1162 	/*
1163 	 * Point %tr to descriptor for ktss0 in gdt.
1164 	 */
1165 	wr_tsr(KTSS_SEL);
1166 }
1167 
1168 #endif	/* __i386 */
1169 #endif	/* !__xpv */
1170 
1171 #if defined(__xpv)
1172 
1173 void
init_desctbls(void)1174 init_desctbls(void)
1175 {
1176 	uint_t vec;
1177 	user_desc_t *gdt;
1178 
1179 	/*
1180 	 * Setup and install our GDT.
1181 	 */
1182 	gdt = init_gdt();
1183 
1184 	/*
1185 	 * Store static pa of gdt to speed up pa_to_ma() translations
1186 	 * on lwp context switches.
1187 	 */
1188 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1189 	CPU->cpu_gdt = gdt;
1190 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1191 
1192 	/*
1193 	 * Setup and install our IDT.
1194 	 */
1195 #if !defined(__lint)
1196 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1197 #endif
1198 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1199 	    PAGESIZE, PAGESIZE);
1200 	bzero(idt0, PAGESIZE);
1201 	init_idt(idt0);
1202 	for (vec = 0; vec < NIDT; vec++)
1203 		xen_idt_write(&idt0[vec], vec);
1204 
1205 	CPU->cpu_idt = idt0;
1206 
1207 	/*
1208 	 * set default kernel stack
1209 	 */
1210 	xen_stack_switch(KDS_SEL,
1211 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1212 
1213 	xen_init_callbacks();
1214 
1215 	init_ldt();
1216 }
1217 
1218 #else	/* __xpv */
1219 
1220 void
init_desctbls(void)1221 init_desctbls(void)
1222 {
1223 	user_desc_t *gdt;
1224 	desctbr_t idtr;
1225 
1226 	/*
1227 	 * Allocate IDT and TSS structures on unique pages for better
1228 	 * performance in virtual machines.
1229 	 */
1230 #if !defined(__lint)
1231 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1232 #endif
1233 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1234 	    PAGESIZE, PAGESIZE);
1235 	bzero(idt0, PAGESIZE);
1236 #if !defined(__lint)
1237 	ASSERT(sizeof (*ktss0) <= PAGESIZE);
1238 #endif
1239 	ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1240 	    PAGESIZE, PAGESIZE);
1241 	bzero(ktss0, PAGESIZE);
1242 
1243 #if defined(__i386)
1244 #if !defined(__lint)
1245 	ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 #endif
1247 	dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248 	    PAGESIZE, PAGESIZE);
1249 	bzero(dftss0, PAGESIZE);
1250 #endif
1251 
1252 	/*
1253 	 * Setup and install our GDT.
1254 	 */
1255 	gdt = init_gdt();
1256 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257 	CPU->cpu_gdt = gdt;
1258 
1259 	/*
1260 	 * Setup and install our IDT.
1261 	 */
1262 	init_idt(idt0);
1263 
1264 	idtr.dtr_base = (uintptr_t)idt0;
1265 	idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266 	wr_idtr(&idtr);
1267 	CPU->cpu_idt = idt0;
1268 
1269 #if defined(__i386)
1270 	/*
1271 	 * We maintain a description of idt0 in convenient IDTR format
1272 	 * for #pf's on some older pentium processors. See pentium_pftrap().
1273 	 */
1274 	idt0_default_r = idtr;
1275 #endif	/* __i386 */
1276 
1277 	init_tss();
1278 	CPU->cpu_tss = ktss0;
1279 	init_ldt();
1280 }
1281 
1282 #endif	/* __xpv */
1283 
1284 /*
1285  * In the early kernel, we need to set up a simple GDT to run on.
1286  *
1287  * XXPV	Can dboot use this too?  See dboot_gdt.s
1288  */
1289 void
init_boot_gdt(user_desc_t * bgdt)1290 init_boot_gdt(user_desc_t *bgdt)
1291 {
1292 #if defined(__amd64)
1293 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294 	    SDP_PAGES, SDP_OP32);
1295 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296 	    SDP_PAGES, SDP_OP32);
1297 #elif defined(__i386)
1298 	set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299 	    SDP_PAGES, SDP_OP32);
1300 	set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1301 	    SDP_PAGES, SDP_OP32);
1302 #endif	/* __i386 */
1303 }
1304 
1305 /*
1306  * Enable interpositioning on the system call path by rewriting the
1307  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1308  * the branded entry points.
1309  */
1310 void
brand_interpositioning_enable(void)1311 brand_interpositioning_enable(void)
1312 {
1313 	gate_desc_t	*idt = CPU->cpu_idt;
1314 	int 		i;
1315 
1316 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1317 
1318 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1319 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1320 #if defined(__xpv)
1321 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322 		    brand_tbl[i].ih_inum);
1323 #endif
1324 	}
1325 
1326 #if defined(__amd64)
1327 #if defined(__xpv)
1328 
1329 	/*
1330 	 * Currently the hypervisor only supports 64-bit syscalls via
1331 	 * syscall instruction. The 32-bit syscalls are handled by
1332 	 * interrupt gate above.
1333 	 */
1334 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335 	    CALLBACKF_mask_events);
1336 
1337 #else
1338 
1339 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1340 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1342 	}
1343 
1344 #endif
1345 #endif	/* __amd64 */
1346 
1347 	if (is_x86_feature(x86_featureset, X86FSET_SEP))
1348 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1349 }
1350 
1351 /*
1352  * Disable interpositioning on the system call path by rewriting the
1353  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354  * the standard entry points, which bypass the interpositioning hooks.
1355  */
1356 void
brand_interpositioning_disable(void)1357 brand_interpositioning_disable(void)
1358 {
1359 	gate_desc_t	*idt = CPU->cpu_idt;
1360 	int i;
1361 
1362 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363 
1364 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1365 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 #if defined(__xpv)
1367 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368 		    brand_tbl[i].ih_inum);
1369 #endif
1370 	}
1371 
1372 #if defined(__amd64)
1373 #if defined(__xpv)
1374 
1375 	/*
1376 	 * See comment above in brand_interpositioning_enable.
1377 	 */
1378 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379 	    CALLBACKF_mask_events);
1380 
1381 #else
1382 
1383 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1384 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1386 	}
1387 
1388 #endif
1389 #endif	/* __amd64 */
1390 
1391 	if (is_x86_feature(x86_featureset, X86FSET_SEP))
1392 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1393 }
1394