1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2011 Joyent, Inc. All rights reserved.
28 */
29
30 /*
31 * Copyright (c) 1992 Terrence R. Lambert.
32 * Copyright (c) 1990 The Regents of the University of California.
33 * All rights reserved.
34 *
35 * This code is derived from software contributed to Berkeley by
36 * William Jolitz.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 */
68
69 #include <sys/types.h>
70 #include <sys/sysmacros.h>
71 #include <sys/tss.h>
72 #include <sys/segments.h>
73 #include <sys/trap.h>
74 #include <sys/cpuvar.h>
75 #include <sys/bootconf.h>
76 #include <sys/x86_archext.h>
77 #include <sys/controlregs.h>
78 #include <sys/archsystm.h>
79 #include <sys/machsystm.h>
80 #include <sys/kobj.h>
81 #include <sys/cmn_err.h>
82 #include <sys/reboot.h>
83 #include <sys/kdi.h>
84 #include <sys/mach_mmu.h>
85 #include <sys/systm.h>
86
87 #ifdef __xpv
88 #include <sys/hypervisor.h>
89 #include <vm/as.h>
90 #endif
91
92 #include <sys/promif.h>
93 #include <sys/bootinfo.h>
94 #include <vm/kboot_mmu.h>
95 #include <vm/hat_pte.h>
96
97 /*
98 * cpu0 and default tables and structures.
99 */
100 user_desc_t *gdt0;
101 #if !defined(__xpv)
102 desctbr_t gdt0_default_r;
103 #endif
104
105 gate_desc_t *idt0; /* interrupt descriptor table */
106 #if defined(__i386)
107 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */
108 #endif
109
110 tss_t *ktss0; /* kernel task state structure */
111
112 #if defined(__i386)
113 tss_t *dftss0; /* #DF double-fault exception */
114 #endif /* __i386 */
115
116 user_desc_t zero_udesc; /* base zero user desc native procs */
117 user_desc_t null_udesc; /* null user descriptor */
118 system_desc_t null_sdesc; /* null system descriptor */
119
120 #if defined(__amd64)
121 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
122 #endif /* __amd64 */
123
124 #if defined(__amd64)
125 user_desc_t ucs_on;
126 user_desc_t ucs_off;
127 user_desc_t ucs32_on;
128 user_desc_t ucs32_off;
129 #endif /* __amd64 */
130
131 #pragma align 16(dblfault_stack0)
132 char dblfault_stack0[DEFAULTSTKSZ];
133
134 extern void fast_null(void);
135 extern hrtime_t get_hrtime(void);
136 extern hrtime_t gethrvtime(void);
137 extern hrtime_t get_hrestime(void);
138 extern uint64_t getlgrp(void);
139
140 void (*(fasttable[]))(void) = {
141 fast_null, /* T_FNULL routine */
142 fast_null, /* T_FGETFP routine (initially null) */
143 fast_null, /* T_FSETFP routine (initially null) */
144 (void (*)())get_hrtime, /* T_GETHRTIME */
145 (void (*)())gethrvtime, /* T_GETHRVTIME */
146 (void (*)())get_hrestime, /* T_GETHRESTIME */
147 (void (*)())getlgrp /* T_GETLGRP */
148 };
149
150 /*
151 * Structure containing pre-computed descriptors to allow us to temporarily
152 * interpose on a standard handler.
153 */
154 struct interposing_handler {
155 int ih_inum;
156 gate_desc_t ih_interp_desc;
157 gate_desc_t ih_default_desc;
158 };
159
160 /*
161 * The brand infrastructure interposes on two handlers, and we use one as a
162 * NULL signpost.
163 */
164 static struct interposing_handler brand_tbl[2];
165
166 /*
167 * software prototypes for default local descriptor table
168 */
169
170 /*
171 * Routines for loading segment descriptors in format the hardware
172 * can understand.
173 */
174
175 #if defined(__amd64)
176
177 /*
178 * In long mode we have the new L or long mode attribute bit
179 * for code segments. Only the conforming bit in type is used along
180 * with descriptor priority and present bits. Default operand size must
181 * be zero when in long mode. In 32-bit compatibility mode all fields
182 * are treated as in legacy mode. For data segments while in long mode
183 * only the present bit is loaded.
184 */
185 void
set_usegd(user_desc_t * dp,uint_t lmode,void * base,size_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
187 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
188 {
189 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
190
191 /*
192 * 64-bit long mode.
193 */
194 if (lmode == SDP_LONG)
195 dp->usd_def32 = 0; /* 32-bit operands only */
196 else
197 /*
198 * 32-bit compatibility mode.
199 */
200 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
201
202 dp->usd_long = lmode; /* 64-bit mode */
203 dp->usd_type = type;
204 dp->usd_dpl = dpl;
205 dp->usd_p = 1;
206 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
207
208 dp->usd_lobase = (uintptr_t)base;
209 dp->usd_midbase = (uintptr_t)base >> 16;
210 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
211 dp->usd_lolimit = size;
212 dp->usd_hilimit = (uintptr_t)size >> 16;
213 }
214
215 #elif defined(__i386)
216
217 /*
218 * Install user segment descriptor for code and data.
219 */
220 void
set_usegd(user_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
222 uint_t dpl, uint_t gran, uint_t defopsz)
223 {
224 dp->usd_lolimit = size;
225 dp->usd_hilimit = (uintptr_t)size >> 16;
226
227 dp->usd_lobase = (uintptr_t)base;
228 dp->usd_midbase = (uintptr_t)base >> 16;
229 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
230
231 dp->usd_type = type;
232 dp->usd_dpl = dpl;
233 dp->usd_p = 1;
234 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */
235 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
236 }
237
238 #endif /* __i386 */
239
240 /*
241 * Install system segment descriptor for LDT and TSS segments.
242 */
243
244 #if defined(__amd64)
245
246 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
248 uint_t dpl)
249 {
250 dp->ssd_lolimit = size;
251 dp->ssd_hilimit = (uintptr_t)size >> 16;
252
253 dp->ssd_lobase = (uintptr_t)base;
254 dp->ssd_midbase = (uintptr_t)base >> 16;
255 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
256 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
257
258 dp->ssd_type = type;
259 dp->ssd_zero1 = 0; /* must be zero */
260 dp->ssd_zero2 = 0;
261 dp->ssd_dpl = dpl;
262 dp->ssd_p = 1;
263 dp->ssd_gran = 0; /* force byte units */
264 }
265
266 void *
get_ssd_base(system_desc_t * dp)267 get_ssd_base(system_desc_t *dp)
268 {
269 uintptr_t base;
270
271 base = (uintptr_t)dp->ssd_lobase |
272 (uintptr_t)dp->ssd_midbase << 16 |
273 (uintptr_t)dp->ssd_hibase << (16 + 8) |
274 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
275 return ((void *)base);
276 }
277
278 #elif defined(__i386)
279
280 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
282 uint_t dpl)
283 {
284 dp->ssd_lolimit = size;
285 dp->ssd_hilimit = (uintptr_t)size >> 16;
286
287 dp->ssd_lobase = (uintptr_t)base;
288 dp->ssd_midbase = (uintptr_t)base >> 16;
289 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
290
291 dp->ssd_type = type;
292 dp->ssd_zero = 0; /* must be zero */
293 dp->ssd_dpl = dpl;
294 dp->ssd_p = 1;
295 dp->ssd_gran = 0; /* force byte units */
296 }
297
298 void *
get_ssd_base(system_desc_t * dp)299 get_ssd_base(system_desc_t *dp)
300 {
301 uintptr_t base;
302
303 base = (uintptr_t)dp->ssd_lobase |
304 (uintptr_t)dp->ssd_midbase << 16 |
305 (uintptr_t)dp->ssd_hibase << (16 + 8);
306 return ((void *)base);
307 }
308
309 #endif /* __i386 */
310
311 /*
312 * Install gate segment descriptor for interrupt, trap, call and task gates.
313 */
314
315 #if defined(__amd64)
316
317 /*ARGSUSED*/
318 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t vector)319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
320 uint_t type, uint_t dpl, uint_t vector)
321 {
322 dp->sgd_looffset = (uintptr_t)func;
323 dp->sgd_hioffset = (uintptr_t)func >> 16;
324 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
325
326 dp->sgd_selector = (uint16_t)sel;
327
328 /*
329 * For 64 bit native we use the IST stack mechanism
330 * for double faults. All other traps use the CPL = 0
331 * (tss_rsp0) stack.
332 */
333 #if !defined(__xpv)
334 if (vector == T_DBLFLT)
335 dp->sgd_ist = 1;
336 else
337 #endif
338 dp->sgd_ist = 0;
339
340 dp->sgd_type = type;
341 dp->sgd_dpl = dpl;
342 dp->sgd_p = 1;
343 }
344
345 #elif defined(__i386)
346
347 /*ARGSUSED*/
348 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t unused)349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
350 uint_t type, uint_t dpl, uint_t unused)
351 {
352 dp->sgd_looffset = (uintptr_t)func;
353 dp->sgd_hioffset = (uintptr_t)func >> 16;
354
355 dp->sgd_selector = (uint16_t)sel;
356 dp->sgd_stkcpy = 0; /* always zero bytes */
357 dp->sgd_type = type;
358 dp->sgd_dpl = dpl;
359 dp->sgd_p = 1;
360 }
361
362 #endif /* __i386 */
363
364 /*
365 * Updates a single user descriptor in the the GDT of the current cpu.
366 * Caller is responsible for preventing cpu migration.
367 */
368
369 void
gdt_update_usegd(uint_t sidx,user_desc_t * udp)370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371 {
372 #if defined(__xpv)
373
374 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375
376 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378
379 #else /* __xpv */
380
381 CPU->cpu_gdt[sidx] = *udp;
382
383 #endif /* __xpv */
384 }
385
386 /*
387 * Writes single descriptor pointed to by udp into a processes
388 * LDT entry pointed to by ldp.
389 */
390 int
ldt_update_segd(user_desc_t * ldp,user_desc_t * udp)391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
392 {
393 #if defined(__xpv)
394
395 uint64_t dpa;
396
397 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
398 ((uintptr_t)ldp & PAGEOFFSET);
399
400 /*
401 * The hypervisor is a little more restrictive about what it
402 * supports in the LDT.
403 */
404 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
405 return (EINVAL);
406
407 #else /* __xpv */
408
409 *ldp = *udp;
410
411 #endif /* __xpv */
412 return (0);
413 }
414
415 #if defined(__xpv)
416
417 /*
418 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
419 * Returns true if a valid entry was written.
420 */
421 int
xen_idt_to_trap_info(uint_t vec,gate_desc_t * sgd,void * ti_arg)422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
423 {
424 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
425
426 /*
427 * skip holes in the IDT
428 */
429 if (GATESEG_GETOFFSET(sgd) == 0)
430 return (0);
431
432 ASSERT(sgd->sgd_type == SDT_SYSIGT);
433 ti->vector = vec;
434 TI_SET_DPL(ti, sgd->sgd_dpl);
435
436 /*
437 * Is this an interrupt gate?
438 */
439 if (sgd->sgd_type == SDT_SYSIGT) {
440 /* LINTED */
441 TI_SET_IF(ti, 1);
442 }
443 ti->cs = sgd->sgd_selector;
444 #if defined(__amd64)
445 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
446 #endif
447 ti->address = GATESEG_GETOFFSET(sgd);
448 return (1);
449 }
450
451 /*
452 * Convert a single hw format gate descriptor and write it into our virtual IDT.
453 */
454 void
xen_idt_write(gate_desc_t * sgd,uint_t vec)455 xen_idt_write(gate_desc_t *sgd, uint_t vec)
456 {
457 trap_info_t trapinfo[2];
458
459 bzero(trapinfo, sizeof (trapinfo));
460 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
461 return;
462 if (xen_set_trap_table(trapinfo) != 0)
463 panic("xen_idt_write: xen_set_trap_table() failed");
464 }
465
466 #endif /* __xpv */
467
468 #if defined(__amd64)
469
470 /*
471 * Build kernel GDT.
472 */
473
474 static void
init_gdt_common(user_desc_t * gdt)475 init_gdt_common(user_desc_t *gdt)
476 {
477 int i;
478
479 /*
480 * 64-bit kernel code segment.
481 */
482 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
483 SDP_PAGES, SDP_OP32);
484
485 /*
486 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
487 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
488 * instruction to return from system calls back to 32-bit applications.
489 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
490 * descriptors. We therefore must ensure that the kernel uses something,
491 * though it will be ignored by hardware, that is compatible with 32-bit
492 * apps. For the same reason we must set the default op size of this
493 * descriptor to 32-bit operands.
494 */
495 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
496 SEL_KPL, SDP_PAGES, SDP_OP32);
497 gdt[GDT_KDATA].usd_def32 = 1;
498
499 /*
500 * 64-bit user code segment.
501 */
502 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
503 SDP_PAGES, SDP_OP32);
504
505 /*
506 * 32-bit user code segment.
507 */
508 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
509 SEL_UPL, SDP_PAGES, SDP_OP32);
510
511 /*
512 * See gdt_ucode32() and gdt_ucode_native().
513 */
514 ucs_on = ucs_off = gdt[GDT_UCODE];
515 ucs_off.usd_p = 0; /* forces #np fault */
516
517 ucs32_on = ucs32_off = gdt[GDT_U32CODE];
518 ucs32_off.usd_p = 0; /* forces #np fault */
519
520 /*
521 * 32 and 64 bit data segments can actually share the same descriptor.
522 * In long mode only the present bit is checked but all other fields
523 * are loaded. But in compatibility mode all fields are interpreted
524 * as in legacy mode so they must be set correctly for a 32-bit data
525 * segment.
526 */
527 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
528 SDP_PAGES, SDP_OP32);
529
530 #if !defined(__xpv)
531
532 /*
533 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
534 * in the GDT is 0.
535 */
536
537 /*
538 * Kernel TSS
539 */
540 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
541 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
542
543 #endif /* !__xpv */
544
545 /*
546 * Initialize fs and gs descriptors for 32 bit processes.
547 * Only attributes and limits are initialized, the effective
548 * base address is programmed via fsbase/gsbase.
549 */
550 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
551 SEL_UPL, SDP_PAGES, SDP_OP32);
552 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
553 SEL_UPL, SDP_PAGES, SDP_OP32);
554
555 /*
556 * Initialize the descriptors set aside for brand usage.
557 * Only attributes and limits are initialized.
558 */
559 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
560 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
561 SEL_UPL, SDP_PAGES, SDP_OP32);
562
563 /*
564 * Initialize convenient zero base user descriptors for clearing
565 * lwp private %fs and %gs descriptors in GDT. See setregs() for
566 * an example.
567 */
568 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
569 SDP_BYTES, SDP_OP32);
570 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
571 SDP_PAGES, SDP_OP32);
572 }
573
574 #if defined(__xpv)
575
576 static user_desc_t *
init_gdt(void)577 init_gdt(void)
578 {
579 uint64_t gdtpa;
580 ulong_t ma[1]; /* XXPV should be a memory_t */
581 ulong_t addr;
582
583 #if !defined(__lint)
584 /*
585 * Our gdt is never larger than a single page.
586 */
587 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
588 #endif
589 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
590 PAGESIZE, PAGESIZE);
591 bzero(gdt0, PAGESIZE);
592
593 init_gdt_common(gdt0);
594
595 /*
596 * XXX Since we never invoke kmdb until after the kernel takes
597 * over the descriptor tables why not have it use the kernel's
598 * selectors?
599 */
600 if (boothowto & RB_DEBUG) {
601 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
602 SEL_KPL, SDP_PAGES, SDP_OP32);
603 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
604 SEL_KPL, SDP_PAGES, SDP_OP32);
605 }
606
607 /*
608 * Clear write permission for page containing the gdt and install it.
609 */
610 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
611 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
612 kbm_read_only((uintptr_t)gdt0, gdtpa);
613 xen_set_gdt(ma, NGDT);
614
615 /*
616 * Reload the segment registers to use the new GDT.
617 * On 64-bit, fixup KCS_SEL to be in ring 3.
618 * See KCS_SEL in segments.h.
619 */
620 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
621
622 /*
623 * setup %gs for kernel
624 */
625 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
626
627 /*
628 * XX64 We should never dereference off "other gsbase" or
629 * "fsbase". So, we should arrange to point FSBASE and
630 * KGSBASE somewhere truly awful e.g. point it at the last
631 * valid address below the hole so that any attempts to index
632 * off them cause an exception.
633 *
634 * For now, point it at 8G -- at least it should be unmapped
635 * until some 64-bit processes run.
636 */
637 addr = 0x200000000ul;
638 xen_set_segment_base(SEGBASE_FS, addr);
639 xen_set_segment_base(SEGBASE_GS_USER, addr);
640 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
641
642 return (gdt0);
643 }
644
645 #else /* __xpv */
646
647 static user_desc_t *
init_gdt(void)648 init_gdt(void)
649 {
650 desctbr_t r_bgdt, r_gdt;
651 user_desc_t *bgdt;
652
653 #if !defined(__lint)
654 /*
655 * Our gdt is never larger than a single page.
656 */
657 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
658 #endif
659 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
660 PAGESIZE, PAGESIZE);
661 bzero(gdt0, PAGESIZE);
662
663 init_gdt_common(gdt0);
664
665 /*
666 * Copy in from boot's gdt to our gdt.
667 * Entry 0 is the null descriptor by definition.
668 */
669 rd_gdtr(&r_bgdt);
670 bgdt = (user_desc_t *)r_bgdt.dtr_base;
671 if (bgdt == NULL)
672 panic("null boot gdt");
673
674 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
675 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
676 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
677 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
678 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
679
680 /*
681 * Install our new GDT
682 */
683 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
684 r_gdt.dtr_base = (uintptr_t)gdt0;
685 wr_gdtr(&r_gdt);
686
687 /*
688 * Reload the segment registers to use the new GDT
689 */
690 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
691
692 /*
693 * setup %gs for kernel
694 */
695 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
696
697 /*
698 * XX64 We should never dereference off "other gsbase" or
699 * "fsbase". So, we should arrange to point FSBASE and
700 * KGSBASE somewhere truly awful e.g. point it at the last
701 * valid address below the hole so that any attempts to index
702 * off them cause an exception.
703 *
704 * For now, point it at 8G -- at least it should be unmapped
705 * until some 64-bit processes run.
706 */
707 wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
708 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
709 return (gdt0);
710 }
711
712 #endif /* __xpv */
713
714 #elif defined(__i386)
715
716 static void
init_gdt_common(user_desc_t * gdt)717 init_gdt_common(user_desc_t *gdt)
718 {
719 int i;
720
721 /*
722 * Text and data for both kernel and user span entire 32 bit
723 * address space.
724 */
725
726 /*
727 * kernel code segment.
728 */
729 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
730 SDP_OP32);
731
732 /*
733 * kernel data segment.
734 */
735 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
736 SDP_OP32);
737
738 /*
739 * user code segment.
740 */
741 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
742 SDP_OP32);
743
744 /*
745 * user data segment.
746 */
747 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
748 SDP_OP32);
749
750 #if !defined(__xpv)
751
752 /*
753 * TSS for T_DBLFLT (double fault) handler
754 */
755 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
756 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
757
758 /*
759 * TSS for kernel
760 */
761 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
762 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
763
764 #endif /* !__xpv */
765
766 /*
767 * %gs selector for kernel
768 */
769 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
770 SEL_KPL, SDP_BYTES, SDP_OP32);
771
772 /*
773 * Initialize lwp private descriptors.
774 * Only attributes and limits are initialized, the effective
775 * base address is programmed via fsbase/gsbase.
776 */
777 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
778 SDP_PAGES, SDP_OP32);
779 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
780 SDP_PAGES, SDP_OP32);
781
782 /*
783 * Initialize the descriptors set aside for brand usage.
784 * Only attributes and limits are initialized.
785 */
786 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
787 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
788 SDP_PAGES, SDP_OP32);
789 /*
790 * Initialize convenient zero base user descriptor for clearing
791 * lwp private %fs and %gs descriptors in GDT. See setregs() for
792 * an example.
793 */
794 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
795 SDP_BYTES, SDP_OP32);
796 }
797
798 #if defined(__xpv)
799
800 static user_desc_t *
init_gdt(void)801 init_gdt(void)
802 {
803 uint64_t gdtpa;
804 ulong_t ma[1]; /* XXPV should be a memory_t */
805
806 #if !defined(__lint)
807 /*
808 * Our gdt is never larger than a single page.
809 */
810 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
811 #endif
812 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
813 PAGESIZE, PAGESIZE);
814 bzero(gdt0, PAGESIZE);
815
816 init_gdt_common(gdt0);
817 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
818
819 /*
820 * XXX Since we never invoke kmdb until after the kernel takes
821 * over the descriptor tables why not have it use the kernel's
822 * selectors?
823 */
824 if (boothowto & RB_DEBUG) {
825 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
826 SDP_PAGES, SDP_OP32);
827 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
828 SDP_PAGES, SDP_OP32);
829 }
830
831 /*
832 * Clear write permission for page containing the gdt and install it.
833 */
834 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
835 kbm_read_only((uintptr_t)gdt0, gdtpa);
836 xen_set_gdt(ma, NGDT);
837
838 /*
839 * Reload the segment registers to use the new GDT
840 */
841 load_segment_registers(
842 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
843
844 return (gdt0);
845 }
846
847 #else /* __xpv */
848
849 static user_desc_t *
init_gdt(void)850 init_gdt(void)
851 {
852 desctbr_t r_bgdt, r_gdt;
853 user_desc_t *bgdt;
854
855 #if !defined(__lint)
856 /*
857 * Our gdt is never larger than a single page.
858 */
859 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
860 #endif
861 /*
862 * XXX this allocation belongs in our caller, not here.
863 */
864 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
865 PAGESIZE, PAGESIZE);
866 bzero(gdt0, PAGESIZE);
867
868 init_gdt_common(gdt0);
869
870 /*
871 * Copy in from boot's gdt to our gdt entries.
872 * Entry 0 is null descriptor by definition.
873 */
874 rd_gdtr(&r_bgdt);
875 bgdt = (user_desc_t *)r_bgdt.dtr_base;
876 if (bgdt == NULL)
877 panic("null boot gdt");
878
879 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
880 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
881 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
882 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
883
884 /*
885 * Install our new GDT
886 */
887 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
888 r_gdt.dtr_base = (uintptr_t)gdt0;
889 wr_gdtr(&r_gdt);
890
891 /*
892 * Reload the segment registers to use the new GDT
893 */
894 load_segment_registers(
895 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
896
897 return (gdt0);
898 }
899
900 #endif /* __xpv */
901 #endif /* __i386 */
902
903 /*
904 * Build kernel IDT.
905 *
906 * Note that for amd64 we pretty much require every gate to be an interrupt
907 * gate which blocks interrupts atomically on entry; that's because of our
908 * dependency on using 'swapgs' every time we come into the kernel to find
909 * the cpu structure. If we get interrupted just before doing that, %cs could
910 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
911 * %gsbase is really still pointing at something in userland. Bad things will
912 * ensue. We also use interrupt gates for i386 as well even though this is not
913 * required for some traps.
914 *
915 * Perhaps they should have invented a trap gate that does an atomic swapgs?
916 */
917 static void
init_idt_common(gate_desc_t * idt)918 init_idt_common(gate_desc_t *idt)
919 {
920 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 0);
922 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
923 0);
924 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
925 0);
926 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
927 0);
928 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
929 0);
930 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
931 TRP_KPL, 0);
932 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
933 0);
934 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
935 0);
936
937 /*
938 * double fault handler.
939 *
940 * Note that on the hypervisor a guest does not receive #df faults.
941 * Instead a failsafe event is injected into the guest if its selectors
942 * and/or stack is in a broken state. See xen_failsafe_callback.
943 */
944 #if !defined(__xpv)
945 #if defined(__amd64)
946
947 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
948 T_DBLFLT);
949
950 #elif defined(__i386)
951
952 /*
953 * task gate required.
954 */
955 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
956 0);
957
958 #endif /* __i386 */
959 #endif /* !__xpv */
960
961 /*
962 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
963 */
964
965 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
966 0);
967 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
968 0);
969 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
970 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
971 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
972 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
973 0);
974 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
975 TRP_KPL, 0);
976 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
977 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
978
979 /*
980 * install fast trap handler at 210.
981 */
982 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
983 0);
984
985 /*
986 * System call handler.
987 */
988 #if defined(__amd64)
989 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
990 TRP_UPL, 0);
991
992 #elif defined(__i386)
993 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
994 TRP_UPL, 0);
995 #endif /* __i386 */
996
997 /*
998 * Install the DTrace interrupt handler for the pid provider.
999 */
1000 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001 SDT_SYSIGT, TRP_UPL, 0);
1002
1003 /*
1004 * Prepare interposing descriptor for the syscall handler
1005 * and cache copy of the default descriptor.
1006 */
1007 brand_tbl[0].ih_inum = T_SYSCALLINT;
1008 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1009
1010 #if defined(__amd64)
1011 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1012 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 #elif defined(__i386)
1014 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1015 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 #endif /* __i386 */
1017
1018 brand_tbl[1].ih_inum = 0;
1019 }
1020
1021 #if defined(__xpv)
1022
1023 static void
init_idt(gate_desc_t * idt)1024 init_idt(gate_desc_t *idt)
1025 {
1026 init_idt_common(idt);
1027 }
1028
1029 #else /* __xpv */
1030
1031 static void
init_idt(gate_desc_t * idt)1032 init_idt(gate_desc_t *idt)
1033 {
1034 char ivctname[80];
1035 void (*ivctptr)(void);
1036 int i;
1037
1038 /*
1039 * Initialize entire table with 'reserved' trap and then overwrite
1040 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041 * since it can only be generated on a 386 processor. 15 is also
1042 * unsupported and reserved.
1043 */
1044 for (i = 0; i < NIDT; i++)
1045 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046 0);
1047
1048 /*
1049 * 20-31 reserved
1050 */
1051 for (i = 20; i < 32; i++)
1052 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053 0);
1054
1055 /*
1056 * interrupts 32 - 255
1057 */
1058 for (i = 32; i < 256; i++) {
1059 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1060 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061 if (ivctptr == NULL)
1062 panic("kobj_getsymvalue(%s) failed", ivctname);
1063
1064 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1065 }
1066
1067 /*
1068 * Now install the common ones. Note that it will overlay some
1069 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070 */
1071 init_idt_common(idt);
1072 }
1073
1074 #endif /* __xpv */
1075
1076 /*
1077 * The kernel does not deal with LDTs unless a user explicitly creates
1078 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079 * to reference the LDT will therefore cause a #gp. System calls made via the
1080 * obsolete lcall mechanism are emulated by the #gp fault handler.
1081 */
1082 static void
init_ldt(void)1083 init_ldt(void)
1084 {
1085 #if defined(__xpv)
1086 xen_set_ldt(NULL, 0);
1087 #else
1088 wr_ldtr(0);
1089 #endif
1090 }
1091
1092 #if !defined(__xpv)
1093 #if defined(__amd64)
1094
1095 static void
init_tss(void)1096 init_tss(void)
1097 {
1098 /*
1099 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100 * All exceptions but #DF will run on the thread stack.
1101 * Set up the double fault stack here.
1102 */
1103 ktss0->tss_ist1 =
1104 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1105
1106 /*
1107 * Set I/O bit map offset equal to size of TSS segment limit
1108 * for no I/O permission map. This will force all user I/O
1109 * instructions to generate #gp fault.
1110 */
1111 ktss0->tss_bitmapbase = sizeof (*ktss0);
1112
1113 /*
1114 * Point %tr to descriptor for ktss0 in gdt.
1115 */
1116 wr_tsr(KTSS_SEL);
1117 }
1118
1119 #elif defined(__i386)
1120
1121 static void
init_tss(void)1122 init_tss(void)
1123 {
1124 /*
1125 * ktss0->tss_esp dynamically filled in by resume() on each
1126 * context switch.
1127 */
1128 ktss0->tss_ss0 = KDS_SEL;
1129 ktss0->tss_eip = (uint32_t)_start;
1130 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131 ktss0->tss_cs = KCS_SEL;
1132 ktss0->tss_fs = KFS_SEL;
1133 ktss0->tss_gs = KGS_SEL;
1134 ktss0->tss_ldt = ULDT_SEL;
1135
1136 /*
1137 * Initialize double fault tss.
1138 */
1139 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140 dftss0->tss_ss0 = KDS_SEL;
1141
1142 /*
1143 * tss_cr3 will get initialized in hat_kern_setup() once our page
1144 * tables have been setup.
1145 */
1146 dftss0->tss_eip = (uint32_t)syserrtrap;
1147 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148 dftss0->tss_cs = KCS_SEL;
1149 dftss0->tss_ds = KDS_SEL;
1150 dftss0->tss_es = KDS_SEL;
1151 dftss0->tss_ss = KDS_SEL;
1152 dftss0->tss_fs = KFS_SEL;
1153 dftss0->tss_gs = KGS_SEL;
1154
1155 /*
1156 * Set I/O bit map offset equal to size of TSS segment limit
1157 * for no I/O permission map. This will force all user I/O
1158 * instructions to generate #gp fault.
1159 */
1160 ktss0->tss_bitmapbase = sizeof (*ktss0);
1161
1162 /*
1163 * Point %tr to descriptor for ktss0 in gdt.
1164 */
1165 wr_tsr(KTSS_SEL);
1166 }
1167
1168 #endif /* __i386 */
1169 #endif /* !__xpv */
1170
1171 #if defined(__xpv)
1172
1173 void
init_desctbls(void)1174 init_desctbls(void)
1175 {
1176 uint_t vec;
1177 user_desc_t *gdt;
1178
1179 /*
1180 * Setup and install our GDT.
1181 */
1182 gdt = init_gdt();
1183
1184 /*
1185 * Store static pa of gdt to speed up pa_to_ma() translations
1186 * on lwp context switches.
1187 */
1188 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1189 CPU->cpu_gdt = gdt;
1190 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1191
1192 /*
1193 * Setup and install our IDT.
1194 */
1195 #if !defined(__lint)
1196 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1197 #endif
1198 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1199 PAGESIZE, PAGESIZE);
1200 bzero(idt0, PAGESIZE);
1201 init_idt(idt0);
1202 for (vec = 0; vec < NIDT; vec++)
1203 xen_idt_write(&idt0[vec], vec);
1204
1205 CPU->cpu_idt = idt0;
1206
1207 /*
1208 * set default kernel stack
1209 */
1210 xen_stack_switch(KDS_SEL,
1211 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1212
1213 xen_init_callbacks();
1214
1215 init_ldt();
1216 }
1217
1218 #else /* __xpv */
1219
1220 void
init_desctbls(void)1221 init_desctbls(void)
1222 {
1223 user_desc_t *gdt;
1224 desctbr_t idtr;
1225
1226 /*
1227 * Allocate IDT and TSS structures on unique pages for better
1228 * performance in virtual machines.
1229 */
1230 #if !defined(__lint)
1231 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1232 #endif
1233 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1234 PAGESIZE, PAGESIZE);
1235 bzero(idt0, PAGESIZE);
1236 #if !defined(__lint)
1237 ASSERT(sizeof (*ktss0) <= PAGESIZE);
1238 #endif
1239 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1240 PAGESIZE, PAGESIZE);
1241 bzero(ktss0, PAGESIZE);
1242
1243 #if defined(__i386)
1244 #if !defined(__lint)
1245 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 #endif
1247 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248 PAGESIZE, PAGESIZE);
1249 bzero(dftss0, PAGESIZE);
1250 #endif
1251
1252 /*
1253 * Setup and install our GDT.
1254 */
1255 gdt = init_gdt();
1256 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257 CPU->cpu_gdt = gdt;
1258
1259 /*
1260 * Setup and install our IDT.
1261 */
1262 init_idt(idt0);
1263
1264 idtr.dtr_base = (uintptr_t)idt0;
1265 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266 wr_idtr(&idtr);
1267 CPU->cpu_idt = idt0;
1268
1269 #if defined(__i386)
1270 /*
1271 * We maintain a description of idt0 in convenient IDTR format
1272 * for #pf's on some older pentium processors. See pentium_pftrap().
1273 */
1274 idt0_default_r = idtr;
1275 #endif /* __i386 */
1276
1277 init_tss();
1278 CPU->cpu_tss = ktss0;
1279 init_ldt();
1280 }
1281
1282 #endif /* __xpv */
1283
1284 /*
1285 * In the early kernel, we need to set up a simple GDT to run on.
1286 *
1287 * XXPV Can dboot use this too? See dboot_gdt.s
1288 */
1289 void
init_boot_gdt(user_desc_t * bgdt)1290 init_boot_gdt(user_desc_t *bgdt)
1291 {
1292 #if defined(__amd64)
1293 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294 SDP_PAGES, SDP_OP32);
1295 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296 SDP_PAGES, SDP_OP32);
1297 #elif defined(__i386)
1298 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299 SDP_PAGES, SDP_OP32);
1300 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1301 SDP_PAGES, SDP_OP32);
1302 #endif /* __i386 */
1303 }
1304
1305 /*
1306 * Enable interpositioning on the system call path by rewriting the
1307 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1308 * the branded entry points.
1309 */
1310 void
brand_interpositioning_enable(void)1311 brand_interpositioning_enable(void)
1312 {
1313 gate_desc_t *idt = CPU->cpu_idt;
1314 int i;
1315
1316 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1317
1318 for (i = 0; brand_tbl[i].ih_inum; i++) {
1319 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1320 #if defined(__xpv)
1321 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322 brand_tbl[i].ih_inum);
1323 #endif
1324 }
1325
1326 #if defined(__amd64)
1327 #if defined(__xpv)
1328
1329 /*
1330 * Currently the hypervisor only supports 64-bit syscalls via
1331 * syscall instruction. The 32-bit syscalls are handled by
1332 * interrupt gate above.
1333 */
1334 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335 CALLBACKF_mask_events);
1336
1337 #else
1338
1339 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1340 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1342 }
1343
1344 #endif
1345 #endif /* __amd64 */
1346
1347 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1348 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1349 }
1350
1351 /*
1352 * Disable interpositioning on the system call path by rewriting the
1353 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354 * the standard entry points, which bypass the interpositioning hooks.
1355 */
1356 void
brand_interpositioning_disable(void)1357 brand_interpositioning_disable(void)
1358 {
1359 gate_desc_t *idt = CPU->cpu_idt;
1360 int i;
1361
1362 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363
1364 for (i = 0; brand_tbl[i].ih_inum; i++) {
1365 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 #if defined(__xpv)
1367 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368 brand_tbl[i].ih_inum);
1369 #endif
1370 }
1371
1372 #if defined(__amd64)
1373 #if defined(__xpv)
1374
1375 /*
1376 * See comment above in brand_interpositioning_enable.
1377 */
1378 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379 CALLBACKF_mask_events);
1380
1381 #else
1382
1383 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1384 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1386 }
1387
1388 #endif
1389 #endif /* __amd64 */
1390
1391 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1392 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1393 }
1394