1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
27 /* All Rights Reserved */
28
29 /* Copyright (c) 1987, 1988 Microsoft Corporation */
30 /* All Rights Reserved */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/systm.h>
36 #include <sys/signal.h>
37 #include <sys/errno.h>
38 #include <sys/fault.h>
39 #include <sys/syscall.h>
40 #include <sys/cpuvar.h>
41 #include <sys/sysi86.h>
42 #include <sys/psw.h>
43 #include <sys/cred.h>
44 #include <sys/policy.h>
45 #include <sys/thread.h>
46 #include <sys/debug.h>
47 #include <sys/ontrap.h>
48 #include <sys/privregs.h>
49 #include <sys/x86_archext.h>
50 #include <sys/vmem.h>
51 #include <sys/kmem.h>
52 #include <sys/mman.h>
53 #include <sys/archsystm.h>
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/seg.h>
57 #include <vm/seg_kmem.h>
58 #include <vm/faultcode.h>
59 #include <sys/fp.h>
60 #include <sys/cmn_err.h>
61 #include <sys/segments.h>
62 #include <sys/clock.h>
63 #if defined(__xpv)
64 #include <sys/hypervisor.h>
65 #include <sys/note.h>
66 #endif
67
68 static void ldt_alloc(proc_t *, uint_t);
69 static void ldt_free(proc_t *);
70 static void ldt_dup(proc_t *, proc_t *);
71 static void ldt_grow(proc_t *, uint_t);
72
73 /*
74 * sysi86 System Call
75 */
76
77 /* ARGSUSED */
78 int
sysi86(short cmd,uintptr_t arg1,uintptr_t arg2,uintptr_t arg3)79 sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
80 {
81 struct ssd ssd;
82 int error = 0;
83 int c;
84 proc_t *pp = curproc;
85
86 switch (cmd) {
87
88 /*
89 * The SI86V86 subsystem call of the SYSI86 system call
90 * supports only one subcode -- V86SC_IOPL.
91 */
92 case SI86V86:
93 if (arg1 == V86SC_IOPL) {
94 struct regs *rp = lwptoregs(ttolwp(curthread));
95 greg_t oldpl = rp->r_ps & PS_IOPL;
96 greg_t newpl = arg2 & PS_IOPL;
97
98 /*
99 * Must be privileged to run this system call
100 * if giving more io privilege.
101 */
102 if (newpl > oldpl && (error =
103 secpolicy_sys_config(CRED(), B_FALSE)) != 0)
104 return (set_errno(error));
105 #if defined(__xpv)
106 kpreempt_disable();
107 installctx(curthread, NULL, xen_disable_user_iopl,
108 xen_enable_user_iopl, NULL, NULL,
109 xen_disable_user_iopl, NULL);
110 xen_enable_user_iopl();
111 kpreempt_enable();
112 #else
113 rp->r_ps ^= oldpl ^ newpl;
114 #endif
115 } else
116 error = EINVAL;
117 break;
118
119 /*
120 * Set a segment descriptor
121 */
122 case SI86DSCR:
123 /*
124 * There are considerable problems here manipulating
125 * resources shared by many running lwps. Get everyone
126 * into a safe state before changing the LDT.
127 */
128 if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
129 error = EINTR;
130 break;
131 }
132
133 if (get_udatamodel() == DATAMODEL_LP64) {
134 error = EINVAL;
135 break;
136 }
137
138 if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) {
139 error = EFAULT;
140 break;
141 }
142
143 error = setdscr(&ssd);
144
145 mutex_enter(&pp->p_lock);
146 if (curthread != pp->p_agenttp)
147 continuelwps(pp);
148 mutex_exit(&pp->p_lock);
149 break;
150
151 case SI86FPHW:
152 c = fp_kind & 0xff;
153 if (suword32((void *)arg1, c) == -1)
154 error = EFAULT;
155 break;
156
157 case SI86FPSTART:
158 /*
159 * arg1 is the address of _fp_hw
160 * arg2 is the desired x87 FCW value
161 * arg3 is the desired SSE MXCSR value
162 * a return value of one means SSE hardware, else none.
163 */
164 c = fp_kind & 0xff;
165 if (suword32((void *)arg1, c) == -1) {
166 error = EFAULT;
167 break;
168 }
169 fpsetcw((uint16_t)arg2, (uint32_t)arg3);
170 return ((fp_kind & __FP_SSE) ? 1 : 0);
171
172 /* real time clock management commands */
173
174 case WTODC:
175 if ((error = secpolicy_settime(CRED())) == 0) {
176 timestruc_t ts;
177 mutex_enter(&tod_lock);
178 gethrestime(&ts);
179 tod_set(ts);
180 mutex_exit(&tod_lock);
181 }
182 break;
183
184 /* Give some timezone playing room */
185 #define ONEWEEK (7 * 24 * 60 * 60)
186
187 case SGMTL:
188 /*
189 * Called from 32 bit land, negative values
190 * are not sign extended, so we do that here
191 * by casting it to an int and back. We also
192 * clamp the value to within reason and detect
193 * when a 64 bit call overflows an int.
194 */
195 if ((error = secpolicy_settime(CRED())) == 0) {
196 int newlag = (int)arg1;
197
198 #ifdef _SYSCALL32_IMPL
199 if (get_udatamodel() == DATAMODEL_NATIVE &&
200 (long)newlag != (long)arg1) {
201 error = EOVERFLOW;
202 } else
203 #endif
204 if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
205 sgmtl(newlag);
206 else
207 error = EOVERFLOW;
208 }
209 break;
210
211 case GGMTL:
212 if (get_udatamodel() == DATAMODEL_NATIVE) {
213 if (sulword((void *)arg1, ggmtl()) == -1)
214 error = EFAULT;
215 #ifdef _SYSCALL32_IMPL
216 } else {
217 time_t gmtl;
218
219 if ((gmtl = ggmtl()) > INT32_MAX) {
220 /*
221 * Since gmt_lag can at most be
222 * +/- 12 hours, something is
223 * *seriously* messed up here.
224 */
225 error = EOVERFLOW;
226 } else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
227 error = EFAULT;
228 #endif
229 }
230 break;
231
232 case RTCSYNC:
233 if ((error = secpolicy_settime(CRED())) == 0)
234 rtcsync();
235 break;
236
237 /* END OF real time clock management commands */
238
239 default:
240 error = EINVAL;
241 break;
242 }
243 return (error == 0 ? 0 : set_errno(error));
244 }
245
246 void
usd_to_ssd(user_desc_t * usd,struct ssd * ssd,selector_t sel)247 usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
248 {
249 ssd->bo = USEGD_GETBASE(usd);
250 ssd->ls = USEGD_GETLIMIT(usd);
251 ssd->sel = sel;
252
253 /*
254 * set type, dpl and present bits.
255 */
256 ssd->acc1 = usd->usd_type;
257 ssd->acc1 |= usd->usd_dpl << 5;
258 ssd->acc1 |= usd->usd_p << (5 + 2);
259
260 /*
261 * set avl, DB and granularity bits.
262 */
263 ssd->acc2 = usd->usd_avl;
264
265 #if defined(__amd64)
266 ssd->acc2 |= usd->usd_long << 1;
267 #else
268 ssd->acc2 |= usd->usd_reserved << 1;
269 #endif
270
271 ssd->acc2 |= usd->usd_def32 << (1 + 1);
272 ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
273 }
274
275 static void
ssd_to_usd(struct ssd * ssd,user_desc_t * usd)276 ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
277 {
278
279 ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0);
280
281 USEGD_SETBASE(usd, ssd->bo);
282 USEGD_SETLIMIT(usd, ssd->ls);
283
284 /*
285 * set type, dpl and present bits.
286 */
287 usd->usd_type = ssd->acc1;
288 usd->usd_dpl = ssd->acc1 >> 5;
289 usd->usd_p = ssd->acc1 >> (5 + 2);
290
291 ASSERT(usd->usd_type >= SDT_MEMRO);
292 ASSERT(usd->usd_dpl == SEL_UPL);
293
294 /*
295 * 64-bit code selectors are never allowed in the LDT.
296 * Reserved bit is always 0 on 32-bit sytems.
297 */
298 #if defined(__amd64)
299 usd->usd_long = 0;
300 #else
301 usd->usd_reserved = 0;
302 #endif
303
304 /*
305 * set avl, DB and granularity bits.
306 */
307 usd->usd_avl = ssd->acc2;
308 usd->usd_def32 = ssd->acc2 >> (1 + 1);
309 usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
310 }
311
312
313 #if defined(__i386)
314
315 static void
ssd_to_sgd(struct ssd * ssd,gate_desc_t * sgd)316 ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
317 {
318
319 ASSERT(bcmp(sgd, &null_sdesc, sizeof (*sgd)) == 0);
320
321 sgd->sgd_looffset = ssd->bo;
322 sgd->sgd_hioffset = ssd->bo >> 16;
323
324 sgd->sgd_selector = ssd->ls;
325
326 /*
327 * set type, dpl and present bits.
328 */
329 sgd->sgd_type = ssd->acc1;
330 sgd->sgd_dpl = ssd->acc1 >> 5;
331 sgd->sgd_p = ssd->acc1 >> 7;
332 ASSERT(sgd->sgd_type == SDT_SYSCGT);
333 ASSERT(sgd->sgd_dpl == SEL_UPL);
334 sgd->sgd_stkcpy = 0;
335 }
336
337 #endif /* __i386 */
338
339 /*
340 * Load LDT register with the current process's LDT.
341 */
342 static void
ldt_load(void)343 ldt_load(void)
344 {
345 #if defined(__xpv)
346 xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc),
347 curproc->p_ldtlimit + 1);
348 #else
349 *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
350 wr_ldtr(ULDT_SEL);
351 #endif
352 }
353
354 /*
355 * Store a NULL selector in the LDTR. All subsequent illegal references to
356 * the LDT will result in a #gp.
357 */
358 void
ldt_unload(void)359 ldt_unload(void)
360 {
361 #if defined(__xpv)
362 xen_set_ldt(NULL, 0);
363 #else
364 *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc;
365 wr_ldtr(0);
366 #endif
367 }
368
369 /*ARGSUSED*/
370 static void
ldt_savectx(proc_t * p)371 ldt_savectx(proc_t *p)
372 {
373 ASSERT(p->p_ldt != NULL);
374 ASSERT(p == curproc);
375
376 #if defined(__amd64)
377 /*
378 * The 64-bit kernel must be sure to clear any stale ldt
379 * selectors when context switching away from a process that
380 * has a private ldt. Consider the following example:
381 *
382 * Wine creats a ldt descriptor and points a segment register
383 * to it.
384 *
385 * We then context switch away from wine lwp to kernel
386 * thread and hit breakpoint in kernel with kmdb
387 *
388 * When we continue and resume from kmdb we will #gp
389 * fault since kmdb will have saved the stale ldt selector
390 * from wine and will try to restore it but we are no longer in
391 * the context of the wine process and do not have our
392 * ldtr register pointing to the private ldt.
393 */
394 reset_sregs();
395 #endif
396
397 ldt_unload();
398 cpu_fast_syscall_enable(NULL);
399 }
400
401 static void
ldt_restorectx(proc_t * p)402 ldt_restorectx(proc_t *p)
403 {
404 ASSERT(p->p_ldt != NULL);
405 ASSERT(p == curproc);
406
407 ldt_load();
408 cpu_fast_syscall_disable(NULL);
409 }
410
411 /*
412 * When a process with a private LDT execs, fast syscalls must be enabled for
413 * the new process image.
414 */
415 /* ARGSUSED */
416 static void
ldt_freectx(proc_t * p,int isexec)417 ldt_freectx(proc_t *p, int isexec)
418 {
419 ASSERT(p->p_ldt);
420
421 if (isexec) {
422 kpreempt_disable();
423 cpu_fast_syscall_enable(NULL);
424 kpreempt_enable();
425 }
426
427 /*
428 * ldt_free() will free the memory used by the private LDT, reset the
429 * process's descriptor, and re-program the LDTR.
430 */
431 ldt_free(p);
432 }
433
434 /*
435 * Install ctx op that ensures syscall/sysenter are disabled.
436 * See comments below.
437 *
438 * When a thread with a private LDT forks, the new process
439 * must have the LDT context ops installed.
440 */
441 /* ARGSUSED */
442 static void
ldt_installctx(proc_t * p,proc_t * cp)443 ldt_installctx(proc_t *p, proc_t *cp)
444 {
445 proc_t *targ = p;
446 kthread_t *t;
447
448 /*
449 * If this is a fork, operate on the child process.
450 */
451 if (cp != NULL) {
452 targ = cp;
453 ldt_dup(p, cp);
454 }
455
456 /*
457 * The process context ops expect the target process as their argument.
458 */
459 ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
460 ldt_installctx, ldt_savectx, ldt_freectx) == 0);
461
462 installpctx(targ, targ, ldt_savectx, ldt_restorectx,
463 ldt_installctx, ldt_savectx, ldt_freectx);
464
465 /*
466 * We've just disabled fast system call and return instructions; take
467 * the slow path out to make sure we don't try to use one to return
468 * back to user. We must set t_post_sys for every thread in the
469 * process to make sure none of them escape out via fast return.
470 */
471
472 mutex_enter(&targ->p_lock);
473 t = targ->p_tlist;
474 do {
475 t->t_post_sys = 1;
476 } while ((t = t->t_forw) != targ->p_tlist);
477 mutex_exit(&targ->p_lock);
478 }
479
480 int
setdscr(struct ssd * ssd)481 setdscr(struct ssd *ssd)
482 {
483 ushort_t seli; /* selector index */
484 user_desc_t *ldp; /* descriptor pointer */
485 user_desc_t ndesc; /* new descriptor */
486 proc_t *pp = ttoproc(curthread);
487 int rc = 0;
488
489 /*
490 * LDT segments: executable and data at DPL 3 only.
491 */
492 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
493 return (EINVAL);
494
495 /*
496 * check the selector index.
497 */
498 seli = SELTOIDX(ssd->sel);
499 if (seli >= MAXNLDT || seli < LDT_UDBASE)
500 return (EINVAL);
501
502 ndesc = null_udesc;
503 mutex_enter(&pp->p_ldtlock);
504
505 /*
506 * If this is the first time for this process then setup a
507 * private LDT for it.
508 */
509 if (pp->p_ldt == NULL) {
510 ldt_alloc(pp, seli);
511
512 /*
513 * Now that this process has a private LDT, the use of
514 * the syscall/sysret and sysenter/sysexit instructions
515 * is forbidden for this processes because they destroy
516 * the contents of %cs and %ss segment registers.
517 *
518 * Explicity disable them here and add a context handler
519 * to the process. Note that disabling
520 * them here means we can't use sysret or sysexit on
521 * the way out of this system call - so we force this
522 * thread to take the slow path (which doesn't make use
523 * of sysenter or sysexit) back out.
524 */
525 kpreempt_disable();
526 ldt_installctx(pp, NULL);
527 cpu_fast_syscall_disable(NULL);
528 ASSERT(curthread->t_post_sys != 0);
529 kpreempt_enable();
530
531 } else if (seli > pp->p_ldtlimit) {
532
533 /*
534 * Increase size of ldt to include seli.
535 */
536 ldt_grow(pp, seli);
537 }
538
539 ASSERT(seli <= pp->p_ldtlimit);
540 ldp = &pp->p_ldt[seli];
541
542 /*
543 * On the 64-bit kernel, this is where things get more subtle.
544 * Recall that in the 64-bit kernel, when we enter the kernel we
545 * deliberately -don't- reload the segment selectors we came in on
546 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
547 * and the underlying descriptors are essentially ignored by the
548 * hardware in long mode - except for the base that we override with
549 * the gsbase MSRs.
550 *
551 * However, there's one unfortunate issue with this rosy picture --
552 * a descriptor that's not marked as 'present' will still generate
553 * an #np when loading a segment register.
554 *
555 * Consider this case. An lwp creates a harmless LDT entry, points
556 * one of it's segment registers at it, then tells the kernel (here)
557 * to delete it. In the 32-bit kernel, the #np will happen on the
558 * way back to userland where we reload the segment registers, and be
559 * handled in kern_gpfault(). In the 64-bit kernel, the same thing
560 * will happen in the normal case too. However, if we're trying to
561 * use a debugger that wants to save and restore the segment registers,
562 * and the debugger things that we have valid segment registers, we
563 * have the problem that the debugger will try and restore the
564 * segment register that points at the now 'not present' descriptor
565 * and will take a #np right there.
566 *
567 * We should obviously fix the debugger to be paranoid about
568 * -not- restoring segment registers that point to bad descriptors;
569 * however we can prevent the problem here if we check to see if any
570 * of the segment registers are still pointing at the thing we're
571 * destroying; if they are, return an error instead. (That also seems
572 * a lot better failure mode than SIGKILL and a core file
573 * from kern_gpfault() too.)
574 */
575 if (SI86SSD_PRES(ssd) == 0) {
576 kthread_t *t;
577 int bad = 0;
578
579 /*
580 * Look carefully at the segment registers of every lwp
581 * in the process (they're all stopped by our caller).
582 * If we're about to invalidate a descriptor that's still
583 * being referenced by *any* of them, return an error,
584 * rather than having them #gp on their way out of the kernel.
585 */
586 ASSERT(pp->p_lwprcnt == 1);
587
588 mutex_enter(&pp->p_lock);
589 t = pp->p_tlist;
590 do {
591 klwp_t *lwp = ttolwp(t);
592 struct regs *rp = lwp->lwp_regs;
593 #if defined(__amd64)
594 pcb_t *pcb = &lwp->lwp_pcb;
595 #endif
596
597 if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) {
598 bad = 1;
599 break;
600 }
601
602 #if defined(__amd64)
603 if (pcb->pcb_rupdate == 1) {
604 if (ssd->sel == pcb->pcb_ds ||
605 ssd->sel == pcb->pcb_es ||
606 ssd->sel == pcb->pcb_fs ||
607 ssd->sel == pcb->pcb_gs) {
608 bad = 1;
609 break;
610 }
611 } else
612 #endif
613 {
614 if (ssd->sel == rp->r_ds ||
615 ssd->sel == rp->r_es ||
616 ssd->sel == rp->r_fs ||
617 ssd->sel == rp->r_gs) {
618 bad = 1;
619 break;
620 }
621 }
622
623 } while ((t = t->t_forw) != pp->p_tlist);
624 mutex_exit(&pp->p_lock);
625
626 if (bad) {
627 mutex_exit(&pp->p_ldtlock);
628 return (EBUSY);
629 }
630 }
631
632 /*
633 * If acc1 is zero, clear the descriptor (including the 'present' bit)
634 */
635 if (ssd->acc1 == 0) {
636 rc = ldt_update_segd(ldp, &null_udesc);
637 mutex_exit(&pp->p_ldtlock);
638 return (rc);
639 }
640
641 /*
642 * Check segment type, allow segment not present and
643 * only user DPL (3).
644 */
645 if (SI86SSD_DPL(ssd) != SEL_UPL) {
646 mutex_exit(&pp->p_ldtlock);
647 return (EINVAL);
648 }
649
650 #if defined(__amd64)
651 /*
652 * Do not allow 32-bit applications to create 64-bit mode code
653 * segments.
654 */
655 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
656 SI86SSD_ISLONG(ssd)) {
657 mutex_exit(&pp->p_ldtlock);
658 return (EINVAL);
659 }
660 #endif /* __amd64 */
661
662 /*
663 * Set up a code or data user segment descriptor.
664 */
665 if (SI86SSD_ISUSEG(ssd)) {
666 ssd_to_usd(ssd, &ndesc);
667 rc = ldt_update_segd(ldp, &ndesc);
668 mutex_exit(&pp->p_ldtlock);
669 return (rc);
670 }
671
672 #if defined(__i386)
673 /*
674 * Allow a call gate only if the destination is in the LDT
675 * and the system is running in 32-bit legacy mode.
676 *
677 * In long mode 32-bit call gates are redefined as 64-bit call
678 * gates and the hw enforces that the target code selector
679 * of the call gate must be 64-bit selector. A #gp fault is
680 * generated if otherwise. Since we do not allow 32-bit processes
681 * to switch themselves to 64-bits we never allow call gates
682 * on 64-bit system system.
683 */
684 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
685
686
687 ssd_to_sgd(ssd, (gate_desc_t *)&ndesc);
688 rc = ldt_update_segd(ldp, &ndesc);
689 mutex_exit(&pp->p_ldtlock);
690 return (rc);
691 }
692 #endif /* __i386 */
693
694 mutex_exit(&pp->p_ldtlock);
695 return (EINVAL);
696 }
697
698 /*
699 * Allocate new LDT for process just large enough to contain seli.
700 * Note we allocate and grow LDT in PAGESIZE chunks. We do this
701 * to simplify the implementation and because on the hypervisor it's
702 * required, since the LDT must live on pages that have PROT_WRITE
703 * removed and which are given to the hypervisor.
704 */
705 static void
ldt_alloc(proc_t * pp,uint_t seli)706 ldt_alloc(proc_t *pp, uint_t seli)
707 {
708 user_desc_t *ldt;
709 size_t ldtsz;
710 uint_t nsels;
711
712 ASSERT(MUTEX_HELD(&pp->p_ldtlock));
713 ASSERT(pp->p_ldt == NULL);
714 ASSERT(pp->p_ldtlimit == 0);
715
716 /*
717 * Allocate new LDT just large enough to contain seli.
718 */
719 ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
720 nsels = ldtsz / sizeof (user_desc_t);
721 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
722
723 ldt = kmem_zalloc(ldtsz, KM_SLEEP);
724 ASSERT(IS_P2ALIGNED(ldt, PAGESIZE));
725
726 #if defined(__xpv)
727 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ))
728 panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
729 #endif
730
731 pp->p_ldt = ldt;
732 pp->p_ldtlimit = nsels - 1;
733 set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL);
734
735 if (pp == curproc) {
736 kpreempt_disable();
737 ldt_load();
738 kpreempt_enable();
739 }
740 }
741
742 static void
ldt_free(proc_t * pp)743 ldt_free(proc_t *pp)
744 {
745 user_desc_t *ldt;
746 size_t ldtsz;
747
748 ASSERT(pp->p_ldt != NULL);
749
750 mutex_enter(&pp->p_ldtlock);
751 ldt = pp->p_ldt;
752 ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
753
754 ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
755
756 pp->p_ldt = NULL;
757 pp->p_ldtlimit = 0;
758 pp->p_ldt_desc = null_sdesc;
759 mutex_exit(&pp->p_ldtlock);
760
761 if (pp == curproc) {
762 kpreempt_disable();
763 ldt_unload();
764 kpreempt_enable();
765 }
766
767 #if defined(__xpv)
768 /*
769 * We are not allowed to make the ldt writable until after
770 * we tell the hypervisor to unload it.
771 */
772 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE))
773 panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
774 #endif
775
776 kmem_free(ldt, ldtsz);
777 }
778
779 /*
780 * On fork copy new ldt for child.
781 */
782 static void
ldt_dup(proc_t * pp,proc_t * cp)783 ldt_dup(proc_t *pp, proc_t *cp)
784 {
785 size_t ldtsz;
786
787 ASSERT(pp->p_ldt != NULL);
788 ASSERT(cp != curproc);
789
790 /*
791 * I assume the parent's ldt can't increase since we're in a fork.
792 */
793 mutex_enter(&pp->p_ldtlock);
794 mutex_enter(&cp->p_ldtlock);
795
796 ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
797
798 ldt_alloc(cp, pp->p_ldtlimit);
799
800 #if defined(__xpv)
801 /*
802 * Make child's ldt writable so it can be copied into from
803 * parent's ldt. This works since ldt_alloc above did not load
804 * the ldt since its for the child process. If we tried to make
805 * an LDT writable that is loaded in hw the setprot operation
806 * would fail.
807 */
808 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE))
809 panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
810 #endif
811
812 bcopy(pp->p_ldt, cp->p_ldt, ldtsz);
813
814 #if defined(__xpv)
815 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ))
816 panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed");
817 #endif
818 mutex_exit(&cp->p_ldtlock);
819 mutex_exit(&pp->p_ldtlock);
820
821 }
822
823 static void
ldt_grow(proc_t * pp,uint_t seli)824 ldt_grow(proc_t *pp, uint_t seli)
825 {
826 user_desc_t *oldt, *nldt;
827 uint_t nsels;
828 size_t oldtsz, nldtsz;
829
830 ASSERT(MUTEX_HELD(&pp->p_ldtlock));
831 ASSERT(pp->p_ldt != NULL);
832 ASSERT(pp->p_ldtlimit != 0);
833
834 /*
835 * Allocate larger LDT just large enough to contain seli.
836 */
837 nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
838 nsels = nldtsz / sizeof (user_desc_t);
839 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
840 ASSERT(nsels > pp->p_ldtlimit);
841
842 oldt = pp->p_ldt;
843 oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
844
845 nldt = kmem_zalloc(nldtsz, KM_SLEEP);
846 ASSERT(IS_P2ALIGNED(nldt, PAGESIZE));
847
848 bcopy(oldt, nldt, oldtsz);
849
850 /*
851 * unload old ldt.
852 */
853 kpreempt_disable();
854 ldt_unload();
855 kpreempt_enable();
856
857 #if defined(__xpv)
858
859 /*
860 * Make old ldt writable and new ldt read only.
861 */
862 if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE))
863 panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
864
865 if (xen_ldt_setprot(nldt, nldtsz, PROT_READ))
866 panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed");
867 #endif
868
869 pp->p_ldt = nldt;
870 pp->p_ldtlimit = nsels - 1;
871
872 /*
873 * write new ldt segment descriptor.
874 */
875 set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL);
876
877 /*
878 * load the new ldt.
879 */
880 kpreempt_disable();
881 ldt_load();
882 kpreempt_enable();
883
884 kmem_free(oldt, oldtsz);
885 }
886