xref: /titanic_51/usr/src/uts/intel/ia32/os/sysi86.c (revision 355b4669e025ff377602b6fc7caaf30dbc218371)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
32 /*	  All Rights Reserved	*/
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/sysmacros.h>
39 #include <sys/systm.h>
40 #include <sys/signal.h>
41 #include <sys/errno.h>
42 #include <sys/fault.h>
43 #include <sys/syscall.h>
44 #include <sys/cpuvar.h>
45 #include <sys/sysi86.h>
46 #include <sys/psw.h>
47 #include <sys/cred.h>
48 #include <sys/policy.h>
49 #include <sys/thread.h>
50 #include <sys/debug.h>
51 #include <sys/ontrap.h>
52 #include <sys/privregs.h>
53 #include <sys/x86_archext.h>
54 #include <sys/vmem.h>
55 #include <sys/kmem.h>
56 #include <sys/mman.h>
57 #include <sys/archsystm.h>
58 #include <vm/hat.h>
59 #include <vm/as.h>
60 #include <vm/seg.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/faultcode.h>
63 #include <sys/fp.h>
64 #include <sys/cmn_err.h>
65 
66 static int setdscr(caddr_t ap);
67 static void setup_ldt(proc_t *pp);
68 static void *ldt_map(proc_t *pp, uint_t seli);
69 static void ldt_free(proc_t *pp);
70 
71 extern void rtcsync(void);
72 extern long ggmtl(void);
73 extern void sgmtl(long);
74 
75 /*
76  * sysi86 System Call
77  */
78 
79 /* ARGSUSED */
80 int
81 sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
82 {
83 	int error = 0;
84 	int c;
85 	proc_t *pp = curproc;
86 
87 	switch (cmd) {
88 
89 	/*
90 	 * The SI86V86 subsystem call of the SYSI86 system call
91 	 * supports only one subcode -- V86SC_IOPL.
92 	 */
93 	case SI86V86:
94 		if (arg1 == V86SC_IOPL) {
95 			struct regs *rp = lwptoregs(ttolwp(curthread));
96 			greg_t oldpl = rp->r_ps & PS_IOPL;
97 			greg_t newpl = arg2 & PS_IOPL;
98 
99 			/*
100 			 * Must be privileged to run this system call
101 			 * if giving more io privilege.
102 			 */
103 			if (newpl > oldpl && (error =
104 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
105 				return (set_errno(error));
106 			rp->r_ps ^= oldpl ^ newpl;
107 		} else
108 			error = EINVAL;
109 		break;
110 
111 	/*
112 	 * Set a segment descriptor
113 	 */
114 	case SI86DSCR:
115 		/*
116 		 * There are considerable problems here manipulating
117 		 * resources shared by many running lwps.  Get everyone
118 		 * into a safe state before changing the LDT.
119 		 */
120 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
121 			error = EINTR;
122 			break;
123 		}
124 		error = setdscr((caddr_t)arg1);
125 		mutex_enter(&pp->p_lock);
126 		if (curthread != pp->p_agenttp)
127 			continuelwps(pp);
128 		mutex_exit(&pp->p_lock);
129 		break;
130 
131 	case SI86FPHW:
132 		c = fp_kind & 0xff;
133 		if (suword32((void *)arg1, c) == -1)
134 			error = EFAULT;
135 		break;
136 
137 	case SI86FPSTART:
138 		/*
139 		 * arg1 is the address of _fp_hw
140 		 * arg2 is the desired x87 FCW value
141 		 * arg3 is the desired SSE MXCSR value
142 		 * a return value of one means SSE hardware, else none.
143 		 */
144 		c = fp_kind & 0xff;
145 		if (suword32((void *)arg1, c) == -1) {
146 			error = EFAULT;
147 			break;
148 		}
149 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
150 		return (fp_kind == __FP_SSE ? 1 : 0);
151 
152 	/* real time clock management commands */
153 
154 	case WTODC:
155 		if ((error = secpolicy_settime(CRED())) == 0) {
156 			timestruc_t ts;
157 			mutex_enter(&tod_lock);
158 			gethrestime(&ts);
159 			tod_set(ts);
160 			mutex_exit(&tod_lock);
161 		}
162 		break;
163 
164 /* Give some timezone playing room */
165 #define	ONEWEEK	(7 * 24 * 60 * 60)
166 
167 	case SGMTL:
168 		/*
169 		 * Called from 32 bit land, negative values
170 		 * are not sign extended, so we do that here
171 		 * by casting it to an int and back.  We also
172 		 * clamp the value to within reason and detect
173 		 * when a 64 bit call overflows an int.
174 		 */
175 		if ((error = secpolicy_settime(CRED())) == 0) {
176 			int newlag = (int)arg1;
177 
178 #ifdef _SYSCALL32_IMPL
179 			if (get_udatamodel() == DATAMODEL_NATIVE &&
180 			    (long)newlag != (long)arg1) {
181 				error = EOVERFLOW;
182 			} else
183 #endif
184 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
185 				sgmtl(newlag);
186 			else
187 				error = EOVERFLOW;
188 		}
189 		break;
190 
191 	case GGMTL:
192 		if (get_udatamodel() == DATAMODEL_NATIVE) {
193 			if (sulword((void *)arg1, ggmtl()) == -1)
194 				error = EFAULT;
195 #ifdef _SYSCALL32_IMPL
196 		} else {
197 			time_t gmtl;
198 
199 			if ((gmtl = ggmtl()) > INT32_MAX) {
200 				/*
201 				 * Since gmt_lag can at most be
202 				 * +/- 12 hours, something is
203 				 * *seriously* messed up here.
204 				 */
205 				error = EOVERFLOW;
206 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
207 				error = EFAULT;
208 #endif
209 		}
210 		break;
211 
212 	case RTCSYNC:
213 		if ((error = secpolicy_settime(CRED())) == 0)
214 			rtcsync();
215 		break;
216 
217 	/* END OF real time clock management commands */
218 
219 	default:
220 		error = EINVAL;
221 		break;
222 	}
223 	return (error == 0 ? 0 : set_errno(error));
224 }
225 
226 void
227 usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
228 {
229 	ssd->bo = USEGD_GETBASE(usd);
230 	ssd->ls = USEGD_GETLIMIT(usd);
231 	ssd->sel = sel;
232 
233 	/*
234 	 * set type, dpl and present bits.
235 	 */
236 	ssd->acc1 = usd->usd_type;
237 	ssd->acc1 |= usd->usd_dpl << 5;
238 	ssd->acc1 |= usd->usd_p << (5 + 2);
239 
240 	/*
241 	 * set avl, DB and granularity bits.
242 	 */
243 	ssd->acc2 = usd->usd_avl;
244 
245 #if defined(__amd64)
246 	ssd->acc2 |= usd->usd_long << 1;
247 #else
248 	ssd->acc2 |= usd->usd_reserved << 1;
249 #endif
250 
251 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
252 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
253 }
254 
255 static void
256 ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
257 {
258 
259 	USEGD_SETBASE(usd, ssd->bo);
260 	USEGD_SETLIMIT(usd, ssd->ls);
261 
262 	/*
263 	 * set type, dpl and present bits.
264 	 */
265 	usd->usd_type = ssd->acc1;
266 	usd->usd_dpl = ssd->acc1 >> 5;
267 	usd->usd_p = ssd->acc1 >> (5 + 2);
268 
269 	ASSERT(usd->usd_type >= SDT_MEMRO);
270 	ASSERT(usd->usd_dpl == SEL_UPL);
271 
272 	/*
273 	 * set avl, DB and granularity bits.
274 	 */
275 	usd->usd_avl = ssd->acc2;
276 
277 #if defined(__amd64)
278 	usd->usd_long = ssd->acc2 >> 1;
279 #else
280 	usd->usd_reserved = ssd->acc2 >> 1;
281 #endif
282 
283 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
284 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
285 }
286 
287 static void
288 ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
289 {
290 
291 	sgd->sgd_looffset = ssd->bo;
292 	sgd->sgd_hioffset = ssd->bo >> 16;
293 
294 	sgd->sgd_selector = ssd->ls;
295 	/*
296 	 * set type, dpl and present bits.
297 	 */
298 	sgd->sgd_type = ssd->acc1;
299 	sgd->sgd_dpl = ssd->acc1 >> 5;
300 	sgd->sgd_p = ssd->acc1 >> 7;
301 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
302 	ASSERT(sgd->sgd_dpl == SEL_UPL);
303 
304 #if defined(__i386)	/* reserved, ignored in amd64 */
305 	sgd->sgd_stkcpy = 0;
306 #endif
307 }
308 
309 /*
310  * Load LDT register with the current process's LDT.
311  */
312 void
313 ldt_load(void)
314 {
315 	/*
316 	 */
317 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
318 	wr_ldtr(ULDT_SEL);
319 }
320 
321 /*
322  * Store a NULL selector in the LDTR. All subsequent illegal references to
323  * the LDT will result in a #gp.
324  */
325 void
326 ldt_unload(void)
327 {
328 	CPU->cpu_gdt[GDT_LDT] = zero_udesc;
329 	wr_ldtr(0);
330 }
331 
332 /*ARGSUSED*/
333 static void
334 ldt_savectx(proc_t *p)
335 {
336 	ASSERT(p->p_ldt != NULL);
337 	ASSERT(p == curproc);
338 
339 #if defined(__amd64)
340 	/*
341 	 * The 64-bit kernel must be sure to clear any stale ldt
342 	 * selectors when context switching away from a process that
343 	 * has a private ldt. Consider the following example:
344 	 *
345 	 * 	Wine creats a ldt descriptor and points a segment register
346 	 * 	to it.
347 	 *
348 	 *	We then context switch away from wine lwp to kernel
349 	 *	thread and hit breakpoint in kernel with kmdb
350 	 *
351 	 *	When we continue and resume from kmdb we will #gp
352 	 * 	fault since kmdb will have saved the stale ldt selector
353 	 *	from wine and will try to restore it but we are no longer in
354 	 *	the context of the wine process and do not have our
355 	 *	ldtr register pointing to the private ldt.
356 	 */
357 	clr_ldt_sregs();
358 #endif
359 
360 	ldt_unload();
361 	cpu_fast_syscall_enable(NULL);
362 }
363 
364 static void
365 ldt_restorectx(proc_t *p)
366 {
367 	ASSERT(p->p_ldt != NULL);
368 	ASSERT(p == curproc);
369 
370 	ldt_load();
371 	cpu_fast_syscall_disable(NULL);
372 }
373 
374 /*
375  * When a process with a private LDT execs, fast syscalls must be enabled for
376  * the new process image.
377  */
378 /* ARGSUSED */
379 static void
380 ldt_freectx(proc_t *p, int isexec)
381 {
382 	ASSERT(p->p_ldt);
383 
384 	if (isexec) {
385 		kpreempt_disable();
386 		cpu_fast_syscall_enable(NULL);
387 		kpreempt_enable();
388 	}
389 
390 	/*
391 	 * ldt_free() will free the memory used by the private LDT, reset the
392 	 * process's descriptor, and re-program the LDTR.
393 	 */
394 	ldt_free(p);
395 }
396 
397 /*
398  * Install ctx op that ensures syscall/sysenter are disabled.
399  * See comments below.
400  *
401  * When a thread with a private LDT forks, the new process
402  * must have the LDT context ops installed.
403  */
404 /* ARGSUSED */
405 static void
406 ldt_installctx(proc_t *p, proc_t *cp)
407 {
408 	proc_t		*targ = p;
409 	kthread_t	*t;
410 
411 	/*
412 	 * If this is a fork, operate on the child process.
413 	 */
414 	if (cp != NULL) {
415 		targ = cp;
416 		ldt_dup(p, cp);
417 	}
418 
419 	/*
420 	 * The process context ops expect the target process as their argument.
421 	 */
422 	ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
423 	    ldt_installctx, ldt_savectx, ldt_freectx) == 0);
424 
425 	installpctx(targ, targ, ldt_savectx, ldt_restorectx,
426 	    ldt_installctx, ldt_savectx, ldt_freectx);
427 
428 	/*
429 	 * We've just disabled fast system call and return instructions; take
430 	 * the slow path out to make sure we don't try to use one to return
431 	 * back to user. We must set t_post_sys for every thread in the
432 	 * process to make sure none of them escape out via fast return.
433 	 */
434 
435 	mutex_enter(&targ->p_lock);
436 	t = targ->p_tlist;
437 	do {
438 		t->t_post_sys = 1;
439 	} while ((t = t->t_forw) != targ->p_tlist);
440 	mutex_exit(&targ->p_lock);
441 }
442 
443 static int
444 setdscr(caddr_t ap)
445 {
446 	struct ssd ssd;		/* request structure buffer */
447 	ushort_t seli; 		/* selector index */
448 	user_desc_t *dscrp;	/* descriptor pointer */
449 	proc_t	*pp = ttoproc(curthread);
450 
451 	if (get_udatamodel() == DATAMODEL_LP64)
452 		return (EINVAL);
453 
454 	if (copyin(ap, &ssd, sizeof (ssd)) < 0)
455 		return (EFAULT);
456 
457 	/*
458 	 * LDT segments: executable and data at DPL 3 only.
459 	 */
460 	if (!SELISLDT(ssd.sel) || !SELISUPL(ssd.sel))
461 		return (EINVAL);
462 
463 	/*
464 	 * check the selector index.
465 	 */
466 	seli = SELTOIDX(ssd.sel);
467 	if (seli >= MAXNLDT || seli < LDT_UDBASE)
468 		return (EINVAL);
469 
470 	mutex_enter(&pp->p_ldtlock);
471 
472 	/*
473 	 * If this is the first time for this process then setup a
474 	 * private LDT for it.
475 	 */
476 	if (pp->p_ldt == NULL) {
477 		kpreempt_disable();
478 		setup_ldt(pp);
479 
480 		/*
481 		 * Now that this process has a private LDT, the use of
482 		 * the syscall/sysret and sysenter/sysexit instructions
483 		 * is forbidden for this processes because they destroy
484 		 * the contents of %cs and %ss segment registers.
485 		 *
486 		 * Explicity disable them here and add a context handler
487 		 * to the process. Note that disabling
488 		 * them here means we can't use sysret or sysexit on
489 		 * the way out of this system call - so we force this
490 		 * thread to take the slow path (which doesn't make use
491 		 * of sysenter or sysexit) back out.
492 		 */
493 
494 		ldt_installctx(pp, NULL);
495 
496 		cpu_fast_syscall_disable(NULL);
497 
498 		ASSERT(curthread->t_post_sys != 0);
499 		wr_ldtr(ULDT_SEL);
500 		kpreempt_enable();
501 	}
502 
503 	if (ldt_map(pp, seli) == NULL) {
504 		mutex_exit(&pp->p_ldtlock);
505 		return (ENOMEM);
506 	}
507 
508 	ASSERT(seli <= pp->p_ldtlimit);
509 	dscrp = &pp->p_ldt[seli];
510 
511 	/*
512 	 * On the 64-bit kernel, this is where things get more subtle.
513 	 * Recall that in the 64-bit kernel, when we enter the kernel we
514 	 * deliberately -don't- reload the segment selectors we came in on
515 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
516 	 * and the underlying descriptors are essentially ignored by the
517 	 * hardware in long mode - except for the base that we override with
518 	 * the gsbase MSRs.
519 	 *
520 	 * However, there's one unfortunate issue with this rosy picture --
521 	 * a descriptor that's not marked as 'present' will still generate
522 	 * an #np when loading a segment register.
523 	 *
524 	 * Consider this case.  An lwp creates a harmless LDT entry, points
525 	 * one of it's segment registers at it, then tells the kernel (here)
526 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
527 	 * way back to userland where we reload the segment registers, and be
528 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
529 	 * will happen in the normal case too.  However, if we're trying to
530 	 * use a debugger that wants to save and restore the segment registers,
531 	 * and the debugger things that we have valid segment registers, we
532 	 * have the problem that the debugger will try and restore the
533 	 * segment register that points at the now 'not present' descriptor
534 	 * and will take a #np right there.
535 	 *
536 	 * We should obviously fix the debugger to be paranoid about
537 	 * -not- restoring segment registers that point to bad descriptors;
538 	 * however we can prevent the problem here if we check to see if any
539 	 * of the segment registers are still pointing at the thing we're
540 	 * destroying; if they are, return an error instead. (That also seems
541 	 * a lot better failure mode than SIGKILL and a core file
542 	 * from kern_gpfault() too.)
543 	 */
544 	if (SI86SSD_PRES(&ssd) == 0) {
545 		kthread_t *t;
546 		int bad = 0;
547 
548 		/*
549 		 * Look carefully at the segment registers of every lwp
550 		 * in the process (they're all stopped by our caller).
551 		 * If we're about to invalidate a descriptor that's still
552 		 * being referenced by *any* of them, return an error,
553 		 * rather than having them #gp on their way out of the kernel.
554 		 */
555 		ASSERT(pp->p_lwprcnt == 1);
556 
557 		mutex_enter(&pp->p_lock);
558 		t = pp->p_tlist;
559 		do {
560 			klwp_t *lwp = ttolwp(t);
561 			struct regs *rp = lwp->lwp_regs;
562 #if defined(__amd64)
563 			pcb_t *pcb = &lwp->lwp_pcb;
564 #endif
565 
566 			if (ssd.sel == rp->r_cs || ssd.sel == rp->r_ss) {
567 				bad = 1;
568 				break;
569 			}
570 
571 #if defined(__amd64)
572 			if (pcb->pcb_flags & RUPDATE_PENDING) {
573 				if (ssd.sel == pcb->pcb_ds ||
574 				    ssd.sel == pcb->pcb_es ||
575 				    ssd.sel == pcb->pcb_fs ||
576 				    ssd.sel == pcb->pcb_gs) {
577 					bad = 1;
578 					break;
579 				}
580 			} else
581 #endif
582 			{
583 				if (ssd.sel == rp->r_ds ||
584 				    ssd.sel == rp->r_es ||
585 				    ssd.sel == rp->r_fs ||
586 				    ssd.sel == rp->r_gs) {
587 					bad = 1;
588 					break;
589 				}
590 			}
591 
592 		} while ((t = t->t_forw) != pp->p_tlist);
593 		mutex_exit(&pp->p_lock);
594 
595 		if (bad) {
596 			mutex_exit(&pp->p_ldtlock);
597 			return (EBUSY);
598 		}
599 	}
600 
601 	/*
602 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
603 	 */
604 	if (ssd.acc1 == 0) {
605 		bzero(dscrp, sizeof (*dscrp));
606 		mutex_exit(&pp->p_ldtlock);
607 		return (0);
608 	}
609 
610 	/*
611 	 * Check segment type, allow segment not present and
612 	 * only user DPL (3).
613 	 */
614 	if (SI86SSD_DPL(&ssd) != SEL_UPL) {
615 		mutex_exit(&pp->p_ldtlock);
616 		return (EINVAL);
617 	}
618 
619 #if defined(__amd64)
620 	/*
621 	 * Do not allow 32-bit applications to create 64-bit mode code segments.
622 	 */
623 	if (SI86SSD_ISUSEG(&ssd) && ((SI86SSD_TYPE(&ssd) >> 3) & 1) == 1 &&
624 	    SI86SSD_ISLONG(&ssd)) {
625 		mutex_exit(&pp->p_ldtlock);
626 		return (EINVAL);
627 	}
628 #endif /* __amd64 */
629 
630 	/*
631 	 * Set up a code or data user segment descriptor.
632 	 */
633 	if (SI86SSD_ISUSEG(&ssd)) {
634 		ssd_to_usd(&ssd, dscrp);
635 		mutex_exit(&pp->p_ldtlock);
636 		return (0);
637 	}
638 
639 	/*
640 	 * Allow a call gate only if the destination is in the LDT.
641 	 */
642 	if (SI86SSD_TYPE(&ssd) == SDT_SYSCGT && SELISLDT(ssd.ls)) {
643 		ssd_to_sgd(&ssd, (gate_desc_t *)dscrp);
644 		mutex_exit(&pp->p_ldtlock);
645 		return (0);
646 	}
647 
648 	mutex_exit(&pp->p_ldtlock);
649 	return (EINVAL);
650 }
651 
652 /*
653  * Allocate a private LDT for this process and initialize it with the
654  * default entries.
655  */
656 void
657 setup_ldt(proc_t *pp)
658 {
659 	user_desc_t *ldtp;	/* descriptor pointer */
660 	pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t));
661 
662 	/*
663 	 * Allocate maximum virtual space we need for this LDT.
664 	 */
665 	ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP);
666 
667 	/*
668 	 * Allocate the minimum number of physical pages for LDT.
669 	 */
670 	(void) segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t),
671 	    VM_SLEEP, 0, segkmem_page_create, NULL);
672 
673 	bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t))));
674 
675 	kpreempt_disable();
676 
677 	/* Update proc structure. XXX - need any locks here??? */
678 
679 	set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1,
680 	    SDT_SYSLDT, SEL_KPL);
681 
682 	pp->p_ldtlimit = MINNLDT - 1;
683 	pp->p_ldt = ldtp;
684 	if (pp == curproc)
685 		*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc;
686 
687 	kpreempt_enable();
688 }
689 
690 /*
691  * Map the page corresponding to the selector entry. If the page is
692  * already mapped then it simply returns with the pointer to the entry.
693  * Otherwise it allocates a physical page for it and returns the pointer
694  * to the entry.  Returns 0 for errors.
695  */
696 static void *
697 ldt_map(proc_t *pp, uint_t seli)
698 {
699 	caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0];
700 	caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli];
701 	volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET));
702 	caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET));
703 	on_trap_data_t otd;
704 
705 	ASSERT(pp->p_ldt != NULL);
706 
707 	if (seli <= pp->p_ldtlimit)
708 		return (ent_addr);
709 
710 	/*
711 	 * We are increasing the size of the process's LDT.
712 	 * Make sure this and all intervening pages are mapped.
713 	 */
714 	while (page <= epage) {
715 		if (!on_trap(&otd, OT_DATA_ACCESS))
716 			(void) *(volatile int *)page;	/* peek at the page */
717 		else {		/* Allocate a physical page */
718 			(void) segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0,
719 			    segkmem_page_create, NULL);
720 			bzero(page, PAGESIZE);
721 		}
722 		no_trap();
723 		page += PAGESIZE;
724 	}
725 
726 	/* XXX - need any locks to update proc_t or gdt ??? */
727 
728 	ASSERT(curproc == pp);
729 
730 	kpreempt_disable();
731 	pp->p_ldtlimit = seli;
732 	SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1);
733 
734 	ldt_load();
735 	kpreempt_enable();
736 
737 	return (ent_addr);
738 }
739 
740 /*
741  * Free up the kernel memory used for LDT of this process.
742  */
743 static void
744 ldt_free(proc_t *pp)
745 {
746 	on_trap_data_t otd;
747 	caddr_t start, end;
748 	volatile caddr_t addr;
749 
750 	ASSERT(pp->p_ldt != NULL);
751 
752 	mutex_enter(&pp->p_ldtlock);
753 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
754 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
755 
756 	/* Free the physical page(s) used for mapping LDT */
757 	for (addr = start; addr <= end; addr += PAGESIZE) {
758 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
759 			/* peek at the address */
760 			(void) *(volatile int *)addr;
761 			segkmem_free(NULL, addr, PAGESIZE);
762 		}
763 	}
764 	no_trap();
765 
766 	/* Free up the virtual address space used for this LDT */
767 	vmem_free(heap_arena, pp->p_ldt,
768 	    ptob(btopr(MAXNLDT * sizeof (user_desc_t))));
769 	kpreempt_disable();
770 	pp->p_ldt = NULL;
771 	pp->p_ldt_desc = zero_sdesc;
772 	pp->p_ldtlimit = 0;
773 
774 	if (pp == curproc)
775 		ldt_unload();
776 	kpreempt_enable();
777 	mutex_exit(&pp->p_ldtlock);
778 }
779 
780 /*
781  * On fork copy new ldt for child.
782  */
783 void
784 ldt_dup(proc_t *pp, proc_t *cp)
785 {
786 	on_trap_data_t otd;
787 	caddr_t start, end;
788 	volatile caddr_t addr, caddr;
789 	int	minsize;
790 
791 	ASSERT(pp->p_ldt);
792 
793 	setup_ldt(cp);
794 
795 	mutex_enter(&pp->p_ldtlock);
796 	cp->p_ldtlimit = pp->p_ldtlimit;
797 	SYSSEGD_SETLIMIT(&cp->p_ldt_desc,
798 	    (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1);
799 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
800 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
801 	caddr = (caddr_t)cp->p_ldt; /* child LDT start */
802 
803 	minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET;
804 	/* Walk thru the physical page(s) used for parent's LDT */
805 	for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) {
806 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
807 			(void) *(volatile int *)addr; /* peek at the address */
808 			/* allocate a page if necessary */
809 			if (caddr >= ((caddr_t)cp->p_ldt + minsize)) {
810 				(void) segkmem_xalloc(NULL, caddr, PAGESIZE,
811 				    VM_SLEEP, 0, segkmem_page_create, NULL);
812 			}
813 			bcopy(addr, caddr, PAGESIZE);
814 		}
815 	}
816 	no_trap();
817 	mutex_exit(&pp->p_ldtlock);
818 }
819