xref: /titanic_50/usr/src/uts/intel/ia32/os/sysi86.c (revision e099bf07784b9aadc4cc8655e69d462397e99860)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/sysmacros.h>
38 #include <sys/systm.h>
39 #include <sys/signal.h>
40 #include <sys/errno.h>
41 #include <sys/fault.h>
42 #include <sys/syscall.h>
43 #include <sys/cpuvar.h>
44 #include <sys/sysi86.h>
45 #include <sys/psw.h>
46 #include <sys/cred.h>
47 #include <sys/policy.h>
48 #include <sys/thread.h>
49 #include <sys/debug.h>
50 #include <sys/ontrap.h>
51 #include <sys/privregs.h>
52 #include <sys/x86_archext.h>
53 #include <sys/vmem.h>
54 #include <sys/kmem.h>
55 #include <sys/mman.h>
56 #include <sys/archsystm.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/seg.h>
60 #include <vm/seg_kmem.h>
61 #include <vm/faultcode.h>
62 #include <sys/fp.h>
63 #include <sys/cmn_err.h>
64 #include <sys/segments.h>
65 #include <sys/clock.h>
66 
67 static void setup_ldt(proc_t *pp);
68 static void *ldt_map(proc_t *pp, uint_t seli);
69 static void ldt_free(proc_t *pp);
70 
71 /*
72  * sysi86 System Call
73  */
74 
75 /* ARGSUSED */
76 int
77 sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
78 {
79 	struct ssd ssd;
80 	int error = 0;
81 	int c;
82 	proc_t *pp = curproc;
83 
84 	switch (cmd) {
85 
86 	/*
87 	 * The SI86V86 subsystem call of the SYSI86 system call
88 	 * supports only one subcode -- V86SC_IOPL.
89 	 */
90 	case SI86V86:
91 		if (arg1 == V86SC_IOPL) {
92 			struct regs *rp = lwptoregs(ttolwp(curthread));
93 			greg_t oldpl = rp->r_ps & PS_IOPL;
94 			greg_t newpl = arg2 & PS_IOPL;
95 
96 			/*
97 			 * Must be privileged to run this system call
98 			 * if giving more io privilege.
99 			 */
100 			if (newpl > oldpl && (error =
101 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
102 				return (set_errno(error));
103 			rp->r_ps ^= oldpl ^ newpl;
104 		} else
105 			error = EINVAL;
106 		break;
107 
108 	/*
109 	 * Set a segment descriptor
110 	 */
111 	case SI86DSCR:
112 		/*
113 		 * There are considerable problems here manipulating
114 		 * resources shared by many running lwps.  Get everyone
115 		 * into a safe state before changing the LDT.
116 		 */
117 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
118 			error = EINTR;
119 			break;
120 		}
121 
122 		if (get_udatamodel() == DATAMODEL_LP64) {
123 			error = EINVAL;
124 			break;
125 		}
126 
127 		if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) {
128 			error = EFAULT;
129 			break;
130 		}
131 
132 		error = setdscr(&ssd);
133 
134 		mutex_enter(&pp->p_lock);
135 		if (curthread != pp->p_agenttp)
136 			continuelwps(pp);
137 		mutex_exit(&pp->p_lock);
138 		break;
139 
140 	case SI86FPHW:
141 		c = fp_kind & 0xff;
142 		if (suword32((void *)arg1, c) == -1)
143 			error = EFAULT;
144 		break;
145 
146 	case SI86FPSTART:
147 		/*
148 		 * arg1 is the address of _fp_hw
149 		 * arg2 is the desired x87 FCW value
150 		 * arg3 is the desired SSE MXCSR value
151 		 * a return value of one means SSE hardware, else none.
152 		 */
153 		c = fp_kind & 0xff;
154 		if (suword32((void *)arg1, c) == -1) {
155 			error = EFAULT;
156 			break;
157 		}
158 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
159 		return (fp_kind == __FP_SSE ? 1 : 0);
160 
161 	/* real time clock management commands */
162 
163 	case WTODC:
164 		if ((error = secpolicy_settime(CRED())) == 0) {
165 			timestruc_t ts;
166 			mutex_enter(&tod_lock);
167 			gethrestime(&ts);
168 			tod_set(ts);
169 			mutex_exit(&tod_lock);
170 		}
171 		break;
172 
173 /* Give some timezone playing room */
174 #define	ONEWEEK	(7 * 24 * 60 * 60)
175 
176 	case SGMTL:
177 		/*
178 		 * Called from 32 bit land, negative values
179 		 * are not sign extended, so we do that here
180 		 * by casting it to an int and back.  We also
181 		 * clamp the value to within reason and detect
182 		 * when a 64 bit call overflows an int.
183 		 */
184 		if ((error = secpolicy_settime(CRED())) == 0) {
185 			int newlag = (int)arg1;
186 
187 #ifdef _SYSCALL32_IMPL
188 			if (get_udatamodel() == DATAMODEL_NATIVE &&
189 			    (long)newlag != (long)arg1) {
190 				error = EOVERFLOW;
191 			} else
192 #endif
193 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
194 				sgmtl(newlag);
195 			else
196 				error = EOVERFLOW;
197 		}
198 		break;
199 
200 	case GGMTL:
201 		if (get_udatamodel() == DATAMODEL_NATIVE) {
202 			if (sulword((void *)arg1, ggmtl()) == -1)
203 				error = EFAULT;
204 #ifdef _SYSCALL32_IMPL
205 		} else {
206 			time_t gmtl;
207 
208 			if ((gmtl = ggmtl()) > INT32_MAX) {
209 				/*
210 				 * Since gmt_lag can at most be
211 				 * +/- 12 hours, something is
212 				 * *seriously* messed up here.
213 				 */
214 				error = EOVERFLOW;
215 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
216 				error = EFAULT;
217 #endif
218 		}
219 		break;
220 
221 	case RTCSYNC:
222 		if ((error = secpolicy_settime(CRED())) == 0)
223 			rtcsync();
224 		break;
225 
226 	/* END OF real time clock management commands */
227 
228 	default:
229 		error = EINVAL;
230 		break;
231 	}
232 	return (error == 0 ? 0 : set_errno(error));
233 }
234 
235 void
236 usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
237 {
238 	ssd->bo = USEGD_GETBASE(usd);
239 	ssd->ls = USEGD_GETLIMIT(usd);
240 	ssd->sel = sel;
241 
242 	/*
243 	 * set type, dpl and present bits.
244 	 */
245 	ssd->acc1 = usd->usd_type;
246 	ssd->acc1 |= usd->usd_dpl << 5;
247 	ssd->acc1 |= usd->usd_p << (5 + 2);
248 
249 	/*
250 	 * set avl, DB and granularity bits.
251 	 */
252 	ssd->acc2 = usd->usd_avl;
253 
254 #if defined(__amd64)
255 	ssd->acc2 |= usd->usd_long << 1;
256 #else
257 	ssd->acc2 |= usd->usd_reserved << 1;
258 #endif
259 
260 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
261 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
262 }
263 
264 static void
265 ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
266 {
267 
268 	USEGD_SETBASE(usd, ssd->bo);
269 	USEGD_SETLIMIT(usd, ssd->ls);
270 
271 	/*
272 	 * set type, dpl and present bits.
273 	 */
274 	usd->usd_type = ssd->acc1;
275 	usd->usd_dpl = ssd->acc1 >> 5;
276 	usd->usd_p = ssd->acc1 >> (5 + 2);
277 
278 	ASSERT(usd->usd_type >= SDT_MEMRO);
279 	ASSERT(usd->usd_dpl == SEL_UPL);
280 
281 	/*
282 	 * set avl, DB and granularity bits.
283 	 */
284 	usd->usd_avl = ssd->acc2;
285 
286 #if defined(__amd64)
287 	usd->usd_long = ssd->acc2 >> 1;
288 #else
289 	usd->usd_reserved = ssd->acc2 >> 1;
290 #endif
291 
292 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
293 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
294 }
295 
296 static void
297 ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
298 {
299 
300 	sgd->sgd_looffset = ssd->bo;
301 	sgd->sgd_hioffset = ssd->bo >> 16;
302 
303 	sgd->sgd_selector = ssd->ls;
304 	/*
305 	 * set type, dpl and present bits.
306 	 */
307 	sgd->sgd_type = ssd->acc1;
308 	sgd->sgd_dpl = ssd->acc1 >> 5;
309 	sgd->sgd_p = ssd->acc1 >> 7;
310 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
311 	ASSERT(sgd->sgd_dpl == SEL_UPL);
312 
313 #if defined(__i386)	/* reserved, ignored in amd64 */
314 	sgd->sgd_stkcpy = 0;
315 #endif
316 }
317 
318 /*
319  * Load LDT register with the current process's LDT.
320  */
321 void
322 ldt_load(void)
323 {
324 	/*
325 	 */
326 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
327 	wr_ldtr(ULDT_SEL);
328 }
329 
330 /*
331  * Store a NULL selector in the LDTR. All subsequent illegal references to
332  * the LDT will result in a #gp.
333  */
334 void
335 ldt_unload(void)
336 {
337 	CPU->cpu_gdt[GDT_LDT] = zero_udesc;
338 	wr_ldtr(0);
339 }
340 
341 /*ARGSUSED*/
342 static void
343 ldt_savectx(proc_t *p)
344 {
345 	ASSERT(p->p_ldt != NULL);
346 	ASSERT(p == curproc);
347 
348 #if defined(__amd64)
349 	/*
350 	 * The 64-bit kernel must be sure to clear any stale ldt
351 	 * selectors when context switching away from a process that
352 	 * has a private ldt. Consider the following example:
353 	 *
354 	 * 	Wine creats a ldt descriptor and points a segment register
355 	 * 	to it.
356 	 *
357 	 *	We then context switch away from wine lwp to kernel
358 	 *	thread and hit breakpoint in kernel with kmdb
359 	 *
360 	 *	When we continue and resume from kmdb we will #gp
361 	 * 	fault since kmdb will have saved the stale ldt selector
362 	 *	from wine and will try to restore it but we are no longer in
363 	 *	the context of the wine process and do not have our
364 	 *	ldtr register pointing to the private ldt.
365 	 */
366 	clr_ldt_sregs();
367 #endif
368 
369 	ldt_unload();
370 	cpu_fast_syscall_enable(NULL);
371 }
372 
373 static void
374 ldt_restorectx(proc_t *p)
375 {
376 	ASSERT(p->p_ldt != NULL);
377 	ASSERT(p == curproc);
378 
379 	ldt_load();
380 	cpu_fast_syscall_disable(NULL);
381 }
382 
383 /*
384  * When a process with a private LDT execs, fast syscalls must be enabled for
385  * the new process image.
386  */
387 /* ARGSUSED */
388 static void
389 ldt_freectx(proc_t *p, int isexec)
390 {
391 	ASSERT(p->p_ldt);
392 
393 	if (isexec) {
394 		kpreempt_disable();
395 		cpu_fast_syscall_enable(NULL);
396 		kpreempt_enable();
397 	}
398 
399 	/*
400 	 * ldt_free() will free the memory used by the private LDT, reset the
401 	 * process's descriptor, and re-program the LDTR.
402 	 */
403 	ldt_free(p);
404 }
405 
406 /*
407  * Install ctx op that ensures syscall/sysenter are disabled.
408  * See comments below.
409  *
410  * When a thread with a private LDT forks, the new process
411  * must have the LDT context ops installed.
412  */
413 /* ARGSUSED */
414 static void
415 ldt_installctx(proc_t *p, proc_t *cp)
416 {
417 	proc_t		*targ = p;
418 	kthread_t	*t;
419 
420 	/*
421 	 * If this is a fork, operate on the child process.
422 	 */
423 	if (cp != NULL) {
424 		targ = cp;
425 		ldt_dup(p, cp);
426 	}
427 
428 	/*
429 	 * The process context ops expect the target process as their argument.
430 	 */
431 	ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
432 	    ldt_installctx, ldt_savectx, ldt_freectx) == 0);
433 
434 	installpctx(targ, targ, ldt_savectx, ldt_restorectx,
435 	    ldt_installctx, ldt_savectx, ldt_freectx);
436 
437 	/*
438 	 * We've just disabled fast system call and return instructions; take
439 	 * the slow path out to make sure we don't try to use one to return
440 	 * back to user. We must set t_post_sys for every thread in the
441 	 * process to make sure none of them escape out via fast return.
442 	 */
443 
444 	mutex_enter(&targ->p_lock);
445 	t = targ->p_tlist;
446 	do {
447 		t->t_post_sys = 1;
448 	} while ((t = t->t_forw) != targ->p_tlist);
449 	mutex_exit(&targ->p_lock);
450 }
451 
452 int
453 setdscr(struct ssd *ssd)
454 {
455 	ushort_t seli; 		/* selector index */
456 	user_desc_t *dscrp;	/* descriptor pointer */
457 	proc_t	*pp = ttoproc(curthread);
458 
459 	/*
460 	 * LDT segments: executable and data at DPL 3 only.
461 	 */
462 	if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
463 		return (EINVAL);
464 
465 	/*
466 	 * check the selector index.
467 	 */
468 	seli = SELTOIDX(ssd->sel);
469 	if (seli >= MAXNLDT || seli < LDT_UDBASE)
470 		return (EINVAL);
471 
472 	mutex_enter(&pp->p_ldtlock);
473 
474 	/*
475 	 * If this is the first time for this process then setup a
476 	 * private LDT for it.
477 	 */
478 	if (pp->p_ldt == NULL) {
479 		kpreempt_disable();
480 		setup_ldt(pp);
481 
482 		/*
483 		 * Now that this process has a private LDT, the use of
484 		 * the syscall/sysret and sysenter/sysexit instructions
485 		 * is forbidden for this processes because they destroy
486 		 * the contents of %cs and %ss segment registers.
487 		 *
488 		 * Explicity disable them here and add a context handler
489 		 * to the process. Note that disabling
490 		 * them here means we can't use sysret or sysexit on
491 		 * the way out of this system call - so we force this
492 		 * thread to take the slow path (which doesn't make use
493 		 * of sysenter or sysexit) back out.
494 		 */
495 
496 		ldt_installctx(pp, NULL);
497 
498 		cpu_fast_syscall_disable(NULL);
499 
500 		ASSERT(curthread->t_post_sys != 0);
501 		wr_ldtr(ULDT_SEL);
502 		kpreempt_enable();
503 	}
504 
505 	if (ldt_map(pp, seli) == NULL) {
506 		mutex_exit(&pp->p_ldtlock);
507 		return (ENOMEM);
508 	}
509 
510 	ASSERT(seli <= pp->p_ldtlimit);
511 	dscrp = &pp->p_ldt[seli];
512 
513 	/*
514 	 * On the 64-bit kernel, this is where things get more subtle.
515 	 * Recall that in the 64-bit kernel, when we enter the kernel we
516 	 * deliberately -don't- reload the segment selectors we came in on
517 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
518 	 * and the underlying descriptors are essentially ignored by the
519 	 * hardware in long mode - except for the base that we override with
520 	 * the gsbase MSRs.
521 	 *
522 	 * However, there's one unfortunate issue with this rosy picture --
523 	 * a descriptor that's not marked as 'present' will still generate
524 	 * an #np when loading a segment register.
525 	 *
526 	 * Consider this case.  An lwp creates a harmless LDT entry, points
527 	 * one of it's segment registers at it, then tells the kernel (here)
528 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
529 	 * way back to userland where we reload the segment registers, and be
530 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
531 	 * will happen in the normal case too.  However, if we're trying to
532 	 * use a debugger that wants to save and restore the segment registers,
533 	 * and the debugger things that we have valid segment registers, we
534 	 * have the problem that the debugger will try and restore the
535 	 * segment register that points at the now 'not present' descriptor
536 	 * and will take a #np right there.
537 	 *
538 	 * We should obviously fix the debugger to be paranoid about
539 	 * -not- restoring segment registers that point to bad descriptors;
540 	 * however we can prevent the problem here if we check to see if any
541 	 * of the segment registers are still pointing at the thing we're
542 	 * destroying; if they are, return an error instead. (That also seems
543 	 * a lot better failure mode than SIGKILL and a core file
544 	 * from kern_gpfault() too.)
545 	 */
546 	if (SI86SSD_PRES(ssd) == 0) {
547 		kthread_t *t;
548 		int bad = 0;
549 
550 		/*
551 		 * Look carefully at the segment registers of every lwp
552 		 * in the process (they're all stopped by our caller).
553 		 * If we're about to invalidate a descriptor that's still
554 		 * being referenced by *any* of them, return an error,
555 		 * rather than having them #gp on their way out of the kernel.
556 		 */
557 		ASSERT(pp->p_lwprcnt == 1);
558 
559 		mutex_enter(&pp->p_lock);
560 		t = pp->p_tlist;
561 		do {
562 			klwp_t *lwp = ttolwp(t);
563 			struct regs *rp = lwp->lwp_regs;
564 #if defined(__amd64)
565 			pcb_t *pcb = &lwp->lwp_pcb;
566 #endif
567 
568 			if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) {
569 				bad = 1;
570 				break;
571 			}
572 
573 #if defined(__amd64)
574 			if (pcb->pcb_flags & RUPDATE_PENDING) {
575 				if (ssd->sel == pcb->pcb_ds ||
576 				    ssd->sel == pcb->pcb_es ||
577 				    ssd->sel == pcb->pcb_fs ||
578 				    ssd->sel == pcb->pcb_gs) {
579 					bad = 1;
580 					break;
581 				}
582 			} else
583 #endif
584 			{
585 				if (ssd->sel == rp->r_ds ||
586 				    ssd->sel == rp->r_es ||
587 				    ssd->sel == rp->r_fs ||
588 				    ssd->sel == rp->r_gs) {
589 					bad = 1;
590 					break;
591 				}
592 			}
593 
594 		} while ((t = t->t_forw) != pp->p_tlist);
595 		mutex_exit(&pp->p_lock);
596 
597 		if (bad) {
598 			mutex_exit(&pp->p_ldtlock);
599 			return (EBUSY);
600 		}
601 	}
602 
603 	/*
604 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
605 	 */
606 	if (ssd->acc1 == 0) {
607 		bzero(dscrp, sizeof (*dscrp));
608 		mutex_exit(&pp->p_ldtlock);
609 		return (0);
610 	}
611 
612 	/*
613 	 * Check segment type, allow segment not present and
614 	 * only user DPL (3).
615 	 */
616 	if (SI86SSD_DPL(ssd) != SEL_UPL) {
617 		mutex_exit(&pp->p_ldtlock);
618 		return (EINVAL);
619 	}
620 
621 #if defined(__amd64)
622 	/*
623 	 * Do not allow 32-bit applications to create 64-bit mode code
624 	 * segments.
625 	 */
626 	if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
627 	    SI86SSD_ISLONG(ssd)) {
628 		mutex_exit(&pp->p_ldtlock);
629 		return (EINVAL);
630 	}
631 #endif /* __amd64 */
632 
633 	/*
634 	 * Set up a code or data user segment descriptor.
635 	 */
636 	if (SI86SSD_ISUSEG(ssd)) {
637 		ssd_to_usd(ssd, dscrp);
638 		mutex_exit(&pp->p_ldtlock);
639 		return (0);
640 	}
641 
642 	/*
643 	 * Allow a call gate only if the destination is in the LDT.
644 	 */
645 	if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
646 		ssd_to_sgd(ssd, (gate_desc_t *)dscrp);
647 		mutex_exit(&pp->p_ldtlock);
648 		return (0);
649 	}
650 
651 	mutex_exit(&pp->p_ldtlock);
652 	return (EINVAL);
653 }
654 
655 /*
656  * Allocate a private LDT for this process and initialize it with the
657  * default entries.
658  */
659 static void
660 setup_ldt(proc_t *pp)
661 {
662 	user_desc_t *ldtp;	/* descriptor pointer */
663 	pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t));
664 
665 	/*
666 	 * Allocate maximum virtual space we need for this LDT.
667 	 */
668 	ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP);
669 
670 	/*
671 	 * Allocate the minimum number of physical pages for LDT.
672 	 */
673 	(void) segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t),
674 	    VM_SLEEP, 0, segkmem_page_create, NULL);
675 
676 	bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t))));
677 
678 	kpreempt_disable();
679 
680 	/* Update proc structure. XXX - need any locks here??? */
681 
682 	set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1,
683 	    SDT_SYSLDT, SEL_KPL);
684 
685 	pp->p_ldtlimit = MINNLDT - 1;
686 	pp->p_ldt = ldtp;
687 	if (pp == curproc)
688 		*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc;
689 
690 	kpreempt_enable();
691 }
692 
693 /*
694  * Map the page corresponding to the selector entry. If the page is
695  * already mapped then it simply returns with the pointer to the entry.
696  * Otherwise it allocates a physical page for it and returns the pointer
697  * to the entry.  Returns 0 for errors.
698  */
699 static void *
700 ldt_map(proc_t *pp, uint_t seli)
701 {
702 	caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0];
703 	caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli];
704 	volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET));
705 	caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET));
706 	on_trap_data_t otd;
707 
708 	ASSERT(pp->p_ldt != NULL);
709 
710 	if (seli <= pp->p_ldtlimit)
711 		return (ent_addr);
712 
713 	/*
714 	 * We are increasing the size of the process's LDT.
715 	 * Make sure this and all intervening pages are mapped.
716 	 */
717 	while (page <= epage) {
718 		if (!on_trap(&otd, OT_DATA_ACCESS))
719 			(void) *(volatile int *)page;	/* peek at the page */
720 		else {		/* Allocate a physical page */
721 			(void) segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0,
722 			    segkmem_page_create, NULL);
723 			bzero(page, PAGESIZE);
724 		}
725 		no_trap();
726 		page += PAGESIZE;
727 	}
728 
729 	/* XXX - need any locks to update proc_t or gdt ??? */
730 
731 	ASSERT(curproc == pp);
732 
733 	kpreempt_disable();
734 	pp->p_ldtlimit = seli;
735 	SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1);
736 
737 	ldt_load();
738 	kpreempt_enable();
739 
740 	return (ent_addr);
741 }
742 
743 /*
744  * Free up the kernel memory used for LDT of this process.
745  */
746 static void
747 ldt_free(proc_t *pp)
748 {
749 	on_trap_data_t otd;
750 	caddr_t start, end;
751 	volatile caddr_t addr;
752 
753 	ASSERT(pp->p_ldt != NULL);
754 
755 	mutex_enter(&pp->p_ldtlock);
756 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
757 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
758 
759 	/* Free the physical page(s) used for mapping LDT */
760 	for (addr = start; addr <= end; addr += PAGESIZE) {
761 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
762 			/* peek at the address */
763 			(void) *(volatile int *)addr;
764 			segkmem_free(NULL, addr, PAGESIZE);
765 		}
766 	}
767 	no_trap();
768 
769 	/* Free up the virtual address space used for this LDT */
770 	vmem_free(heap_arena, pp->p_ldt,
771 	    ptob(btopr(MAXNLDT * sizeof (user_desc_t))));
772 	kpreempt_disable();
773 	pp->p_ldt = NULL;
774 	pp->p_ldt_desc = zero_sdesc;
775 	pp->p_ldtlimit = 0;
776 
777 	if (pp == curproc)
778 		ldt_unload();
779 	kpreempt_enable();
780 	mutex_exit(&pp->p_ldtlock);
781 }
782 
783 /*
784  * On fork copy new ldt for child.
785  */
786 void
787 ldt_dup(proc_t *pp, proc_t *cp)
788 {
789 	on_trap_data_t otd;
790 	caddr_t start, end;
791 	volatile caddr_t addr, caddr;
792 	int	minsize;
793 
794 	ASSERT(pp->p_ldt);
795 
796 	setup_ldt(cp);
797 
798 	mutex_enter(&pp->p_ldtlock);
799 	cp->p_ldtlimit = pp->p_ldtlimit;
800 	SYSSEGD_SETLIMIT(&cp->p_ldt_desc,
801 	    (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1);
802 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
803 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
804 	caddr = (caddr_t)cp->p_ldt; /* child LDT start */
805 
806 	minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET;
807 	/* Walk thru the physical page(s) used for parent's LDT */
808 	for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) {
809 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
810 			(void) *(volatile int *)addr; /* peek at the address */
811 			/* allocate a page if necessary */
812 			if (caddr >= ((caddr_t)cp->p_ldt + minsize)) {
813 				(void) segkmem_xalloc(NULL, caddr, PAGESIZE,
814 				    VM_SLEEP, 0, segkmem_page_create, NULL);
815 			}
816 			bcopy(addr, caddr, PAGESIZE);
817 		}
818 	}
819 	no_trap();
820 	mutex_exit(&pp->p_ldtlock);
821 }
822