xref: /titanic_41/usr/src/uts/intel/ia32/os/sysi86.c (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
32 /*	  All Rights Reserved	*/
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/sysmacros.h>
39 #include <sys/systm.h>
40 #include <sys/signal.h>
41 #include <sys/errno.h>
42 #include <sys/fault.h>
43 #include <sys/syscall.h>
44 #include <sys/cpuvar.h>
45 #include <sys/sysi86.h>
46 #include <sys/psw.h>
47 #include <sys/cred.h>
48 #include <sys/policy.h>
49 #include <sys/thread.h>
50 #include <sys/debug.h>
51 #include <sys/ontrap.h>
52 #include <sys/privregs.h>
53 #include <sys/x86_archext.h>
54 #include <sys/vmem.h>
55 #include <sys/kmem.h>
56 #include <sys/mman.h>
57 #include <sys/archsystm.h>
58 #include <vm/hat.h>
59 #include <vm/as.h>
60 #include <vm/seg.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/faultcode.h>
63 #include <sys/fp.h>
64 #include <sys/cmn_err.h>
65 
66 static int setdscr(caddr_t ap);
67 static void *setup_ldt(proc_t *pp);
68 static void *ldt_map(proc_t *pp, uint_t seli);
69 
70 extern void rtcsync(void);
71 extern long ggmtl(void);
72 extern void sgmtl(long);
73 
74 /*
75  * sysi86 System Call
76  */
77 
78 /* ARGSUSED */
79 int
80 sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
81 {
82 	int error = 0;
83 	int c;
84 	proc_t *pp = curproc;
85 
86 	switch (cmd) {
87 
88 	/*
89 	 * The SI86V86 subsystem call of the SYSI86 system call
90 	 * supports only one subcode -- V86SC_IOPL.
91 	 */
92 	case SI86V86:
93 		if (arg1 == V86SC_IOPL) {
94 			struct regs *rp = lwptoregs(ttolwp(curthread));
95 			greg_t oldpl = rp->r_ps & PS_IOPL;
96 			greg_t newpl = arg2 & PS_IOPL;
97 
98 			/*
99 			 * Must be privileged to run this system call
100 			 * if giving more io privilege.
101 			 */
102 			if (newpl > oldpl && (error =
103 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
104 				return (set_errno(error));
105 			rp->r_ps ^= oldpl ^ newpl;
106 		} else
107 			error = EINVAL;
108 		break;
109 
110 	/*
111 	 * Set a segment descriptor
112 	 */
113 	case SI86DSCR:
114 		/*
115 		 * There are considerable problems here manipulating
116 		 * resources shared by many running lwps.  Get everyone
117 		 * into a safe state before changing the LDT.
118 		 */
119 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
120 			error = EINTR;
121 			break;
122 		}
123 		error = setdscr((caddr_t)arg1);
124 		mutex_enter(&pp->p_lock);
125 		if (curthread != pp->p_agenttp)
126 			continuelwps(pp);
127 		mutex_exit(&pp->p_lock);
128 		break;
129 
130 	case SI86FPHW:
131 		c = fp_kind & 0xff;
132 		if (suword32((void *)arg1, c) == -1)
133 			error = EFAULT;
134 		break;
135 
136 	case SI86FPSTART:
137 		/*
138 		 * arg1 is the address of _fp_hw
139 		 * arg2 is the desired x87 FCW value
140 		 * arg3 is the desired SSE MXCSR value
141 		 * a return value of one means SSE hardware, else none.
142 		 */
143 		c = fp_kind & 0xff;
144 		if (suword32((void *)arg1, c) == -1) {
145 			error = EFAULT;
146 			break;
147 		}
148 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
149 		return (fp_kind == __FP_SSE ? 1 : 0);
150 
151 	/* real time clock management commands */
152 
153 	case WTODC:
154 		if ((error = secpolicy_settime(CRED())) == 0) {
155 			timestruc_t ts;
156 			mutex_enter(&tod_lock);
157 			gethrestime(&ts);
158 			tod_set(ts);
159 			mutex_exit(&tod_lock);
160 		}
161 		break;
162 
163 /* Give some timezone playing room */
164 #define	ONEWEEK	(7 * 24 * 60 * 60)
165 
166 	case SGMTL:
167 		/*
168 		 * Called from 32 bit land, negative values
169 		 * are not sign extended, so we do that here
170 		 * by casting it to an int and back.  We also
171 		 * clamp the value to within reason and detect
172 		 * when a 64 bit call overflows an int.
173 		 */
174 		if ((error = secpolicy_settime(CRED())) == 0) {
175 			int newlag = (int)arg1;
176 
177 #ifdef _SYSCALL32_IMPL
178 			if (get_udatamodel() == DATAMODEL_NATIVE &&
179 			    (long)newlag != (long)arg1) {
180 				error = EOVERFLOW;
181 			} else
182 #endif
183 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
184 				sgmtl(newlag);
185 			else
186 				error = EOVERFLOW;
187 		}
188 		break;
189 
190 	case GGMTL:
191 		if (get_udatamodel() == DATAMODEL_NATIVE) {
192 			if (sulword((void *)arg1, ggmtl()) == -1)
193 				error = EFAULT;
194 #ifdef _SYSCALL32_IMPL
195 		} else {
196 			time_t gmtl;
197 
198 			if ((gmtl = ggmtl()) > INT32_MAX) {
199 				/*
200 				 * Since gmt_lag can at most be
201 				 * +/- 12 hours, something is
202 				 * *seriously* messed up here.
203 				 */
204 				error = EOVERFLOW;
205 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
206 				error = EFAULT;
207 #endif
208 		}
209 		break;
210 
211 	case RTCSYNC:
212 		if ((error = secpolicy_settime(CRED())) == 0)
213 			rtcsync();
214 		break;
215 
216 	/* END OF real time clock management commands */
217 
218 	default:
219 		error = EINVAL;
220 		break;
221 	}
222 	return (error == 0 ? 0 : set_errno(error));
223 }
224 
225 void
226 usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
227 {
228 	ssd->bo = USEGD_GETBASE(usd);
229 	ssd->ls = USEGD_GETLIMIT(usd);
230 	ssd->sel = sel;
231 
232 	/*
233 	 * set type, dpl and present bits.
234 	 */
235 	ssd->acc1 = usd->usd_type;
236 	ssd->acc1 |= usd->usd_dpl << 5;
237 	ssd->acc1 |= usd->usd_p << (5 + 2);
238 
239 	/*
240 	 * set avl, DB and granularity bits.
241 	 */
242 	ssd->acc2 = usd->usd_avl;
243 
244 #if defined(__amd64)
245 	ssd->acc2 |= usd->usd_long << 1;
246 #else
247 	ssd->acc2 |= usd->usd_reserved << 1;
248 #endif
249 
250 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
251 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
252 }
253 
254 static void
255 ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
256 {
257 
258 	USEGD_SETBASE(usd, ssd->bo);
259 	USEGD_SETLIMIT(usd, ssd->ls);
260 
261 	/*
262 	 * set type, dpl and present bits.
263 	 */
264 	usd->usd_type = ssd->acc1;
265 	usd->usd_dpl = ssd->acc1 >> 5;
266 	usd->usd_p = ssd->acc1 >> (5 + 2);
267 
268 	ASSERT(usd->usd_type >= SDT_MEMRO);
269 	ASSERT(usd->usd_dpl == SEL_UPL);
270 
271 	/*
272 	 * set avl, DB and granularity bits.
273 	 */
274 	usd->usd_avl = ssd->acc2;
275 
276 #if defined(__amd64)
277 	usd->usd_long = ssd->acc2 >> 1;
278 #else
279 	usd->usd_reserved = ssd->acc2 >> 1;
280 #endif
281 
282 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
283 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
284 }
285 
286 static void
287 ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
288 {
289 
290 	sgd->sgd_looffset = ssd->bo;
291 	sgd->sgd_hioffset = ssd->bo >> 16;
292 
293 	sgd->sgd_selector = ssd->ls;
294 	/*
295 	 * set type, dpl and present bits.
296 	 */
297 	sgd->sgd_type = ssd->acc1;
298 	sgd->sgd_dpl = ssd->acc1 >> 5;
299 	sgd->sgd_p = ssd->acc1 >> 7;
300 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
301 	ASSERT(sgd->sgd_dpl == SEL_UPL);
302 
303 #if defined(__i386)	/* reserved, ignored in amd64 */
304 	sgd->sgd_stkcpy = 0;
305 #endif
306 }
307 
308 static void ldt_installctx(kthread_t *, kthread_t *);
309 
310 /*ARGSUSED*/
311 static void
312 ldt_savectx(kthread_t *t)
313 {
314 #if defined(__amd64)
315 	/*
316 	 * The 64-bit kernel must be sure to clear any stale ldt
317 	 * selectors when context switching away from a process that
318 	 * has a private ldt. Consider the following example:
319 	 *
320 	 * 	Wine creats a ldt descriptor and points a segment register
321 	 * 	to it.
322 	 *
323 	 *	We then context switch away from wine lwp to kernel
324 	 *	thread and hit breakpoint in kernel with kmdb
325 	 *
326 	 *	When we continue and resume from kmdb we will #gp
327 	 * 	fault since kmdb will have saved the stale ldt selector
328 	 *	from wine and will try to restore it but we are no longer in
329 	 *	the context of the wine process and do not have our
330 	 *	ldtr register pointing to the private ldt.
331 	 */
332 	clr_ldt_sregs();
333 #endif
334 
335 	cpu_fast_syscall_enable(NULL);
336 }
337 
338 /*
339  * When a thread with a private LDT execs, fast syscalls must be enabled for the
340  * new process image.
341  */
342 /* ARGSUSED */
343 static void
344 ldt_freectx(kthread_t *t, int isexec)
345 {
346 	if (isexec) {
347 		kpreempt_disable();
348 		cpu_fast_syscall_enable(NULL);
349 		kpreempt_enable();
350 	}
351 }
352 
353 /*
354  * Install ctx op that ensures syscall/sysenter are disabled.
355  * See comments below.
356  *
357  * When a thread with a private LDT creates a new LWP or forks, the new LWP
358  * must have the LDT context ops installed.
359  */
360 /* ARGSUSED */
361 static void
362 ldt_installctx(kthread_t *t, kthread_t *ct)
363 {
364 	kthread_t *targ = t;
365 
366 	/*
367 	 * If this is a fork or an lwp_create, operate on the child thread.
368 	 */
369 	if (ct != NULL)
370 		targ = ct;
371 
372 	ASSERT(removectx(targ, NULL, ldt_savectx, cpu_fast_syscall_disable,
373 	    ldt_installctx, ldt_installctx, cpu_fast_syscall_enable,
374 	    ldt_freectx) == 0);
375 
376 	installctx(targ, NULL, ldt_savectx, cpu_fast_syscall_disable,
377 	    ldt_installctx, ldt_installctx, cpu_fast_syscall_enable,
378 	    ldt_freectx);
379 
380 	/*
381 	 * We've just disabled fast system call and return instructions; take
382 	 * the slow path out to make sure we don't try to use one to return
383 	 * back to user.
384 	 */
385 	targ->t_post_sys = 1;
386 }
387 
388 static int
389 setdscr(caddr_t ap)
390 {
391 	struct ssd ssd;		/* request structure buffer */
392 	ushort_t seli; 		/* selector index */
393 	user_desc_t *dscrp;	/* descriptor pointer */
394 	proc_t	*pp = ttoproc(curthread);
395 	kthread_t *t;
396 
397 	if (get_udatamodel() == DATAMODEL_LP64)
398 		return (EINVAL);
399 
400 	if (copyin(ap, &ssd, sizeof (ssd)) < 0)
401 		return (EFAULT);
402 
403 	/*
404 	 * LDT segments: executable and data at DPL 3 only.
405 	 */
406 	if (!SELISLDT(ssd.sel) || !SELISUPL(ssd.sel))
407 		return (EINVAL);
408 
409 	/*
410 	 * check the selector index.
411 	 */
412 	seli = SELTOIDX(ssd.sel);
413 	if (seli >= MAXNLDT || seli <= LDT_UDBASE)
414 		return (EINVAL);
415 
416 	mutex_enter(&pp->p_ldtlock);
417 
418 	/*
419 	 * If this is the first time for this process then setup a
420 	 * private LDT for it.
421 	 */
422 	if (pp->p_ldt == NULL) {
423 		if (setup_ldt(pp) == NULL) {
424 			mutex_exit(&pp->p_ldtlock);
425 			return (ENOMEM);
426 		}
427 
428 		/*
429 		 * Now that this process has a private LDT, the use of
430 		 * the syscall/sysret and sysenter/sysexit instructions
431 		 * is forbidden for this processes because they destroy
432 		 * the contents of %cs and %ss segment registers.
433 		 *
434 		 * Explicity disable them here and add context handlers
435 		 * to all lwps in the process. Note that disabling
436 		 * them here means we can't use sysret or sysexit on
437 		 * the way out of this system call - so we force this
438 		 * thread to take the slow path (which doesn't make use
439 		 * of sysenter or sysexit) back out.
440 		 */
441 
442 		mutex_enter(&pp->p_lock);
443 		t = pp->p_tlist;
444 		do {
445 			ldt_installctx(t, NULL);
446 		} while ((t = t->t_forw) != pp->p_tlist);
447 		mutex_exit(&pp->p_lock);
448 
449 		kpreempt_disable();
450 		cpu_fast_syscall_disable(NULL);
451 		kpreempt_enable();
452 		ASSERT(curthread->t_post_sys != 0);
453 		wr_ldtr(ULDT_SEL);
454 	}
455 
456 	if (ldt_map(pp, seli) == NULL) {
457 		mutex_exit(&pp->p_ldtlock);
458 		return (ENOMEM);
459 	}
460 
461 	ASSERT(seli <= pp->p_ldtlimit);
462 	dscrp = &pp->p_ldt[seli];
463 
464 	/*
465 	 * On the 64-bit kernel, this is where things get more subtle.
466 	 * Recall that in the 64-bit kernel, when we enter the kernel we
467 	 * deliberately -don't- reload the segment selectors we came in on
468 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
469 	 * and the underlying descriptors are essentially ignored by the
470 	 * hardware in long mode - except for the base that we override with
471 	 * the gsbase MSRs.
472 	 *
473 	 * However, there's one unfortunate issue with this rosy picture --
474 	 * a descriptor that's not marked as 'present' will still generate
475 	 * an #np when loading a segment register.
476 	 *
477 	 * Consider this case.  An lwp creates a harmless LDT entry, points
478 	 * one of it's segment registers at it, then tells the kernel (here)
479 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
480 	 * way back to userland where we reload the segment registers, and be
481 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
482 	 * will happen in the normal case too.  However, if we're trying to
483 	 * use a debugger that wants to save and restore the segment registers,
484 	 * and the debugger things that we have valid segment registers, we
485 	 * have the problem that the debugger will try and restore the
486 	 * segment register that points at the now 'not present' descriptor
487 	 * and will take a #np right there.
488 	 *
489 	 * We should obviously fix the debugger to be paranoid about
490 	 * -not- restoring segment registers that point to bad descriptors;
491 	 * however we can prevent the problem here if we check to see if any
492 	 * of the segment registers are still pointing at the thing we're
493 	 * destroying; if they are, return an error instead. (That also seems
494 	 * a lot better failure mode than SIGKILL and a core file
495 	 * from kern_gpfault() too.)
496 	 */
497 	if (SI86SSD_PRES(&ssd) == 0) {
498 		kthread_t *t;
499 		int bad = 0;
500 
501 		/*
502 		 * Look carefully at the segment registers of every lwp
503 		 * in the process (they're all stopped by our caller).
504 		 * If we're about to invalidate a descriptor that's still
505 		 * being referenced by *any* of them, return an error,
506 		 * rather than having them #gp on their way out of the kernel.
507 		 */
508 		ASSERT(pp->p_lwprcnt == 1);
509 
510 		mutex_enter(&pp->p_lock);
511 		t = pp->p_tlist;
512 		do {
513 			klwp_t *lwp = ttolwp(t);
514 			struct regs *rp = lwp->lwp_regs;
515 #if defined(__amd64)
516 			pcb_t *pcb = &lwp->lwp_pcb;
517 #endif
518 
519 			if (ssd.sel == rp->r_cs || ssd.sel == rp->r_ss) {
520 				bad = 1;
521 				break;
522 			}
523 
524 #if defined(__amd64)
525 			if (pcb->pcb_flags & RUPDATE_PENDING) {
526 				if (ssd.sel == pcb->pcb_ds ||
527 				    ssd.sel == pcb->pcb_es ||
528 				    ssd.sel == pcb->pcb_fs ||
529 				    ssd.sel == pcb->pcb_gs) {
530 					bad = 1;
531 					break;
532 				}
533 			} else
534 #endif
535 			{
536 				if (ssd.sel == rp->r_ds ||
537 				    ssd.sel == rp->r_es ||
538 				    ssd.sel == rp->r_fs ||
539 				    ssd.sel == rp->r_gs) {
540 					bad = 1;
541 					break;
542 				}
543 			}
544 
545 		} while ((t = t->t_forw) != pp->p_tlist);
546 		mutex_exit(&pp->p_lock);
547 
548 		if (bad) {
549 			mutex_exit(&pp->p_ldtlock);
550 			return (EBUSY);
551 		}
552 	}
553 
554 	/*
555 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
556 	 */
557 	if (ssd.acc1 == 0) {
558 		bzero(dscrp, sizeof (*dscrp));
559 		mutex_exit(&pp->p_ldtlock);
560 		return (0);
561 	}
562 
563 	/*
564 	 * Check segment type, allow segment not present and
565 	 * only user DPL (3).
566 	 */
567 	if (SI86SSD_DPL(&ssd) != SEL_UPL) {
568 		mutex_exit(&pp->p_ldtlock);
569 		return (EINVAL);
570 	}
571 
572 #if defined(__amd64)
573 	/*
574 	 * Do not allow 32-bit applications to create 64-bit mode code segments.
575 	 */
576 	if (SI86SSD_ISUSEG(&ssd) && ((SI86SSD_TYPE(&ssd) >> 3) & 1) == 1 &&
577 	    SI86SSD_ISLONG(&ssd)) {
578 		mutex_exit(&pp->p_ldtlock);
579 		return (EINVAL);
580 	}
581 #endif /* __amd64 */
582 
583 	/*
584 	 * Set up a code or data user segment descriptor.
585 	 */
586 	if (SI86SSD_ISUSEG(&ssd)) {
587 		ssd_to_usd(&ssd, dscrp);
588 		mutex_exit(&pp->p_ldtlock);
589 		return (0);
590 	}
591 
592 	/*
593 	 * Allow a call gate only if the destination is in the LDT.
594 	 */
595 	if (SI86SSD_TYPE(&ssd) == SDT_SYSCGT && SELISLDT(ssd.ls)) {
596 		ssd_to_sgd(&ssd, (gate_desc_t *)dscrp);
597 		mutex_exit(&pp->p_ldtlock);
598 		return (0);
599 	}
600 
601 	mutex_exit(&pp->p_ldtlock);
602 	return (EINVAL);
603 }
604 
605 /*
606  * Allocate a private LDT for this process and initialize it with the
607  * default entries. Returns 0 for errors, pointer to LDT for success.
608  */
609 static void *
610 setup_ldt(proc_t *pp)
611 {
612 	user_desc_t *ldtp;	/* descriptor pointer */
613 	pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t));
614 
615 	/*
616 	 * Allocate maximum virtual space we need for this LDT.
617 	 */
618 	ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP);
619 
620 	/*
621 	 * Allocate the minimum number of physical pages for LDT.
622 	 */
623 	if (segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t),
624 	    VM_SLEEP, 0, segkmem_page_create, NULL) == NULL) {
625 		vmem_free(heap_arena, ldtp, ptob(npages));
626 		return (0);
627 	}
628 	bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t))));
629 
630 	/*
631 	 * Copy the default LDT entries into the new table.
632 	 */
633 	bcopy(ldt0_default, ldtp, MINNLDT * sizeof (user_desc_t));
634 
635 	kpreempt_disable();
636 
637 	/* Update proc structure. XXX - need any locks here??? */
638 
639 	set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1,
640 	    SDT_SYSLDT, SEL_KPL);
641 
642 	pp->p_ldtlimit = MINNLDT - 1;
643 	pp->p_ldt = ldtp;
644 	if (pp == curproc)
645 		*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc;
646 
647 	kpreempt_enable();
648 
649 	return (ldtp);
650 }
651 
652 /*
653  * Load LDT register with the current process's LDT.
654  */
655 void
656 ldt_load(void)
657 {
658 	proc_t *p = curthread->t_procp;
659 
660 	ASSERT(curthread->t_preempt != 0);
661 
662 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = p->p_ldt_desc;
663 	wr_ldtr(ULDT_SEL);
664 }
665 
666 /*
667  * Map the page corresponding to the selector entry. If the page is
668  * already mapped then it simply returns with the pointer to the entry.
669  * Otherwise it allocates a physical page for it and returns the pointer
670  * to the entry.  Returns 0 for errors.
671  */
672 static void *
673 ldt_map(proc_t *pp, uint_t seli)
674 {
675 	caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0];
676 	caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli];
677 	volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET));
678 	caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET));
679 	on_trap_data_t otd;
680 
681 	ASSERT(pp->p_ldt != NULL);
682 
683 	if (seli <= pp->p_ldtlimit)
684 		return (ent_addr);
685 
686 	/*
687 	 * We are increasing the size of the process's LDT.
688 	 * Make sure this and all intervening pages are mapped.
689 	 */
690 	while (page <= epage) {
691 		if (!on_trap(&otd, OT_DATA_ACCESS))
692 			(void) *(volatile int *)page;	/* peek at the page */
693 		else {		/* Allocate a physical page */
694 			if (segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0,
695 			    segkmem_page_create, NULL) == NULL) {
696 				no_trap();
697 				return (NULL);
698 			}
699 			bzero(page, PAGESIZE);
700 		}
701 		no_trap();
702 		page += PAGESIZE;
703 	}
704 
705 	/* XXX - need any locks to update proc_t or gdt ??? */
706 
707 	ASSERT(curproc == pp);
708 
709 	kpreempt_disable();
710 	pp->p_ldtlimit = seli;
711 	SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1);
712 
713 	ldt_load();
714 	kpreempt_enable();
715 
716 	return (ent_addr);
717 }
718 
719 /*
720  * Free up the kernel memory used for LDT of this process.
721  */
722 void
723 ldt_free(proc_t *pp)
724 {
725 	on_trap_data_t otd;
726 	caddr_t start, end;
727 	volatile caddr_t addr;
728 
729 	ASSERT(pp->p_ldt != NULL);
730 
731 	mutex_enter(&pp->p_ldtlock);
732 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
733 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
734 
735 	/* Free the physical page(s) used for mapping LDT */
736 	for (addr = start; addr <= end; addr += PAGESIZE) {
737 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
738 			/* peek at the address */
739 			(void) *(volatile int *)addr;
740 			segkmem_free(NULL, addr, PAGESIZE);
741 		}
742 	}
743 	no_trap();
744 
745 	/* Free up the virtual address space used for this LDT */
746 	vmem_free(heap_arena, pp->p_ldt,
747 	    ptob(btopr(MAXNLDT * sizeof (user_desc_t))));
748 	kpreempt_disable();
749 	pp->p_ldt = NULL;
750 	pp->p_ldt_desc = ldt0_default_desc;
751 	if (pp == curproc)
752 		ldt_load();
753 	kpreempt_enable();
754 	mutex_exit(&pp->p_ldtlock);
755 }
756 
757 /*
758  * On fork copy new ldt for child.
759  */
760 int
761 ldt_dup(proc_t *pp, proc_t *cp)
762 {
763 	on_trap_data_t otd;
764 	caddr_t start, end;
765 	volatile caddr_t addr, caddr;
766 	int	minsize;
767 
768 	if (pp->p_ldt == NULL) {
769 		cp->p_ldt_desc = ldt0_default_desc;
770 		return (0);
771 	}
772 
773 	if (setup_ldt(cp) == NULL) {
774 		return (ENOMEM);
775 	}
776 
777 	mutex_enter(&pp->p_ldtlock);
778 	cp->p_ldtlimit = pp->p_ldtlimit;
779 	SYSSEGD_SETLIMIT(&cp->p_ldt_desc,
780 	    (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1);
781 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
782 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
783 	caddr = (caddr_t)cp->p_ldt; /* child LDT start */
784 
785 	minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET;
786 	/* Walk thru the physical page(s) used for parent's LDT */
787 	for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) {
788 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
789 			(void) *(volatile int *)addr; /* peek at the address */
790 			/* allocate a page if necessary */
791 			if (caddr >= ((caddr_t)cp->p_ldt + minsize)) {
792 				if (segkmem_xalloc(NULL, caddr, PAGESIZE,
793 				    VM_SLEEP, 0, segkmem_page_create, NULL) ==
794 				    NULL) {
795 					no_trap();
796 					ldt_free(cp);
797 					mutex_exit(&pp->p_ldtlock);
798 					return (ENOMEM);
799 				}
800 			}
801 			bcopy(addr, caddr, PAGESIZE);
802 		}
803 	}
804 	no_trap();
805 	mutex_exit(&pp->p_ldtlock);
806 	return (0);
807 }
808