xref: /titanic_52/usr/src/uts/intel/ia32/os/sysi86.c (revision ef292b7fad311e62bc65379b1190c4ab7a898668)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/sysmacros.h>
38 #include <sys/systm.h>
39 #include <sys/signal.h>
40 #include <sys/errno.h>
41 #include <sys/fault.h>
42 #include <sys/syscall.h>
43 #include <sys/cpuvar.h>
44 #include <sys/sysi86.h>
45 #include <sys/psw.h>
46 #include <sys/cred.h>
47 #include <sys/policy.h>
48 #include <sys/thread.h>
49 #include <sys/debug.h>
50 #include <sys/ontrap.h>
51 #include <sys/privregs.h>
52 #include <sys/x86_archext.h>
53 #include <sys/vmem.h>
54 #include <sys/kmem.h>
55 #include <sys/mman.h>
56 #include <sys/archsystm.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/seg.h>
60 #include <vm/seg_kmem.h>
61 #include <vm/faultcode.h>
62 #include <sys/fp.h>
63 #include <sys/cmn_err.h>
64 
65 static void setup_ldt(proc_t *pp);
66 static void *ldt_map(proc_t *pp, uint_t seli);
67 static void ldt_free(proc_t *pp);
68 
69 extern void rtcsync(void);
70 extern long ggmtl(void);
71 extern void sgmtl(long);
72 
73 /*
74  * sysi86 System Call
75  */
76 
77 /* ARGSUSED */
78 int
79 sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
80 {
81 	struct ssd ssd;
82 	int error = 0;
83 	int c;
84 	proc_t *pp = curproc;
85 
86 	switch (cmd) {
87 
88 	/*
89 	 * The SI86V86 subsystem call of the SYSI86 system call
90 	 * supports only one subcode -- V86SC_IOPL.
91 	 */
92 	case SI86V86:
93 		if (arg1 == V86SC_IOPL) {
94 			struct regs *rp = lwptoregs(ttolwp(curthread));
95 			greg_t oldpl = rp->r_ps & PS_IOPL;
96 			greg_t newpl = arg2 & PS_IOPL;
97 
98 			/*
99 			 * Must be privileged to run this system call
100 			 * if giving more io privilege.
101 			 */
102 			if (newpl > oldpl && (error =
103 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
104 				return (set_errno(error));
105 			rp->r_ps ^= oldpl ^ newpl;
106 		} else
107 			error = EINVAL;
108 		break;
109 
110 	/*
111 	 * Set a segment descriptor
112 	 */
113 	case SI86DSCR:
114 		/*
115 		 * There are considerable problems here manipulating
116 		 * resources shared by many running lwps.  Get everyone
117 		 * into a safe state before changing the LDT.
118 		 */
119 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
120 			error = EINTR;
121 			break;
122 		}
123 
124 		if (get_udatamodel() == DATAMODEL_LP64) {
125 			error = EINVAL;
126 			break;
127 		}
128 
129 		if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) {
130 			error = EFAULT;
131 			break;
132 		}
133 
134 		error = setdscr(&ssd);
135 
136 		mutex_enter(&pp->p_lock);
137 		if (curthread != pp->p_agenttp)
138 			continuelwps(pp);
139 		mutex_exit(&pp->p_lock);
140 		break;
141 
142 	case SI86FPHW:
143 		c = fp_kind & 0xff;
144 		if (suword32((void *)arg1, c) == -1)
145 			error = EFAULT;
146 		break;
147 
148 	case SI86FPSTART:
149 		/*
150 		 * arg1 is the address of _fp_hw
151 		 * arg2 is the desired x87 FCW value
152 		 * arg3 is the desired SSE MXCSR value
153 		 * a return value of one means SSE hardware, else none.
154 		 */
155 		c = fp_kind & 0xff;
156 		if (suword32((void *)arg1, c) == -1) {
157 			error = EFAULT;
158 			break;
159 		}
160 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
161 		return (fp_kind == __FP_SSE ? 1 : 0);
162 
163 	/* real time clock management commands */
164 
165 	case WTODC:
166 		if ((error = secpolicy_settime(CRED())) == 0) {
167 			timestruc_t ts;
168 			mutex_enter(&tod_lock);
169 			gethrestime(&ts);
170 			tod_set(ts);
171 			mutex_exit(&tod_lock);
172 		}
173 		break;
174 
175 /* Give some timezone playing room */
176 #define	ONEWEEK	(7 * 24 * 60 * 60)
177 
178 	case SGMTL:
179 		/*
180 		 * Called from 32 bit land, negative values
181 		 * are not sign extended, so we do that here
182 		 * by casting it to an int and back.  We also
183 		 * clamp the value to within reason and detect
184 		 * when a 64 bit call overflows an int.
185 		 */
186 		if ((error = secpolicy_settime(CRED())) == 0) {
187 			int newlag = (int)arg1;
188 
189 #ifdef _SYSCALL32_IMPL
190 			if (get_udatamodel() == DATAMODEL_NATIVE &&
191 			    (long)newlag != (long)arg1) {
192 				error = EOVERFLOW;
193 			} else
194 #endif
195 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
196 				sgmtl(newlag);
197 			else
198 				error = EOVERFLOW;
199 		}
200 		break;
201 
202 	case GGMTL:
203 		if (get_udatamodel() == DATAMODEL_NATIVE) {
204 			if (sulword((void *)arg1, ggmtl()) == -1)
205 				error = EFAULT;
206 #ifdef _SYSCALL32_IMPL
207 		} else {
208 			time_t gmtl;
209 
210 			if ((gmtl = ggmtl()) > INT32_MAX) {
211 				/*
212 				 * Since gmt_lag can at most be
213 				 * +/- 12 hours, something is
214 				 * *seriously* messed up here.
215 				 */
216 				error = EOVERFLOW;
217 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
218 				error = EFAULT;
219 #endif
220 		}
221 		break;
222 
223 	case RTCSYNC:
224 		if ((error = secpolicy_settime(CRED())) == 0)
225 			rtcsync();
226 		break;
227 
228 	/* END OF real time clock management commands */
229 
230 	default:
231 		error = EINVAL;
232 		break;
233 	}
234 	return (error == 0 ? 0 : set_errno(error));
235 }
236 
237 void
238 usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
239 {
240 	ssd->bo = USEGD_GETBASE(usd);
241 	ssd->ls = USEGD_GETLIMIT(usd);
242 	ssd->sel = sel;
243 
244 	/*
245 	 * set type, dpl and present bits.
246 	 */
247 	ssd->acc1 = usd->usd_type;
248 	ssd->acc1 |= usd->usd_dpl << 5;
249 	ssd->acc1 |= usd->usd_p << (5 + 2);
250 
251 	/*
252 	 * set avl, DB and granularity bits.
253 	 */
254 	ssd->acc2 = usd->usd_avl;
255 
256 #if defined(__amd64)
257 	ssd->acc2 |= usd->usd_long << 1;
258 #else
259 	ssd->acc2 |= usd->usd_reserved << 1;
260 #endif
261 
262 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
263 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
264 }
265 
266 static void
267 ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
268 {
269 
270 	USEGD_SETBASE(usd, ssd->bo);
271 	USEGD_SETLIMIT(usd, ssd->ls);
272 
273 	/*
274 	 * set type, dpl and present bits.
275 	 */
276 	usd->usd_type = ssd->acc1;
277 	usd->usd_dpl = ssd->acc1 >> 5;
278 	usd->usd_p = ssd->acc1 >> (5 + 2);
279 
280 	ASSERT(usd->usd_type >= SDT_MEMRO);
281 	ASSERT(usd->usd_dpl == SEL_UPL);
282 
283 	/*
284 	 * set avl, DB and granularity bits.
285 	 */
286 	usd->usd_avl = ssd->acc2;
287 
288 #if defined(__amd64)
289 	usd->usd_long = ssd->acc2 >> 1;
290 #else
291 	usd->usd_reserved = ssd->acc2 >> 1;
292 #endif
293 
294 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
295 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
296 }
297 
298 static void
299 ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
300 {
301 
302 	sgd->sgd_looffset = ssd->bo;
303 	sgd->sgd_hioffset = ssd->bo >> 16;
304 
305 	sgd->sgd_selector = ssd->ls;
306 	/*
307 	 * set type, dpl and present bits.
308 	 */
309 	sgd->sgd_type = ssd->acc1;
310 	sgd->sgd_dpl = ssd->acc1 >> 5;
311 	sgd->sgd_p = ssd->acc1 >> 7;
312 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
313 	ASSERT(sgd->sgd_dpl == SEL_UPL);
314 
315 #if defined(__i386)	/* reserved, ignored in amd64 */
316 	sgd->sgd_stkcpy = 0;
317 #endif
318 }
319 
320 /*
321  * Load LDT register with the current process's LDT.
322  */
323 void
324 ldt_load(void)
325 {
326 	/*
327 	 */
328 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
329 	wr_ldtr(ULDT_SEL);
330 }
331 
332 /*
333  * Store a NULL selector in the LDTR. All subsequent illegal references to
334  * the LDT will result in a #gp.
335  */
336 void
337 ldt_unload(void)
338 {
339 	CPU->cpu_gdt[GDT_LDT] = zero_udesc;
340 	wr_ldtr(0);
341 }
342 
343 /*ARGSUSED*/
344 static void
345 ldt_savectx(proc_t *p)
346 {
347 	ASSERT(p->p_ldt != NULL);
348 	ASSERT(p == curproc);
349 
350 #if defined(__amd64)
351 	/*
352 	 * The 64-bit kernel must be sure to clear any stale ldt
353 	 * selectors when context switching away from a process that
354 	 * has a private ldt. Consider the following example:
355 	 *
356 	 * 	Wine creats a ldt descriptor and points a segment register
357 	 * 	to it.
358 	 *
359 	 *	We then context switch away from wine lwp to kernel
360 	 *	thread and hit breakpoint in kernel with kmdb
361 	 *
362 	 *	When we continue and resume from kmdb we will #gp
363 	 * 	fault since kmdb will have saved the stale ldt selector
364 	 *	from wine and will try to restore it but we are no longer in
365 	 *	the context of the wine process and do not have our
366 	 *	ldtr register pointing to the private ldt.
367 	 */
368 	clr_ldt_sregs();
369 #endif
370 
371 	ldt_unload();
372 	cpu_fast_syscall_enable(NULL);
373 }
374 
375 static void
376 ldt_restorectx(proc_t *p)
377 {
378 	ASSERT(p->p_ldt != NULL);
379 	ASSERT(p == curproc);
380 
381 	ldt_load();
382 	cpu_fast_syscall_disable(NULL);
383 }
384 
385 /*
386  * When a process with a private LDT execs, fast syscalls must be enabled for
387  * the new process image.
388  */
389 /* ARGSUSED */
390 static void
391 ldt_freectx(proc_t *p, int isexec)
392 {
393 	ASSERT(p->p_ldt);
394 
395 	if (isexec) {
396 		kpreempt_disable();
397 		cpu_fast_syscall_enable(NULL);
398 		kpreempt_enable();
399 	}
400 
401 	/*
402 	 * ldt_free() will free the memory used by the private LDT, reset the
403 	 * process's descriptor, and re-program the LDTR.
404 	 */
405 	ldt_free(p);
406 }
407 
408 /*
409  * Install ctx op that ensures syscall/sysenter are disabled.
410  * See comments below.
411  *
412  * When a thread with a private LDT forks, the new process
413  * must have the LDT context ops installed.
414  */
415 /* ARGSUSED */
416 static void
417 ldt_installctx(proc_t *p, proc_t *cp)
418 {
419 	proc_t		*targ = p;
420 	kthread_t	*t;
421 
422 	/*
423 	 * If this is a fork, operate on the child process.
424 	 */
425 	if (cp != NULL) {
426 		targ = cp;
427 		ldt_dup(p, cp);
428 	}
429 
430 	/*
431 	 * The process context ops expect the target process as their argument.
432 	 */
433 	ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
434 	    ldt_installctx, ldt_savectx, ldt_freectx) == 0);
435 
436 	installpctx(targ, targ, ldt_savectx, ldt_restorectx,
437 	    ldt_installctx, ldt_savectx, ldt_freectx);
438 
439 	/*
440 	 * We've just disabled fast system call and return instructions; take
441 	 * the slow path out to make sure we don't try to use one to return
442 	 * back to user. We must set t_post_sys for every thread in the
443 	 * process to make sure none of them escape out via fast return.
444 	 */
445 
446 	mutex_enter(&targ->p_lock);
447 	t = targ->p_tlist;
448 	do {
449 		t->t_post_sys = 1;
450 	} while ((t = t->t_forw) != targ->p_tlist);
451 	mutex_exit(&targ->p_lock);
452 }
453 
454 int
455 setdscr(struct ssd *ssd)
456 {
457 	ushort_t seli; 		/* selector index */
458 	user_desc_t *dscrp;	/* descriptor pointer */
459 	proc_t	*pp = ttoproc(curthread);
460 
461 	/*
462 	 * LDT segments: executable and data at DPL 3 only.
463 	 */
464 	if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
465 		return (EINVAL);
466 
467 	/*
468 	 * check the selector index.
469 	 */
470 	seli = SELTOIDX(ssd->sel);
471 	if (seli >= MAXNLDT || seli < LDT_UDBASE)
472 		return (EINVAL);
473 
474 	mutex_enter(&pp->p_ldtlock);
475 
476 	/*
477 	 * If this is the first time for this process then setup a
478 	 * private LDT for it.
479 	 */
480 	if (pp->p_ldt == NULL) {
481 		kpreempt_disable();
482 		setup_ldt(pp);
483 
484 		/*
485 		 * Now that this process has a private LDT, the use of
486 		 * the syscall/sysret and sysenter/sysexit instructions
487 		 * is forbidden for this processes because they destroy
488 		 * the contents of %cs and %ss segment registers.
489 		 *
490 		 * Explicity disable them here and add a context handler
491 		 * to the process. Note that disabling
492 		 * them here means we can't use sysret or sysexit on
493 		 * the way out of this system call - so we force this
494 		 * thread to take the slow path (which doesn't make use
495 		 * of sysenter or sysexit) back out.
496 		 */
497 
498 		ldt_installctx(pp, NULL);
499 
500 		cpu_fast_syscall_disable(NULL);
501 
502 		ASSERT(curthread->t_post_sys != 0);
503 		wr_ldtr(ULDT_SEL);
504 		kpreempt_enable();
505 	}
506 
507 	if (ldt_map(pp, seli) == NULL) {
508 		mutex_exit(&pp->p_ldtlock);
509 		return (ENOMEM);
510 	}
511 
512 	ASSERT(seli <= pp->p_ldtlimit);
513 	dscrp = &pp->p_ldt[seli];
514 
515 	/*
516 	 * On the 64-bit kernel, this is where things get more subtle.
517 	 * Recall that in the 64-bit kernel, when we enter the kernel we
518 	 * deliberately -don't- reload the segment selectors we came in on
519 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
520 	 * and the underlying descriptors are essentially ignored by the
521 	 * hardware in long mode - except for the base that we override with
522 	 * the gsbase MSRs.
523 	 *
524 	 * However, there's one unfortunate issue with this rosy picture --
525 	 * a descriptor that's not marked as 'present' will still generate
526 	 * an #np when loading a segment register.
527 	 *
528 	 * Consider this case.  An lwp creates a harmless LDT entry, points
529 	 * one of it's segment registers at it, then tells the kernel (here)
530 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
531 	 * way back to userland where we reload the segment registers, and be
532 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
533 	 * will happen in the normal case too.  However, if we're trying to
534 	 * use a debugger that wants to save and restore the segment registers,
535 	 * and the debugger things that we have valid segment registers, we
536 	 * have the problem that the debugger will try and restore the
537 	 * segment register that points at the now 'not present' descriptor
538 	 * and will take a #np right there.
539 	 *
540 	 * We should obviously fix the debugger to be paranoid about
541 	 * -not- restoring segment registers that point to bad descriptors;
542 	 * however we can prevent the problem here if we check to see if any
543 	 * of the segment registers are still pointing at the thing we're
544 	 * destroying; if they are, return an error instead. (That also seems
545 	 * a lot better failure mode than SIGKILL and a core file
546 	 * from kern_gpfault() too.)
547 	 */
548 	if (SI86SSD_PRES(ssd) == 0) {
549 		kthread_t *t;
550 		int bad = 0;
551 
552 		/*
553 		 * Look carefully at the segment registers of every lwp
554 		 * in the process (they're all stopped by our caller).
555 		 * If we're about to invalidate a descriptor that's still
556 		 * being referenced by *any* of them, return an error,
557 		 * rather than having them #gp on their way out of the kernel.
558 		 */
559 		ASSERT(pp->p_lwprcnt == 1);
560 
561 		mutex_enter(&pp->p_lock);
562 		t = pp->p_tlist;
563 		do {
564 			klwp_t *lwp = ttolwp(t);
565 			struct regs *rp = lwp->lwp_regs;
566 #if defined(__amd64)
567 			pcb_t *pcb = &lwp->lwp_pcb;
568 #endif
569 
570 			if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) {
571 				bad = 1;
572 				break;
573 			}
574 
575 #if defined(__amd64)
576 			if (pcb->pcb_flags & RUPDATE_PENDING) {
577 				if (ssd->sel == pcb->pcb_ds ||
578 				    ssd->sel == pcb->pcb_es ||
579 				    ssd->sel == pcb->pcb_fs ||
580 				    ssd->sel == pcb->pcb_gs) {
581 					bad = 1;
582 					break;
583 				}
584 			} else
585 #endif
586 			{
587 				if (ssd->sel == rp->r_ds ||
588 				    ssd->sel == rp->r_es ||
589 				    ssd->sel == rp->r_fs ||
590 				    ssd->sel == rp->r_gs) {
591 					bad = 1;
592 					break;
593 				}
594 			}
595 
596 		} while ((t = t->t_forw) != pp->p_tlist);
597 		mutex_exit(&pp->p_lock);
598 
599 		if (bad) {
600 			mutex_exit(&pp->p_ldtlock);
601 			return (EBUSY);
602 		}
603 	}
604 
605 	/*
606 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
607 	 */
608 	if (ssd->acc1 == 0) {
609 		bzero(dscrp, sizeof (*dscrp));
610 		mutex_exit(&pp->p_ldtlock);
611 		return (0);
612 	}
613 
614 	/*
615 	 * Check segment type, allow segment not present and
616 	 * only user DPL (3).
617 	 */
618 	if (SI86SSD_DPL(ssd) != SEL_UPL) {
619 		mutex_exit(&pp->p_ldtlock);
620 		return (EINVAL);
621 	}
622 
623 #if defined(__amd64)
624 	/*
625 	 * Do not allow 32-bit applications to create 64-bit mode code
626 	 * segments.
627 	 */
628 	if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
629 	    SI86SSD_ISLONG(ssd)) {
630 		mutex_exit(&pp->p_ldtlock);
631 		return (EINVAL);
632 	}
633 #endif /* __amd64 */
634 
635 	/*
636 	 * Set up a code or data user segment descriptor.
637 	 */
638 	if (SI86SSD_ISUSEG(ssd)) {
639 		ssd_to_usd(ssd, dscrp);
640 		mutex_exit(&pp->p_ldtlock);
641 		return (0);
642 	}
643 
644 	/*
645 	 * Allow a call gate only if the destination is in the LDT.
646 	 */
647 	if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
648 		ssd_to_sgd(ssd, (gate_desc_t *)dscrp);
649 		mutex_exit(&pp->p_ldtlock);
650 		return (0);
651 	}
652 
653 	mutex_exit(&pp->p_ldtlock);
654 	return (EINVAL);
655 }
656 
657 /*
658  * Allocate a private LDT for this process and initialize it with the
659  * default entries.
660  */
661 static void
662 setup_ldt(proc_t *pp)
663 {
664 	user_desc_t *ldtp;	/* descriptor pointer */
665 	pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t));
666 
667 	/*
668 	 * Allocate maximum virtual space we need for this LDT.
669 	 */
670 	ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP);
671 
672 	/*
673 	 * Allocate the minimum number of physical pages for LDT.
674 	 */
675 	(void) segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t),
676 	    VM_SLEEP, 0, segkmem_page_create, NULL);
677 
678 	bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t))));
679 
680 	kpreempt_disable();
681 
682 	/* Update proc structure. XXX - need any locks here??? */
683 
684 	set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1,
685 	    SDT_SYSLDT, SEL_KPL);
686 
687 	pp->p_ldtlimit = MINNLDT - 1;
688 	pp->p_ldt = ldtp;
689 	if (pp == curproc)
690 		*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc;
691 
692 	kpreempt_enable();
693 }
694 
695 /*
696  * Map the page corresponding to the selector entry. If the page is
697  * already mapped then it simply returns with the pointer to the entry.
698  * Otherwise it allocates a physical page for it and returns the pointer
699  * to the entry.  Returns 0 for errors.
700  */
701 static void *
702 ldt_map(proc_t *pp, uint_t seli)
703 {
704 	caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0];
705 	caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli];
706 	volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET));
707 	caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET));
708 	on_trap_data_t otd;
709 
710 	ASSERT(pp->p_ldt != NULL);
711 
712 	if (seli <= pp->p_ldtlimit)
713 		return (ent_addr);
714 
715 	/*
716 	 * We are increasing the size of the process's LDT.
717 	 * Make sure this and all intervening pages are mapped.
718 	 */
719 	while (page <= epage) {
720 		if (!on_trap(&otd, OT_DATA_ACCESS))
721 			(void) *(volatile int *)page;	/* peek at the page */
722 		else {		/* Allocate a physical page */
723 			(void) segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0,
724 			    segkmem_page_create, NULL);
725 			bzero(page, PAGESIZE);
726 		}
727 		no_trap();
728 		page += PAGESIZE;
729 	}
730 
731 	/* XXX - need any locks to update proc_t or gdt ??? */
732 
733 	ASSERT(curproc == pp);
734 
735 	kpreempt_disable();
736 	pp->p_ldtlimit = seli;
737 	SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1);
738 
739 	ldt_load();
740 	kpreempt_enable();
741 
742 	return (ent_addr);
743 }
744 
745 /*
746  * Free up the kernel memory used for LDT of this process.
747  */
748 static void
749 ldt_free(proc_t *pp)
750 {
751 	on_trap_data_t otd;
752 	caddr_t start, end;
753 	volatile caddr_t addr;
754 
755 	ASSERT(pp->p_ldt != NULL);
756 
757 	mutex_enter(&pp->p_ldtlock);
758 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
759 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
760 
761 	/* Free the physical page(s) used for mapping LDT */
762 	for (addr = start; addr <= end; addr += PAGESIZE) {
763 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
764 			/* peek at the address */
765 			(void) *(volatile int *)addr;
766 			segkmem_free(NULL, addr, PAGESIZE);
767 		}
768 	}
769 	no_trap();
770 
771 	/* Free up the virtual address space used for this LDT */
772 	vmem_free(heap_arena, pp->p_ldt,
773 	    ptob(btopr(MAXNLDT * sizeof (user_desc_t))));
774 	kpreempt_disable();
775 	pp->p_ldt = NULL;
776 	pp->p_ldt_desc = zero_sdesc;
777 	pp->p_ldtlimit = 0;
778 
779 	if (pp == curproc)
780 		ldt_unload();
781 	kpreempt_enable();
782 	mutex_exit(&pp->p_ldtlock);
783 }
784 
785 /*
786  * On fork copy new ldt for child.
787  */
788 void
789 ldt_dup(proc_t *pp, proc_t *cp)
790 {
791 	on_trap_data_t otd;
792 	caddr_t start, end;
793 	volatile caddr_t addr, caddr;
794 	int	minsize;
795 
796 	ASSERT(pp->p_ldt);
797 
798 	setup_ldt(cp);
799 
800 	mutex_enter(&pp->p_ldtlock);
801 	cp->p_ldtlimit = pp->p_ldtlimit;
802 	SYSSEGD_SETLIMIT(&cp->p_ldt_desc,
803 	    (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1);
804 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
805 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
806 	caddr = (caddr_t)cp->p_ldt; /* child LDT start */
807 
808 	minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET;
809 	/* Walk thru the physical page(s) used for parent's LDT */
810 	for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) {
811 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
812 			(void) *(volatile int *)addr; /* peek at the address */
813 			/* allocate a page if necessary */
814 			if (caddr >= ((caddr_t)cp->p_ldt + minsize)) {
815 				(void) segkmem_xalloc(NULL, caddr, PAGESIZE,
816 				    VM_SLEEP, 0, segkmem_page_create, NULL);
817 			}
818 			bcopy(addr, caddr, PAGESIZE);
819 		}
820 	}
821 	no_trap();
822 	mutex_exit(&pp->p_ldtlock);
823 }
824