xref: /titanic_50/usr/src/uts/intel/ia32/os/sysi86.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
59acbbeafSnn35248  * Common Development and Distribution License (the "License").
69acbbeafSnn35248  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*7af88ac7SKuriakose Kuruvilla  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
267c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
277c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
307c478bd9Sstevel@tonic-gate /*	  All Rights Reserved	*/
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate #include <sys/param.h>
337c478bd9Sstevel@tonic-gate #include <sys/types.h>
347c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
357c478bd9Sstevel@tonic-gate #include <sys/systm.h>
367c478bd9Sstevel@tonic-gate #include <sys/signal.h>
377c478bd9Sstevel@tonic-gate #include <sys/errno.h>
387c478bd9Sstevel@tonic-gate #include <sys/fault.h>
397c478bd9Sstevel@tonic-gate #include <sys/syscall.h>
407c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
417c478bd9Sstevel@tonic-gate #include <sys/sysi86.h>
427c478bd9Sstevel@tonic-gate #include <sys/psw.h>
437c478bd9Sstevel@tonic-gate #include <sys/cred.h>
447c478bd9Sstevel@tonic-gate #include <sys/policy.h>
457c478bd9Sstevel@tonic-gate #include <sys/thread.h>
467c478bd9Sstevel@tonic-gate #include <sys/debug.h>
477c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
487c478bd9Sstevel@tonic-gate #include <sys/privregs.h>
497c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h>
507c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
517c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
527c478bd9Sstevel@tonic-gate #include <sys/mman.h>
537c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
547c478bd9Sstevel@tonic-gate #include <vm/hat.h>
557c478bd9Sstevel@tonic-gate #include <vm/as.h>
567c478bd9Sstevel@tonic-gate #include <vm/seg.h>
577c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
587c478bd9Sstevel@tonic-gate #include <vm/faultcode.h>
597c478bd9Sstevel@tonic-gate #include <sys/fp.h>
607c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
61ae115bc7Smrj #include <sys/segments.h>
62ae115bc7Smrj #include <sys/clock.h>
63843e1988Sjohnlev #if defined(__xpv)
64843e1988Sjohnlev #include <sys/hypervisor.h>
65843e1988Sjohnlev #include <sys/note.h>
66843e1988Sjohnlev #endif
677c478bd9Sstevel@tonic-gate 
68843e1988Sjohnlev static void ldt_alloc(proc_t *, uint_t);
69843e1988Sjohnlev static void ldt_free(proc_t *);
70843e1988Sjohnlev static void ldt_dup(proc_t *, proc_t *);
71843e1988Sjohnlev static void ldt_grow(proc_t *, uint_t);
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate /*
747c478bd9Sstevel@tonic-gate  * sysi86 System Call
757c478bd9Sstevel@tonic-gate  */
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate /* ARGSUSED */
787c478bd9Sstevel@tonic-gate int
sysi86(short cmd,uintptr_t arg1,uintptr_t arg2,uintptr_t arg3)797c478bd9Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
807c478bd9Sstevel@tonic-gate {
819acbbeafSnn35248 	struct ssd ssd;
827c478bd9Sstevel@tonic-gate 	int error = 0;
837c478bd9Sstevel@tonic-gate 	int c;
847c478bd9Sstevel@tonic-gate 	proc_t *pp = curproc;
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate 	switch (cmd) {
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate 	/*
897c478bd9Sstevel@tonic-gate 	 * The SI86V86 subsystem call of the SYSI86 system call
907c478bd9Sstevel@tonic-gate 	 * supports only one subcode -- V86SC_IOPL.
917c478bd9Sstevel@tonic-gate 	 */
927c478bd9Sstevel@tonic-gate 	case SI86V86:
937c478bd9Sstevel@tonic-gate 		if (arg1 == V86SC_IOPL) {
947c478bd9Sstevel@tonic-gate 			struct regs *rp = lwptoregs(ttolwp(curthread));
957c478bd9Sstevel@tonic-gate 			greg_t oldpl = rp->r_ps & PS_IOPL;
967c478bd9Sstevel@tonic-gate 			greg_t newpl = arg2 & PS_IOPL;
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate 			/*
997c478bd9Sstevel@tonic-gate 			 * Must be privileged to run this system call
1007c478bd9Sstevel@tonic-gate 			 * if giving more io privilege.
1017c478bd9Sstevel@tonic-gate 			 */
1027c478bd9Sstevel@tonic-gate 			if (newpl > oldpl && (error =
1037c478bd9Sstevel@tonic-gate 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
1047c478bd9Sstevel@tonic-gate 				return (set_errno(error));
105843e1988Sjohnlev #if defined(__xpv)
106843e1988Sjohnlev 			kpreempt_disable();
107843e1988Sjohnlev 			installctx(curthread, NULL, xen_disable_user_iopl,
108843e1988Sjohnlev 			    xen_enable_user_iopl, NULL, NULL,
109843e1988Sjohnlev 			    xen_disable_user_iopl, NULL);
110843e1988Sjohnlev 			xen_enable_user_iopl();
111843e1988Sjohnlev 			kpreempt_enable();
112843e1988Sjohnlev #else
1137c478bd9Sstevel@tonic-gate 			rp->r_ps ^= oldpl ^ newpl;
114843e1988Sjohnlev #endif
1157c478bd9Sstevel@tonic-gate 		} else
1167c478bd9Sstevel@tonic-gate 			error = EINVAL;
1177c478bd9Sstevel@tonic-gate 		break;
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate 	/*
1207c478bd9Sstevel@tonic-gate 	 * Set a segment descriptor
1217c478bd9Sstevel@tonic-gate 	 */
1227c478bd9Sstevel@tonic-gate 	case SI86DSCR:
1237c478bd9Sstevel@tonic-gate 		/*
1247c478bd9Sstevel@tonic-gate 		 * There are considerable problems here manipulating
1257c478bd9Sstevel@tonic-gate 		 * resources shared by many running lwps.  Get everyone
1267c478bd9Sstevel@tonic-gate 		 * into a safe state before changing the LDT.
1277c478bd9Sstevel@tonic-gate 		 */
1287c478bd9Sstevel@tonic-gate 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
1297c478bd9Sstevel@tonic-gate 			error = EINTR;
1307c478bd9Sstevel@tonic-gate 			break;
1317c478bd9Sstevel@tonic-gate 		}
1329acbbeafSnn35248 
1339acbbeafSnn35248 		if (get_udatamodel() == DATAMODEL_LP64) {
1349acbbeafSnn35248 			error = EINVAL;
1359acbbeafSnn35248 			break;
1369acbbeafSnn35248 		}
1379acbbeafSnn35248 
1389acbbeafSnn35248 		if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) {
1399acbbeafSnn35248 			error = EFAULT;
1409acbbeafSnn35248 			break;
1419acbbeafSnn35248 		}
1429acbbeafSnn35248 
1439acbbeafSnn35248 		error = setdscr(&ssd);
1449acbbeafSnn35248 
1457c478bd9Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
1467c478bd9Sstevel@tonic-gate 		if (curthread != pp->p_agenttp)
1477c478bd9Sstevel@tonic-gate 			continuelwps(pp);
1487c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
1497c478bd9Sstevel@tonic-gate 		break;
1507c478bd9Sstevel@tonic-gate 
1517c478bd9Sstevel@tonic-gate 	case SI86FPHW:
1527c478bd9Sstevel@tonic-gate 		c = fp_kind & 0xff;
1537c478bd9Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1)
1547c478bd9Sstevel@tonic-gate 			error = EFAULT;
1557c478bd9Sstevel@tonic-gate 		break;
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate 	case SI86FPSTART:
1587c478bd9Sstevel@tonic-gate 		/*
1597c478bd9Sstevel@tonic-gate 		 * arg1 is the address of _fp_hw
1607c478bd9Sstevel@tonic-gate 		 * arg2 is the desired x87 FCW value
1617c478bd9Sstevel@tonic-gate 		 * arg3 is the desired SSE MXCSR value
1627c478bd9Sstevel@tonic-gate 		 * a return value of one means SSE hardware, else none.
1637c478bd9Sstevel@tonic-gate 		 */
1647c478bd9Sstevel@tonic-gate 		c = fp_kind & 0xff;
1657c478bd9Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1) {
1667c478bd9Sstevel@tonic-gate 			error = EFAULT;
1677c478bd9Sstevel@tonic-gate 			break;
1687c478bd9Sstevel@tonic-gate 		}
1697c478bd9Sstevel@tonic-gate 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
170*7af88ac7SKuriakose Kuruvilla 		return ((fp_kind & __FP_SSE) ? 1 : 0);
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate 	/* real time clock management commands */
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate 	case WTODC:
1757c478bd9Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1767c478bd9Sstevel@tonic-gate 			timestruc_t ts;
1777c478bd9Sstevel@tonic-gate 			mutex_enter(&tod_lock);
1787c478bd9Sstevel@tonic-gate 			gethrestime(&ts);
1797c478bd9Sstevel@tonic-gate 			tod_set(ts);
1807c478bd9Sstevel@tonic-gate 			mutex_exit(&tod_lock);
1817c478bd9Sstevel@tonic-gate 		}
1827c478bd9Sstevel@tonic-gate 		break;
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate /* Give some timezone playing room */
1857c478bd9Sstevel@tonic-gate #define	ONEWEEK	(7 * 24 * 60 * 60)
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 	case SGMTL:
1887c478bd9Sstevel@tonic-gate 		/*
1897c478bd9Sstevel@tonic-gate 		 * Called from 32 bit land, negative values
1907c478bd9Sstevel@tonic-gate 		 * are not sign extended, so we do that here
1917c478bd9Sstevel@tonic-gate 		 * by casting it to an int and back.  We also
1927c478bd9Sstevel@tonic-gate 		 * clamp the value to within reason and detect
1937c478bd9Sstevel@tonic-gate 		 * when a 64 bit call overflows an int.
1947c478bd9Sstevel@tonic-gate 		 */
1957c478bd9Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1967c478bd9Sstevel@tonic-gate 			int newlag = (int)arg1;
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1997c478bd9Sstevel@tonic-gate 			if (get_udatamodel() == DATAMODEL_NATIVE &&
2007c478bd9Sstevel@tonic-gate 			    (long)newlag != (long)arg1) {
2017c478bd9Sstevel@tonic-gate 				error = EOVERFLOW;
2027c478bd9Sstevel@tonic-gate 			} else
2037c478bd9Sstevel@tonic-gate #endif
2047c478bd9Sstevel@tonic-gate 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
2057c478bd9Sstevel@tonic-gate 				sgmtl(newlag);
2067c478bd9Sstevel@tonic-gate 			else
2077c478bd9Sstevel@tonic-gate 				error = EOVERFLOW;
2087c478bd9Sstevel@tonic-gate 		}
2097c478bd9Sstevel@tonic-gate 		break;
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 	case GGMTL:
2127c478bd9Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
2137c478bd9Sstevel@tonic-gate 			if (sulword((void *)arg1, ggmtl()) == -1)
2147c478bd9Sstevel@tonic-gate 				error = EFAULT;
2157c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
2167c478bd9Sstevel@tonic-gate 		} else {
2177c478bd9Sstevel@tonic-gate 			time_t gmtl;
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 			if ((gmtl = ggmtl()) > INT32_MAX) {
2207c478bd9Sstevel@tonic-gate 				/*
2217c478bd9Sstevel@tonic-gate 				 * Since gmt_lag can at most be
2227c478bd9Sstevel@tonic-gate 				 * +/- 12 hours, something is
2237c478bd9Sstevel@tonic-gate 				 * *seriously* messed up here.
2247c478bd9Sstevel@tonic-gate 				 */
2257c478bd9Sstevel@tonic-gate 				error = EOVERFLOW;
2267c478bd9Sstevel@tonic-gate 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
2277c478bd9Sstevel@tonic-gate 				error = EFAULT;
2287c478bd9Sstevel@tonic-gate #endif
2297c478bd9Sstevel@tonic-gate 		}
2307c478bd9Sstevel@tonic-gate 		break;
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	case RTCSYNC:
2337c478bd9Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0)
2347c478bd9Sstevel@tonic-gate 			rtcsync();
2357c478bd9Sstevel@tonic-gate 		break;
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate 	/* END OF real time clock management commands */
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate 	default:
2407c478bd9Sstevel@tonic-gate 		error = EINVAL;
2417c478bd9Sstevel@tonic-gate 		break;
2427c478bd9Sstevel@tonic-gate 	}
2437c478bd9Sstevel@tonic-gate 	return (error == 0 ? 0 : set_errno(error));
2447c478bd9Sstevel@tonic-gate }
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate void
usd_to_ssd(user_desc_t * usd,struct ssd * ssd,selector_t sel)2477c478bd9Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
2487c478bd9Sstevel@tonic-gate {
2497c478bd9Sstevel@tonic-gate 	ssd->bo = USEGD_GETBASE(usd);
2507c478bd9Sstevel@tonic-gate 	ssd->ls = USEGD_GETLIMIT(usd);
2517c478bd9Sstevel@tonic-gate 	ssd->sel = sel;
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 	/*
2547c478bd9Sstevel@tonic-gate 	 * set type, dpl and present bits.
2557c478bd9Sstevel@tonic-gate 	 */
2567c478bd9Sstevel@tonic-gate 	ssd->acc1 = usd->usd_type;
2577c478bd9Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_dpl << 5;
2587c478bd9Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_p << (5 + 2);
2597c478bd9Sstevel@tonic-gate 
2607c478bd9Sstevel@tonic-gate 	/*
2617c478bd9Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
2627c478bd9Sstevel@tonic-gate 	 */
2637c478bd9Sstevel@tonic-gate 	ssd->acc2 = usd->usd_avl;
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate #if defined(__amd64)
2667c478bd9Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_long << 1;
2677c478bd9Sstevel@tonic-gate #else
2687c478bd9Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_reserved << 1;
2697c478bd9Sstevel@tonic-gate #endif
2707c478bd9Sstevel@tonic-gate 
2717c478bd9Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
2727c478bd9Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
2737c478bd9Sstevel@tonic-gate }
2747c478bd9Sstevel@tonic-gate 
2757c478bd9Sstevel@tonic-gate static void
ssd_to_usd(struct ssd * ssd,user_desc_t * usd)2767c478bd9Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
2777c478bd9Sstevel@tonic-gate {
2787c478bd9Sstevel@tonic-gate 
279843e1988Sjohnlev 	ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0);
280843e1988Sjohnlev 
2817c478bd9Sstevel@tonic-gate 	USEGD_SETBASE(usd, ssd->bo);
2827c478bd9Sstevel@tonic-gate 	USEGD_SETLIMIT(usd, ssd->ls);
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate 	/*
2857c478bd9Sstevel@tonic-gate 	 * set type, dpl and present bits.
2867c478bd9Sstevel@tonic-gate 	 */
2877c478bd9Sstevel@tonic-gate 	usd->usd_type = ssd->acc1;
2887c478bd9Sstevel@tonic-gate 	usd->usd_dpl = ssd->acc1 >> 5;
2897c478bd9Sstevel@tonic-gate 	usd->usd_p = ssd->acc1 >> (5 + 2);
2907c478bd9Sstevel@tonic-gate 
2917c478bd9Sstevel@tonic-gate 	ASSERT(usd->usd_type >= SDT_MEMRO);
2927c478bd9Sstevel@tonic-gate 	ASSERT(usd->usd_dpl == SEL_UPL);
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate 	/*
295843e1988Sjohnlev 	 * 64-bit code selectors are never allowed in the LDT.
296843e1988Sjohnlev 	 * Reserved bit is always 0 on 32-bit sytems.
297843e1988Sjohnlev 	 */
298843e1988Sjohnlev #if defined(__amd64)
299843e1988Sjohnlev 	usd->usd_long = 0;
300843e1988Sjohnlev #else
301843e1988Sjohnlev 	usd->usd_reserved = 0;
302843e1988Sjohnlev #endif
303843e1988Sjohnlev 
304843e1988Sjohnlev 	/*
3057c478bd9Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
3067c478bd9Sstevel@tonic-gate 	 */
3077c478bd9Sstevel@tonic-gate 	usd->usd_avl = ssd->acc2;
3087c478bd9Sstevel@tonic-gate 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
3097c478bd9Sstevel@tonic-gate 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
3107c478bd9Sstevel@tonic-gate }
3117c478bd9Sstevel@tonic-gate 
312843e1988Sjohnlev 
313843e1988Sjohnlev #if defined(__i386)
314843e1988Sjohnlev 
3157c478bd9Sstevel@tonic-gate static void
ssd_to_sgd(struct ssd * ssd,gate_desc_t * sgd)3167c478bd9Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
3177c478bd9Sstevel@tonic-gate {
3187c478bd9Sstevel@tonic-gate 
319843e1988Sjohnlev 	ASSERT(bcmp(sgd, &null_sdesc, sizeof (*sgd)) == 0);
320843e1988Sjohnlev 
3217c478bd9Sstevel@tonic-gate 	sgd->sgd_looffset = ssd->bo;
3227c478bd9Sstevel@tonic-gate 	sgd->sgd_hioffset = ssd->bo >> 16;
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	sgd->sgd_selector = ssd->ls;
325843e1988Sjohnlev 
3267c478bd9Sstevel@tonic-gate 	/*
3277c478bd9Sstevel@tonic-gate 	 * set type, dpl and present bits.
3287c478bd9Sstevel@tonic-gate 	 */
3297c478bd9Sstevel@tonic-gate 	sgd->sgd_type = ssd->acc1;
3307c478bd9Sstevel@tonic-gate 	sgd->sgd_dpl = ssd->acc1 >> 5;
3317c478bd9Sstevel@tonic-gate 	sgd->sgd_p = ssd->acc1 >> 7;
3327c478bd9Sstevel@tonic-gate 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
3337c478bd9Sstevel@tonic-gate 	ASSERT(sgd->sgd_dpl == SEL_UPL);
3347c478bd9Sstevel@tonic-gate 	sgd->sgd_stkcpy = 0;
3357c478bd9Sstevel@tonic-gate }
3367c478bd9Sstevel@tonic-gate 
337843e1988Sjohnlev #endif	/* __i386 */
338843e1988Sjohnlev 
3390baeff3dSrab /*
3400baeff3dSrab  * Load LDT register with the current process's LDT.
3410baeff3dSrab  */
342843e1988Sjohnlev static void
ldt_load(void)3430baeff3dSrab ldt_load(void)
3440baeff3dSrab {
345843e1988Sjohnlev #if defined(__xpv)
346843e1988Sjohnlev 	xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc),
347843e1988Sjohnlev 	    curproc->p_ldtlimit + 1);
348843e1988Sjohnlev #else
3490baeff3dSrab 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
3500baeff3dSrab 	wr_ldtr(ULDT_SEL);
351843e1988Sjohnlev #endif
3520baeff3dSrab }
3530baeff3dSrab 
3540baeff3dSrab /*
3550baeff3dSrab  * Store a NULL selector in the LDTR. All subsequent illegal references to
3560baeff3dSrab  * the LDT will result in a #gp.
3570baeff3dSrab  */
3580baeff3dSrab void
ldt_unload(void)3590baeff3dSrab ldt_unload(void)
3600baeff3dSrab {
361843e1988Sjohnlev #if defined(__xpv)
362843e1988Sjohnlev 	xen_set_ldt(NULL, 0);
363843e1988Sjohnlev #else
364843e1988Sjohnlev 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc;
3650baeff3dSrab 	wr_ldtr(0);
366843e1988Sjohnlev #endif
3670baeff3dSrab }
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3707c478bd9Sstevel@tonic-gate static void
ldt_savectx(proc_t * p)3710baeff3dSrab ldt_savectx(proc_t *p)
3727c478bd9Sstevel@tonic-gate {
3730baeff3dSrab 	ASSERT(p->p_ldt != NULL);
3740baeff3dSrab 	ASSERT(p == curproc);
3750baeff3dSrab 
3767c478bd9Sstevel@tonic-gate #if defined(__amd64)
3777c478bd9Sstevel@tonic-gate 	/*
3787c478bd9Sstevel@tonic-gate 	 * The 64-bit kernel must be sure to clear any stale ldt
3797c478bd9Sstevel@tonic-gate 	 * selectors when context switching away from a process that
3807c478bd9Sstevel@tonic-gate 	 * has a private ldt. Consider the following example:
3817c478bd9Sstevel@tonic-gate 	 *
3827c478bd9Sstevel@tonic-gate 	 * 	Wine creats a ldt descriptor and points a segment register
3837c478bd9Sstevel@tonic-gate 	 * 	to it.
3847c478bd9Sstevel@tonic-gate 	 *
3857c478bd9Sstevel@tonic-gate 	 *	We then context switch away from wine lwp to kernel
3867c478bd9Sstevel@tonic-gate 	 *	thread and hit breakpoint in kernel with kmdb
3877c478bd9Sstevel@tonic-gate 	 *
3887c478bd9Sstevel@tonic-gate 	 *	When we continue and resume from kmdb we will #gp
3897c478bd9Sstevel@tonic-gate 	 * 	fault since kmdb will have saved the stale ldt selector
3907c478bd9Sstevel@tonic-gate 	 *	from wine and will try to restore it but we are no longer in
3917c478bd9Sstevel@tonic-gate 	 *	the context of the wine process and do not have our
3927c478bd9Sstevel@tonic-gate 	 *	ldtr register pointing to the private ldt.
3937c478bd9Sstevel@tonic-gate 	 */
394843e1988Sjohnlev 	reset_sregs();
3957c478bd9Sstevel@tonic-gate #endif
3967c478bd9Sstevel@tonic-gate 
3970baeff3dSrab 	ldt_unload();
3987c478bd9Sstevel@tonic-gate 	cpu_fast_syscall_enable(NULL);
3997c478bd9Sstevel@tonic-gate }
4007c478bd9Sstevel@tonic-gate 
4010baeff3dSrab static void
ldt_restorectx(proc_t * p)4020baeff3dSrab ldt_restorectx(proc_t *p)
4030baeff3dSrab {
4040baeff3dSrab 	ASSERT(p->p_ldt != NULL);
4050baeff3dSrab 	ASSERT(p == curproc);
4060baeff3dSrab 
4070baeff3dSrab 	ldt_load();
4080baeff3dSrab 	cpu_fast_syscall_disable(NULL);
4090baeff3dSrab }
4100baeff3dSrab 
4117c478bd9Sstevel@tonic-gate /*
4120baeff3dSrab  * When a process with a private LDT execs, fast syscalls must be enabled for
4130baeff3dSrab  * the new process image.
4147c478bd9Sstevel@tonic-gate  */
4157c478bd9Sstevel@tonic-gate /* ARGSUSED */
4167c478bd9Sstevel@tonic-gate static void
ldt_freectx(proc_t * p,int isexec)4170baeff3dSrab ldt_freectx(proc_t *p, int isexec)
4187c478bd9Sstevel@tonic-gate {
4190baeff3dSrab 	ASSERT(p->p_ldt);
4200baeff3dSrab 
4217c478bd9Sstevel@tonic-gate 	if (isexec) {
4227c478bd9Sstevel@tonic-gate 		kpreempt_disable();
4237c478bd9Sstevel@tonic-gate 		cpu_fast_syscall_enable(NULL);
4247c478bd9Sstevel@tonic-gate 		kpreempt_enable();
4257c478bd9Sstevel@tonic-gate 	}
4260baeff3dSrab 
4270baeff3dSrab 	/*
4280baeff3dSrab 	 * ldt_free() will free the memory used by the private LDT, reset the
4290baeff3dSrab 	 * process's descriptor, and re-program the LDTR.
4300baeff3dSrab 	 */
4310baeff3dSrab 	ldt_free(p);
4327c478bd9Sstevel@tonic-gate }
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate /*
4357c478bd9Sstevel@tonic-gate  * Install ctx op that ensures syscall/sysenter are disabled.
4367c478bd9Sstevel@tonic-gate  * See comments below.
4377c478bd9Sstevel@tonic-gate  *
4380baeff3dSrab  * When a thread with a private LDT forks, the new process
4397c478bd9Sstevel@tonic-gate  * must have the LDT context ops installed.
4407c478bd9Sstevel@tonic-gate  */
4417c478bd9Sstevel@tonic-gate /* ARGSUSED */
4427c478bd9Sstevel@tonic-gate static void
ldt_installctx(proc_t * p,proc_t * cp)4430baeff3dSrab ldt_installctx(proc_t *p, proc_t *cp)
4447c478bd9Sstevel@tonic-gate {
4450baeff3dSrab 	proc_t		*targ = p;
4460baeff3dSrab 	kthread_t	*t;
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate 	/*
4490baeff3dSrab 	 * If this is a fork, operate on the child process.
4507c478bd9Sstevel@tonic-gate 	 */
4510baeff3dSrab 	if (cp != NULL) {
4520baeff3dSrab 		targ = cp;
4530baeff3dSrab 		ldt_dup(p, cp);
4540baeff3dSrab 	}
4557c478bd9Sstevel@tonic-gate 
4560baeff3dSrab 	/*
4570baeff3dSrab 	 * The process context ops expect the target process as their argument.
4580baeff3dSrab 	 */
4590baeff3dSrab 	ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
4600baeff3dSrab 	    ldt_installctx, ldt_savectx, ldt_freectx) == 0);
4617c478bd9Sstevel@tonic-gate 
4620baeff3dSrab 	installpctx(targ, targ, ldt_savectx, ldt_restorectx,
4630baeff3dSrab 	    ldt_installctx, ldt_savectx, ldt_freectx);
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate 	/*
4667c478bd9Sstevel@tonic-gate 	 * We've just disabled fast system call and return instructions; take
4677c478bd9Sstevel@tonic-gate 	 * the slow path out to make sure we don't try to use one to return
4680baeff3dSrab 	 * back to user. We must set t_post_sys for every thread in the
4690baeff3dSrab 	 * process to make sure none of them escape out via fast return.
4707c478bd9Sstevel@tonic-gate 	 */
4710baeff3dSrab 
4720baeff3dSrab 	mutex_enter(&targ->p_lock);
4730baeff3dSrab 	t = targ->p_tlist;
4740baeff3dSrab 	do {
4750baeff3dSrab 		t->t_post_sys = 1;
4760baeff3dSrab 	} while ((t = t->t_forw) != targ->p_tlist);
4770baeff3dSrab 	mutex_exit(&targ->p_lock);
4787c478bd9Sstevel@tonic-gate }
4797c478bd9Sstevel@tonic-gate 
4809acbbeafSnn35248 int
setdscr(struct ssd * ssd)4819acbbeafSnn35248 setdscr(struct ssd *ssd)
4827c478bd9Sstevel@tonic-gate {
4837c478bd9Sstevel@tonic-gate 	ushort_t seli; 		/* selector index */
484843e1988Sjohnlev 	user_desc_t *ldp;	/* descriptor pointer */
485843e1988Sjohnlev 	user_desc_t ndesc;	/* new descriptor */
4867c478bd9Sstevel@tonic-gate 	proc_t	*pp = ttoproc(curthread);
487843e1988Sjohnlev 	int	rc = 0;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	/*
4907c478bd9Sstevel@tonic-gate 	 * LDT segments: executable and data at DPL 3 only.
4917c478bd9Sstevel@tonic-gate 	 */
4929acbbeafSnn35248 	if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
4937c478bd9Sstevel@tonic-gate 		return (EINVAL);
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	/*
4967c478bd9Sstevel@tonic-gate 	 * check the selector index.
4977c478bd9Sstevel@tonic-gate 	 */
4989acbbeafSnn35248 	seli = SELTOIDX(ssd->sel);
4990baeff3dSrab 	if (seli >= MAXNLDT || seli < LDT_UDBASE)
5007c478bd9Sstevel@tonic-gate 		return (EINVAL);
5017c478bd9Sstevel@tonic-gate 
502843e1988Sjohnlev 	ndesc = null_udesc;
5037c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	/*
5067c478bd9Sstevel@tonic-gate 	 * If this is the first time for this process then setup a
5077c478bd9Sstevel@tonic-gate 	 * private LDT for it.
5087c478bd9Sstevel@tonic-gate 	 */
5097c478bd9Sstevel@tonic-gate 	if (pp->p_ldt == NULL) {
510843e1988Sjohnlev 		ldt_alloc(pp, seli);
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 		/*
5137c478bd9Sstevel@tonic-gate 		 * Now that this process has a private LDT, the use of
5147c478bd9Sstevel@tonic-gate 		 * the syscall/sysret and sysenter/sysexit instructions
5157c478bd9Sstevel@tonic-gate 		 * is forbidden for this processes because they destroy
5167c478bd9Sstevel@tonic-gate 		 * the contents of %cs and %ss segment registers.
5177c478bd9Sstevel@tonic-gate 		 *
5180baeff3dSrab 		 * Explicity disable them here and add a context handler
5190baeff3dSrab 		 * to the process. Note that disabling
5207c478bd9Sstevel@tonic-gate 		 * them here means we can't use sysret or sysexit on
5217c478bd9Sstevel@tonic-gate 		 * the way out of this system call - so we force this
5227c478bd9Sstevel@tonic-gate 		 * thread to take the slow path (which doesn't make use
5237c478bd9Sstevel@tonic-gate 		 * of sysenter or sysexit) back out.
5247c478bd9Sstevel@tonic-gate 		 */
525843e1988Sjohnlev 		kpreempt_disable();
5260baeff3dSrab 		ldt_installctx(pp, NULL);
5277c478bd9Sstevel@tonic-gate 		cpu_fast_syscall_disable(NULL);
5287c478bd9Sstevel@tonic-gate 		ASSERT(curthread->t_post_sys != 0);
5290baeff3dSrab 		kpreempt_enable();
5307c478bd9Sstevel@tonic-gate 
531843e1988Sjohnlev 	} else if (seli > pp->p_ldtlimit) {
532843e1988Sjohnlev 
533843e1988Sjohnlev 		/*
534843e1988Sjohnlev 		 * Increase size of ldt to include seli.
535843e1988Sjohnlev 		 */
536843e1988Sjohnlev 		ldt_grow(pp, seli);
5377c478bd9Sstevel@tonic-gate 	}
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	ASSERT(seli <= pp->p_ldtlimit);
540843e1988Sjohnlev 	ldp = &pp->p_ldt[seli];
5417c478bd9Sstevel@tonic-gate 
5427c478bd9Sstevel@tonic-gate 	/*
5437c478bd9Sstevel@tonic-gate 	 * On the 64-bit kernel, this is where things get more subtle.
5447c478bd9Sstevel@tonic-gate 	 * Recall that in the 64-bit kernel, when we enter the kernel we
5457c478bd9Sstevel@tonic-gate 	 * deliberately -don't- reload the segment selectors we came in on
5467c478bd9Sstevel@tonic-gate 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
5477c478bd9Sstevel@tonic-gate 	 * and the underlying descriptors are essentially ignored by the
5487c478bd9Sstevel@tonic-gate 	 * hardware in long mode - except for the base that we override with
5497c478bd9Sstevel@tonic-gate 	 * the gsbase MSRs.
5507c478bd9Sstevel@tonic-gate 	 *
5517c478bd9Sstevel@tonic-gate 	 * However, there's one unfortunate issue with this rosy picture --
5527c478bd9Sstevel@tonic-gate 	 * a descriptor that's not marked as 'present' will still generate
5537c478bd9Sstevel@tonic-gate 	 * an #np when loading a segment register.
5547c478bd9Sstevel@tonic-gate 	 *
5557c478bd9Sstevel@tonic-gate 	 * Consider this case.  An lwp creates a harmless LDT entry, points
5567c478bd9Sstevel@tonic-gate 	 * one of it's segment registers at it, then tells the kernel (here)
5577c478bd9Sstevel@tonic-gate 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
5587c478bd9Sstevel@tonic-gate 	 * way back to userland where we reload the segment registers, and be
5597c478bd9Sstevel@tonic-gate 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
5607c478bd9Sstevel@tonic-gate 	 * will happen in the normal case too.  However, if we're trying to
5617c478bd9Sstevel@tonic-gate 	 * use a debugger that wants to save and restore the segment registers,
5627c478bd9Sstevel@tonic-gate 	 * and the debugger things that we have valid segment registers, we
5637c478bd9Sstevel@tonic-gate 	 * have the problem that the debugger will try and restore the
5647c478bd9Sstevel@tonic-gate 	 * segment register that points at the now 'not present' descriptor
5657c478bd9Sstevel@tonic-gate 	 * and will take a #np right there.
5667c478bd9Sstevel@tonic-gate 	 *
5677c478bd9Sstevel@tonic-gate 	 * We should obviously fix the debugger to be paranoid about
5687c478bd9Sstevel@tonic-gate 	 * -not- restoring segment registers that point to bad descriptors;
5697c478bd9Sstevel@tonic-gate 	 * however we can prevent the problem here if we check to see if any
5707c478bd9Sstevel@tonic-gate 	 * of the segment registers are still pointing at the thing we're
5717c478bd9Sstevel@tonic-gate 	 * destroying; if they are, return an error instead. (That also seems
5727c478bd9Sstevel@tonic-gate 	 * a lot better failure mode than SIGKILL and a core file
5737c478bd9Sstevel@tonic-gate 	 * from kern_gpfault() too.)
5747c478bd9Sstevel@tonic-gate 	 */
5759acbbeafSnn35248 	if (SI86SSD_PRES(ssd) == 0) {
5767c478bd9Sstevel@tonic-gate 		kthread_t *t;
5777c478bd9Sstevel@tonic-gate 		int bad = 0;
5787c478bd9Sstevel@tonic-gate 
5797c478bd9Sstevel@tonic-gate 		/*
5807c478bd9Sstevel@tonic-gate 		 * Look carefully at the segment registers of every lwp
5817c478bd9Sstevel@tonic-gate 		 * in the process (they're all stopped by our caller).
5827c478bd9Sstevel@tonic-gate 		 * If we're about to invalidate a descriptor that's still
5837c478bd9Sstevel@tonic-gate 		 * being referenced by *any* of them, return an error,
5847c478bd9Sstevel@tonic-gate 		 * rather than having them #gp on their way out of the kernel.
5857c478bd9Sstevel@tonic-gate 		 */
5867c478bd9Sstevel@tonic-gate 		ASSERT(pp->p_lwprcnt == 1);
5877c478bd9Sstevel@tonic-gate 
5887c478bd9Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
5897c478bd9Sstevel@tonic-gate 		t = pp->p_tlist;
5907c478bd9Sstevel@tonic-gate 		do {
5917c478bd9Sstevel@tonic-gate 			klwp_t *lwp = ttolwp(t);
5927c478bd9Sstevel@tonic-gate 			struct regs *rp = lwp->lwp_regs;
5937c478bd9Sstevel@tonic-gate #if defined(__amd64)
5947c478bd9Sstevel@tonic-gate 			pcb_t *pcb = &lwp->lwp_pcb;
5957c478bd9Sstevel@tonic-gate #endif
5967c478bd9Sstevel@tonic-gate 
5979acbbeafSnn35248 			if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) {
5987c478bd9Sstevel@tonic-gate 				bad = 1;
5997c478bd9Sstevel@tonic-gate 				break;
6007c478bd9Sstevel@tonic-gate 			}
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate #if defined(__amd64)
6037712e92cSsudheer 			if (pcb->pcb_rupdate == 1) {
6049acbbeafSnn35248 				if (ssd->sel == pcb->pcb_ds ||
6059acbbeafSnn35248 				    ssd->sel == pcb->pcb_es ||
6069acbbeafSnn35248 				    ssd->sel == pcb->pcb_fs ||
6079acbbeafSnn35248 				    ssd->sel == pcb->pcb_gs) {
6087c478bd9Sstevel@tonic-gate 					bad = 1;
6097c478bd9Sstevel@tonic-gate 					break;
6107c478bd9Sstevel@tonic-gate 				}
6117c478bd9Sstevel@tonic-gate 			} else
6127c478bd9Sstevel@tonic-gate #endif
6137c478bd9Sstevel@tonic-gate 			{
6149acbbeafSnn35248 				if (ssd->sel == rp->r_ds ||
6159acbbeafSnn35248 				    ssd->sel == rp->r_es ||
6169acbbeafSnn35248 				    ssd->sel == rp->r_fs ||
6179acbbeafSnn35248 				    ssd->sel == rp->r_gs) {
6187c478bd9Sstevel@tonic-gate 					bad = 1;
6197c478bd9Sstevel@tonic-gate 					break;
6207c478bd9Sstevel@tonic-gate 				}
6217c478bd9Sstevel@tonic-gate 			}
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate 		} while ((t = t->t_forw) != pp->p_tlist);
6247c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate 		if (bad) {
6277c478bd9Sstevel@tonic-gate 			mutex_exit(&pp->p_ldtlock);
6287c478bd9Sstevel@tonic-gate 			return (EBUSY);
6297c478bd9Sstevel@tonic-gate 		}
6307c478bd9Sstevel@tonic-gate 	}
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate 	/*
6337c478bd9Sstevel@tonic-gate 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
6347c478bd9Sstevel@tonic-gate 	 */
6359acbbeafSnn35248 	if (ssd->acc1 == 0) {
636843e1988Sjohnlev 		rc  = ldt_update_segd(ldp, &null_udesc);
6377c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
638843e1988Sjohnlev 		return (rc);
6397c478bd9Sstevel@tonic-gate 	}
6407c478bd9Sstevel@tonic-gate 
6417c478bd9Sstevel@tonic-gate 	/*
6427c478bd9Sstevel@tonic-gate 	 * Check segment type, allow segment not present and
6437c478bd9Sstevel@tonic-gate 	 * only user DPL (3).
6447c478bd9Sstevel@tonic-gate 	 */
6459acbbeafSnn35248 	if (SI86SSD_DPL(ssd) != SEL_UPL) {
6467c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6477c478bd9Sstevel@tonic-gate 		return (EINVAL);
6487c478bd9Sstevel@tonic-gate 	}
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate #if defined(__amd64)
6517c478bd9Sstevel@tonic-gate 	/*
6529acbbeafSnn35248 	 * Do not allow 32-bit applications to create 64-bit mode code
6539acbbeafSnn35248 	 * segments.
6547c478bd9Sstevel@tonic-gate 	 */
6559acbbeafSnn35248 	if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
6569acbbeafSnn35248 	    SI86SSD_ISLONG(ssd)) {
6577c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6587c478bd9Sstevel@tonic-gate 		return (EINVAL);
6597c478bd9Sstevel@tonic-gate 	}
6607c478bd9Sstevel@tonic-gate #endif /* __amd64 */
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 	/*
6637c478bd9Sstevel@tonic-gate 	 * Set up a code or data user segment descriptor.
6647c478bd9Sstevel@tonic-gate 	 */
6659acbbeafSnn35248 	if (SI86SSD_ISUSEG(ssd)) {
666843e1988Sjohnlev 		ssd_to_usd(ssd, &ndesc);
667843e1988Sjohnlev 		rc = ldt_update_segd(ldp, &ndesc);
6687c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
669843e1988Sjohnlev 		return (rc);
6707c478bd9Sstevel@tonic-gate 	}
6717c478bd9Sstevel@tonic-gate 
672843e1988Sjohnlev #if defined(__i386)
6737c478bd9Sstevel@tonic-gate 	/*
674843e1988Sjohnlev 	 * Allow a call gate only if the destination is in the LDT
675843e1988Sjohnlev 	 * and the system is running in 32-bit legacy mode.
676843e1988Sjohnlev 	 *
677843e1988Sjohnlev 	 * In long mode 32-bit call gates are redefined as 64-bit call
678843e1988Sjohnlev 	 * gates and the hw enforces that the target code selector
679843e1988Sjohnlev 	 * of the call gate must be 64-bit selector. A #gp fault is
680843e1988Sjohnlev 	 * generated if otherwise. Since we do not allow 32-bit processes
681843e1988Sjohnlev 	 * to switch themselves to 64-bits we never allow call gates
682843e1988Sjohnlev 	 * on 64-bit system system.
6837c478bd9Sstevel@tonic-gate 	 */
6849acbbeafSnn35248 	if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
685843e1988Sjohnlev 
686843e1988Sjohnlev 
687843e1988Sjohnlev 		ssd_to_sgd(ssd, (gate_desc_t *)&ndesc);
688843e1988Sjohnlev 		rc = ldt_update_segd(ldp, &ndesc);
6897c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
690843e1988Sjohnlev 		return (rc);
6917c478bd9Sstevel@tonic-gate 	}
692843e1988Sjohnlev #endif	/* __i386 */
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
6957c478bd9Sstevel@tonic-gate 	return (EINVAL);
6967c478bd9Sstevel@tonic-gate }
6977c478bd9Sstevel@tonic-gate 
6987c478bd9Sstevel@tonic-gate /*
699843e1988Sjohnlev  * Allocate new LDT for process just large enough to contain seli.
700843e1988Sjohnlev  * Note we allocate and grow LDT in PAGESIZE chunks. We do this
701843e1988Sjohnlev  * to simplify the implementation and because on the hypervisor it's
702843e1988Sjohnlev  * required, since the LDT must live on pages that have PROT_WRITE
703843e1988Sjohnlev  * removed and which are given to the hypervisor.
7047c478bd9Sstevel@tonic-gate  */
7059acbbeafSnn35248 static void
ldt_alloc(proc_t * pp,uint_t seli)706843e1988Sjohnlev ldt_alloc(proc_t *pp, uint_t seli)
7077c478bd9Sstevel@tonic-gate {
708843e1988Sjohnlev 	user_desc_t	*ldt;
709843e1988Sjohnlev 	size_t		ldtsz;
710843e1988Sjohnlev 	uint_t		nsels;
711843e1988Sjohnlev 
712843e1988Sjohnlev 	ASSERT(MUTEX_HELD(&pp->p_ldtlock));
713843e1988Sjohnlev 	ASSERT(pp->p_ldt == NULL);
714843e1988Sjohnlev 	ASSERT(pp->p_ldtlimit == 0);
7157c478bd9Sstevel@tonic-gate 
7167c478bd9Sstevel@tonic-gate 	/*
717843e1988Sjohnlev 	 * Allocate new LDT just large enough to contain seli.
7187c478bd9Sstevel@tonic-gate 	 */
719843e1988Sjohnlev 	ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
720843e1988Sjohnlev 	nsels = ldtsz / sizeof (user_desc_t);
721843e1988Sjohnlev 	ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
7227c478bd9Sstevel@tonic-gate 
723843e1988Sjohnlev 	ldt = kmem_zalloc(ldtsz, KM_SLEEP);
724843e1988Sjohnlev 	ASSERT(IS_P2ALIGNED(ldt, PAGESIZE));
7257c478bd9Sstevel@tonic-gate 
726843e1988Sjohnlev #if defined(__xpv)
727843e1988Sjohnlev 	if (xen_ldt_setprot(ldt, ldtsz, PROT_READ))
728843e1988Sjohnlev 		panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
729843e1988Sjohnlev #endif
7307c478bd9Sstevel@tonic-gate 
731843e1988Sjohnlev 	pp->p_ldt = ldt;
732843e1988Sjohnlev 	pp->p_ldtlimit = nsels - 1;
733843e1988Sjohnlev 	set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL);
734843e1988Sjohnlev 
735843e1988Sjohnlev 	if (pp == curproc) {
7367c478bd9Sstevel@tonic-gate 		kpreempt_disable();
7377c478bd9Sstevel@tonic-gate 		ldt_load();
7387c478bd9Sstevel@tonic-gate 		kpreempt_enable();
739843e1988Sjohnlev 	}
7407c478bd9Sstevel@tonic-gate }
7417c478bd9Sstevel@tonic-gate 
7420baeff3dSrab static void
ldt_free(proc_t * pp)7437c478bd9Sstevel@tonic-gate ldt_free(proc_t *pp)
7447c478bd9Sstevel@tonic-gate {
745843e1988Sjohnlev 	user_desc_t	*ldt;
746843e1988Sjohnlev 	size_t		ldtsz;
7477c478bd9Sstevel@tonic-gate 
7487c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_ldt != NULL);
7497c478bd9Sstevel@tonic-gate 
7507c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
751843e1988Sjohnlev 	ldt = pp->p_ldt;
752843e1988Sjohnlev 	ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
7537c478bd9Sstevel@tonic-gate 
754843e1988Sjohnlev 	ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
7557c478bd9Sstevel@tonic-gate 
7567c478bd9Sstevel@tonic-gate 	pp->p_ldt = NULL;
7570baeff3dSrab 	pp->p_ldtlimit = 0;
758843e1988Sjohnlev 	pp->p_ldt_desc = null_sdesc;
759843e1988Sjohnlev 	mutex_exit(&pp->p_ldtlock);
7600baeff3dSrab 
761843e1988Sjohnlev 	if (pp == curproc) {
762843e1988Sjohnlev 		kpreempt_disable();
7630baeff3dSrab 		ldt_unload();
7647c478bd9Sstevel@tonic-gate 		kpreempt_enable();
765843e1988Sjohnlev 	}
766843e1988Sjohnlev 
767843e1988Sjohnlev #if defined(__xpv)
768843e1988Sjohnlev 	/*
769843e1988Sjohnlev 	 * We are not allowed to make the ldt writable until after
770843e1988Sjohnlev 	 * we tell the hypervisor to unload it.
771843e1988Sjohnlev 	 */
772843e1988Sjohnlev 	if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE))
773843e1988Sjohnlev 		panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
774843e1988Sjohnlev #endif
775843e1988Sjohnlev 
776843e1988Sjohnlev 	kmem_free(ldt, ldtsz);
7777c478bd9Sstevel@tonic-gate }
7787c478bd9Sstevel@tonic-gate 
7797c478bd9Sstevel@tonic-gate /*
7807c478bd9Sstevel@tonic-gate  * On fork copy new ldt for child.
7817c478bd9Sstevel@tonic-gate  */
782843e1988Sjohnlev static void
ldt_dup(proc_t * pp,proc_t * cp)7837c478bd9Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp)
7847c478bd9Sstevel@tonic-gate {
785843e1988Sjohnlev 	size_t	ldtsz;
7867c478bd9Sstevel@tonic-gate 
787843e1988Sjohnlev 	ASSERT(pp->p_ldt != NULL);
788843e1988Sjohnlev 	ASSERT(cp != curproc);
7897c478bd9Sstevel@tonic-gate 
790843e1988Sjohnlev 	/*
791843e1988Sjohnlev 	 * I assume the parent's ldt can't increase since we're in a fork.
792843e1988Sjohnlev 	 */
7937c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
794843e1988Sjohnlev 	mutex_enter(&cp->p_ldtlock);
7957c478bd9Sstevel@tonic-gate 
796843e1988Sjohnlev 	ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
797843e1988Sjohnlev 
798843e1988Sjohnlev 	ldt_alloc(cp, pp->p_ldtlimit);
799843e1988Sjohnlev 
800843e1988Sjohnlev #if defined(__xpv)
801843e1988Sjohnlev 	/*
802843e1988Sjohnlev 	 * Make child's ldt writable so it can be copied into from
803843e1988Sjohnlev 	 * parent's ldt. This works since ldt_alloc above did not load
804843e1988Sjohnlev 	 * the ldt since its for the child process. If we tried to make
805843e1988Sjohnlev 	 * an LDT writable that is loaded in hw the setprot operation
806843e1988Sjohnlev 	 * would fail.
807843e1988Sjohnlev 	 */
808843e1988Sjohnlev 	if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE))
809843e1988Sjohnlev 		panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
810843e1988Sjohnlev #endif
811843e1988Sjohnlev 
812843e1988Sjohnlev 	bcopy(pp->p_ldt, cp->p_ldt, ldtsz);
813843e1988Sjohnlev 
814843e1988Sjohnlev #if defined(__xpv)
815843e1988Sjohnlev 	if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ))
816843e1988Sjohnlev 		panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed");
817843e1988Sjohnlev #endif
818843e1988Sjohnlev 	mutex_exit(&cp->p_ldtlock);
8197c478bd9Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
820843e1988Sjohnlev 
821843e1988Sjohnlev }
822843e1988Sjohnlev 
823843e1988Sjohnlev static void
ldt_grow(proc_t * pp,uint_t seli)824843e1988Sjohnlev ldt_grow(proc_t *pp, uint_t seli)
825843e1988Sjohnlev {
826843e1988Sjohnlev 	user_desc_t	*oldt, *nldt;
827843e1988Sjohnlev 	uint_t		nsels;
828843e1988Sjohnlev 	size_t		oldtsz, nldtsz;
829843e1988Sjohnlev 
830843e1988Sjohnlev 	ASSERT(MUTEX_HELD(&pp->p_ldtlock));
831843e1988Sjohnlev 	ASSERT(pp->p_ldt != NULL);
832843e1988Sjohnlev 	ASSERT(pp->p_ldtlimit != 0);
833843e1988Sjohnlev 
834843e1988Sjohnlev 	/*
835843e1988Sjohnlev 	 * Allocate larger LDT just large enough to contain seli.
836843e1988Sjohnlev 	 */
837843e1988Sjohnlev 	nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
838843e1988Sjohnlev 	nsels = nldtsz / sizeof (user_desc_t);
839843e1988Sjohnlev 	ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
840843e1988Sjohnlev 	ASSERT(nsels > pp->p_ldtlimit);
841843e1988Sjohnlev 
842843e1988Sjohnlev 	oldt = pp->p_ldt;
843843e1988Sjohnlev 	oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
844843e1988Sjohnlev 
845843e1988Sjohnlev 	nldt = kmem_zalloc(nldtsz, KM_SLEEP);
846843e1988Sjohnlev 	ASSERT(IS_P2ALIGNED(nldt, PAGESIZE));
847843e1988Sjohnlev 
848843e1988Sjohnlev 	bcopy(oldt, nldt, oldtsz);
849843e1988Sjohnlev 
850843e1988Sjohnlev 	/*
851843e1988Sjohnlev 	 * unload old ldt.
852843e1988Sjohnlev 	 */
853843e1988Sjohnlev 	kpreempt_disable();
854843e1988Sjohnlev 	ldt_unload();
855843e1988Sjohnlev 	kpreempt_enable();
856843e1988Sjohnlev 
857843e1988Sjohnlev #if defined(__xpv)
858843e1988Sjohnlev 
859843e1988Sjohnlev 	/*
860843e1988Sjohnlev 	 * Make old ldt writable and new ldt read only.
861843e1988Sjohnlev 	 */
862843e1988Sjohnlev 	if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE))
863843e1988Sjohnlev 		panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
864843e1988Sjohnlev 
865843e1988Sjohnlev 	if (xen_ldt_setprot(nldt, nldtsz, PROT_READ))
866843e1988Sjohnlev 		panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed");
867843e1988Sjohnlev #endif
868843e1988Sjohnlev 
869843e1988Sjohnlev 	pp->p_ldt = nldt;
870843e1988Sjohnlev 	pp->p_ldtlimit = nsels - 1;
871843e1988Sjohnlev 
872843e1988Sjohnlev 	/*
873843e1988Sjohnlev 	 * write new ldt segment descriptor.
874843e1988Sjohnlev 	 */
875843e1988Sjohnlev 	set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL);
876843e1988Sjohnlev 
877843e1988Sjohnlev 	/*
878843e1988Sjohnlev 	 * load the new ldt.
879843e1988Sjohnlev 	 */
880843e1988Sjohnlev 	kpreempt_disable();
881843e1988Sjohnlev 	ldt_load();
882843e1988Sjohnlev 	kpreempt_enable();
883843e1988Sjohnlev 
884843e1988Sjohnlev 	kmem_free(oldt, oldtsz);
8857c478bd9Sstevel@tonic-gate }
886