17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 59acbbeafSnn35248 * Common Development and Distribution License (the "License"). 69acbbeafSnn35248 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*7af88ac7SKuriakose Kuruvilla * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate */ 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* Copyright (c) 1987, 1988 Microsoft Corporation */ 307c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 317c478bd9Sstevel@tonic-gate 327c478bd9Sstevel@tonic-gate #include <sys/param.h> 337c478bd9Sstevel@tonic-gate #include <sys/types.h> 347c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 357c478bd9Sstevel@tonic-gate #include <sys/systm.h> 367c478bd9Sstevel@tonic-gate #include <sys/signal.h> 377c478bd9Sstevel@tonic-gate #include <sys/errno.h> 387c478bd9Sstevel@tonic-gate #include <sys/fault.h> 397c478bd9Sstevel@tonic-gate #include <sys/syscall.h> 407c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 417c478bd9Sstevel@tonic-gate #include <sys/sysi86.h> 427c478bd9Sstevel@tonic-gate #include <sys/psw.h> 437c478bd9Sstevel@tonic-gate #include <sys/cred.h> 447c478bd9Sstevel@tonic-gate #include <sys/policy.h> 457c478bd9Sstevel@tonic-gate #include <sys/thread.h> 467c478bd9Sstevel@tonic-gate #include <sys/debug.h> 477c478bd9Sstevel@tonic-gate #include <sys/ontrap.h> 487c478bd9Sstevel@tonic-gate #include <sys/privregs.h> 497c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 507c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 517c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 527c478bd9Sstevel@tonic-gate #include <sys/mman.h> 537c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 547c478bd9Sstevel@tonic-gate #include <vm/hat.h> 557c478bd9Sstevel@tonic-gate #include <vm/as.h> 567c478bd9Sstevel@tonic-gate #include <vm/seg.h> 577c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 587c478bd9Sstevel@tonic-gate #include <vm/faultcode.h> 597c478bd9Sstevel@tonic-gate #include <sys/fp.h> 607c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 61ae115bc7Smrj #include <sys/segments.h> 62ae115bc7Smrj #include <sys/clock.h> 63843e1988Sjohnlev #if defined(__xpv) 64843e1988Sjohnlev #include <sys/hypervisor.h> 65843e1988Sjohnlev #include <sys/note.h> 66843e1988Sjohnlev #endif 677c478bd9Sstevel@tonic-gate 68843e1988Sjohnlev static void ldt_alloc(proc_t *, uint_t); 69843e1988Sjohnlev static void ldt_free(proc_t *); 70843e1988Sjohnlev static void ldt_dup(proc_t *, proc_t *); 71843e1988Sjohnlev static void ldt_grow(proc_t *, uint_t); 727c478bd9Sstevel@tonic-gate 737c478bd9Sstevel@tonic-gate /* 747c478bd9Sstevel@tonic-gate * sysi86 System Call 757c478bd9Sstevel@tonic-gate */ 767c478bd9Sstevel@tonic-gate 777c478bd9Sstevel@tonic-gate /* ARGSUSED */ 787c478bd9Sstevel@tonic-gate int 797c478bd9Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) 807c478bd9Sstevel@tonic-gate { 819acbbeafSnn35248 struct ssd ssd; 827c478bd9Sstevel@tonic-gate int error = 0; 837c478bd9Sstevel@tonic-gate int c; 847c478bd9Sstevel@tonic-gate proc_t *pp = curproc; 857c478bd9Sstevel@tonic-gate 867c478bd9Sstevel@tonic-gate switch (cmd) { 877c478bd9Sstevel@tonic-gate 887c478bd9Sstevel@tonic-gate /* 897c478bd9Sstevel@tonic-gate * The SI86V86 subsystem call of the SYSI86 system call 907c478bd9Sstevel@tonic-gate * supports only one subcode -- V86SC_IOPL. 917c478bd9Sstevel@tonic-gate */ 927c478bd9Sstevel@tonic-gate case SI86V86: 937c478bd9Sstevel@tonic-gate if (arg1 == V86SC_IOPL) { 947c478bd9Sstevel@tonic-gate struct regs *rp = lwptoregs(ttolwp(curthread)); 957c478bd9Sstevel@tonic-gate greg_t oldpl = rp->r_ps & PS_IOPL; 967c478bd9Sstevel@tonic-gate greg_t newpl = arg2 & PS_IOPL; 977c478bd9Sstevel@tonic-gate 987c478bd9Sstevel@tonic-gate /* 997c478bd9Sstevel@tonic-gate * Must be privileged to run this system call 1007c478bd9Sstevel@tonic-gate * if giving more io privilege. 1017c478bd9Sstevel@tonic-gate */ 1027c478bd9Sstevel@tonic-gate if (newpl > oldpl && (error = 1037c478bd9Sstevel@tonic-gate secpolicy_sys_config(CRED(), B_FALSE)) != 0) 1047c478bd9Sstevel@tonic-gate return (set_errno(error)); 105843e1988Sjohnlev #if defined(__xpv) 106843e1988Sjohnlev kpreempt_disable(); 107843e1988Sjohnlev installctx(curthread, NULL, xen_disable_user_iopl, 108843e1988Sjohnlev xen_enable_user_iopl, NULL, NULL, 109843e1988Sjohnlev xen_disable_user_iopl, NULL); 110843e1988Sjohnlev xen_enable_user_iopl(); 111843e1988Sjohnlev kpreempt_enable(); 112843e1988Sjohnlev #else 1137c478bd9Sstevel@tonic-gate rp->r_ps ^= oldpl ^ newpl; 114843e1988Sjohnlev #endif 1157c478bd9Sstevel@tonic-gate } else 1167c478bd9Sstevel@tonic-gate error = EINVAL; 1177c478bd9Sstevel@tonic-gate break; 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate /* 1207c478bd9Sstevel@tonic-gate * Set a segment descriptor 1217c478bd9Sstevel@tonic-gate */ 1227c478bd9Sstevel@tonic-gate case SI86DSCR: 1237c478bd9Sstevel@tonic-gate /* 1247c478bd9Sstevel@tonic-gate * There are considerable problems here manipulating 1257c478bd9Sstevel@tonic-gate * resources shared by many running lwps. Get everyone 1267c478bd9Sstevel@tonic-gate * into a safe state before changing the LDT. 1277c478bd9Sstevel@tonic-gate */ 1287c478bd9Sstevel@tonic-gate if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) { 1297c478bd9Sstevel@tonic-gate error = EINTR; 1307c478bd9Sstevel@tonic-gate break; 1317c478bd9Sstevel@tonic-gate } 1329acbbeafSnn35248 1339acbbeafSnn35248 if (get_udatamodel() == DATAMODEL_LP64) { 1349acbbeafSnn35248 error = EINVAL; 1359acbbeafSnn35248 break; 1369acbbeafSnn35248 } 1379acbbeafSnn35248 1389acbbeafSnn35248 if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) { 1399acbbeafSnn35248 error = EFAULT; 1409acbbeafSnn35248 break; 1419acbbeafSnn35248 } 1429acbbeafSnn35248 1439acbbeafSnn35248 error = setdscr(&ssd); 1449acbbeafSnn35248 1457c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_lock); 1467c478bd9Sstevel@tonic-gate if (curthread != pp->p_agenttp) 1477c478bd9Sstevel@tonic-gate continuelwps(pp); 1487c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 1497c478bd9Sstevel@tonic-gate break; 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate case SI86FPHW: 1527c478bd9Sstevel@tonic-gate c = fp_kind & 0xff; 1537c478bd9Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) 1547c478bd9Sstevel@tonic-gate error = EFAULT; 1557c478bd9Sstevel@tonic-gate break; 1567c478bd9Sstevel@tonic-gate 1577c478bd9Sstevel@tonic-gate case SI86FPSTART: 1587c478bd9Sstevel@tonic-gate /* 1597c478bd9Sstevel@tonic-gate * arg1 is the address of _fp_hw 1607c478bd9Sstevel@tonic-gate * arg2 is the desired x87 FCW value 1617c478bd9Sstevel@tonic-gate * arg3 is the desired SSE MXCSR value 1627c478bd9Sstevel@tonic-gate * a return value of one means SSE hardware, else none. 1637c478bd9Sstevel@tonic-gate */ 1647c478bd9Sstevel@tonic-gate c = fp_kind & 0xff; 1657c478bd9Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) { 1667c478bd9Sstevel@tonic-gate error = EFAULT; 1677c478bd9Sstevel@tonic-gate break; 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate fpsetcw((uint16_t)arg2, (uint32_t)arg3); 170*7af88ac7SKuriakose Kuruvilla return ((fp_kind & __FP_SSE) ? 1 : 0); 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate /* real time clock management commands */ 1737c478bd9Sstevel@tonic-gate 1747c478bd9Sstevel@tonic-gate case WTODC: 1757c478bd9Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1767c478bd9Sstevel@tonic-gate timestruc_t ts; 1777c478bd9Sstevel@tonic-gate mutex_enter(&tod_lock); 1787c478bd9Sstevel@tonic-gate gethrestime(&ts); 1797c478bd9Sstevel@tonic-gate tod_set(ts); 1807c478bd9Sstevel@tonic-gate mutex_exit(&tod_lock); 1817c478bd9Sstevel@tonic-gate } 1827c478bd9Sstevel@tonic-gate break; 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate /* Give some timezone playing room */ 1857c478bd9Sstevel@tonic-gate #define ONEWEEK (7 * 24 * 60 * 60) 1867c478bd9Sstevel@tonic-gate 1877c478bd9Sstevel@tonic-gate case SGMTL: 1887c478bd9Sstevel@tonic-gate /* 1897c478bd9Sstevel@tonic-gate * Called from 32 bit land, negative values 1907c478bd9Sstevel@tonic-gate * are not sign extended, so we do that here 1917c478bd9Sstevel@tonic-gate * by casting it to an int and back. We also 1927c478bd9Sstevel@tonic-gate * clamp the value to within reason and detect 1937c478bd9Sstevel@tonic-gate * when a 64 bit call overflows an int. 1947c478bd9Sstevel@tonic-gate */ 1957c478bd9Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1967c478bd9Sstevel@tonic-gate int newlag = (int)arg1; 1977c478bd9Sstevel@tonic-gate 1987c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1997c478bd9Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE && 2007c478bd9Sstevel@tonic-gate (long)newlag != (long)arg1) { 2017c478bd9Sstevel@tonic-gate error = EOVERFLOW; 2027c478bd9Sstevel@tonic-gate } else 2037c478bd9Sstevel@tonic-gate #endif 2047c478bd9Sstevel@tonic-gate if (newlag >= -ONEWEEK && newlag <= ONEWEEK) 2057c478bd9Sstevel@tonic-gate sgmtl(newlag); 2067c478bd9Sstevel@tonic-gate else 2077c478bd9Sstevel@tonic-gate error = EOVERFLOW; 2087c478bd9Sstevel@tonic-gate } 2097c478bd9Sstevel@tonic-gate break; 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate case GGMTL: 2127c478bd9Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 2137c478bd9Sstevel@tonic-gate if (sulword((void *)arg1, ggmtl()) == -1) 2147c478bd9Sstevel@tonic-gate error = EFAULT; 2157c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 2167c478bd9Sstevel@tonic-gate } else { 2177c478bd9Sstevel@tonic-gate time_t gmtl; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate if ((gmtl = ggmtl()) > INT32_MAX) { 2207c478bd9Sstevel@tonic-gate /* 2217c478bd9Sstevel@tonic-gate * Since gmt_lag can at most be 2227c478bd9Sstevel@tonic-gate * +/- 12 hours, something is 2237c478bd9Sstevel@tonic-gate * *seriously* messed up here. 2247c478bd9Sstevel@tonic-gate */ 2257c478bd9Sstevel@tonic-gate error = EOVERFLOW; 2267c478bd9Sstevel@tonic-gate } else if (suword32((void *)arg1, (int32_t)gmtl) == -1) 2277c478bd9Sstevel@tonic-gate error = EFAULT; 2287c478bd9Sstevel@tonic-gate #endif 2297c478bd9Sstevel@tonic-gate } 2307c478bd9Sstevel@tonic-gate break; 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate case RTCSYNC: 2337c478bd9Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) 2347c478bd9Sstevel@tonic-gate rtcsync(); 2357c478bd9Sstevel@tonic-gate break; 2367c478bd9Sstevel@tonic-gate 2377c478bd9Sstevel@tonic-gate /* END OF real time clock management commands */ 2387c478bd9Sstevel@tonic-gate 2397c478bd9Sstevel@tonic-gate default: 2407c478bd9Sstevel@tonic-gate error = EINVAL; 2417c478bd9Sstevel@tonic-gate break; 2427c478bd9Sstevel@tonic-gate } 2437c478bd9Sstevel@tonic-gate return (error == 0 ? 0 : set_errno(error)); 2447c478bd9Sstevel@tonic-gate } 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate void 2477c478bd9Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel) 2487c478bd9Sstevel@tonic-gate { 2497c478bd9Sstevel@tonic-gate ssd->bo = USEGD_GETBASE(usd); 2507c478bd9Sstevel@tonic-gate ssd->ls = USEGD_GETLIMIT(usd); 2517c478bd9Sstevel@tonic-gate ssd->sel = sel; 2527c478bd9Sstevel@tonic-gate 2537c478bd9Sstevel@tonic-gate /* 2547c478bd9Sstevel@tonic-gate * set type, dpl and present bits. 2557c478bd9Sstevel@tonic-gate */ 2567c478bd9Sstevel@tonic-gate ssd->acc1 = usd->usd_type; 2577c478bd9Sstevel@tonic-gate ssd->acc1 |= usd->usd_dpl << 5; 2587c478bd9Sstevel@tonic-gate ssd->acc1 |= usd->usd_p << (5 + 2); 2597c478bd9Sstevel@tonic-gate 2607c478bd9Sstevel@tonic-gate /* 2617c478bd9Sstevel@tonic-gate * set avl, DB and granularity bits. 2627c478bd9Sstevel@tonic-gate */ 2637c478bd9Sstevel@tonic-gate ssd->acc2 = usd->usd_avl; 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate #if defined(__amd64) 2667c478bd9Sstevel@tonic-gate ssd->acc2 |= usd->usd_long << 1; 2677c478bd9Sstevel@tonic-gate #else 2687c478bd9Sstevel@tonic-gate ssd->acc2 |= usd->usd_reserved << 1; 2697c478bd9Sstevel@tonic-gate #endif 2707c478bd9Sstevel@tonic-gate 2717c478bd9Sstevel@tonic-gate ssd->acc2 |= usd->usd_def32 << (1 + 1); 2727c478bd9Sstevel@tonic-gate ssd->acc2 |= usd->usd_gran << (1 + 1 + 1); 2737c478bd9Sstevel@tonic-gate } 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate static void 2767c478bd9Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd) 2777c478bd9Sstevel@tonic-gate { 2787c478bd9Sstevel@tonic-gate 279843e1988Sjohnlev ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0); 280843e1988Sjohnlev 2817c478bd9Sstevel@tonic-gate USEGD_SETBASE(usd, ssd->bo); 2827c478bd9Sstevel@tonic-gate USEGD_SETLIMIT(usd, ssd->ls); 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate /* 2857c478bd9Sstevel@tonic-gate * set type, dpl and present bits. 2867c478bd9Sstevel@tonic-gate */ 2877c478bd9Sstevel@tonic-gate usd->usd_type = ssd->acc1; 2887c478bd9Sstevel@tonic-gate usd->usd_dpl = ssd->acc1 >> 5; 2897c478bd9Sstevel@tonic-gate usd->usd_p = ssd->acc1 >> (5 + 2); 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate ASSERT(usd->usd_type >= SDT_MEMRO); 2927c478bd9Sstevel@tonic-gate ASSERT(usd->usd_dpl == SEL_UPL); 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate /* 295843e1988Sjohnlev * 64-bit code selectors are never allowed in the LDT. 296843e1988Sjohnlev * Reserved bit is always 0 on 32-bit sytems. 297843e1988Sjohnlev */ 298843e1988Sjohnlev #if defined(__amd64) 299843e1988Sjohnlev usd->usd_long = 0; 300843e1988Sjohnlev #else 301843e1988Sjohnlev usd->usd_reserved = 0; 302843e1988Sjohnlev #endif 303843e1988Sjohnlev 304843e1988Sjohnlev /* 3057c478bd9Sstevel@tonic-gate * set avl, DB and granularity bits. 3067c478bd9Sstevel@tonic-gate */ 3077c478bd9Sstevel@tonic-gate usd->usd_avl = ssd->acc2; 3087c478bd9Sstevel@tonic-gate usd->usd_def32 = ssd->acc2 >> (1 + 1); 3097c478bd9Sstevel@tonic-gate usd->usd_gran = ssd->acc2 >> (1 + 1 + 1); 3107c478bd9Sstevel@tonic-gate } 3117c478bd9Sstevel@tonic-gate 312843e1988Sjohnlev 313843e1988Sjohnlev #if defined(__i386) 314843e1988Sjohnlev 3157c478bd9Sstevel@tonic-gate static void 3167c478bd9Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd) 3177c478bd9Sstevel@tonic-gate { 3187c478bd9Sstevel@tonic-gate 319843e1988Sjohnlev ASSERT(bcmp(sgd, &null_sdesc, sizeof (*sgd)) == 0); 320843e1988Sjohnlev 3217c478bd9Sstevel@tonic-gate sgd->sgd_looffset = ssd->bo; 3227c478bd9Sstevel@tonic-gate sgd->sgd_hioffset = ssd->bo >> 16; 3237c478bd9Sstevel@tonic-gate 3247c478bd9Sstevel@tonic-gate sgd->sgd_selector = ssd->ls; 325843e1988Sjohnlev 3267c478bd9Sstevel@tonic-gate /* 3277c478bd9Sstevel@tonic-gate * set type, dpl and present bits. 3287c478bd9Sstevel@tonic-gate */ 3297c478bd9Sstevel@tonic-gate sgd->sgd_type = ssd->acc1; 3307c478bd9Sstevel@tonic-gate sgd->sgd_dpl = ssd->acc1 >> 5; 3317c478bd9Sstevel@tonic-gate sgd->sgd_p = ssd->acc1 >> 7; 3327c478bd9Sstevel@tonic-gate ASSERT(sgd->sgd_type == SDT_SYSCGT); 3337c478bd9Sstevel@tonic-gate ASSERT(sgd->sgd_dpl == SEL_UPL); 3347c478bd9Sstevel@tonic-gate sgd->sgd_stkcpy = 0; 3357c478bd9Sstevel@tonic-gate } 3367c478bd9Sstevel@tonic-gate 337843e1988Sjohnlev #endif /* __i386 */ 338843e1988Sjohnlev 3390baeff3dSrab /* 3400baeff3dSrab * Load LDT register with the current process's LDT. 3410baeff3dSrab */ 342843e1988Sjohnlev static void 3430baeff3dSrab ldt_load(void) 3440baeff3dSrab { 345843e1988Sjohnlev #if defined(__xpv) 346843e1988Sjohnlev xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc), 347843e1988Sjohnlev curproc->p_ldtlimit + 1); 348843e1988Sjohnlev #else 3490baeff3dSrab *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc; 3500baeff3dSrab wr_ldtr(ULDT_SEL); 351843e1988Sjohnlev #endif 3520baeff3dSrab } 3530baeff3dSrab 3540baeff3dSrab /* 3550baeff3dSrab * Store a NULL selector in the LDTR. All subsequent illegal references to 3560baeff3dSrab * the LDT will result in a #gp. 3570baeff3dSrab */ 3580baeff3dSrab void 3590baeff3dSrab ldt_unload(void) 3600baeff3dSrab { 361843e1988Sjohnlev #if defined(__xpv) 362843e1988Sjohnlev xen_set_ldt(NULL, 0); 363843e1988Sjohnlev #else 364843e1988Sjohnlev *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc; 3650baeff3dSrab wr_ldtr(0); 366843e1988Sjohnlev #endif 3670baeff3dSrab } 3687c478bd9Sstevel@tonic-gate 3697c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3707c478bd9Sstevel@tonic-gate static void 3710baeff3dSrab ldt_savectx(proc_t *p) 3727c478bd9Sstevel@tonic-gate { 3730baeff3dSrab ASSERT(p->p_ldt != NULL); 3740baeff3dSrab ASSERT(p == curproc); 3750baeff3dSrab 3767c478bd9Sstevel@tonic-gate #if defined(__amd64) 3777c478bd9Sstevel@tonic-gate /* 3787c478bd9Sstevel@tonic-gate * The 64-bit kernel must be sure to clear any stale ldt 3797c478bd9Sstevel@tonic-gate * selectors when context switching away from a process that 3807c478bd9Sstevel@tonic-gate * has a private ldt. Consider the following example: 3817c478bd9Sstevel@tonic-gate * 3827c478bd9Sstevel@tonic-gate * Wine creats a ldt descriptor and points a segment register 3837c478bd9Sstevel@tonic-gate * to it. 3847c478bd9Sstevel@tonic-gate * 3857c478bd9Sstevel@tonic-gate * We then context switch away from wine lwp to kernel 3867c478bd9Sstevel@tonic-gate * thread and hit breakpoint in kernel with kmdb 3877c478bd9Sstevel@tonic-gate * 3887c478bd9Sstevel@tonic-gate * When we continue and resume from kmdb we will #gp 3897c478bd9Sstevel@tonic-gate * fault since kmdb will have saved the stale ldt selector 3907c478bd9Sstevel@tonic-gate * from wine and will try to restore it but we are no longer in 3917c478bd9Sstevel@tonic-gate * the context of the wine process and do not have our 3927c478bd9Sstevel@tonic-gate * ldtr register pointing to the private ldt. 3937c478bd9Sstevel@tonic-gate */ 394843e1988Sjohnlev reset_sregs(); 3957c478bd9Sstevel@tonic-gate #endif 3967c478bd9Sstevel@tonic-gate 3970baeff3dSrab ldt_unload(); 3987c478bd9Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 3997c478bd9Sstevel@tonic-gate } 4007c478bd9Sstevel@tonic-gate 4010baeff3dSrab static void 4020baeff3dSrab ldt_restorectx(proc_t *p) 4030baeff3dSrab { 4040baeff3dSrab ASSERT(p->p_ldt != NULL); 4050baeff3dSrab ASSERT(p == curproc); 4060baeff3dSrab 4070baeff3dSrab ldt_load(); 4080baeff3dSrab cpu_fast_syscall_disable(NULL); 4090baeff3dSrab } 4100baeff3dSrab 4117c478bd9Sstevel@tonic-gate /* 4120baeff3dSrab * When a process with a private LDT execs, fast syscalls must be enabled for 4130baeff3dSrab * the new process image. 4147c478bd9Sstevel@tonic-gate */ 4157c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4167c478bd9Sstevel@tonic-gate static void 4170baeff3dSrab ldt_freectx(proc_t *p, int isexec) 4187c478bd9Sstevel@tonic-gate { 4190baeff3dSrab ASSERT(p->p_ldt); 4200baeff3dSrab 4217c478bd9Sstevel@tonic-gate if (isexec) { 4227c478bd9Sstevel@tonic-gate kpreempt_disable(); 4237c478bd9Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 4247c478bd9Sstevel@tonic-gate kpreempt_enable(); 4257c478bd9Sstevel@tonic-gate } 4260baeff3dSrab 4270baeff3dSrab /* 4280baeff3dSrab * ldt_free() will free the memory used by the private LDT, reset the 4290baeff3dSrab * process's descriptor, and re-program the LDTR. 4300baeff3dSrab */ 4310baeff3dSrab ldt_free(p); 4327c478bd9Sstevel@tonic-gate } 4337c478bd9Sstevel@tonic-gate 4347c478bd9Sstevel@tonic-gate /* 4357c478bd9Sstevel@tonic-gate * Install ctx op that ensures syscall/sysenter are disabled. 4367c478bd9Sstevel@tonic-gate * See comments below. 4377c478bd9Sstevel@tonic-gate * 4380baeff3dSrab * When a thread with a private LDT forks, the new process 4397c478bd9Sstevel@tonic-gate * must have the LDT context ops installed. 4407c478bd9Sstevel@tonic-gate */ 4417c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4427c478bd9Sstevel@tonic-gate static void 4430baeff3dSrab ldt_installctx(proc_t *p, proc_t *cp) 4447c478bd9Sstevel@tonic-gate { 4450baeff3dSrab proc_t *targ = p; 4460baeff3dSrab kthread_t *t; 4477c478bd9Sstevel@tonic-gate 4487c478bd9Sstevel@tonic-gate /* 4490baeff3dSrab * If this is a fork, operate on the child process. 4507c478bd9Sstevel@tonic-gate */ 4510baeff3dSrab if (cp != NULL) { 4520baeff3dSrab targ = cp; 4530baeff3dSrab ldt_dup(p, cp); 4540baeff3dSrab } 4557c478bd9Sstevel@tonic-gate 4560baeff3dSrab /* 4570baeff3dSrab * The process context ops expect the target process as their argument. 4580baeff3dSrab */ 4590baeff3dSrab ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx, 4600baeff3dSrab ldt_installctx, ldt_savectx, ldt_freectx) == 0); 4617c478bd9Sstevel@tonic-gate 4620baeff3dSrab installpctx(targ, targ, ldt_savectx, ldt_restorectx, 4630baeff3dSrab ldt_installctx, ldt_savectx, ldt_freectx); 4647c478bd9Sstevel@tonic-gate 4657c478bd9Sstevel@tonic-gate /* 4667c478bd9Sstevel@tonic-gate * We've just disabled fast system call and return instructions; take 4677c478bd9Sstevel@tonic-gate * the slow path out to make sure we don't try to use one to return 4680baeff3dSrab * back to user. We must set t_post_sys for every thread in the 4690baeff3dSrab * process to make sure none of them escape out via fast return. 4707c478bd9Sstevel@tonic-gate */ 4710baeff3dSrab 4720baeff3dSrab mutex_enter(&targ->p_lock); 4730baeff3dSrab t = targ->p_tlist; 4740baeff3dSrab do { 4750baeff3dSrab t->t_post_sys = 1; 4760baeff3dSrab } while ((t = t->t_forw) != targ->p_tlist); 4770baeff3dSrab mutex_exit(&targ->p_lock); 4787c478bd9Sstevel@tonic-gate } 4797c478bd9Sstevel@tonic-gate 4809acbbeafSnn35248 int 4819acbbeafSnn35248 setdscr(struct ssd *ssd) 4827c478bd9Sstevel@tonic-gate { 4837c478bd9Sstevel@tonic-gate ushort_t seli; /* selector index */ 484843e1988Sjohnlev user_desc_t *ldp; /* descriptor pointer */ 485843e1988Sjohnlev user_desc_t ndesc; /* new descriptor */ 4867c478bd9Sstevel@tonic-gate proc_t *pp = ttoproc(curthread); 487843e1988Sjohnlev int rc = 0; 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate /* 4907c478bd9Sstevel@tonic-gate * LDT segments: executable and data at DPL 3 only. 4917c478bd9Sstevel@tonic-gate */ 4929acbbeafSnn35248 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel)) 4937c478bd9Sstevel@tonic-gate return (EINVAL); 4947c478bd9Sstevel@tonic-gate 4957c478bd9Sstevel@tonic-gate /* 4967c478bd9Sstevel@tonic-gate * check the selector index. 4977c478bd9Sstevel@tonic-gate */ 4989acbbeafSnn35248 seli = SELTOIDX(ssd->sel); 4990baeff3dSrab if (seli >= MAXNLDT || seli < LDT_UDBASE) 5007c478bd9Sstevel@tonic-gate return (EINVAL); 5017c478bd9Sstevel@tonic-gate 502843e1988Sjohnlev ndesc = null_udesc; 5037c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate /* 5067c478bd9Sstevel@tonic-gate * If this is the first time for this process then setup a 5077c478bd9Sstevel@tonic-gate * private LDT for it. 5087c478bd9Sstevel@tonic-gate */ 5097c478bd9Sstevel@tonic-gate if (pp->p_ldt == NULL) { 510843e1988Sjohnlev ldt_alloc(pp, seli); 5117c478bd9Sstevel@tonic-gate 5127c478bd9Sstevel@tonic-gate /* 5137c478bd9Sstevel@tonic-gate * Now that this process has a private LDT, the use of 5147c478bd9Sstevel@tonic-gate * the syscall/sysret and sysenter/sysexit instructions 5157c478bd9Sstevel@tonic-gate * is forbidden for this processes because they destroy 5167c478bd9Sstevel@tonic-gate * the contents of %cs and %ss segment registers. 5177c478bd9Sstevel@tonic-gate * 5180baeff3dSrab * Explicity disable them here and add a context handler 5190baeff3dSrab * to the process. Note that disabling 5207c478bd9Sstevel@tonic-gate * them here means we can't use sysret or sysexit on 5217c478bd9Sstevel@tonic-gate * the way out of this system call - so we force this 5227c478bd9Sstevel@tonic-gate * thread to take the slow path (which doesn't make use 5237c478bd9Sstevel@tonic-gate * of sysenter or sysexit) back out. 5247c478bd9Sstevel@tonic-gate */ 525843e1988Sjohnlev kpreempt_disable(); 5260baeff3dSrab ldt_installctx(pp, NULL); 5277c478bd9Sstevel@tonic-gate cpu_fast_syscall_disable(NULL); 5287c478bd9Sstevel@tonic-gate ASSERT(curthread->t_post_sys != 0); 5290baeff3dSrab kpreempt_enable(); 5307c478bd9Sstevel@tonic-gate 531843e1988Sjohnlev } else if (seli > pp->p_ldtlimit) { 532843e1988Sjohnlev 533843e1988Sjohnlev /* 534843e1988Sjohnlev * Increase size of ldt to include seli. 535843e1988Sjohnlev */ 536843e1988Sjohnlev ldt_grow(pp, seli); 5377c478bd9Sstevel@tonic-gate } 5387c478bd9Sstevel@tonic-gate 5397c478bd9Sstevel@tonic-gate ASSERT(seli <= pp->p_ldtlimit); 540843e1988Sjohnlev ldp = &pp->p_ldt[seli]; 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate /* 5437c478bd9Sstevel@tonic-gate * On the 64-bit kernel, this is where things get more subtle. 5447c478bd9Sstevel@tonic-gate * Recall that in the 64-bit kernel, when we enter the kernel we 5457c478bd9Sstevel@tonic-gate * deliberately -don't- reload the segment selectors we came in on 5467c478bd9Sstevel@tonic-gate * for %ds, %es, %fs or %gs. Messing with selectors is expensive, 5477c478bd9Sstevel@tonic-gate * and the underlying descriptors are essentially ignored by the 5487c478bd9Sstevel@tonic-gate * hardware in long mode - except for the base that we override with 5497c478bd9Sstevel@tonic-gate * the gsbase MSRs. 5507c478bd9Sstevel@tonic-gate * 5517c478bd9Sstevel@tonic-gate * However, there's one unfortunate issue with this rosy picture -- 5527c478bd9Sstevel@tonic-gate * a descriptor that's not marked as 'present' will still generate 5537c478bd9Sstevel@tonic-gate * an #np when loading a segment register. 5547c478bd9Sstevel@tonic-gate * 5557c478bd9Sstevel@tonic-gate * Consider this case. An lwp creates a harmless LDT entry, points 5567c478bd9Sstevel@tonic-gate * one of it's segment registers at it, then tells the kernel (here) 5577c478bd9Sstevel@tonic-gate * to delete it. In the 32-bit kernel, the #np will happen on the 5587c478bd9Sstevel@tonic-gate * way back to userland where we reload the segment registers, and be 5597c478bd9Sstevel@tonic-gate * handled in kern_gpfault(). In the 64-bit kernel, the same thing 5607c478bd9Sstevel@tonic-gate * will happen in the normal case too. However, if we're trying to 5617c478bd9Sstevel@tonic-gate * use a debugger that wants to save and restore the segment registers, 5627c478bd9Sstevel@tonic-gate * and the debugger things that we have valid segment registers, we 5637c478bd9Sstevel@tonic-gate * have the problem that the debugger will try and restore the 5647c478bd9Sstevel@tonic-gate * segment register that points at the now 'not present' descriptor 5657c478bd9Sstevel@tonic-gate * and will take a #np right there. 5667c478bd9Sstevel@tonic-gate * 5677c478bd9Sstevel@tonic-gate * We should obviously fix the debugger to be paranoid about 5687c478bd9Sstevel@tonic-gate * -not- restoring segment registers that point to bad descriptors; 5697c478bd9Sstevel@tonic-gate * however we can prevent the problem here if we check to see if any 5707c478bd9Sstevel@tonic-gate * of the segment registers are still pointing at the thing we're 5717c478bd9Sstevel@tonic-gate * destroying; if they are, return an error instead. (That also seems 5727c478bd9Sstevel@tonic-gate * a lot better failure mode than SIGKILL and a core file 5737c478bd9Sstevel@tonic-gate * from kern_gpfault() too.) 5747c478bd9Sstevel@tonic-gate */ 5759acbbeafSnn35248 if (SI86SSD_PRES(ssd) == 0) { 5767c478bd9Sstevel@tonic-gate kthread_t *t; 5777c478bd9Sstevel@tonic-gate int bad = 0; 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate /* 5807c478bd9Sstevel@tonic-gate * Look carefully at the segment registers of every lwp 5817c478bd9Sstevel@tonic-gate * in the process (they're all stopped by our caller). 5827c478bd9Sstevel@tonic-gate * If we're about to invalidate a descriptor that's still 5837c478bd9Sstevel@tonic-gate * being referenced by *any* of them, return an error, 5847c478bd9Sstevel@tonic-gate * rather than having them #gp on their way out of the kernel. 5857c478bd9Sstevel@tonic-gate */ 5867c478bd9Sstevel@tonic-gate ASSERT(pp->p_lwprcnt == 1); 5877c478bd9Sstevel@tonic-gate 5887c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5897c478bd9Sstevel@tonic-gate t = pp->p_tlist; 5907c478bd9Sstevel@tonic-gate do { 5917c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(t); 5927c478bd9Sstevel@tonic-gate struct regs *rp = lwp->lwp_regs; 5937c478bd9Sstevel@tonic-gate #if defined(__amd64) 5947c478bd9Sstevel@tonic-gate pcb_t *pcb = &lwp->lwp_pcb; 5957c478bd9Sstevel@tonic-gate #endif 5967c478bd9Sstevel@tonic-gate 5979acbbeafSnn35248 if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) { 5987c478bd9Sstevel@tonic-gate bad = 1; 5997c478bd9Sstevel@tonic-gate break; 6007c478bd9Sstevel@tonic-gate } 6017c478bd9Sstevel@tonic-gate 6027c478bd9Sstevel@tonic-gate #if defined(__amd64) 6037712e92cSsudheer if (pcb->pcb_rupdate == 1) { 6049acbbeafSnn35248 if (ssd->sel == pcb->pcb_ds || 6059acbbeafSnn35248 ssd->sel == pcb->pcb_es || 6069acbbeafSnn35248 ssd->sel == pcb->pcb_fs || 6079acbbeafSnn35248 ssd->sel == pcb->pcb_gs) { 6087c478bd9Sstevel@tonic-gate bad = 1; 6097c478bd9Sstevel@tonic-gate break; 6107c478bd9Sstevel@tonic-gate } 6117c478bd9Sstevel@tonic-gate } else 6127c478bd9Sstevel@tonic-gate #endif 6137c478bd9Sstevel@tonic-gate { 6149acbbeafSnn35248 if (ssd->sel == rp->r_ds || 6159acbbeafSnn35248 ssd->sel == rp->r_es || 6169acbbeafSnn35248 ssd->sel == rp->r_fs || 6179acbbeafSnn35248 ssd->sel == rp->r_gs) { 6187c478bd9Sstevel@tonic-gate bad = 1; 6197c478bd9Sstevel@tonic-gate break; 6207c478bd9Sstevel@tonic-gate } 6217c478bd9Sstevel@tonic-gate } 6227c478bd9Sstevel@tonic-gate 6237c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != pp->p_tlist); 6247c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 6257c478bd9Sstevel@tonic-gate 6267c478bd9Sstevel@tonic-gate if (bad) { 6277c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6287c478bd9Sstevel@tonic-gate return (EBUSY); 6297c478bd9Sstevel@tonic-gate } 6307c478bd9Sstevel@tonic-gate } 6317c478bd9Sstevel@tonic-gate 6327c478bd9Sstevel@tonic-gate /* 6337c478bd9Sstevel@tonic-gate * If acc1 is zero, clear the descriptor (including the 'present' bit) 6347c478bd9Sstevel@tonic-gate */ 6359acbbeafSnn35248 if (ssd->acc1 == 0) { 636843e1988Sjohnlev rc = ldt_update_segd(ldp, &null_udesc); 6377c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 638843e1988Sjohnlev return (rc); 6397c478bd9Sstevel@tonic-gate } 6407c478bd9Sstevel@tonic-gate 6417c478bd9Sstevel@tonic-gate /* 6427c478bd9Sstevel@tonic-gate * Check segment type, allow segment not present and 6437c478bd9Sstevel@tonic-gate * only user DPL (3). 6447c478bd9Sstevel@tonic-gate */ 6459acbbeafSnn35248 if (SI86SSD_DPL(ssd) != SEL_UPL) { 6467c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6477c478bd9Sstevel@tonic-gate return (EINVAL); 6487c478bd9Sstevel@tonic-gate } 6497c478bd9Sstevel@tonic-gate 6507c478bd9Sstevel@tonic-gate #if defined(__amd64) 6517c478bd9Sstevel@tonic-gate /* 6529acbbeafSnn35248 * Do not allow 32-bit applications to create 64-bit mode code 6539acbbeafSnn35248 * segments. 6547c478bd9Sstevel@tonic-gate */ 6559acbbeafSnn35248 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 && 6569acbbeafSnn35248 SI86SSD_ISLONG(ssd)) { 6577c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6587c478bd9Sstevel@tonic-gate return (EINVAL); 6597c478bd9Sstevel@tonic-gate } 6607c478bd9Sstevel@tonic-gate #endif /* __amd64 */ 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate /* 6637c478bd9Sstevel@tonic-gate * Set up a code or data user segment descriptor. 6647c478bd9Sstevel@tonic-gate */ 6659acbbeafSnn35248 if (SI86SSD_ISUSEG(ssd)) { 666843e1988Sjohnlev ssd_to_usd(ssd, &ndesc); 667843e1988Sjohnlev rc = ldt_update_segd(ldp, &ndesc); 6687c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 669843e1988Sjohnlev return (rc); 6707c478bd9Sstevel@tonic-gate } 6717c478bd9Sstevel@tonic-gate 672843e1988Sjohnlev #if defined(__i386) 6737c478bd9Sstevel@tonic-gate /* 674843e1988Sjohnlev * Allow a call gate only if the destination is in the LDT 675843e1988Sjohnlev * and the system is running in 32-bit legacy mode. 676843e1988Sjohnlev * 677843e1988Sjohnlev * In long mode 32-bit call gates are redefined as 64-bit call 678843e1988Sjohnlev * gates and the hw enforces that the target code selector 679843e1988Sjohnlev * of the call gate must be 64-bit selector. A #gp fault is 680843e1988Sjohnlev * generated if otherwise. Since we do not allow 32-bit processes 681843e1988Sjohnlev * to switch themselves to 64-bits we never allow call gates 682843e1988Sjohnlev * on 64-bit system system. 6837c478bd9Sstevel@tonic-gate */ 6849acbbeafSnn35248 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) { 685843e1988Sjohnlev 686843e1988Sjohnlev 687843e1988Sjohnlev ssd_to_sgd(ssd, (gate_desc_t *)&ndesc); 688843e1988Sjohnlev rc = ldt_update_segd(ldp, &ndesc); 6897c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 690843e1988Sjohnlev return (rc); 6917c478bd9Sstevel@tonic-gate } 692843e1988Sjohnlev #endif /* __i386 */ 6937c478bd9Sstevel@tonic-gate 6947c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6957c478bd9Sstevel@tonic-gate return (EINVAL); 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate 6987c478bd9Sstevel@tonic-gate /* 699843e1988Sjohnlev * Allocate new LDT for process just large enough to contain seli. 700843e1988Sjohnlev * Note we allocate and grow LDT in PAGESIZE chunks. We do this 701843e1988Sjohnlev * to simplify the implementation and because on the hypervisor it's 702843e1988Sjohnlev * required, since the LDT must live on pages that have PROT_WRITE 703843e1988Sjohnlev * removed and which are given to the hypervisor. 7047c478bd9Sstevel@tonic-gate */ 7059acbbeafSnn35248 static void 706843e1988Sjohnlev ldt_alloc(proc_t *pp, uint_t seli) 7077c478bd9Sstevel@tonic-gate { 708843e1988Sjohnlev user_desc_t *ldt; 709843e1988Sjohnlev size_t ldtsz; 710843e1988Sjohnlev uint_t nsels; 711843e1988Sjohnlev 712843e1988Sjohnlev ASSERT(MUTEX_HELD(&pp->p_ldtlock)); 713843e1988Sjohnlev ASSERT(pp->p_ldt == NULL); 714843e1988Sjohnlev ASSERT(pp->p_ldtlimit == 0); 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate /* 717843e1988Sjohnlev * Allocate new LDT just large enough to contain seli. 7187c478bd9Sstevel@tonic-gate */ 719843e1988Sjohnlev ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); 720843e1988Sjohnlev nsels = ldtsz / sizeof (user_desc_t); 721843e1988Sjohnlev ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); 7227c478bd9Sstevel@tonic-gate 723843e1988Sjohnlev ldt = kmem_zalloc(ldtsz, KM_SLEEP); 724843e1988Sjohnlev ASSERT(IS_P2ALIGNED(ldt, PAGESIZE)); 7257c478bd9Sstevel@tonic-gate 726843e1988Sjohnlev #if defined(__xpv) 727843e1988Sjohnlev if (xen_ldt_setprot(ldt, ldtsz, PROT_READ)) 728843e1988Sjohnlev panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed"); 729843e1988Sjohnlev #endif 7307c478bd9Sstevel@tonic-gate 731843e1988Sjohnlev pp->p_ldt = ldt; 732843e1988Sjohnlev pp->p_ldtlimit = nsels - 1; 733843e1988Sjohnlev set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL); 734843e1988Sjohnlev 735843e1988Sjohnlev if (pp == curproc) { 7367c478bd9Sstevel@tonic-gate kpreempt_disable(); 7377c478bd9Sstevel@tonic-gate ldt_load(); 7387c478bd9Sstevel@tonic-gate kpreempt_enable(); 739843e1988Sjohnlev } 7407c478bd9Sstevel@tonic-gate } 7417c478bd9Sstevel@tonic-gate 7420baeff3dSrab static void 7437c478bd9Sstevel@tonic-gate ldt_free(proc_t *pp) 7447c478bd9Sstevel@tonic-gate { 745843e1988Sjohnlev user_desc_t *ldt; 746843e1988Sjohnlev size_t ldtsz; 7477c478bd9Sstevel@tonic-gate 7487c478bd9Sstevel@tonic-gate ASSERT(pp->p_ldt != NULL); 7497c478bd9Sstevel@tonic-gate 7507c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 751843e1988Sjohnlev ldt = pp->p_ldt; 752843e1988Sjohnlev ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 7537c478bd9Sstevel@tonic-gate 754843e1988Sjohnlev ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE)); 7557c478bd9Sstevel@tonic-gate 7567c478bd9Sstevel@tonic-gate pp->p_ldt = NULL; 7570baeff3dSrab pp->p_ldtlimit = 0; 758843e1988Sjohnlev pp->p_ldt_desc = null_sdesc; 759843e1988Sjohnlev mutex_exit(&pp->p_ldtlock); 7600baeff3dSrab 761843e1988Sjohnlev if (pp == curproc) { 762843e1988Sjohnlev kpreempt_disable(); 7630baeff3dSrab ldt_unload(); 7647c478bd9Sstevel@tonic-gate kpreempt_enable(); 765843e1988Sjohnlev } 766843e1988Sjohnlev 767843e1988Sjohnlev #if defined(__xpv) 768843e1988Sjohnlev /* 769843e1988Sjohnlev * We are not allowed to make the ldt writable until after 770843e1988Sjohnlev * we tell the hypervisor to unload it. 771843e1988Sjohnlev */ 772843e1988Sjohnlev if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE)) 773843e1988Sjohnlev panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 774843e1988Sjohnlev #endif 775843e1988Sjohnlev 776843e1988Sjohnlev kmem_free(ldt, ldtsz); 7777c478bd9Sstevel@tonic-gate } 7787c478bd9Sstevel@tonic-gate 7797c478bd9Sstevel@tonic-gate /* 7807c478bd9Sstevel@tonic-gate * On fork copy new ldt for child. 7817c478bd9Sstevel@tonic-gate */ 782843e1988Sjohnlev static void 7837c478bd9Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp) 7847c478bd9Sstevel@tonic-gate { 785843e1988Sjohnlev size_t ldtsz; 7867c478bd9Sstevel@tonic-gate 787843e1988Sjohnlev ASSERT(pp->p_ldt != NULL); 788843e1988Sjohnlev ASSERT(cp != curproc); 7897c478bd9Sstevel@tonic-gate 790843e1988Sjohnlev /* 791843e1988Sjohnlev * I assume the parent's ldt can't increase since we're in a fork. 792843e1988Sjohnlev */ 7937c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 794843e1988Sjohnlev mutex_enter(&cp->p_ldtlock); 7957c478bd9Sstevel@tonic-gate 796843e1988Sjohnlev ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 797843e1988Sjohnlev 798843e1988Sjohnlev ldt_alloc(cp, pp->p_ldtlimit); 799843e1988Sjohnlev 800843e1988Sjohnlev #if defined(__xpv) 801843e1988Sjohnlev /* 802843e1988Sjohnlev * Make child's ldt writable so it can be copied into from 803843e1988Sjohnlev * parent's ldt. This works since ldt_alloc above did not load 804843e1988Sjohnlev * the ldt since its for the child process. If we tried to make 805843e1988Sjohnlev * an LDT writable that is loaded in hw the setprot operation 806843e1988Sjohnlev * would fail. 807843e1988Sjohnlev */ 808843e1988Sjohnlev if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE)) 809843e1988Sjohnlev panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 810843e1988Sjohnlev #endif 811843e1988Sjohnlev 812843e1988Sjohnlev bcopy(pp->p_ldt, cp->p_ldt, ldtsz); 813843e1988Sjohnlev 814843e1988Sjohnlev #if defined(__xpv) 815843e1988Sjohnlev if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ)) 816843e1988Sjohnlev panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed"); 817843e1988Sjohnlev #endif 818843e1988Sjohnlev mutex_exit(&cp->p_ldtlock); 8197c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 820843e1988Sjohnlev 821843e1988Sjohnlev } 822843e1988Sjohnlev 823843e1988Sjohnlev static void 824843e1988Sjohnlev ldt_grow(proc_t *pp, uint_t seli) 825843e1988Sjohnlev { 826843e1988Sjohnlev user_desc_t *oldt, *nldt; 827843e1988Sjohnlev uint_t nsels; 828843e1988Sjohnlev size_t oldtsz, nldtsz; 829843e1988Sjohnlev 830843e1988Sjohnlev ASSERT(MUTEX_HELD(&pp->p_ldtlock)); 831843e1988Sjohnlev ASSERT(pp->p_ldt != NULL); 832843e1988Sjohnlev ASSERT(pp->p_ldtlimit != 0); 833843e1988Sjohnlev 834843e1988Sjohnlev /* 835843e1988Sjohnlev * Allocate larger LDT just large enough to contain seli. 836843e1988Sjohnlev */ 837843e1988Sjohnlev nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); 838843e1988Sjohnlev nsels = nldtsz / sizeof (user_desc_t); 839843e1988Sjohnlev ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); 840843e1988Sjohnlev ASSERT(nsels > pp->p_ldtlimit); 841843e1988Sjohnlev 842843e1988Sjohnlev oldt = pp->p_ldt; 843843e1988Sjohnlev oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 844843e1988Sjohnlev 845843e1988Sjohnlev nldt = kmem_zalloc(nldtsz, KM_SLEEP); 846843e1988Sjohnlev ASSERT(IS_P2ALIGNED(nldt, PAGESIZE)); 847843e1988Sjohnlev 848843e1988Sjohnlev bcopy(oldt, nldt, oldtsz); 849843e1988Sjohnlev 850843e1988Sjohnlev /* 851843e1988Sjohnlev * unload old ldt. 852843e1988Sjohnlev */ 853843e1988Sjohnlev kpreempt_disable(); 854843e1988Sjohnlev ldt_unload(); 855843e1988Sjohnlev kpreempt_enable(); 856843e1988Sjohnlev 857843e1988Sjohnlev #if defined(__xpv) 858843e1988Sjohnlev 859843e1988Sjohnlev /* 860843e1988Sjohnlev * Make old ldt writable and new ldt read only. 861843e1988Sjohnlev */ 862843e1988Sjohnlev if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE)) 863843e1988Sjohnlev panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 864843e1988Sjohnlev 865843e1988Sjohnlev if (xen_ldt_setprot(nldt, nldtsz, PROT_READ)) 866843e1988Sjohnlev panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed"); 867843e1988Sjohnlev #endif 868843e1988Sjohnlev 869843e1988Sjohnlev pp->p_ldt = nldt; 870843e1988Sjohnlev pp->p_ldtlimit = nsels - 1; 871843e1988Sjohnlev 872843e1988Sjohnlev /* 873843e1988Sjohnlev * write new ldt segment descriptor. 874843e1988Sjohnlev */ 875843e1988Sjohnlev set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL); 876843e1988Sjohnlev 877843e1988Sjohnlev /* 878843e1988Sjohnlev * load the new ldt. 879843e1988Sjohnlev */ 880843e1988Sjohnlev kpreempt_disable(); 881843e1988Sjohnlev ldt_load(); 882843e1988Sjohnlev kpreempt_enable(); 883843e1988Sjohnlev 884843e1988Sjohnlev kmem_free(oldt, oldtsz); 8857c478bd9Sstevel@tonic-gate } 886