1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 28 /* All Rights Reserved */ 29 30 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 31 /* All Rights Reserved */ 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #include <sys/param.h> 36 #include <sys/types.h> 37 #include <sys/sysmacros.h> 38 #include <sys/systm.h> 39 #include <sys/signal.h> 40 #include <sys/errno.h> 41 #include <sys/fault.h> 42 #include <sys/syscall.h> 43 #include <sys/cpuvar.h> 44 #include <sys/sysi86.h> 45 #include <sys/psw.h> 46 #include <sys/cred.h> 47 #include <sys/policy.h> 48 #include <sys/thread.h> 49 #include <sys/debug.h> 50 #include <sys/ontrap.h> 51 #include <sys/privregs.h> 52 #include <sys/x86_archext.h> 53 #include <sys/vmem.h> 54 #include <sys/kmem.h> 55 #include <sys/mman.h> 56 #include <sys/archsystm.h> 57 #include <vm/hat.h> 58 #include <vm/as.h> 59 #include <vm/seg.h> 60 #include <vm/seg_kmem.h> 61 #include <vm/faultcode.h> 62 #include <sys/fp.h> 63 #include <sys/cmn_err.h> 64 #include <sys/segments.h> 65 #include <sys/clock.h> 66 #if defined(__xpv) 67 #include <sys/hypervisor.h> 68 #include <sys/note.h> 69 #endif 70 71 static void ldt_alloc(proc_t *, uint_t); 72 static void ldt_free(proc_t *); 73 static void ldt_dup(proc_t *, proc_t *); 74 static void ldt_grow(proc_t *, uint_t); 75 76 /* 77 * sysi86 System Call 78 */ 79 80 /* ARGSUSED */ 81 int 82 sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) 83 { 84 struct ssd ssd; 85 int error = 0; 86 int c; 87 proc_t *pp = curproc; 88 89 switch (cmd) { 90 91 /* 92 * The SI86V86 subsystem call of the SYSI86 system call 93 * supports only one subcode -- V86SC_IOPL. 94 */ 95 case SI86V86: 96 if (arg1 == V86SC_IOPL) { 97 struct regs *rp = lwptoregs(ttolwp(curthread)); 98 greg_t oldpl = rp->r_ps & PS_IOPL; 99 greg_t newpl = arg2 & PS_IOPL; 100 101 /* 102 * Must be privileged to run this system call 103 * if giving more io privilege. 104 */ 105 if (newpl > oldpl && (error = 106 secpolicy_sys_config(CRED(), B_FALSE)) != 0) 107 return (set_errno(error)); 108 #if defined(__xpv) 109 kpreempt_disable(); 110 installctx(curthread, NULL, xen_disable_user_iopl, 111 xen_enable_user_iopl, NULL, NULL, 112 xen_disable_user_iopl, NULL); 113 xen_enable_user_iopl(); 114 kpreempt_enable(); 115 #else 116 rp->r_ps ^= oldpl ^ newpl; 117 #endif 118 } else 119 error = EINVAL; 120 break; 121 122 /* 123 * Set a segment descriptor 124 */ 125 case SI86DSCR: 126 /* 127 * There are considerable problems here manipulating 128 * resources shared by many running lwps. Get everyone 129 * into a safe state before changing the LDT. 130 */ 131 if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) { 132 error = EINTR; 133 break; 134 } 135 136 if (get_udatamodel() == DATAMODEL_LP64) { 137 error = EINVAL; 138 break; 139 } 140 141 if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) { 142 error = EFAULT; 143 break; 144 } 145 146 error = setdscr(&ssd); 147 148 mutex_enter(&pp->p_lock); 149 if (curthread != pp->p_agenttp) 150 continuelwps(pp); 151 mutex_exit(&pp->p_lock); 152 break; 153 154 case SI86FPHW: 155 c = fp_kind & 0xff; 156 if (suword32((void *)arg1, c) == -1) 157 error = EFAULT; 158 break; 159 160 case SI86FPSTART: 161 /* 162 * arg1 is the address of _fp_hw 163 * arg2 is the desired x87 FCW value 164 * arg3 is the desired SSE MXCSR value 165 * a return value of one means SSE hardware, else none. 166 */ 167 c = fp_kind & 0xff; 168 if (suword32((void *)arg1, c) == -1) { 169 error = EFAULT; 170 break; 171 } 172 fpsetcw((uint16_t)arg2, (uint32_t)arg3); 173 return (fp_kind == __FP_SSE ? 1 : 0); 174 175 /* real time clock management commands */ 176 177 case WTODC: 178 if ((error = secpolicy_settime(CRED())) == 0) { 179 timestruc_t ts; 180 mutex_enter(&tod_lock); 181 gethrestime(&ts); 182 tod_set(ts); 183 mutex_exit(&tod_lock); 184 } 185 break; 186 187 /* Give some timezone playing room */ 188 #define ONEWEEK (7 * 24 * 60 * 60) 189 190 case SGMTL: 191 /* 192 * Called from 32 bit land, negative values 193 * are not sign extended, so we do that here 194 * by casting it to an int and back. We also 195 * clamp the value to within reason and detect 196 * when a 64 bit call overflows an int. 197 */ 198 if ((error = secpolicy_settime(CRED())) == 0) { 199 int newlag = (int)arg1; 200 201 #ifdef _SYSCALL32_IMPL 202 if (get_udatamodel() == DATAMODEL_NATIVE && 203 (long)newlag != (long)arg1) { 204 error = EOVERFLOW; 205 } else 206 #endif 207 if (newlag >= -ONEWEEK && newlag <= ONEWEEK) 208 sgmtl(newlag); 209 else 210 error = EOVERFLOW; 211 } 212 break; 213 214 case GGMTL: 215 if (get_udatamodel() == DATAMODEL_NATIVE) { 216 if (sulword((void *)arg1, ggmtl()) == -1) 217 error = EFAULT; 218 #ifdef _SYSCALL32_IMPL 219 } else { 220 time_t gmtl; 221 222 if ((gmtl = ggmtl()) > INT32_MAX) { 223 /* 224 * Since gmt_lag can at most be 225 * +/- 12 hours, something is 226 * *seriously* messed up here. 227 */ 228 error = EOVERFLOW; 229 } else if (suword32((void *)arg1, (int32_t)gmtl) == -1) 230 error = EFAULT; 231 #endif 232 } 233 break; 234 235 case RTCSYNC: 236 if ((error = secpolicy_settime(CRED())) == 0) 237 rtcsync(); 238 break; 239 240 /* END OF real time clock management commands */ 241 242 default: 243 error = EINVAL; 244 break; 245 } 246 return (error == 0 ? 0 : set_errno(error)); 247 } 248 249 void 250 usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel) 251 { 252 ssd->bo = USEGD_GETBASE(usd); 253 ssd->ls = USEGD_GETLIMIT(usd); 254 ssd->sel = sel; 255 256 /* 257 * set type, dpl and present bits. 258 */ 259 ssd->acc1 = usd->usd_type; 260 ssd->acc1 |= usd->usd_dpl << 5; 261 ssd->acc1 |= usd->usd_p << (5 + 2); 262 263 /* 264 * set avl, DB and granularity bits. 265 */ 266 ssd->acc2 = usd->usd_avl; 267 268 #if defined(__amd64) 269 ssd->acc2 |= usd->usd_long << 1; 270 #else 271 ssd->acc2 |= usd->usd_reserved << 1; 272 #endif 273 274 ssd->acc2 |= usd->usd_def32 << (1 + 1); 275 ssd->acc2 |= usd->usd_gran << (1 + 1 + 1); 276 } 277 278 static void 279 ssd_to_usd(struct ssd *ssd, user_desc_t *usd) 280 { 281 282 ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0); 283 284 USEGD_SETBASE(usd, ssd->bo); 285 USEGD_SETLIMIT(usd, ssd->ls); 286 287 /* 288 * set type, dpl and present bits. 289 */ 290 usd->usd_type = ssd->acc1; 291 usd->usd_dpl = ssd->acc1 >> 5; 292 usd->usd_p = ssd->acc1 >> (5 + 2); 293 294 ASSERT(usd->usd_type >= SDT_MEMRO); 295 ASSERT(usd->usd_dpl == SEL_UPL); 296 297 /* 298 * 64-bit code selectors are never allowed in the LDT. 299 * Reserved bit is always 0 on 32-bit sytems. 300 */ 301 #if defined(__amd64) 302 usd->usd_long = 0; 303 #else 304 usd->usd_reserved = 0; 305 #endif 306 307 /* 308 * set avl, DB and granularity bits. 309 */ 310 usd->usd_avl = ssd->acc2; 311 usd->usd_def32 = ssd->acc2 >> (1 + 1); 312 usd->usd_gran = ssd->acc2 >> (1 + 1 + 1); 313 } 314 315 316 #if defined(__i386) 317 318 static void 319 ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd) 320 { 321 322 ASSERT(bcmp(sgd, &null_sdesc, sizeof (*sgd)) == 0); 323 324 sgd->sgd_looffset = ssd->bo; 325 sgd->sgd_hioffset = ssd->bo >> 16; 326 327 sgd->sgd_selector = ssd->ls; 328 329 /* 330 * set type, dpl and present bits. 331 */ 332 sgd->sgd_type = ssd->acc1; 333 sgd->sgd_dpl = ssd->acc1 >> 5; 334 sgd->sgd_p = ssd->acc1 >> 7; 335 ASSERT(sgd->sgd_type == SDT_SYSCGT); 336 ASSERT(sgd->sgd_dpl == SEL_UPL); 337 sgd->sgd_stkcpy = 0; 338 } 339 340 #endif /* __i386 */ 341 342 /* 343 * Load LDT register with the current process's LDT. 344 */ 345 static void 346 ldt_load(void) 347 { 348 #if defined(__xpv) 349 xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc), 350 curproc->p_ldtlimit + 1); 351 #else 352 *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc; 353 wr_ldtr(ULDT_SEL); 354 #endif 355 } 356 357 /* 358 * Store a NULL selector in the LDTR. All subsequent illegal references to 359 * the LDT will result in a #gp. 360 */ 361 void 362 ldt_unload(void) 363 { 364 #if defined(__xpv) 365 xen_set_ldt(NULL, 0); 366 #else 367 *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc; 368 wr_ldtr(0); 369 #endif 370 } 371 372 /*ARGSUSED*/ 373 static void 374 ldt_savectx(proc_t *p) 375 { 376 ASSERT(p->p_ldt != NULL); 377 ASSERT(p == curproc); 378 379 #if defined(__amd64) 380 /* 381 * The 64-bit kernel must be sure to clear any stale ldt 382 * selectors when context switching away from a process that 383 * has a private ldt. Consider the following example: 384 * 385 * Wine creats a ldt descriptor and points a segment register 386 * to it. 387 * 388 * We then context switch away from wine lwp to kernel 389 * thread and hit breakpoint in kernel with kmdb 390 * 391 * When we continue and resume from kmdb we will #gp 392 * fault since kmdb will have saved the stale ldt selector 393 * from wine and will try to restore it but we are no longer in 394 * the context of the wine process and do not have our 395 * ldtr register pointing to the private ldt. 396 */ 397 reset_sregs(); 398 #endif 399 400 ldt_unload(); 401 cpu_fast_syscall_enable(NULL); 402 } 403 404 static void 405 ldt_restorectx(proc_t *p) 406 { 407 ASSERT(p->p_ldt != NULL); 408 ASSERT(p == curproc); 409 410 ldt_load(); 411 cpu_fast_syscall_disable(NULL); 412 } 413 414 /* 415 * When a process with a private LDT execs, fast syscalls must be enabled for 416 * the new process image. 417 */ 418 /* ARGSUSED */ 419 static void 420 ldt_freectx(proc_t *p, int isexec) 421 { 422 ASSERT(p->p_ldt); 423 424 if (isexec) { 425 kpreempt_disable(); 426 cpu_fast_syscall_enable(NULL); 427 kpreempt_enable(); 428 } 429 430 /* 431 * ldt_free() will free the memory used by the private LDT, reset the 432 * process's descriptor, and re-program the LDTR. 433 */ 434 ldt_free(p); 435 } 436 437 /* 438 * Install ctx op that ensures syscall/sysenter are disabled. 439 * See comments below. 440 * 441 * When a thread with a private LDT forks, the new process 442 * must have the LDT context ops installed. 443 */ 444 /* ARGSUSED */ 445 static void 446 ldt_installctx(proc_t *p, proc_t *cp) 447 { 448 proc_t *targ = p; 449 kthread_t *t; 450 451 /* 452 * If this is a fork, operate on the child process. 453 */ 454 if (cp != NULL) { 455 targ = cp; 456 ldt_dup(p, cp); 457 } 458 459 /* 460 * The process context ops expect the target process as their argument. 461 */ 462 ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx, 463 ldt_installctx, ldt_savectx, ldt_freectx) == 0); 464 465 installpctx(targ, targ, ldt_savectx, ldt_restorectx, 466 ldt_installctx, ldt_savectx, ldt_freectx); 467 468 /* 469 * We've just disabled fast system call and return instructions; take 470 * the slow path out to make sure we don't try to use one to return 471 * back to user. We must set t_post_sys for every thread in the 472 * process to make sure none of them escape out via fast return. 473 */ 474 475 mutex_enter(&targ->p_lock); 476 t = targ->p_tlist; 477 do { 478 t->t_post_sys = 1; 479 } while ((t = t->t_forw) != targ->p_tlist); 480 mutex_exit(&targ->p_lock); 481 } 482 483 int 484 setdscr(struct ssd *ssd) 485 { 486 ushort_t seli; /* selector index */ 487 user_desc_t *ldp; /* descriptor pointer */ 488 user_desc_t ndesc; /* new descriptor */ 489 proc_t *pp = ttoproc(curthread); 490 int rc = 0; 491 492 /* 493 * LDT segments: executable and data at DPL 3 only. 494 */ 495 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel)) 496 return (EINVAL); 497 498 /* 499 * check the selector index. 500 */ 501 seli = SELTOIDX(ssd->sel); 502 if (seli >= MAXNLDT || seli < LDT_UDBASE) 503 return (EINVAL); 504 505 ndesc = null_udesc; 506 mutex_enter(&pp->p_ldtlock); 507 508 /* 509 * If this is the first time for this process then setup a 510 * private LDT for it. 511 */ 512 if (pp->p_ldt == NULL) { 513 ldt_alloc(pp, seli); 514 515 /* 516 * Now that this process has a private LDT, the use of 517 * the syscall/sysret and sysenter/sysexit instructions 518 * is forbidden for this processes because they destroy 519 * the contents of %cs and %ss segment registers. 520 * 521 * Explicity disable them here and add a context handler 522 * to the process. Note that disabling 523 * them here means we can't use sysret or sysexit on 524 * the way out of this system call - so we force this 525 * thread to take the slow path (which doesn't make use 526 * of sysenter or sysexit) back out. 527 */ 528 kpreempt_disable(); 529 ldt_installctx(pp, NULL); 530 cpu_fast_syscall_disable(NULL); 531 ASSERT(curthread->t_post_sys != 0); 532 kpreempt_enable(); 533 534 } else if (seli > pp->p_ldtlimit) { 535 536 /* 537 * Increase size of ldt to include seli. 538 */ 539 ldt_grow(pp, seli); 540 } 541 542 ASSERT(seli <= pp->p_ldtlimit); 543 ldp = &pp->p_ldt[seli]; 544 545 /* 546 * On the 64-bit kernel, this is where things get more subtle. 547 * Recall that in the 64-bit kernel, when we enter the kernel we 548 * deliberately -don't- reload the segment selectors we came in on 549 * for %ds, %es, %fs or %gs. Messing with selectors is expensive, 550 * and the underlying descriptors are essentially ignored by the 551 * hardware in long mode - except for the base that we override with 552 * the gsbase MSRs. 553 * 554 * However, there's one unfortunate issue with this rosy picture -- 555 * a descriptor that's not marked as 'present' will still generate 556 * an #np when loading a segment register. 557 * 558 * Consider this case. An lwp creates a harmless LDT entry, points 559 * one of it's segment registers at it, then tells the kernel (here) 560 * to delete it. In the 32-bit kernel, the #np will happen on the 561 * way back to userland where we reload the segment registers, and be 562 * handled in kern_gpfault(). In the 64-bit kernel, the same thing 563 * will happen in the normal case too. However, if we're trying to 564 * use a debugger that wants to save and restore the segment registers, 565 * and the debugger things that we have valid segment registers, we 566 * have the problem that the debugger will try and restore the 567 * segment register that points at the now 'not present' descriptor 568 * and will take a #np right there. 569 * 570 * We should obviously fix the debugger to be paranoid about 571 * -not- restoring segment registers that point to bad descriptors; 572 * however we can prevent the problem here if we check to see if any 573 * of the segment registers are still pointing at the thing we're 574 * destroying; if they are, return an error instead. (That also seems 575 * a lot better failure mode than SIGKILL and a core file 576 * from kern_gpfault() too.) 577 */ 578 if (SI86SSD_PRES(ssd) == 0) { 579 kthread_t *t; 580 int bad = 0; 581 582 /* 583 * Look carefully at the segment registers of every lwp 584 * in the process (they're all stopped by our caller). 585 * If we're about to invalidate a descriptor that's still 586 * being referenced by *any* of them, return an error, 587 * rather than having them #gp on their way out of the kernel. 588 */ 589 ASSERT(pp->p_lwprcnt == 1); 590 591 mutex_enter(&pp->p_lock); 592 t = pp->p_tlist; 593 do { 594 klwp_t *lwp = ttolwp(t); 595 struct regs *rp = lwp->lwp_regs; 596 #if defined(__amd64) 597 pcb_t *pcb = &lwp->lwp_pcb; 598 #endif 599 600 if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) { 601 bad = 1; 602 break; 603 } 604 605 #if defined(__amd64) 606 if (pcb->pcb_rupdate == 1) { 607 if (ssd->sel == pcb->pcb_ds || 608 ssd->sel == pcb->pcb_es || 609 ssd->sel == pcb->pcb_fs || 610 ssd->sel == pcb->pcb_gs) { 611 bad = 1; 612 break; 613 } 614 } else 615 #endif 616 { 617 if (ssd->sel == rp->r_ds || 618 ssd->sel == rp->r_es || 619 ssd->sel == rp->r_fs || 620 ssd->sel == rp->r_gs) { 621 bad = 1; 622 break; 623 } 624 } 625 626 } while ((t = t->t_forw) != pp->p_tlist); 627 mutex_exit(&pp->p_lock); 628 629 if (bad) { 630 mutex_exit(&pp->p_ldtlock); 631 return (EBUSY); 632 } 633 } 634 635 /* 636 * If acc1 is zero, clear the descriptor (including the 'present' bit) 637 */ 638 if (ssd->acc1 == 0) { 639 rc = ldt_update_segd(ldp, &null_udesc); 640 mutex_exit(&pp->p_ldtlock); 641 return (rc); 642 } 643 644 /* 645 * Check segment type, allow segment not present and 646 * only user DPL (3). 647 */ 648 if (SI86SSD_DPL(ssd) != SEL_UPL) { 649 mutex_exit(&pp->p_ldtlock); 650 return (EINVAL); 651 } 652 653 #if defined(__amd64) 654 /* 655 * Do not allow 32-bit applications to create 64-bit mode code 656 * segments. 657 */ 658 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 && 659 SI86SSD_ISLONG(ssd)) { 660 mutex_exit(&pp->p_ldtlock); 661 return (EINVAL); 662 } 663 #endif /* __amd64 */ 664 665 /* 666 * Set up a code or data user segment descriptor. 667 */ 668 if (SI86SSD_ISUSEG(ssd)) { 669 ssd_to_usd(ssd, &ndesc); 670 rc = ldt_update_segd(ldp, &ndesc); 671 mutex_exit(&pp->p_ldtlock); 672 return (rc); 673 } 674 675 #if defined(__i386) 676 /* 677 * Allow a call gate only if the destination is in the LDT 678 * and the system is running in 32-bit legacy mode. 679 * 680 * In long mode 32-bit call gates are redefined as 64-bit call 681 * gates and the hw enforces that the target code selector 682 * of the call gate must be 64-bit selector. A #gp fault is 683 * generated if otherwise. Since we do not allow 32-bit processes 684 * to switch themselves to 64-bits we never allow call gates 685 * on 64-bit system system. 686 */ 687 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) { 688 689 690 ssd_to_sgd(ssd, (gate_desc_t *)&ndesc); 691 rc = ldt_update_segd(ldp, &ndesc); 692 mutex_exit(&pp->p_ldtlock); 693 return (rc); 694 } 695 #endif /* __i386 */ 696 697 mutex_exit(&pp->p_ldtlock); 698 return (EINVAL); 699 } 700 701 /* 702 * Allocate new LDT for process just large enough to contain seli. 703 * Note we allocate and grow LDT in PAGESIZE chunks. We do this 704 * to simplify the implementation and because on the hypervisor it's 705 * required, since the LDT must live on pages that have PROT_WRITE 706 * removed and which are given to the hypervisor. 707 */ 708 static void 709 ldt_alloc(proc_t *pp, uint_t seli) 710 { 711 user_desc_t *ldt; 712 size_t ldtsz; 713 uint_t nsels; 714 715 ASSERT(MUTEX_HELD(&pp->p_ldtlock)); 716 ASSERT(pp->p_ldt == NULL); 717 ASSERT(pp->p_ldtlimit == 0); 718 719 /* 720 * Allocate new LDT just large enough to contain seli. 721 */ 722 ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); 723 nsels = ldtsz / sizeof (user_desc_t); 724 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); 725 726 ldt = kmem_zalloc(ldtsz, KM_SLEEP); 727 ASSERT(IS_P2ALIGNED(ldt, PAGESIZE)); 728 729 #if defined(__xpv) 730 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ)) 731 panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed"); 732 #endif 733 734 pp->p_ldt = ldt; 735 pp->p_ldtlimit = nsels - 1; 736 set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL); 737 738 if (pp == curproc) { 739 kpreempt_disable(); 740 ldt_load(); 741 kpreempt_enable(); 742 } 743 } 744 745 static void 746 ldt_free(proc_t *pp) 747 { 748 user_desc_t *ldt; 749 size_t ldtsz; 750 751 ASSERT(pp->p_ldt != NULL); 752 753 mutex_enter(&pp->p_ldtlock); 754 ldt = pp->p_ldt; 755 ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 756 757 ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE)); 758 759 pp->p_ldt = NULL; 760 pp->p_ldtlimit = 0; 761 pp->p_ldt_desc = null_sdesc; 762 mutex_exit(&pp->p_ldtlock); 763 764 if (pp == curproc) { 765 kpreempt_disable(); 766 ldt_unload(); 767 kpreempt_enable(); 768 } 769 770 #if defined(__xpv) 771 /* 772 * We are not allowed to make the ldt writable until after 773 * we tell the hypervisor to unload it. 774 */ 775 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE)) 776 panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 777 #endif 778 779 kmem_free(ldt, ldtsz); 780 } 781 782 /* 783 * On fork copy new ldt for child. 784 */ 785 static void 786 ldt_dup(proc_t *pp, proc_t *cp) 787 { 788 size_t ldtsz; 789 790 ASSERT(pp->p_ldt != NULL); 791 ASSERT(cp != curproc); 792 793 /* 794 * I assume the parent's ldt can't increase since we're in a fork. 795 */ 796 mutex_enter(&pp->p_ldtlock); 797 mutex_enter(&cp->p_ldtlock); 798 799 ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 800 801 ldt_alloc(cp, pp->p_ldtlimit); 802 803 #if defined(__xpv) 804 /* 805 * Make child's ldt writable so it can be copied into from 806 * parent's ldt. This works since ldt_alloc above did not load 807 * the ldt since its for the child process. If we tried to make 808 * an LDT writable that is loaded in hw the setprot operation 809 * would fail. 810 */ 811 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE)) 812 panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 813 #endif 814 815 bcopy(pp->p_ldt, cp->p_ldt, ldtsz); 816 817 #if defined(__xpv) 818 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ)) 819 panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed"); 820 #endif 821 mutex_exit(&cp->p_ldtlock); 822 mutex_exit(&pp->p_ldtlock); 823 824 } 825 826 static void 827 ldt_grow(proc_t *pp, uint_t seli) 828 { 829 user_desc_t *oldt, *nldt; 830 uint_t nsels; 831 size_t oldtsz, nldtsz; 832 833 ASSERT(MUTEX_HELD(&pp->p_ldtlock)); 834 ASSERT(pp->p_ldt != NULL); 835 ASSERT(pp->p_ldtlimit != 0); 836 837 /* 838 * Allocate larger LDT just large enough to contain seli. 839 */ 840 nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); 841 nsels = nldtsz / sizeof (user_desc_t); 842 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); 843 ASSERT(nsels > pp->p_ldtlimit); 844 845 oldt = pp->p_ldt; 846 oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 847 848 nldt = kmem_zalloc(nldtsz, KM_SLEEP); 849 ASSERT(IS_P2ALIGNED(nldt, PAGESIZE)); 850 851 bcopy(oldt, nldt, oldtsz); 852 853 /* 854 * unload old ldt. 855 */ 856 kpreempt_disable(); 857 ldt_unload(); 858 kpreempt_enable(); 859 860 #if defined(__xpv) 861 862 /* 863 * Make old ldt writable and new ldt read only. 864 */ 865 if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE)) 866 panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 867 868 if (xen_ldt_setprot(nldt, nldtsz, PROT_READ)) 869 panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed"); 870 #endif 871 872 pp->p_ldt = nldt; 873 pp->p_ldtlimit = nsels - 1; 874 875 /* 876 * write new ldt segment descriptor. 877 */ 878 set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL); 879 880 /* 881 * load the new ldt. 882 */ 883 kpreempt_disable(); 884 ldt_load(); 885 kpreempt_enable(); 886 887 kmem_free(oldt, oldtsz); 888 } 889