1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/signal.h> 36 #include <sys/systm.h> 37 #include <sys/user.h> 38 #include <sys/mman.h> 39 #include <sys/class.h> 40 #include <sys/proc.h> 41 #include <sys/procfs.h> 42 #include <sys/buf.h> 43 #include <sys/kmem.h> 44 #include <sys/cred.h> 45 #include <sys/archsystm.h> 46 #include <sys/vmparam.h> 47 #include <sys/prsystm.h> 48 #include <sys/reboot.h> 49 #include <sys/uadmin.h> 50 #include <sys/vfs.h> 51 #include <sys/vnode.h> 52 #include <sys/file.h> 53 #include <sys/session.h> 54 #include <sys/ucontext.h> 55 #include <sys/dnlc.h> 56 #include <sys/var.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debugreg.h> 59 #include <sys/thread.h> 60 #include <sys/vtrace.h> 61 #include <sys/consdev.h> 62 #include <sys/psw.h> 63 #include <sys/regset.h> 64 #include <sys/privregs.h> 65 #include <sys/cpu.h> 66 #include <sys/stack.h> 67 #include <sys/swap.h> 68 #include <vm/hat.h> 69 #include <vm/anon.h> 70 #include <vm/as.h> 71 #include <vm/page.h> 72 #include <vm/seg.h> 73 #include <vm/seg_kmem.h> 74 #include <vm/seg_map.h> 75 #include <vm/seg_vn.h> 76 #include <sys/exec.h> 77 #include <sys/acct.h> 78 #include <sys/core.h> 79 #include <sys/corectl.h> 80 #include <sys/modctl.h> 81 #include <sys/tuneable.h> 82 #include <c2/audit.h> 83 #include <sys/bootconf.h> 84 #include <sys/brand.h> 85 #include <sys/dumphdr.h> 86 #include <sys/promif.h> 87 #include <sys/systeminfo.h> 88 #include <sys/kdi.h> 89 #include <sys/contract_impl.h> 90 #include <sys/x86_archext.h> 91 #include <sys/segments.h> 92 #include <sys/ontrap.h> 93 94 /* 95 * Compare the version of boot that boot says it is against 96 * the version of boot the kernel expects. 97 */ 98 int 99 check_boot_version(int boots_version) 100 { 101 if (boots_version == BO_VERSION) 102 return (0); 103 104 prom_printf("Wrong boot interface - kernel needs v%d found v%d\n", 105 BO_VERSION, boots_version); 106 prom_panic("halting"); 107 /*NOTREACHED*/ 108 } 109 110 /* 111 * Process the physical installed list for boot. 112 * Finds: 113 * 1) the pfn of the highest installed physical page, 114 * 2) the number of pages installed 115 * 3) the number of distinct contiguous regions these pages fall into. 116 */ 117 void 118 installed_top_size( 119 struct memlist *list, /* pointer to start of installed list */ 120 pfn_t *high_pfn, /* return ptr for top value */ 121 pgcnt_t *pgcnt, /* return ptr for sum of installed pages */ 122 int *ranges) /* return ptr for the count of contig. ranges */ 123 { 124 pfn_t top = 0; 125 pgcnt_t sumpages = 0; 126 pfn_t highp; /* high page in a chunk */ 127 int cnt = 0; 128 129 for (; list; list = list->next) { 130 ++cnt; 131 highp = (list->address + list->size - 1) >> PAGESHIFT; 132 if (top < highp) 133 top = highp; 134 sumpages += btop(list->size); 135 } 136 137 *high_pfn = top; 138 *pgcnt = sumpages; 139 *ranges = cnt; 140 } 141 142 /* 143 * Copy in a memory list from boot to kernel, with a filter function 144 * to remove pages. The filter function can increase the address and/or 145 * decrease the size to filter out pages. It will also align addresses and 146 * sizes to PAGESIZE. 147 */ 148 void 149 copy_memlist_filter( 150 struct memlist *src, 151 struct memlist **dstp, 152 void (*filter)(uint64_t *, uint64_t *)) 153 { 154 struct memlist *dst, *prev; 155 uint64_t addr; 156 uint64_t size; 157 uint64_t eaddr; 158 159 dst = *dstp; 160 prev = dst; 161 162 /* 163 * Move through the memlist applying a filter against 164 * each range of memory. Note that we may apply the 165 * filter multiple times against each memlist entry. 166 */ 167 for (; src; src = src->next) { 168 addr = P2ROUNDUP(src->address, PAGESIZE); 169 eaddr = P2ALIGN(src->address + src->size, PAGESIZE); 170 while (addr < eaddr) { 171 size = eaddr - addr; 172 if (filter != NULL) 173 filter(&addr, &size); 174 if (size == 0) 175 break; 176 dst->address = addr; 177 dst->size = size; 178 dst->next = 0; 179 if (prev == dst) { 180 dst->prev = 0; 181 dst++; 182 } else { 183 dst->prev = prev; 184 prev->next = dst; 185 dst++; 186 prev++; 187 } 188 addr += size; 189 } 190 } 191 192 *dstp = dst; 193 } 194 195 /* 196 * Kernel setup code, called from startup(). 197 */ 198 void 199 kern_setup1(void) 200 { 201 proc_t *pp; 202 203 pp = &p0; 204 205 proc_sched = pp; 206 207 /* 208 * Initialize process 0 data structures 209 */ 210 pp->p_stat = SRUN; 211 pp->p_flag = SSYS; 212 213 pp->p_pidp = &pid0; 214 pp->p_pgidp = &pid0; 215 pp->p_sessp = &session0; 216 pp->p_tlist = &t0; 217 pid0.pid_pglink = pp; 218 pid0.pid_pgtail = pp; 219 220 /* 221 * XXX - we asssume that the u-area is zeroed out except for 222 * ttolwp(curthread)->lwp_regs. 223 */ 224 PTOU(curproc)->u_cmask = (mode_t)CMASK; 225 226 thread_init(); /* init thread_free list */ 227 pid_init(); /* initialize pid (proc) table */ 228 contract_init(); /* initialize contracts */ 229 230 init_pages_pp_maximum(); 231 } 232 233 /* 234 * Load a procedure into a thread. 235 */ 236 void 237 thread_load(kthread_t *t, void (*start)(), caddr_t arg, size_t len) 238 { 239 caddr_t sp; 240 size_t framesz; 241 caddr_t argp; 242 long *p; 243 extern void thread_start(); 244 245 /* 246 * Push a "c" call frame onto the stack to represent 247 * the caller of "start". 248 */ 249 sp = t->t_stk; 250 ASSERT(((uintptr_t)t->t_stk & (STACK_ENTRY_ALIGN - 1)) == 0); 251 if (len != 0) { 252 /* 253 * the object that arg points at is copied into the 254 * caller's frame. 255 */ 256 framesz = SA(len); 257 sp -= framesz; 258 ASSERT(sp > t->t_stkbase); 259 argp = sp + SA(MINFRAME); 260 bcopy(arg, argp, len); 261 arg = argp; 262 } 263 /* 264 * Set up arguments (arg and len) on the caller's stack frame. 265 */ 266 p = (long *)sp; 267 268 *--p = 0; /* fake call */ 269 *--p = 0; /* null frame pointer terminates stack trace */ 270 *--p = (long)len; 271 *--p = (intptr_t)arg; 272 *--p = (intptr_t)start; 273 274 /* 275 * initialize thread to resume at thread_start() which will 276 * turn around and invoke (*start)(arg, len). 277 */ 278 t->t_pc = (uintptr_t)thread_start; 279 t->t_sp = (uintptr_t)p; 280 281 ASSERT((t->t_sp & (STACK_ENTRY_ALIGN - 1)) == 0); 282 } 283 284 /* 285 * load user registers into lwp. 286 */ 287 /*ARGSUSED2*/ 288 void 289 lwp_load(klwp_t *lwp, gregset_t grp, uintptr_t thrptr) 290 { 291 struct regs *rp = lwptoregs(lwp); 292 293 setgregs(lwp, grp); 294 rp->r_ps = PSL_USER; 295 296 /* 297 * For 64-bit lwps, we allow null %fs selector value, and null 298 * %gs selector to point anywhere in the address space using 299 * %fsbase and %gsbase behind the scenes. libc uses %fs to point 300 * at the ulwp_t structure. 301 * 302 * For 32-bit lwps, libc wedges its lwp thread pointer into the 303 * ucontext ESP slot (which is otherwise irrelevant to setting a 304 * ucontext) and LWPGS_SEL value into gregs[REG_GS]. This is so 305 * syslwp_create() can atomically setup %gs. 306 * 307 * See setup_context() in libc. 308 */ 309 #ifdef _SYSCALL32_IMPL 310 if (lwp_getdatamodel(lwp) == DATAMODEL_ILP32) { 311 if (grp[REG_GS] == LWPGS_SEL) 312 (void) lwp_setprivate(lwp, _LWP_GSBASE, thrptr); 313 } else { 314 /* 315 * See lwp_setprivate in kernel and setup_context in libc. 316 * 317 * Currently libc constructs a ucontext from whole cloth for 318 * every new (not main) lwp created. For 64 bit processes 319 * %fsbase is directly set to point to current thread pointer. 320 * In the past (solaris 10) %fs was also set LWPFS_SEL to 321 * indicate %fsbase. Now we use the null GDT selector for 322 * this purpose. LWP[FS|GS]_SEL are only intended for 32 bit 323 * processes. To ease transition we support older libcs in 324 * the newer kernel by forcing %fs or %gs selector to null 325 * by calling lwp_setprivate if LWP[FS|GS]_SEL is passed in 326 * the ucontext. This is should be ripped out at some future 327 * date. Another fix would be for libc to do a getcontext 328 * and inherit the null %fs/%gs from the current context but 329 * that means an extra system call and could hurt performance. 330 */ 331 if (grp[REG_FS] == 0x1bb) /* hard code legacy LWPFS_SEL */ 332 (void) lwp_setprivate(lwp, _LWP_FSBASE, 333 (uintptr_t)grp[REG_FSBASE]); 334 335 if (grp[REG_GS] == 0x1c3) /* hard code legacy LWPGS_SEL */ 336 (void) lwp_setprivate(lwp, _LWP_GSBASE, 337 (uintptr_t)grp[REG_GSBASE]); 338 } 339 #else 340 if (grp[GS] == LWPGS_SEL) 341 (void) lwp_setprivate(lwp, _LWP_GSBASE, thrptr); 342 #endif 343 344 lwp->lwp_eosys = JUSTRETURN; 345 lwptot(lwp)->t_post_sys = 1; 346 } 347 348 /* 349 * set syscall()'s return values for a lwp. 350 */ 351 void 352 lwp_setrval(klwp_t *lwp, int v1, int v2) 353 { 354 lwptoregs(lwp)->r_ps &= ~PS_C; 355 lwptoregs(lwp)->r_r0 = v1; 356 lwptoregs(lwp)->r_r1 = v2; 357 } 358 359 /* 360 * set syscall()'s return values for a lwp. 361 */ 362 void 363 lwp_setsp(klwp_t *lwp, caddr_t sp) 364 { 365 lwptoregs(lwp)->r_sp = (intptr_t)sp; 366 } 367 368 /* 369 * Copy regs from parent to child. 370 */ 371 void 372 lwp_forkregs(klwp_t *lwp, klwp_t *clwp) 373 { 374 #if defined(__amd64) 375 struct pcb *pcb = &clwp->lwp_pcb; 376 struct regs *rp = lwptoregs(lwp); 377 378 if (pcb->pcb_rupdate == 0) { 379 pcb->pcb_ds = rp->r_ds; 380 pcb->pcb_es = rp->r_es; 381 pcb->pcb_fs = rp->r_fs; 382 pcb->pcb_gs = rp->r_gs; 383 pcb->pcb_rupdate = 1; 384 lwptot(clwp)->t_post_sys = 1; 385 } 386 ASSERT(lwptot(clwp)->t_post_sys); 387 #endif 388 389 bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct regs)); 390 } 391 392 /* 393 * This function is currently unused on x86. 394 */ 395 /*ARGSUSED*/ 396 void 397 lwp_freeregs(klwp_t *lwp, int isexec) 398 {} 399 400 /* 401 * This function is currently unused on x86. 402 */ 403 void 404 lwp_pcb_exit(void) 405 {} 406 407 /* 408 * Lwp context ops for segment registers. 409 */ 410 411 /* 412 * Every time we come into the kernel (syscall, interrupt or trap 413 * but not fast-traps) we capture the current values of the user's 414 * segment registers into the lwp's reg structure. This includes 415 * lcall for i386 generic system call support since it is handled 416 * as a segment-not-present trap. 417 * 418 * Here we save the current values from the lwp regs into the pcb 419 * and set pcb->pcb_rupdate to 1 to tell the rest of the kernel 420 * that the pcb copy of the segment registers is the current one. 421 * This ensures the lwp's next trip to user land via update_sregs. 422 * Finally we set t_post_sys to ensure that no system call fast-path's 423 * its way out of the kernel via sysret. 424 * 425 * (This means that we need to have interrupts disabled when we test 426 * t->t_post_sys in the syscall handlers; if the test fails, we need 427 * to keep interrupts disabled until we return to userland so we can't 428 * be switched away.) 429 * 430 * As a result of all this, we don't really have to do a whole lot if 431 * the thread is just mucking about in the kernel, switching on and 432 * off the cpu for whatever reason it feels like. And yet we still 433 * preserve fast syscalls, cause if we -don't- get descheduled, 434 * we never come here either. 435 */ 436 437 #define VALID_LWP_DESC(udp) ((udp)->usd_type == SDT_MEMRWA && \ 438 (udp)->usd_p == 1 && (udp)->usd_dpl == SEL_UPL) 439 440 void 441 lwp_segregs_save(klwp_t *lwp) 442 { 443 #if defined(__amd64) 444 pcb_t *pcb = &lwp->lwp_pcb; 445 struct regs *rp; 446 447 ASSERT(VALID_LWP_DESC(&pcb->pcb_fsdesc)); 448 ASSERT(VALID_LWP_DESC(&pcb->pcb_gsdesc)); 449 450 if (pcb->pcb_rupdate == 0) { 451 rp = lwptoregs(lwp); 452 453 /* 454 * If there's no update already pending, capture the current 455 * %ds/%es/%fs/%gs values from lwp's regs in case the user 456 * changed them; %fsbase and %gsbase are privileged so the 457 * kernel versions of these registers in pcb_fsbase and 458 * pcb_gsbase are always up-to-date. 459 */ 460 pcb->pcb_ds = rp->r_ds; 461 pcb->pcb_es = rp->r_es; 462 pcb->pcb_fs = rp->r_fs; 463 pcb->pcb_gs = rp->r_gs; 464 pcb->pcb_rupdate = 1; 465 lwp->lwp_thread->t_post_sys = 1; 466 } 467 #endif /* __amd64 */ 468 469 ASSERT(bcmp(&CPU->cpu_gdt[GDT_LWPFS], &lwp->lwp_pcb.pcb_fsdesc, 470 sizeof (lwp->lwp_pcb.pcb_fsdesc)) == 0); 471 ASSERT(bcmp(&CPU->cpu_gdt[GDT_LWPGS], &lwp->lwp_pcb.pcb_gsdesc, 472 sizeof (lwp->lwp_pcb.pcb_gsdesc)) == 0); 473 } 474 475 #if defined(__amd64) 476 477 /* 478 * Update the segment registers with new values from the pcb 479 * 480 * We have to do this carefully, and in the following order, 481 * in case any of the selectors points at a bogus descriptor. 482 * If they do, we'll catch trap with on_trap and return 1. 483 * returns 0 on success. 484 * 485 * This is particularly tricky for %gs. 486 * This routine must be executed under a cli. 487 */ 488 int 489 update_sregs(struct regs *rp, klwp_t *lwp) 490 { 491 pcb_t *pcb = &lwp->lwp_pcb; 492 ulong_t kgsbase; 493 on_trap_data_t otd; 494 int rc = 0; 495 496 if (!on_trap(&otd, OT_SEGMENT_ACCESS)) { 497 498 kgsbase = (ulong_t)CPU; 499 __set_gs(pcb->pcb_gs); 500 501 /* 502 * If __set_gs fails it's because the new %gs is a bad %gs, 503 * we'll be taking a trap but with the original %gs and %gsbase 504 * undamaged (i.e. pointing at curcpu). 505 * 506 * We've just mucked up the kernel's gsbase. Oops. In 507 * particular we can't take any traps at all. Make the newly 508 * computed gsbase be the hidden gs via __swapgs , and fix 509 * the kernel's gsbase back again. Later, when we return to 510 * userland we'll swapgs again restoring gsbase just loaded 511 * above. 512 */ 513 __swapgs(); 514 rp->r_gs = pcb->pcb_gs; 515 516 /* 517 * restore kernel's gsbase 518 */ 519 wrmsr(MSR_AMD_GSBASE, kgsbase); 520 521 /* 522 * Only override the descriptor base address if 523 * r_gs == LWPGS_SEL or if r_gs == NULL. A note on 524 * NULL descriptors -- 32-bit programs take faults 525 * if they deference NULL descriptors; however, 526 * when 64-bit programs load them into %fs or %gs, 527 * they DONT fault -- only the base address remains 528 * whatever it was from the last load. Urk. 529 * 530 * XXX - note that lwp_setprivate now sets %fs/%gs to the 531 * null selector for 64 bit processes. Whereas before 532 * %fs/%gs were set to LWP(FS|GS)_SEL regardless of 533 * the process's data model. For now we check for both 534 * values so that the kernel can also support the older 535 * libc. This should be ripped out at some point in the 536 * future. 537 */ 538 if (pcb->pcb_gs == LWPGS_SEL || pcb->pcb_gs == 0) 539 wrmsr(MSR_AMD_KGSBASE, pcb->pcb_gsbase); 540 541 __set_ds(pcb->pcb_ds); 542 rp->r_ds = pcb->pcb_ds; 543 544 __set_es(pcb->pcb_es); 545 rp->r_es = pcb->pcb_es; 546 547 __set_fs(pcb->pcb_fs); 548 rp->r_fs = pcb->pcb_fs; 549 550 /* 551 * Same as for %gs 552 */ 553 if (pcb->pcb_fs == LWPFS_SEL || pcb->pcb_fs == 0) 554 wrmsr(MSR_AMD_FSBASE, pcb->pcb_fsbase); 555 556 } else { 557 cli(); 558 rc = 1; 559 } 560 no_trap(); 561 return (rc); 562 } 563 #endif /* __amd64 */ 564 565 #ifdef _SYSCALL32_IMPL 566 567 /* 568 * Make it impossible for a process to change its data model. 569 * We do this by toggling the present bits for the 32 and 570 * 64-bit user code descriptors. That way if a user lwp attempts 571 * to change its data model (by using the wrong code descriptor in 572 * %cs) it will fault immediately. This also allows us to simplify 573 * assertions and checks in the kernel. 574 */ 575 static void 576 gdt_ucode_model(model_t model) 577 { 578 cpu_t *cpu; 579 580 kpreempt_disable(); 581 cpu = CPU; 582 if (model == DATAMODEL_NATIVE) { 583 cpu->cpu_gdt[GDT_UCODE].usd_p = 1; 584 cpu->cpu_gdt[GDT_U32CODE].usd_p = 0; 585 } else { 586 cpu->cpu_gdt[GDT_U32CODE].usd_p = 1; 587 cpu->cpu_gdt[GDT_UCODE].usd_p = 0; 588 } 589 kpreempt_enable(); 590 } 591 592 #endif /* _SYSCALL32_IMPL */ 593 594 /* 595 * Restore lwp private fs and gs segment descriptors 596 * on current cpu's GDT. 597 */ 598 static void 599 lwp_segregs_restore(klwp_t *lwp) 600 { 601 pcb_t *pcb = &lwp->lwp_pcb; 602 cpu_t *cpu = CPU; 603 604 ASSERT(VALID_LWP_DESC(&pcb->pcb_fsdesc)); 605 ASSERT(VALID_LWP_DESC(&pcb->pcb_gsdesc)); 606 607 #ifdef _SYSCALL32_IMPL 608 gdt_ucode_model(DATAMODEL_NATIVE); 609 #endif 610 611 cpu->cpu_gdt[GDT_LWPFS] = pcb->pcb_fsdesc; 612 cpu->cpu_gdt[GDT_LWPGS] = pcb->pcb_gsdesc; 613 614 } 615 616 #ifdef _SYSCALL32_IMPL 617 618 static void 619 lwp_segregs_restore32(klwp_t *lwp) 620 { 621 /*LINTED*/ 622 cpu_t *cpu = CPU; 623 pcb_t *pcb = &lwp->lwp_pcb; 624 625 ASSERT(VALID_LWP_DESC(&lwp->lwp_pcb.pcb_fsdesc)); 626 ASSERT(VALID_LWP_DESC(&lwp->lwp_pcb.pcb_gsdesc)); 627 628 gdt_ucode_model(DATAMODEL_ILP32); 629 cpu->cpu_gdt[GDT_LWPFS] = pcb->pcb_fsdesc; 630 cpu->cpu_gdt[GDT_LWPGS] = pcb->pcb_gsdesc; 631 } 632 633 #endif /* _SYSCALL32_IMPL */ 634 635 /* 636 * If this is a process in a branded zone, then we want it to use the brand 637 * syscall entry points instead of the standard Solaris entry points. This 638 * routine must be called when a new lwp is created within a branded zone 639 * or when an existing lwp moves into a branded zone via a zone_enter() 640 * operation. 641 */ 642 void 643 lwp_attach_brand_hdlrs(klwp_t *lwp) 644 { 645 kthread_t *t = lwptot(lwp); 646 647 ASSERT(PROC_IS_BRANDED(lwptoproc(lwp))); 648 ASSERT(removectx(t, NULL, brand_interpositioning_disable, 649 brand_interpositioning_enable, NULL, NULL, 650 brand_interpositioning_disable, NULL) == 0); 651 652 installctx(t, NULL, brand_interpositioning_disable, 653 brand_interpositioning_enable, NULL, NULL, 654 brand_interpositioning_disable, NULL); 655 656 if (t == curthread) { 657 kpreempt_disable(); 658 brand_interpositioning_enable(); 659 kpreempt_enable(); 660 } 661 } 662 663 /* 664 * Add any lwp-associated context handlers to the lwp at the beginning 665 * of the lwp's useful life. 666 * 667 * All paths which create lwp's invoke lwp_create(); lwp_create() 668 * invokes lwp_stk_init() which initializes the stack, sets up 669 * lwp_regs, and invokes this routine. 670 * 671 * All paths which destroy lwp's invoke lwp_exit() to rip the lwp 672 * apart and put it on 'lwp_deathrow'; if the lwp is destroyed it 673 * ends up in thread_free() which invokes freectx(t, 0) before 674 * invoking lwp_stk_fini(). When the lwp is recycled from death 675 * row, lwp_stk_fini() is invoked, then thread_free(), and thus 676 * freectx(t, 0) as before. 677 * 678 * In the case of exec, the surviving lwp is thoroughly scrubbed 679 * clean; exec invokes freectx(t, 1) to destroy associated contexts. 680 * On the way back to the new image, it invokes setregs() which 681 * in turn invokes this routine. 682 */ 683 void 684 lwp_installctx(klwp_t *lwp) 685 { 686 kthread_t *t = lwptot(lwp); 687 int thisthread = t == curthread; 688 #ifdef _SYSCALL32_IMPL 689 void (*restop)(klwp_t *) = lwp_getdatamodel(lwp) == DATAMODEL_NATIVE ? 690 lwp_segregs_restore : lwp_segregs_restore32; 691 #else 692 void (*restop)(klwp_t *) = lwp_segregs_restore; 693 #endif 694 695 /* 696 * Install the basic lwp context handlers on each lwp. 697 * 698 * On the amd64 kernel, the context handlers are responsible for 699 * virtualizing %ds, %es, %fs, and %gs to the lwp. The register 700 * values are only ever changed via sys_rtt when the 701 * pcb->pcb_rupdate == 1. Only sys_rtt gets to clear the bit. 702 * 703 * On the i386 kernel, the context handlers are responsible for 704 * virtualizing %gs/%fs to the lwp by updating the per-cpu GDTs 705 */ 706 ASSERT(removectx(t, lwp, lwp_segregs_save, restop, 707 NULL, NULL, NULL, NULL) == 0); 708 if (thisthread) 709 kpreempt_disable(); 710 installctx(t, lwp, lwp_segregs_save, restop, 711 NULL, NULL, NULL, NULL); 712 if (thisthread) { 713 /* 714 * Since we're the right thread, set the values in the GDT 715 */ 716 restop(lwp); 717 kpreempt_enable(); 718 } 719 720 /* 721 * If we have sysenter/sysexit instructions enabled, we need 722 * to ensure that the hardware mechanism is kept up-to-date with the 723 * lwp's kernel stack pointer across context switches. 724 * 725 * sep_save zeros the sysenter stack pointer msr; sep_restore sets 726 * it to the lwp's kernel stack pointer (kstktop). 727 */ 728 if (x86_feature & X86_SEP) { 729 #if defined(__amd64) 730 caddr_t kstktop = (caddr_t)lwp->lwp_regs; 731 #elif defined(__i386) 732 caddr_t kstktop = ((caddr_t)lwp->lwp_regs - MINFRAME) + 733 SA(sizeof (struct regs) + MINFRAME); 734 #endif 735 ASSERT(removectx(t, kstktop, 736 sep_save, sep_restore, NULL, NULL, NULL, NULL) == 0); 737 738 if (thisthread) 739 kpreempt_disable(); 740 installctx(t, kstktop, 741 sep_save, sep_restore, NULL, NULL, NULL, NULL); 742 if (thisthread) { 743 /* 744 * We're the right thread, so set the stack pointer 745 * for the first sysenter instruction to use 746 */ 747 sep_restore(kstktop); 748 kpreempt_enable(); 749 } 750 } 751 752 if (PROC_IS_BRANDED(ttoproc(t))) 753 lwp_attach_brand_hdlrs(lwp); 754 } 755 756 /* 757 * Clear registers on exec(2). 758 */ 759 void 760 setregs(uarg_t *args) 761 { 762 struct regs *rp; 763 kthread_t *t = curthread; 764 klwp_t *lwp = ttolwp(t); 765 pcb_t *pcb = &lwp->lwp_pcb; 766 greg_t sp; 767 768 /* 769 * Initialize user registers 770 */ 771 (void) save_syscall_args(); /* copy args from registers first */ 772 rp = lwptoregs(lwp); 773 sp = rp->r_sp; 774 bzero(rp, sizeof (*rp)); 775 776 rp->r_ss = UDS_SEL; 777 rp->r_sp = sp; 778 rp->r_pc = args->entry; 779 rp->r_ps = PSL_USER; 780 781 #if defined(__amd64) 782 783 pcb->pcb_fs = pcb->pcb_gs = 0; 784 pcb->pcb_fsbase = pcb->pcb_gsbase = 0; 785 786 if (ttoproc(t)->p_model == DATAMODEL_NATIVE) { 787 788 rp->r_cs = UCS_SEL; 789 790 /* 791 * Only allow 64-bit user code descriptor to be present. 792 */ 793 gdt_ucode_model(DATAMODEL_NATIVE); 794 795 /* 796 * Arrange that the virtualized %fs and %gs GDT descriptors 797 * have a well-defined initial state (present, ring 3 798 * and of type data). 799 */ 800 pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_udesc; 801 802 /* 803 * thrptr is either NULL or a value used by DTrace. 804 * 64-bit processes use %fs as their "thread" register. 805 */ 806 if (args->thrptr) 807 (void) lwp_setprivate(lwp, _LWP_FSBASE, args->thrptr); 808 809 } else { 810 811 rp->r_cs = U32CS_SEL; 812 rp->r_ds = rp->r_es = UDS_SEL; 813 814 /* 815 * only allow 32-bit user code selector to be present. 816 */ 817 gdt_ucode_model(DATAMODEL_ILP32); 818 819 pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_u32desc; 820 821 /* 822 * thrptr is either NULL or a value used by DTrace. 823 * 32-bit processes use %gs as their "thread" register. 824 */ 825 if (args->thrptr) 826 (void) lwp_setprivate(lwp, _LWP_GSBASE, args->thrptr); 827 828 } 829 830 pcb->pcb_ds = rp->r_ds; 831 pcb->pcb_es = rp->r_es; 832 pcb->pcb_rupdate = 1; 833 834 #elif defined(__i386) 835 836 rp->r_cs = UCS_SEL; 837 rp->r_ds = rp->r_es = UDS_SEL; 838 839 /* 840 * Arrange that the virtualized %fs and %gs GDT descriptors 841 * have a well-defined initial state (present, ring 3 842 * and of type data). 843 */ 844 pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_udesc; 845 846 /* 847 * For %gs we need to reset LWP_GSBASE in pcb and the 848 * per-cpu GDT descriptor. thrptr is either NULL 849 * or a value used by DTrace. 850 */ 851 if (args->thrptr) 852 (void) lwp_setprivate(lwp, _LWP_GSBASE, args->thrptr); 853 #endif 854 855 lwp->lwp_eosys = JUSTRETURN; 856 t->t_post_sys = 1; 857 858 /* 859 * Here we initialize minimal fpu state. 860 * The rest is done at the first floating 861 * point instruction that a process executes. 862 */ 863 pcb->pcb_fpu.fpu_flags = 0; 864 865 /* 866 * Add the lwp context handlers that virtualize segment registers, 867 * and/or system call stacks etc. 868 */ 869 lwp_installctx(lwp); 870 } 871 872 user_desc_t * 873 cpu_get_gdt(void) 874 { 875 return (CPU->cpu_gdt); 876 } 877 878 879 #if !defined(lwp_getdatamodel) 880 881 /* 882 * Return the datamodel of the given lwp. 883 */ 884 /*ARGSUSED*/ 885 model_t 886 lwp_getdatamodel(klwp_t *lwp) 887 { 888 return (lwp->lwp_procp->p_model); 889 } 890 891 #endif /* !lwp_getdatamodel */ 892 893 #if !defined(get_udatamodel) 894 895 model_t 896 get_udatamodel(void) 897 { 898 return (curproc->p_model); 899 } 900 901 #endif /* !get_udatamodel */ 902