1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/schedctl.h> 31 #include <sys/proc.h> 32 #include <sys/thread.h> 33 #include <sys/class.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/cmn_err.h> 37 #include <sys/stack.h> 38 #include <sys/debug.h> 39 #include <sys/cpuvar.h> 40 #include <sys/sobject.h> 41 #include <sys/door.h> 42 #include <sys/modctl.h> 43 #include <sys/syscall.h> 44 #include <sys/sysmacros.h> 45 #include <sys/vmsystm.h> 46 #include <sys/mman.h> 47 #include <sys/vnode.h> 48 #include <sys/swap.h> 49 #include <sys/lwp.h> 50 #include <sys/bitmap.h> 51 #include <sys/atomic.h> 52 #include <sys/fcntl.h> 53 #include <vm/seg_kp.h> 54 #include <vm/seg_vn.h> 55 #include <vm/as.h> 56 #include <fs/fs_subr.h> 57 58 59 /* 60 * Page handling structures. This is set up as a list of per-page 61 * control structures (sc_page_ctl), with p->p_pagep pointing to 62 * the first. The per-page structures point to the actual pages 63 * and contain pointers to the user address for each mapped page. 64 * 65 * All data is protected by p->p_sc_lock. Since this lock is 66 * held while waiting for memory, schedctl_shared_alloc() should 67 * not be called while holding p_lock. 68 */ 69 70 typedef struct sc_page_ctl { 71 struct sc_page_ctl *spc_next; 72 sc_shared_t *spc_base; /* base of kernel page */ 73 sc_shared_t *spc_end; /* end of usable space */ 74 ulong_t *spc_map; /* bitmap of allocated space on page */ 75 size_t spc_space; /* amount of space on page */ 76 caddr_t spc_uaddr; /* user-level address of the page */ 77 struct anon_map *spc_amp; /* anonymous memory structure */ 78 } sc_page_ctl_t; 79 80 static size_t sc_pagesize; /* size of usable space on page */ 81 static size_t sc_bitmap_len; /* # of bits in allocation bitmap */ 82 static size_t sc_bitmap_words; /* # of words in allocation bitmap */ 83 84 /* Context ops */ 85 static void schedctl_save(sc_shared_t *); 86 static void schedctl_restore(sc_shared_t *); 87 static void schedctl_fork(kthread_t *, kthread_t *); 88 89 /* Functions for handling shared pages */ 90 static int schedctl_shared_alloc(sc_shared_t **, uintptr_t *); 91 static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *); 92 static int schedctl_map(struct anon_map *, caddr_t *, caddr_t); 93 static int schedctl_getpage(struct anon_map **, caddr_t *); 94 static void schedctl_freepage(struct anon_map *, caddr_t); 95 96 /* 97 * System call interface to scheduler activations. 98 * This always operates on the current lwp. 99 */ 100 caddr_t 101 schedctl(void) 102 { 103 kthread_t *t = curthread; 104 sc_shared_t *ssp; 105 uintptr_t uaddr; 106 int error; 107 108 if (t->t_schedctl == NULL) { 109 /* 110 * Allocate and initialize the shared structure. 111 */ 112 if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0) 113 return ((caddr_t)(uintptr_t)set_errno(error)); 114 bzero(ssp, sizeof (*ssp)); 115 116 installctx(t, ssp, schedctl_save, schedctl_restore, 117 schedctl_fork, NULL, NULL, NULL); 118 119 thread_lock(t); /* protect against ts_tick and ts_update */ 120 t->t_schedctl = ssp; 121 t->t_sc_uaddr = uaddr; 122 thread_unlock(t); 123 } 124 125 return ((caddr_t)t->t_sc_uaddr); 126 } 127 128 129 /* 130 * Clean up scheduler activations state associated with an exiting 131 * (or execing) lwp. t is always the current thread. 132 */ 133 void 134 schedctl_lwp_cleanup(kthread_t *t) 135 { 136 sc_shared_t *ssp = t->t_schedctl; 137 proc_t *p = ttoproc(t); 138 sc_page_ctl_t *pagep; 139 index_t index; 140 141 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 142 143 thread_lock(t); /* protect against ts_tick and ts_update */ 144 t->t_schedctl = NULL; 145 t->t_sc_uaddr = 0; 146 thread_unlock(t); 147 148 /* 149 * Remove the context op to avoid the final call to 150 * schedctl_save when switching away from this lwp. 151 */ 152 (void) removectx(t, ssp, schedctl_save, schedctl_restore, 153 schedctl_fork, NULL, NULL, NULL); 154 155 /* 156 * Do not unmap the shared page until the process exits. 157 * User-level library code relies on this for adaptive mutex locking. 158 */ 159 mutex_enter(&p->p_sc_lock); 160 ssp->sc_state = SC_FREE; 161 pagep = schedctl_page_lookup(ssp); 162 index = (index_t)(ssp - pagep->spc_base); 163 BT_CLEAR(pagep->spc_map, index); 164 pagep->spc_space += sizeof (sc_shared_t); 165 mutex_exit(&p->p_sc_lock); 166 } 167 168 /* 169 * Cleanup the list of schedctl shared pages for the process. 170 * Called from exec() and exit() system calls. 171 */ 172 void 173 schedctl_proc_cleanup() 174 { 175 proc_t *p = curproc; 176 sc_page_ctl_t *pagep; 177 sc_page_ctl_t *next; 178 179 ASSERT(p->p_lwpcnt == 1); /* we are single-threaded now */ 180 ASSERT(curthread->t_schedctl == NULL); 181 182 /* 183 * Since we are single-threaded, we don't have to hold p->p_sc_lock. 184 */ 185 pagep = p->p_pagep; 186 p->p_pagep = NULL; 187 while (pagep != NULL) { 188 ASSERT(pagep->spc_space == sc_pagesize); 189 next = pagep->spc_next; 190 /* 191 * Unmap the user space and free the mapping structure. 192 */ 193 (void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE); 194 schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base)); 195 kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words); 196 kmem_free(pagep, sizeof (sc_page_ctl_t)); 197 pagep = next; 198 } 199 } 200 201 /* 202 * Called by resume just before switching away from the current thread. 203 * Save new thread state. 204 */ 205 void 206 schedctl_save(sc_shared_t *ssp) 207 { 208 ssp->sc_state = curthread->t_state; 209 } 210 211 212 /* 213 * Called by resume after switching to the current thread. 214 * Save new thread state and CPU. 215 */ 216 void 217 schedctl_restore(sc_shared_t *ssp) 218 { 219 ssp->sc_state = SC_ONPROC; 220 ssp->sc_cpu = CPU->cpu_id; 221 } 222 223 224 /* 225 * On fork, remove inherited mappings from the child's address space. 226 * The child's threads must call schedctl() to get new shared mappings. 227 */ 228 void 229 schedctl_fork(kthread_t *pt, kthread_t *ct) 230 { 231 proc_t *pp = ttoproc(pt); 232 proc_t *cp = ttoproc(ct); 233 sc_page_ctl_t *pagep; 234 235 ASSERT(ct->t_schedctl == NULL); 236 237 /* 238 * Do this only once, whether we are doing fork1() or forkall(). 239 * Don't do it at all if the child process is a child of vfork() 240 * because a child of vfork() borrows the parent's address space. 241 */ 242 if (pt != curthread || (cp->p_flag & SVFORK)) 243 return; 244 245 mutex_enter(&pp->p_sc_lock); 246 for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next) 247 (void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE); 248 mutex_exit(&pp->p_sc_lock); 249 } 250 251 /* 252 * Returns non-zero if the specified thread shouldn't be preempted at this time. 253 * Called by ts_preempt, ts_tick, and ts_update. 254 */ 255 int 256 schedctl_get_nopreempt(kthread_t *t) 257 { 258 ASSERT(THREAD_LOCK_HELD(t)); 259 return (t->t_schedctl->sc_preemptctl.sc_nopreempt); 260 } 261 262 263 /* 264 * Sets the value of the nopreempt field for the specified thread. 265 * Called by ts_preempt to clear the field on preemption. 266 */ 267 void 268 schedctl_set_nopreempt(kthread_t *t, short val) 269 { 270 ASSERT(THREAD_LOCK_HELD(t)); 271 t->t_schedctl->sc_preemptctl.sc_nopreempt = val; 272 } 273 274 275 /* 276 * Sets the value of the yield field for the specified thread. Called by 277 * ts_preempt and ts_tick to set the field, and ts_yield to clear it. 278 * The kernel never looks at this field so we don't need a schedctl_get_yield 279 * function. 280 */ 281 void 282 schedctl_set_yield(kthread_t *t, short val) 283 { 284 ASSERT(THREAD_LOCK_HELD(t)); 285 t->t_schedctl->sc_preemptctl.sc_yield = val; 286 } 287 288 289 /* 290 * Returns non-zero if the specified thread has requested that all 291 * signals be blocked. Called by signal-related code that tests 292 * the signal mask of a thread that may not be the current thread 293 * and where the process's p_lock cannot be acquired. 294 */ 295 int 296 schedctl_sigblock(kthread_t *t) 297 { 298 sc_shared_t *tdp = t->t_schedctl; 299 300 if (tdp != NULL) 301 return (tdp->sc_sigblock); 302 return (0); 303 } 304 305 306 /* 307 * If the sc_sigblock field is set for the specified thread, set 308 * its signal mask to block all maskable signals, then clear the 309 * sc_sigblock field. This finishes what user-level code requested 310 * to be done when it set tdp->sc_shared->sc_sigblock non-zero. 311 * Called by signal-related code that holds the process's p_lock. 312 */ 313 void 314 schedctl_finish_sigblock(kthread_t *t) 315 { 316 sc_shared_t *tdp = t->t_schedctl; 317 318 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 319 320 if (tdp != NULL && tdp->sc_sigblock) { 321 t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0; 322 t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1; 323 tdp->sc_sigblock = 0; 324 } 325 } 326 327 328 /* 329 * Return non-zero if the current thread has declared that 330 * it is calling into the kernel to park, else return zero. 331 */ 332 int 333 schedctl_is_park() 334 { 335 sc_shared_t *tdp = curthread->t_schedctl; 336 337 if (tdp != NULL) 338 return (tdp->sc_park); 339 /* 340 * If we're here and there is no shared memory (how could 341 * that happen?) then just assume we really are here to park. 342 */ 343 return (1); 344 } 345 346 /* 347 * Declare thread is parking. 348 * 349 * libc will set "sc_park = 1" before calling lwpsys_park(0, tid) in order 350 * to declare that the thread is calling into the kernel to park. 351 * 352 * This interface exists ONLY to support older versions of libthread which 353 * are not aware of the sc_park flag. 354 * 355 * Older versions of libthread which are not aware of the sc_park flag need to 356 * be modified or emulated to call lwpsys_park(4, ...) instead of 357 * lwpsys_park(0, ...). This will invoke schedctl_set_park() before 358 * lwp_park() to declare that the thread is parking. 359 */ 360 void 361 schedctl_set_park() 362 { 363 sc_shared_t *tdp = curthread->t_schedctl; 364 365 if (tdp != NULL) 366 tdp->sc_park = 1; 367 } 368 369 /* 370 * Clear the shared sc_park flag on return from parking in the kernel. 371 */ 372 void 373 schedctl_unpark() 374 { 375 sc_shared_t *tdp = curthread->t_schedctl; 376 377 if (tdp != NULL) 378 tdp->sc_park = 0; 379 } 380 381 382 /* 383 * Page handling code. 384 */ 385 386 void 387 schedctl_init() 388 { 389 /* 390 * Amount of page that can hold sc_shared_t structures. If 391 * sizeof (sc_shared_t) is a power of 2, this should just be 392 * PAGESIZE. 393 */ 394 sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t)); 395 396 /* 397 * Allocation bitmap is one bit per struct on a page. 398 */ 399 sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t); 400 sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL); 401 } 402 403 int 404 schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp) 405 { 406 proc_t *p = curproc; 407 sc_page_ctl_t *pagep; 408 sc_shared_t *ssp; 409 caddr_t base; 410 index_t index; 411 int error; 412 413 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 414 mutex_enter(&p->p_sc_lock); 415 416 /* 417 * Try to find space for the new data in existing pages 418 * within the process's list of shared pages. 419 */ 420 for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) 421 if (pagep->spc_space != 0) 422 break; 423 424 if (pagep != NULL) 425 base = pagep->spc_uaddr; 426 else { 427 struct anon_map *amp; 428 caddr_t kaddr; 429 430 /* 431 * No room, need to allocate a new page. Also set up 432 * a mapping to the kernel address space for the new 433 * page and lock it in memory. 434 */ 435 if ((error = schedctl_getpage(&, &kaddr)) != 0) { 436 mutex_exit(&p->p_sc_lock); 437 return (error); 438 } 439 if ((error = schedctl_map(amp, &base, kaddr)) != 0) { 440 schedctl_freepage(amp, kaddr); 441 mutex_exit(&p->p_sc_lock); 442 return (error); 443 } 444 445 /* 446 * Allocate and initialize the page control structure. 447 */ 448 pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP); 449 pagep->spc_amp = amp; 450 pagep->spc_base = (sc_shared_t *)kaddr; 451 pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize); 452 pagep->spc_uaddr = base; 453 454 pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words, 455 KM_SLEEP); 456 pagep->spc_space = sc_pagesize; 457 458 pagep->spc_next = p->p_pagep; 459 p->p_pagep = pagep; 460 } 461 462 /* 463 * Got a page, now allocate space for the data. There should 464 * be space unless something's wrong. 465 */ 466 ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t)); 467 index = bt_availbit(pagep->spc_map, sc_bitmap_len); 468 ASSERT(index != -1); 469 470 /* 471 * Get location with pointer arithmetic. spc_base is of type 472 * sc_shared_t *. Mark as allocated. 473 */ 474 ssp = pagep->spc_base + index; 475 BT_SET(pagep->spc_map, index); 476 pagep->spc_space -= sizeof (sc_shared_t); 477 478 mutex_exit(&p->p_sc_lock); 479 480 /* 481 * Return kernel and user addresses. 482 */ 483 *kaddrp = ssp; 484 *uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET); 485 return (0); 486 } 487 488 489 /* 490 * Find the page control structure corresponding to a kernel address. 491 */ 492 static sc_page_ctl_t * 493 schedctl_page_lookup(sc_shared_t *ssp) 494 { 495 proc_t *p = curproc; 496 sc_page_ctl_t *pagep; 497 498 ASSERT(MUTEX_HELD(&p->p_sc_lock)); 499 for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) { 500 if (ssp >= pagep->spc_base && ssp < pagep->spc_end) 501 return (pagep); 502 } 503 return (NULL); /* This "can't happen". Should we panic? */ 504 } 505 506 507 /* 508 * This function is called when a page needs to be mapped into a 509 * process's address space. Allocate the user address space and 510 * set up the mapping to the page. Assumes the page has already 511 * been allocated and locked in memory via schedctl_getpage. 512 */ 513 static int 514 schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr) 515 { 516 caddr_t addr; 517 struct as *as = curproc->p_as; 518 struct segvn_crargs vn_a; 519 int error; 520 521 as_rangelock(as); 522 /* pass address of kernel mapping as offset to avoid VAC conflicts */ 523 map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0); 524 if (addr == NULL) { 525 as_rangeunlock(as); 526 return (ENOMEM); 527 } 528 529 /* 530 * Use segvn to set up the mapping to the page. 531 */ 532 vn_a.vp = NULL; 533 vn_a.offset = 0; 534 vn_a.cred = NULL; 535 vn_a.type = MAP_SHARED; 536 vn_a.prot = vn_a.maxprot = PROT_ALL; 537 vn_a.flags = 0; 538 vn_a.amp = amp; 539 vn_a.szc = 0; 540 vn_a.lgrp_mem_policy_flags = 0; 541 error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a); 542 as_rangeunlock(as); 543 544 if (error) 545 return (error); 546 547 *uaddrp = addr; 548 return (0); 549 } 550 551 552 /* 553 * Allocate a new page from anonymous memory. Also, create a kernel 554 * mapping to the page and lock the page in memory. 555 */ 556 static int 557 schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr) 558 { 559 struct anon_map *amp; 560 caddr_t kaddr; 561 562 /* 563 * Set up anonymous memory struct. No swap reservation is 564 * needed since the page will be locked into memory. 565 */ 566 amp = anonmap_alloc(PAGESIZE, 0); 567 568 /* 569 * Allocate the page. 570 */ 571 kaddr = segkp_get_withanonmap(segkp, PAGESIZE, 572 KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp); 573 if (kaddr == NULL) { 574 amp->refcnt--; 575 anonmap_free(amp); 576 return (ENOMEM); 577 } 578 579 /* 580 * The page is left SE_SHARED locked so that it won't be 581 * paged out or relocated (KPD_LOCKED above). 582 */ 583 584 *newamp = amp; 585 *newaddr = kaddr; 586 return (0); 587 } 588 589 590 /* 591 * Take the necessary steps to allow a page to be released. 592 * This is called when the process is doing exit() or exec(). 593 * There should be no accesses to the page after this. 594 * The kernel mapping of the page is released and the page is unlocked. 595 */ 596 static void 597 schedctl_freepage(struct anon_map *amp, caddr_t kaddr) 598 { 599 /* 600 * Release the lock on the page and remove the kernel mapping. 601 */ 602 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 603 segkp_release(segkp, kaddr); 604 605 /* 606 * Decrement the refcnt so the anon_map structure will be freed. 607 */ 608 if (--amp->refcnt == 0) { 609 /* 610 * The current process no longer has the page mapped, so 611 * we have to free everything rather than letting as_free 612 * do the work. 613 */ 614 anon_free(amp->ahp, 0, PAGESIZE); 615 ANON_LOCK_EXIT(&->a_rwlock); 616 anonmap_free(amp); 617 } else { 618 ANON_LOCK_EXIT(&->a_rwlock); 619 } 620 } 621