1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/proc.h> 35 #include <sys/cpuvar.h> 36 #include <sys/var.h> 37 #include <sys/tuneable.h> 38 #include <sys/cmn_err.h> 39 #include <sys/buf.h> 40 #include <sys/disp.h> 41 #include <sys/vmsystm.h> 42 #include <sys/vmparam.h> 43 #include <sys/class.h> 44 #include <sys/vtrace.h> 45 #include <sys/modctl.h> 46 #include <sys/debug.h> 47 #include <sys/sdt.h> 48 #include <sys/procfs.h> 49 50 #include <vm/seg.h> 51 #include <vm/seg_kp.h> 52 #include <vm/as.h> 53 #include <vm/rm.h> 54 #include <vm/seg_kmem.h> 55 #include <sys/callb.h> 56 57 /* 58 * The swapper sleeps on runout when there is no one to swap in. 59 * It sleeps on runin when it could not find space to swap someone 60 * in or after swapping someone in. 61 */ 62 char runout; 63 char runin; 64 char wake_sched; /* flag tells clock to wake swapper on next tick */ 65 char wake_sched_sec; /* flag tells clock to wake swapper after a second */ 66 67 /* 68 * The swapper swaps processes to reduce memory demand and runs 69 * when avefree < desfree. The swapper resorts to SOFTSWAP when 70 * avefree < desfree which results in swapping out all processes 71 * sleeping for more than maxslp seconds. HARDSWAP occurs when the 72 * system is on the verge of thrashing and this results in swapping 73 * out runnable threads or threads sleeping for less than maxslp secs. 74 * 75 * The swapper runs through all the active processes in the system 76 * and invokes the scheduling class specific swapin/swapout routine 77 * for every thread in the process to obtain an effective priority 78 * for the process. A priority of -1 implies that the thread isn't 79 * swappable. This effective priority is used to find the most 80 * eligible process to swapout or swapin. 81 * 82 * NOTE: Threads which have been swapped are not linked on any 83 * queue and their dispatcher lock points at the "swapped_lock". 84 * 85 * Processes containing threads with the TS_DONT_SWAP flag set cannot be 86 * swapped out immediately by the swapper. This is due to the fact that 87 * such threads may be holding locks which may be needed by the swapper 88 * to push its pages out. The TS_SWAPENQ flag is set on such threads 89 * to prevent them running in user mode. When such threads reach a 90 * safe point (i.e., are not holding any locks - CL_TRAPRET), they 91 * queue themseleves onto the swap queue which is processed by the 92 * swapper. This results in reducing memory demand when the system 93 * is desparate for memory as the thread can't run in user mode. 94 * 95 * The swap queue consists of threads, linked via t_link, which are 96 * haven't been swapped, are runnable but not on the run queue. The 97 * swap queue is protected by the "swapped_lock". The dispatcher 98 * lock (t_lockp) of all threads on the swap queue points at the 99 * "swapped_lock". Thus, the entire queue and/or threads on the 100 * queue can be locked by acquiring "swapped_lock". 101 */ 102 static kthread_t *tswap_queue; 103 extern disp_lock_t swapped_lock; /* protects swap queue and threads on it */ 104 105 int maxslp = 0; 106 pgcnt_t avefree; /* 5 sec moving average of free memory */ 107 pgcnt_t avefree30; /* 30 sec moving average of free memory */ 108 109 /* 110 * Minimum size used to decide if sufficient memory is available 111 * before a process is swapped in. This is necessary since in most 112 * cases the actual size of a process (p_swrss) being swapped in 113 * is usually 2 pages (kernel stack pages). This is due to the fact 114 * almost all user pages of a process are stolen by pageout before 115 * the swapper decides to swapout it out. 116 */ 117 int min_procsize = 12; 118 119 static int swapin(proc_t *); 120 static int swapout(proc_t *, uint_t *, int); 121 static void process_swap_queue(); 122 123 #ifdef __sparc 124 extern void lwp_swapin(kthread_t *); 125 #endif /* __sparc */ 126 127 /* 128 * Counters to keep track of the number of swapins or swapouts. 129 */ 130 uint_t tot_swapped_in, tot_swapped_out; 131 uint_t softswap, hardswap, swapqswap; 132 133 /* 134 * Macro to determine if a process is eligble to be swapped. 135 */ 136 #define not_swappable(p) \ 137 (((p)->p_flag & SSYS) || (p)->p_stat == SIDL || \ 138 (p)->p_stat == SZOMB || (p)->p_as == NULL || \ 139 (p)->p_as == &kas) 140 141 /* 142 * Memory scheduler. 143 */ 144 void 145 sched() 146 { 147 kthread_id_t t; 148 pri_t proc_pri; 149 pri_t thread_pri; 150 pri_t swapin_pri; 151 int desperate; 152 pgcnt_t needs; 153 int divisor; 154 proc_t *prp; 155 proc_t *swapout_prp; 156 proc_t *swapin_prp; 157 spgcnt_t avail; 158 int chosen_pri; 159 time_t swapout_time; 160 time_t swapin_proc_time; 161 callb_cpr_t cprinfo; 162 kmutex_t swap_cpr_lock; 163 164 mutex_init(&swap_cpr_lock, NULL, MUTEX_DEFAULT, NULL); 165 CALLB_CPR_INIT(&cprinfo, &swap_cpr_lock, callb_generic_cpr, "sched"); 166 if (maxslp == 0) 167 maxslp = MAXSLP; 168 loop: 169 needs = 0; 170 desperate = 0; 171 172 swapin_pri = v.v_nglobpris; 173 swapin_prp = NULL; 174 chosen_pri = -1; 175 176 process_swap_queue(); 177 178 /* 179 * Set desperate if 180 * 1. At least 2 runnable processes (on average). 181 * 2. Short (5 sec) and longer (30 sec) average is less 182 * than minfree and desfree respectively. 183 * 3. Pagein + pageout rate is excessive. 184 */ 185 if (avenrun[0] >= 2 * FSCALE && 186 (MAX(avefree, avefree30) < desfree) && 187 (pginrate + pgoutrate > maxpgio || avefree < minfree)) { 188 TRACE_4(TR_FAC_SCHED, TR_DESPERATE, 189 "desp:avefree: %d, avefree30: %d, freemem: %d" 190 " pginrate: %d\n", avefree, avefree30, freemem, pginrate); 191 desperate = 1; 192 goto unload; 193 } 194 195 /* 196 * Search list of processes to swapin and swapout deadwood. 197 */ 198 swapin_proc_time = 0; 199 top: 200 mutex_enter(&pidlock); 201 for (prp = practive; prp != NULL; prp = prp->p_next) { 202 if (not_swappable(prp)) 203 continue; 204 205 /* 206 * Look at processes with at least one swapped lwp. 207 */ 208 if (prp->p_swapcnt) { 209 time_t proc_time; 210 211 /* 212 * Higher priority processes are good candidates 213 * to swapin. 214 */ 215 mutex_enter(&prp->p_lock); 216 proc_pri = -1; 217 t = prp->p_tlist; 218 proc_time = 0; 219 do { 220 if (t->t_schedflag & TS_LOAD) 221 continue; 222 223 thread_lock(t); 224 thread_pri = CL_SWAPIN(t, 0); 225 thread_unlock(t); 226 227 if (t->t_stime - proc_time > 0) 228 proc_time = t->t_stime; 229 if (thread_pri > proc_pri) 230 proc_pri = thread_pri; 231 } while ((t = t->t_forw) != prp->p_tlist); 232 mutex_exit(&prp->p_lock); 233 234 if (proc_pri == -1) 235 continue; 236 237 TRACE_3(TR_FAC_SCHED, TR_CHOOSE_SWAPIN, 238 "prp %p epri %d proc_time %d", 239 prp, proc_pri, proc_time); 240 241 /* 242 * Swapin processes with a high effective priority. 243 */ 244 if (swapin_prp == NULL || proc_pri > chosen_pri) { 245 swapin_prp = prp; 246 chosen_pri = proc_pri; 247 swapin_pri = proc_pri; 248 swapin_proc_time = proc_time; 249 } 250 } else { 251 /* 252 * No need to soft swap if we have sufficient 253 * memory. 254 */ 255 if (avefree > desfree || 256 avefree < desfree && freemem > desfree) 257 continue; 258 259 /* 260 * Skip processes that are exiting 261 * or whose address spaces are locked. 262 */ 263 mutex_enter(&prp->p_lock); 264 if ((prp->p_flag & SEXITING) || 265 (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 266 mutex_exit(&prp->p_lock); 267 continue; 268 } 269 270 /* 271 * Softswapping to kick out deadwood. 272 */ 273 proc_pri = -1; 274 t = prp->p_tlist; 275 do { 276 if ((t->t_schedflag & (TS_SWAPENQ | 277 TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 278 continue; 279 280 thread_lock(t); 281 thread_pri = CL_SWAPOUT(t, SOFTSWAP); 282 thread_unlock(t); 283 if (thread_pri > proc_pri) 284 proc_pri = thread_pri; 285 } while ((t = t->t_forw) != prp->p_tlist); 286 287 if (proc_pri != -1) { 288 uint_t swrss; 289 290 mutex_exit(&pidlock); 291 292 TRACE_1(TR_FAC_SCHED, TR_SOFTSWAP, 293 "softswap:prp %p", prp); 294 295 (void) swapout(prp, &swrss, SOFTSWAP); 296 softswap++; 297 prp->p_swrss += swrss; 298 mutex_exit(&prp->p_lock); 299 goto top; 300 } 301 mutex_exit(&prp->p_lock); 302 } 303 } 304 if (swapin_prp != NULL) 305 mutex_enter(&swapin_prp->p_lock); 306 mutex_exit(&pidlock); 307 308 if (swapin_prp == NULL) { 309 TRACE_3(TR_FAC_SCHED, TR_RUNOUT, 310 "schedrunout:runout nswapped: %d, avefree: %ld freemem: %ld", 311 nswapped, avefree, freemem); 312 313 t = curthread; 314 thread_lock(t); 315 runout++; 316 t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 317 t->t_whystop = PR_SUSPENDED; 318 t->t_whatstop = SUSPEND_NORMAL; 319 (void) new_mstate(t, LMS_SLEEP); 320 mutex_enter(&swap_cpr_lock); 321 CALLB_CPR_SAFE_BEGIN(&cprinfo); 322 mutex_exit(&swap_cpr_lock); 323 thread_stop(t); /* change state and drop lock */ 324 swtch(); 325 mutex_enter(&swap_cpr_lock); 326 CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 327 mutex_exit(&swap_cpr_lock); 328 goto loop; 329 } 330 331 /* 332 * Decide how deserving this process is to be brought in. 333 * Needs is an estimate of how much core the process will 334 * need. If the process has been out for a while, then we 335 * will bring it in with 1/2 the core needed, otherwise 336 * we are conservative. 337 */ 338 divisor = 1; 339 swapout_time = (ddi_get_lbolt() - swapin_proc_time) / hz; 340 if (swapout_time > maxslp / 2) 341 divisor = 2; 342 343 needs = MIN(swapin_prp->p_swrss, lotsfree); 344 needs = MAX(needs, min_procsize); 345 needs = needs / divisor; 346 347 /* 348 * Use freemem, since we want processes to be swapped 349 * in quickly. 350 */ 351 avail = freemem - deficit; 352 if (avail > (spgcnt_t)needs) { 353 deficit += needs; 354 355 TRACE_2(TR_FAC_SCHED, TR_SWAPIN_VALUES, 356 "swapin_values: prp %p needs %lu", swapin_prp, needs); 357 358 if (swapin(swapin_prp)) { 359 mutex_exit(&swapin_prp->p_lock); 360 goto loop; 361 } 362 deficit -= MIN(needs, deficit); 363 mutex_exit(&swapin_prp->p_lock); 364 } else { 365 mutex_exit(&swapin_prp->p_lock); 366 /* 367 * If deficit is high, too many processes have been 368 * swapped in so wait a sec before attempting to 369 * swapin more. 370 */ 371 if (freemem > needs) { 372 TRACE_2(TR_FAC_SCHED, TR_HIGH_DEFICIT, 373 "deficit: prp %p needs %lu", swapin_prp, needs); 374 goto block; 375 } 376 } 377 378 TRACE_2(TR_FAC_SCHED, TR_UNLOAD, 379 "unload: prp %p needs %lu", swapin_prp, needs); 380 381 unload: 382 /* 383 * Unload all unloadable modules, free all other memory 384 * resources we can find, then look for a thread to hardswap. 385 */ 386 modreap(); 387 segkp_cache_free(); 388 389 swapout_prp = NULL; 390 mutex_enter(&pidlock); 391 for (prp = practive; prp != NULL; prp = prp->p_next) { 392 393 /* 394 * No need to soft swap if we have sufficient 395 * memory. 396 */ 397 if (not_swappable(prp)) 398 continue; 399 400 if (avefree > minfree || 401 avefree < minfree && freemem > desfree) { 402 swapout_prp = NULL; 403 break; 404 } 405 406 /* 407 * Skip processes that are exiting 408 * or whose address spaces are locked. 409 */ 410 mutex_enter(&prp->p_lock); 411 if ((prp->p_flag & SEXITING) || 412 (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 413 mutex_exit(&prp->p_lock); 414 continue; 415 } 416 417 proc_pri = -1; 418 t = prp->p_tlist; 419 do { 420 if ((t->t_schedflag & (TS_SWAPENQ | 421 TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 422 continue; 423 424 thread_lock(t); 425 thread_pri = CL_SWAPOUT(t, HARDSWAP); 426 thread_unlock(t); 427 if (thread_pri > proc_pri) 428 proc_pri = thread_pri; 429 } while ((t = t->t_forw) != prp->p_tlist); 430 431 mutex_exit(&prp->p_lock); 432 if (proc_pri == -1) 433 continue; 434 435 /* 436 * Swapout processes sleeping with a lower priority 437 * than the one currently being swapped in, if any. 438 */ 439 if (swapin_prp == NULL || swapin_pri > proc_pri) { 440 TRACE_2(TR_FAC_SCHED, TR_CHOOSE_SWAPOUT, 441 "hardswap: prp %p needs %lu", prp, needs); 442 443 if (swapout_prp == NULL || proc_pri < chosen_pri) { 444 swapout_prp = prp; 445 chosen_pri = proc_pri; 446 } 447 } 448 } 449 450 /* 451 * Acquire the "p_lock" before dropping "pidlock" 452 * to prevent the proc structure from being freed 453 * if the process exits before swapout completes. 454 */ 455 if (swapout_prp != NULL) 456 mutex_enter(&swapout_prp->p_lock); 457 mutex_exit(&pidlock); 458 459 if ((prp = swapout_prp) != NULL) { 460 uint_t swrss = 0; 461 int swapped; 462 463 swapped = swapout(prp, &swrss, HARDSWAP); 464 if (swapped) { 465 /* 466 * If desperate, we want to give the space obtained 467 * by swapping this process out to processes in core, 468 * so we give them a chance by increasing deficit. 469 */ 470 prp->p_swrss += swrss; 471 if (desperate) 472 deficit += MIN(prp->p_swrss, lotsfree); 473 hardswap++; 474 } 475 mutex_exit(&swapout_prp->p_lock); 476 477 if (swapped) 478 goto loop; 479 } 480 481 /* 482 * Delay for 1 second and look again later. 483 */ 484 TRACE_3(TR_FAC_SCHED, TR_RUNIN, 485 "schedrunin:runin nswapped: %d, avefree: %ld freemem: %ld", 486 nswapped, avefree, freemem); 487 488 block: 489 t = curthread; 490 thread_lock(t); 491 runin++; 492 t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 493 t->t_whystop = PR_SUSPENDED; 494 t->t_whatstop = SUSPEND_NORMAL; 495 (void) new_mstate(t, LMS_SLEEP); 496 mutex_enter(&swap_cpr_lock); 497 CALLB_CPR_SAFE_BEGIN(&cprinfo); 498 mutex_exit(&swap_cpr_lock); 499 thread_stop(t); /* change to stop state and drop lock */ 500 swtch(); 501 mutex_enter(&swap_cpr_lock); 502 CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 503 mutex_exit(&swap_cpr_lock); 504 goto loop; 505 } 506 507 /* 508 * Remove the specified thread from the swap queue. 509 */ 510 static void 511 swapdeq(kthread_id_t tp) 512 { 513 kthread_id_t *tpp; 514 515 ASSERT(THREAD_LOCK_HELD(tp)); 516 ASSERT(tp->t_schedflag & TS_ON_SWAPQ); 517 518 tpp = &tswap_queue; 519 for (;;) { 520 ASSERT(*tpp != NULL); 521 if (*tpp == tp) 522 break; 523 tpp = &(*tpp)->t_link; 524 } 525 *tpp = tp->t_link; 526 tp->t_schedflag &= ~TS_ON_SWAPQ; 527 } 528 529 /* 530 * Swap in lwps. Returns nonzero on success (i.e., if at least one lwp is 531 * swapped in) and 0 on failure. 532 */ 533 static int 534 swapin(proc_t *pp) 535 { 536 kthread_id_t tp; 537 int err; 538 int num_swapped_in = 0; 539 struct cpu *cpup = CPU; 540 pri_t thread_pri; 541 542 ASSERT(MUTEX_HELD(&pp->p_lock)); 543 ASSERT(pp->p_swapcnt); 544 545 top: 546 tp = pp->p_tlist; 547 do { 548 /* 549 * Only swapin eligible lwps (specified by the scheduling 550 * class) which are unloaded and ready to run. 551 */ 552 thread_lock(tp); 553 thread_pri = CL_SWAPIN(tp, 0); 554 if (thread_pri != -1 && tp->t_state == TS_RUN && 555 (tp->t_schedflag & TS_LOAD) == 0) { 556 size_t stack_size; 557 pgcnt_t stack_pages; 558 559 ASSERT((tp->t_schedflag & TS_ON_SWAPQ) == 0); 560 561 thread_unlock(tp); 562 /* 563 * Now drop the p_lock since the stack needs 564 * to brought in. 565 */ 566 mutex_exit(&pp->p_lock); 567 568 stack_size = swapsize(tp->t_swap); 569 stack_pages = btopr(stack_size); 570 571 /* Kernel probe */ 572 DTRACE_SCHED1(swapin__lwp, kthread_t *, tp); 573 574 rw_enter(&kas.a_lock, RW_READER); 575 err = segkp_fault(segkp->s_as->a_hat, segkp, 576 tp->t_swap, stack_size, F_SOFTLOCK, S_OTHER); 577 rw_exit(&kas.a_lock); 578 579 /* 580 * Re-acquire the p_lock. 581 */ 582 mutex_enter(&pp->p_lock); 583 if (err) { 584 num_swapped_in = 0; 585 break; 586 } else { 587 #ifdef __sparc 588 lwp_swapin(tp); 589 #endif /* __sparc */ 590 CPU_STATS_ADDQ(cpup, vm, swapin, 1); 591 CPU_STATS_ADDQ(cpup, vm, pgswapin, 592 stack_pages); 593 594 pp->p_swapcnt--; 595 pp->p_swrss -= stack_pages; 596 597 thread_lock(tp); 598 tp->t_schedflag |= TS_LOAD; 599 dq_sruninc(tp); 600 601 /* set swapin time */ 602 tp->t_stime = ddi_get_lbolt(); 603 thread_unlock(tp); 604 605 nswapped--; 606 tot_swapped_in++; 607 num_swapped_in++; 608 609 TRACE_2(TR_FAC_SCHED, TR_SWAPIN, 610 "swapin: pp %p stack_pages %lu", 611 pp, stack_pages); 612 goto top; 613 } 614 } 615 thread_unlock(tp); 616 } while ((tp = tp->t_forw) != pp->p_tlist); 617 return (num_swapped_in); 618 } 619 620 /* 621 * Swap out lwps. Returns nonzero on success (i.e., if at least one lwp is 622 * swapped out) and 0 on failure. 623 */ 624 static int 625 swapout(proc_t *pp, uint_t *swrss, int swapflags) 626 { 627 kthread_id_t tp; 628 pgcnt_t ws_pages = 0; 629 int err; 630 int swapped_lwps = 0; 631 struct as *as = pp->p_as; 632 struct cpu *cpup = CPU; 633 pri_t thread_pri; 634 635 ASSERT(MUTEX_HELD(&pp->p_lock)); 636 637 if (pp->p_flag & SEXITING) 638 return (0); 639 640 top: 641 tp = pp->p_tlist; 642 do { 643 klwp_t *lwp = ttolwp(tp); 644 645 /* 646 * Swapout eligible lwps (specified by the scheduling 647 * class) which don't have TS_DONT_SWAP set. Set the 648 * "intent to swap" flag (TS_SWAPENQ) on threads 649 * which have TS_DONT_SWAP set so that they can be 650 * swapped if and when they reach a safe point. 651 */ 652 thread_lock(tp); 653 thread_pri = CL_SWAPOUT(tp, swapflags); 654 if (thread_pri != -1) { 655 if (tp->t_schedflag & TS_DONT_SWAP) { 656 tp->t_schedflag |= TS_SWAPENQ; 657 tp->t_trapret = 1; 658 aston(tp); 659 } else { 660 pgcnt_t stack_pages; 661 size_t stack_size; 662 663 ASSERT((tp->t_schedflag & 664 (TS_DONT_SWAP | TS_LOAD)) == TS_LOAD); 665 666 if (lock_try(&tp->t_lock)) { 667 /* 668 * Remove thread from the swap_queue. 669 */ 670 if (tp->t_schedflag & TS_ON_SWAPQ) { 671 ASSERT(!(tp->t_schedflag & 672 TS_SWAPENQ)); 673 swapdeq(tp); 674 } else if (tp->t_state == TS_RUN) 675 dq_srundec(tp); 676 677 tp->t_schedflag &= 678 ~(TS_LOAD | TS_SWAPENQ); 679 lock_clear(&tp->t_lock); 680 681 /* 682 * Set swapout time if the thread isn't 683 * sleeping. 684 */ 685 if (tp->t_state != TS_SLEEP) 686 tp->t_stime = ddi_get_lbolt(); 687 thread_unlock(tp); 688 689 nswapped++; 690 tot_swapped_out++; 691 692 lwp->lwp_ru.nswap++; 693 694 /* 695 * Now drop the p_lock since the 696 * stack needs to pushed out. 697 */ 698 mutex_exit(&pp->p_lock); 699 700 stack_size = swapsize(tp->t_swap); 701 stack_pages = btopr(stack_size); 702 ws_pages += stack_pages; 703 704 /* Kernel probe */ 705 DTRACE_SCHED1(swapout__lwp, 706 kthread_t *, tp); 707 708 rw_enter(&kas.a_lock, RW_READER); 709 err = segkp_fault(segkp->s_as->a_hat, 710 segkp, tp->t_swap, stack_size, 711 F_SOFTUNLOCK, S_WRITE); 712 rw_exit(&kas.a_lock); 713 714 if (err) { 715 cmn_err(CE_PANIC, 716 "swapout: segkp_fault " 717 "failed err: %d", err); 718 } 719 CPU_STATS_ADDQ(cpup, 720 vm, pgswapout, stack_pages); 721 722 mutex_enter(&pp->p_lock); 723 pp->p_swapcnt++; 724 swapped_lwps++; 725 goto top; 726 } 727 } 728 } 729 thread_unlock(tp); 730 } while ((tp = tp->t_forw) != pp->p_tlist); 731 732 /* 733 * Unload address space when all lwps are swapped out. 734 */ 735 if (pp->p_swapcnt == pp->p_lwpcnt) { 736 size_t as_size = 0; 737 738 /* 739 * Avoid invoking as_swapout() if the process has 740 * no MMU resources since pageout will eventually 741 * steal pages belonging to this address space. This 742 * saves CPU cycles as the number of pages that are 743 * potentially freed or pushed out by the segment 744 * swapout operation is very small. 745 */ 746 if (rm_asrss(pp->p_as) != 0) 747 as_size = as_swapout(as); 748 749 CPU_STATS_ADDQ(cpup, vm, pgswapout, btop(as_size)); 750 CPU_STATS_ADDQ(cpup, vm, swapout, 1); 751 ws_pages += btop(as_size); 752 753 TRACE_2(TR_FAC_SCHED, TR_SWAPOUT, 754 "swapout: pp %p pages_pushed %lu", pp, ws_pages); 755 756 /* Kernel probe */ 757 DTRACE_SCHED1(swapout__process, proc_t *, pp); 758 } 759 *swrss = ws_pages; 760 return (swapped_lwps); 761 } 762 763 void 764 swapout_lwp(klwp_t *lwp) 765 { 766 kthread_id_t tp = curthread; 767 768 ASSERT(curthread == lwptot(lwp)); 769 770 /* 771 * Don't insert the thread onto the swap queue if 772 * sufficient memory is available. 773 */ 774 if (avefree > desfree || avefree < desfree && freemem > desfree) { 775 thread_lock(tp); 776 tp->t_schedflag &= ~TS_SWAPENQ; 777 thread_unlock(tp); 778 return; 779 } 780 781 /* 782 * Lock the thread, then move it to the swapped queue from the 783 * onproc queue and set its state to be TS_RUN. 784 */ 785 thread_lock(tp); 786 ASSERT(tp->t_state == TS_ONPROC); 787 if (tp->t_schedflag & TS_SWAPENQ) { 788 tp->t_schedflag &= ~TS_SWAPENQ; 789 790 /* 791 * Set the state of this thread to be runnable 792 * and move it from the onproc queue to the swap queue. 793 */ 794 disp_swapped_enq(tp); 795 796 /* 797 * Insert the thread onto the swap queue. 798 */ 799 tp->t_link = tswap_queue; 800 tswap_queue = tp; 801 tp->t_schedflag |= TS_ON_SWAPQ; 802 803 thread_unlock_nopreempt(tp); 804 805 TRACE_1(TR_FAC_SCHED, TR_SWAPOUT_LWP, "swapout_lwp:%x", lwp); 806 807 swtch(); 808 } else { 809 thread_unlock(tp); 810 } 811 } 812 813 /* 814 * Swap all threads on the swap queue. 815 */ 816 static void 817 process_swap_queue(void) 818 { 819 kthread_id_t tp; 820 uint_t ws_pages; 821 proc_t *pp; 822 struct cpu *cpup = CPU; 823 klwp_t *lwp; 824 int err; 825 826 if (tswap_queue == NULL) 827 return; 828 829 /* 830 * Acquire the "swapped_lock" which locks the swap queue, 831 * and unload the stacks of all threads on it. 832 */ 833 disp_lock_enter(&swapped_lock); 834 while ((tp = tswap_queue) != NULL) { 835 pgcnt_t stack_pages; 836 size_t stack_size; 837 838 tswap_queue = tp->t_link; 839 tp->t_link = NULL; 840 841 /* 842 * Drop the "dispatcher lock" before acquiring "t_lock" 843 * to avoid spinning on it since the thread at the front 844 * of the swap queue could be pinned before giving up 845 * its "t_lock" in resume. 846 */ 847 disp_lock_exit(&swapped_lock); 848 lock_set(&tp->t_lock); 849 850 /* 851 * Now, re-acquire the "swapped_lock". Acquiring this lock 852 * results in locking the thread since its dispatcher lock 853 * (t_lockp) is the "swapped_lock". 854 */ 855 disp_lock_enter(&swapped_lock); 856 ASSERT(tp->t_state == TS_RUN); 857 ASSERT(tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)); 858 859 tp->t_schedflag &= ~(TS_LOAD | TS_ON_SWAPQ); 860 tp->t_stime = ddi_get_lbolt(); /* swapout time */ 861 disp_lock_exit(&swapped_lock); 862 lock_clear(&tp->t_lock); 863 864 lwp = ttolwp(tp); 865 lwp->lwp_ru.nswap++; 866 867 pp = ttoproc(tp); 868 stack_size = swapsize(tp->t_swap); 869 stack_pages = btopr(stack_size); 870 871 /* Kernel probe */ 872 DTRACE_SCHED1(swapout__lwp, kthread_t *, tp); 873 874 rw_enter(&kas.a_lock, RW_READER); 875 err = segkp_fault(segkp->s_as->a_hat, segkp, tp->t_swap, 876 stack_size, F_SOFTUNLOCK, S_WRITE); 877 rw_exit(&kas.a_lock); 878 879 if (err) { 880 cmn_err(CE_PANIC, 881 "process_swap_list: segkp_fault failed err: %d", err); 882 } 883 CPU_STATS_ADDQ(cpup, vm, pgswapout, stack_pages); 884 885 nswapped++; 886 tot_swapped_out++; 887 swapqswap++; 888 889 /* 890 * Don't need p_lock since the swapper is the only 891 * thread which increments/decrements p_swapcnt and p_swrss. 892 */ 893 ws_pages = stack_pages; 894 pp->p_swapcnt++; 895 896 TRACE_1(TR_FAC_SCHED, TR_SWAPQ_LWP, "swaplist: pp %p", pp); 897 898 /* 899 * Unload address space when all lwps are swapped out. 900 */ 901 if (pp->p_swapcnt == pp->p_lwpcnt) { 902 size_t as_size = 0; 903 904 if (rm_asrss(pp->p_as) != 0) 905 as_size = as_swapout(pp->p_as); 906 907 CPU_STATS_ADDQ(cpup, vm, pgswapout, 908 btop(as_size)); 909 CPU_STATS_ADDQ(cpup, vm, swapout, 1); 910 911 ws_pages += btop(as_size); 912 913 TRACE_2(TR_FAC_SCHED, TR_SWAPQ_PROC, 914 "swaplist_proc: pp %p pages_pushed: %lu", 915 pp, ws_pages); 916 917 /* Kernel probe */ 918 DTRACE_SCHED1(swapout__process, proc_t *, pp); 919 } 920 pp->p_swrss += ws_pages; 921 disp_lock_enter(&swapped_lock); 922 } 923 disp_lock_exit(&swapped_lock); 924 } 925