1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/proc.h> 35 #include <sys/cpuvar.h> 36 #include <sys/var.h> 37 #include <sys/tuneable.h> 38 #include <sys/cmn_err.h> 39 #include <sys/buf.h> 40 #include <sys/disp.h> 41 #include <sys/vmsystm.h> 42 #include <sys/vmparam.h> 43 #include <sys/class.h> 44 #include <sys/vtrace.h> 45 #include <sys/modctl.h> 46 #include <sys/debug.h> 47 #include <sys/tnf_probe.h> 48 #include <sys/procfs.h> 49 50 #include <vm/seg.h> 51 #include <vm/seg_kp.h> 52 #include <vm/as.h> 53 #include <vm/rm.h> 54 #include <vm/seg_kmem.h> 55 #include <sys/callb.h> 56 57 /* 58 * The swapper sleeps on runout when there is no one to swap in. 59 * It sleeps on runin when it could not find space to swap someone 60 * in or after swapping someone in. 61 */ 62 char runout; 63 char runin; 64 char wake_sched; /* flag tells clock to wake swapper on next tick */ 65 char wake_sched_sec; /* flag tells clock to wake swapper after a second */ 66 67 /* 68 * The swapper swaps processes to reduce memory demand and runs 69 * when avefree < desfree. The swapper resorts to SOFTSWAP when 70 * avefree < desfree which results in swapping out all processes 71 * sleeping for more than maxslp seconds. HARDSWAP occurs when the 72 * system is on the verge of thrashing and this results in swapping 73 * out runnable threads or threads sleeping for less than maxslp secs. 74 * 75 * The swapper runs through all the active processes in the system 76 * and invokes the scheduling class specific swapin/swapout routine 77 * for every thread in the process to obtain an effective priority 78 * for the process. A priority of -1 implies that the thread isn't 79 * swappable. This effective priority is used to find the most 80 * eligible process to swapout or swapin. 81 * 82 * NOTE: Threads which have been swapped are not linked on any 83 * queue and their dispatcher lock points at the "swapped_lock". 84 * 85 * Processes containing threads with the TS_DONT_SWAP flag set cannot be 86 * swapped out immediately by the swapper. This is due to the fact that 87 * such threads may be holding locks which may be needed by the swapper 88 * to push its pages out. The TS_SWAPENQ flag is set on such threads 89 * to prevent them running in user mode. When such threads reach a 90 * safe point (i.e., are not holding any locks - CL_TRAPRET), they 91 * queue themseleves onto the swap queue which is processed by the 92 * swapper. This results in reducing memory demand when the system 93 * is desparate for memory as the thread can't run in user mode. 94 * 95 * The swap queue consists of threads, linked via t_link, which are 96 * haven't been swapped, are runnable but not on the run queue. The 97 * swap queue is protected by the "swapped_lock". The dispatcher 98 * lock (t_lockp) of all threads on the swap queue points at the 99 * "swapped_lock". Thus, the entire queue and/or threads on the 100 * queue can be locked by acquiring "swapped_lock". 101 */ 102 static kthread_t *tswap_queue; 103 extern disp_lock_t swapped_lock; /* protects swap queue and threads on it */ 104 105 int maxslp = 0; 106 pgcnt_t avefree; /* 5 sec moving average of free memory */ 107 pgcnt_t avefree30; /* 30 sec moving average of free memory */ 108 109 /* 110 * Minimum size used to decide if sufficient memory is available 111 * before a process is swapped in. This is necessary since in most 112 * cases the actual size of a process (p_swrss) being swapped in 113 * is usually 2 pages (kernel stack pages). This is due to the fact 114 * almost all user pages of a process are stolen by pageout before 115 * the swapper decides to swapout it out. 116 */ 117 int min_procsize = 12; 118 119 static int swapin(proc_t *); 120 static int swapout(proc_t *, uint_t *, int); 121 static void process_swap_queue(); 122 123 #ifdef __sparc 124 extern void lwp_swapin(kthread_t *); 125 #endif /* __sparc */ 126 127 /* 128 * Counters to keep track of the number of swapins or swapouts. 129 */ 130 uint_t tot_swapped_in, tot_swapped_out; 131 uint_t softswap, hardswap, swapqswap; 132 133 /* 134 * Macro to determine if a process is eligble to be swapped. 135 */ 136 #define not_swappable(p) \ 137 (((p)->p_flag & SSYS) || (p)->p_stat == SIDL || \ 138 (p)->p_stat == SZOMB || (p)->p_as == NULL || \ 139 (p)->p_as == &kas) 140 141 /* 142 * Memory scheduler. 143 */ 144 void 145 sched() 146 { 147 kthread_id_t t; 148 pri_t proc_pri; 149 pri_t thread_pri; 150 pri_t swapin_pri; 151 int desperate; 152 pgcnt_t needs; 153 int divisor; 154 proc_t *prp; 155 proc_t *swapout_prp; 156 proc_t *swapin_prp; 157 spgcnt_t avail; 158 int chosen_pri; 159 time_t swapout_time; 160 time_t swapin_proc_time; 161 callb_cpr_t cprinfo; 162 kmutex_t swap_cpr_lock; 163 164 mutex_init(&swap_cpr_lock, NULL, MUTEX_DEFAULT, NULL); 165 CALLB_CPR_INIT(&cprinfo, &swap_cpr_lock, callb_generic_cpr, "sched"); 166 if (maxslp == 0) 167 maxslp = MAXSLP; 168 loop: 169 needs = 0; 170 desperate = 0; 171 172 swapin_pri = v.v_nglobpris; 173 swapin_prp = NULL; 174 chosen_pri = -1; 175 176 process_swap_queue(); 177 178 /* 179 * Set desperate if 180 * 1. At least 2 runnable processes (on average). 181 * 2. Short (5 sec) and longer (30 sec) average is less 182 * than minfree and desfree respectively. 183 * 3. Pagein + pageout rate is excessive. 184 */ 185 if (avenrun[0] >= 2 * FSCALE && 186 (MAX(avefree, avefree30) < desfree) && 187 (pginrate + pgoutrate > maxpgio || avefree < minfree)) { 188 TRACE_4(TR_FAC_SCHED, TR_DESPERATE, 189 "desp:avefree: %d, avefree30: %d, freemem: %d" 190 " pginrate: %d\n", avefree, avefree30, freemem, pginrate); 191 desperate = 1; 192 goto unload; 193 } 194 195 /* 196 * Search list of processes to swapin and swapout deadwood. 197 */ 198 swapin_proc_time = 0; 199 top: 200 mutex_enter(&pidlock); 201 for (prp = practive; prp != NULL; prp = prp->p_next) { 202 if (not_swappable(prp)) 203 continue; 204 205 /* 206 * Look at processes with at least one swapped lwp. 207 */ 208 if (prp->p_swapcnt) { 209 time_t proc_time; 210 211 /* 212 * Higher priority processes are good candidates 213 * to swapin. 214 */ 215 mutex_enter(&prp->p_lock); 216 proc_pri = -1; 217 t = prp->p_tlist; 218 proc_time = 0; 219 do { 220 if (t->t_schedflag & TS_LOAD) 221 continue; 222 223 thread_lock(t); 224 thread_pri = CL_SWAPIN(t, 0); 225 thread_unlock(t); 226 227 if (t->t_stime - proc_time > 0) 228 proc_time = t->t_stime; 229 if (thread_pri > proc_pri) 230 proc_pri = thread_pri; 231 } while ((t = t->t_forw) != prp->p_tlist); 232 mutex_exit(&prp->p_lock); 233 234 if (proc_pri == -1) 235 continue; 236 237 TRACE_3(TR_FAC_SCHED, TR_CHOOSE_SWAPIN, 238 "prp %p epri %d proc_time %d", 239 prp, proc_pri, proc_time); 240 241 /* 242 * Swapin processes with a high effective priority. 243 */ 244 if (swapin_prp == NULL || proc_pri > chosen_pri) { 245 swapin_prp = prp; 246 chosen_pri = proc_pri; 247 swapin_pri = proc_pri; 248 swapin_proc_time = proc_time; 249 } 250 } else { 251 /* 252 * No need to soft swap if we have sufficient 253 * memory. 254 */ 255 if (avefree > desfree || 256 avefree < desfree && freemem > desfree) 257 continue; 258 259 /* 260 * Skip processes that are exiting 261 * or whose address spaces are locked. 262 */ 263 mutex_enter(&prp->p_lock); 264 if ((prp->p_flag & SEXITING) || 265 (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 266 mutex_exit(&prp->p_lock); 267 continue; 268 } 269 270 /* 271 * Softswapping to kick out deadwood. 272 */ 273 proc_pri = -1; 274 t = prp->p_tlist; 275 do { 276 if ((t->t_schedflag & (TS_SWAPENQ | 277 TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 278 continue; 279 280 thread_lock(t); 281 thread_pri = CL_SWAPOUT(t, SOFTSWAP); 282 thread_unlock(t); 283 if (thread_pri > proc_pri) 284 proc_pri = thread_pri; 285 } while ((t = t->t_forw) != prp->p_tlist); 286 287 if (proc_pri != -1) { 288 uint_t swrss; 289 290 mutex_exit(&pidlock); 291 292 TRACE_1(TR_FAC_SCHED, TR_SOFTSWAP, 293 "softswap:prp %p", prp); 294 295 (void) swapout(prp, &swrss, SOFTSWAP); 296 softswap++; 297 prp->p_swrss += swrss; 298 mutex_exit(&prp->p_lock); 299 goto top; 300 } 301 mutex_exit(&prp->p_lock); 302 } 303 } 304 if (swapin_prp != NULL) 305 mutex_enter(&swapin_prp->p_lock); 306 mutex_exit(&pidlock); 307 308 if (swapin_prp == NULL) { 309 TRACE_3(TR_FAC_SCHED, TR_RUNOUT, 310 "schedrunout:runout nswapped: %d, avefree: %ld freemem: %ld", 311 nswapped, avefree, freemem); 312 313 t = curthread; 314 thread_lock(t); 315 runout++; 316 t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 317 t->t_whystop = PR_SUSPENDED; 318 t->t_whatstop = SUSPEND_NORMAL; 319 (void) new_mstate(t, LMS_SLEEP); 320 mutex_enter(&swap_cpr_lock); 321 CALLB_CPR_SAFE_BEGIN(&cprinfo); 322 mutex_exit(&swap_cpr_lock); 323 thread_stop(t); /* change state and drop lock */ 324 swtch(); 325 mutex_enter(&swap_cpr_lock); 326 CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 327 mutex_exit(&swap_cpr_lock); 328 goto loop; 329 } 330 331 /* 332 * Decide how deserving this process is to be brought in. 333 * Needs is an estimate of how much core the process will 334 * need. If the process has been out for a while, then we 335 * will bring it in with 1/2 the core needed, otherwise 336 * we are conservative. 337 */ 338 divisor = 1; 339 swapout_time = (ddi_get_lbolt() - swapin_proc_time) / hz; 340 if (swapout_time > maxslp / 2) 341 divisor = 2; 342 343 needs = MIN(swapin_prp->p_swrss, lotsfree); 344 needs = MAX(needs, min_procsize); 345 needs = needs / divisor; 346 347 /* 348 * Use freemem, since we want processes to be swapped 349 * in quickly. 350 */ 351 avail = freemem - deficit; 352 if (avail > (spgcnt_t)needs) { 353 deficit += needs; 354 355 TRACE_2(TR_FAC_SCHED, TR_SWAPIN_VALUES, 356 "swapin_values: prp %p needs %lu", swapin_prp, needs); 357 358 if (swapin(swapin_prp)) { 359 mutex_exit(&swapin_prp->p_lock); 360 goto loop; 361 } 362 deficit -= MIN(needs, deficit); 363 mutex_exit(&swapin_prp->p_lock); 364 } else { 365 mutex_exit(&swapin_prp->p_lock); 366 /* 367 * If deficit is high, too many processes have been 368 * swapped in so wait a sec before attempting to 369 * swapin more. 370 */ 371 if (freemem > needs) { 372 TRACE_2(TR_FAC_SCHED, TR_HIGH_DEFICIT, 373 "deficit: prp %p needs %lu", swapin_prp, needs); 374 goto block; 375 } 376 } 377 378 TRACE_2(TR_FAC_SCHED, TR_UNLOAD, 379 "unload: prp %p needs %lu", swapin_prp, needs); 380 381 unload: 382 /* 383 * Unload all unloadable modules, free all other memory 384 * resources we can find, then look for a thread to hardswap. 385 */ 386 modreap(); 387 segkp_cache_free(); 388 389 swapout_prp = NULL; 390 mutex_enter(&pidlock); 391 for (prp = practive; prp != NULL; prp = prp->p_next) { 392 393 /* 394 * No need to soft swap if we have sufficient 395 * memory. 396 */ 397 if (not_swappable(prp)) 398 continue; 399 400 if (avefree > minfree || 401 avefree < minfree && freemem > desfree) { 402 swapout_prp = NULL; 403 break; 404 } 405 406 /* 407 * Skip processes that are exiting 408 * or whose address spaces are locked. 409 */ 410 mutex_enter(&prp->p_lock); 411 if ((prp->p_flag & SEXITING) || 412 (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 413 mutex_exit(&prp->p_lock); 414 continue; 415 } 416 417 proc_pri = -1; 418 t = prp->p_tlist; 419 do { 420 if ((t->t_schedflag & (TS_SWAPENQ | 421 TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 422 continue; 423 424 thread_lock(t); 425 thread_pri = CL_SWAPOUT(t, HARDSWAP); 426 thread_unlock(t); 427 if (thread_pri > proc_pri) 428 proc_pri = thread_pri; 429 } while ((t = t->t_forw) != prp->p_tlist); 430 431 mutex_exit(&prp->p_lock); 432 if (proc_pri == -1) 433 continue; 434 435 /* 436 * Swapout processes sleeping with a lower priority 437 * than the one currently being swapped in, if any. 438 */ 439 if (swapin_prp == NULL || swapin_pri > proc_pri) { 440 TRACE_2(TR_FAC_SCHED, TR_CHOOSE_SWAPOUT, 441 "hardswap: prp %p needs %lu", prp, needs); 442 443 if (swapout_prp == NULL || proc_pri < chosen_pri) { 444 swapout_prp = prp; 445 chosen_pri = proc_pri; 446 } 447 } 448 } 449 450 /* 451 * Acquire the "p_lock" before dropping "pidlock" 452 * to prevent the proc structure from being freed 453 * if the process exits before swapout completes. 454 */ 455 if (swapout_prp != NULL) 456 mutex_enter(&swapout_prp->p_lock); 457 mutex_exit(&pidlock); 458 459 if ((prp = swapout_prp) != NULL) { 460 uint_t swrss = 0; 461 int swapped; 462 463 swapped = swapout(prp, &swrss, HARDSWAP); 464 if (swapped) { 465 /* 466 * If desperate, we want to give the space obtained 467 * by swapping this process out to processes in core, 468 * so we give them a chance by increasing deficit. 469 */ 470 prp->p_swrss += swrss; 471 if (desperate) 472 deficit += MIN(prp->p_swrss, lotsfree); 473 hardswap++; 474 } 475 mutex_exit(&swapout_prp->p_lock); 476 477 if (swapped) 478 goto loop; 479 } 480 481 /* 482 * Delay for 1 second and look again later. 483 */ 484 TRACE_3(TR_FAC_SCHED, TR_RUNIN, 485 "schedrunin:runin nswapped: %d, avefree: %ld freemem: %ld", 486 nswapped, avefree, freemem); 487 488 block: 489 t = curthread; 490 thread_lock(t); 491 runin++; 492 t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 493 t->t_whystop = PR_SUSPENDED; 494 t->t_whatstop = SUSPEND_NORMAL; 495 (void) new_mstate(t, LMS_SLEEP); 496 mutex_enter(&swap_cpr_lock); 497 CALLB_CPR_SAFE_BEGIN(&cprinfo); 498 mutex_exit(&swap_cpr_lock); 499 thread_stop(t); /* change to stop state and drop lock */ 500 swtch(); 501 mutex_enter(&swap_cpr_lock); 502 CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 503 mutex_exit(&swap_cpr_lock); 504 goto loop; 505 } 506 507 /* 508 * Remove the specified thread from the swap queue. 509 */ 510 static void 511 swapdeq(kthread_id_t tp) 512 { 513 kthread_id_t *tpp; 514 515 ASSERT(THREAD_LOCK_HELD(tp)); 516 ASSERT(tp->t_schedflag & TS_ON_SWAPQ); 517 518 tpp = &tswap_queue; 519 for (;;) { 520 ASSERT(*tpp != NULL); 521 if (*tpp == tp) 522 break; 523 tpp = &(*tpp)->t_link; 524 } 525 *tpp = tp->t_link; 526 tp->t_schedflag &= ~TS_ON_SWAPQ; 527 } 528 529 /* 530 * Swap in lwps. Returns nonzero on success (i.e., if at least one lwp is 531 * swapped in) and 0 on failure. 532 */ 533 static int 534 swapin(proc_t *pp) 535 { 536 kthread_id_t tp; 537 int err; 538 int num_swapped_in = 0; 539 struct cpu *cpup = CPU; 540 pri_t thread_pri; 541 542 ASSERT(MUTEX_HELD(&pp->p_lock)); 543 ASSERT(pp->p_swapcnt); 544 545 top: 546 tp = pp->p_tlist; 547 do { 548 /* 549 * Only swapin eligible lwps (specified by the scheduling 550 * class) which are unloaded and ready to run. 551 */ 552 thread_lock(tp); 553 thread_pri = CL_SWAPIN(tp, 0); 554 if (thread_pri != -1 && tp->t_state == TS_RUN && 555 (tp->t_schedflag & TS_LOAD) == 0) { 556 size_t stack_size; 557 pgcnt_t stack_pages; 558 559 ASSERT((tp->t_schedflag & TS_ON_SWAPQ) == 0); 560 561 thread_unlock(tp); 562 /* 563 * Now drop the p_lock since the stack needs 564 * to brought in. 565 */ 566 mutex_exit(&pp->p_lock); 567 568 stack_size = swapsize(tp->t_swap); 569 stack_pages = btopr(stack_size); 570 /* Kernel probe */ 571 TNF_PROBE_4(swapin_lwp, "vm swap swapin", /* CSTYLED */, 572 tnf_pid, pid, pp->p_pid, 573 tnf_lwpid, lwpid, tp->t_tid, 574 tnf_kthread_id, tid, tp, 575 tnf_ulong, page_count, stack_pages); 576 577 rw_enter(&kas.a_lock, RW_READER); 578 err = segkp_fault(segkp->s_as->a_hat, segkp, 579 tp->t_swap, stack_size, F_SOFTLOCK, S_OTHER); 580 rw_exit(&kas.a_lock); 581 582 /* 583 * Re-acquire the p_lock. 584 */ 585 mutex_enter(&pp->p_lock); 586 if (err) { 587 num_swapped_in = 0; 588 break; 589 } else { 590 #ifdef __sparc 591 lwp_swapin(tp); 592 #endif /* __sparc */ 593 CPU_STATS_ADDQ(cpup, vm, swapin, 1); 594 CPU_STATS_ADDQ(cpup, vm, pgswapin, 595 stack_pages); 596 597 pp->p_swapcnt--; 598 pp->p_swrss -= stack_pages; 599 600 thread_lock(tp); 601 tp->t_schedflag |= TS_LOAD; 602 dq_sruninc(tp); 603 604 /* set swapin time */ 605 tp->t_stime = ddi_get_lbolt(); 606 thread_unlock(tp); 607 608 nswapped--; 609 tot_swapped_in++; 610 num_swapped_in++; 611 612 TRACE_2(TR_FAC_SCHED, TR_SWAPIN, 613 "swapin: pp %p stack_pages %lu", 614 pp, stack_pages); 615 goto top; 616 } 617 } 618 thread_unlock(tp); 619 } while ((tp = tp->t_forw) != pp->p_tlist); 620 return (num_swapped_in); 621 } 622 623 /* 624 * Swap out lwps. Returns nonzero on success (i.e., if at least one lwp is 625 * swapped out) and 0 on failure. 626 */ 627 static int 628 swapout(proc_t *pp, uint_t *swrss, int swapflags) 629 { 630 kthread_id_t tp; 631 pgcnt_t ws_pages = 0; 632 int err; 633 int swapped_lwps = 0; 634 struct as *as = pp->p_as; 635 struct cpu *cpup = CPU; 636 pri_t thread_pri; 637 638 ASSERT(MUTEX_HELD(&pp->p_lock)); 639 640 if (pp->p_flag & SEXITING) 641 return (0); 642 643 top: 644 tp = pp->p_tlist; 645 do { 646 klwp_t *lwp = ttolwp(tp); 647 648 /* 649 * Swapout eligible lwps (specified by the scheduling 650 * class) which don't have TS_DONT_SWAP set. Set the 651 * "intent to swap" flag (TS_SWAPENQ) on threads 652 * which have TS_DONT_SWAP set so that they can be 653 * swapped if and when they reach a safe point. 654 */ 655 thread_lock(tp); 656 thread_pri = CL_SWAPOUT(tp, swapflags); 657 if (thread_pri != -1) { 658 if (tp->t_schedflag & TS_DONT_SWAP) { 659 tp->t_schedflag |= TS_SWAPENQ; 660 tp->t_trapret = 1; 661 aston(tp); 662 } else { 663 pgcnt_t stack_pages; 664 size_t stack_size; 665 666 ASSERT((tp->t_schedflag & 667 (TS_DONT_SWAP | TS_LOAD)) == TS_LOAD); 668 669 if (lock_try(&tp->t_lock)) { 670 /* 671 * Remove thread from the swap_queue. 672 */ 673 if (tp->t_schedflag & TS_ON_SWAPQ) { 674 ASSERT(!(tp->t_schedflag & 675 TS_SWAPENQ)); 676 swapdeq(tp); 677 } else if (tp->t_state == TS_RUN) 678 dq_srundec(tp); 679 680 tp->t_schedflag &= 681 ~(TS_LOAD | TS_SWAPENQ); 682 lock_clear(&tp->t_lock); 683 684 /* 685 * Set swapout time if the thread isn't 686 * sleeping. 687 */ 688 if (tp->t_state != TS_SLEEP) 689 tp->t_stime = ddi_get_lbolt(); 690 thread_unlock(tp); 691 692 nswapped++; 693 tot_swapped_out++; 694 695 lwp->lwp_ru.nswap++; 696 697 /* 698 * Now drop the p_lock since the 699 * stack needs to pushed out. 700 */ 701 mutex_exit(&pp->p_lock); 702 703 stack_size = swapsize(tp->t_swap); 704 stack_pages = btopr(stack_size); 705 ws_pages += stack_pages; 706 /* Kernel probe */ 707 TNF_PROBE_4(swapout_lwp, 708 "vm swap swapout", 709 /* CSTYLED */, 710 tnf_pid, pid, pp->p_pid, 711 tnf_lwpid, lwpid, tp->t_tid, 712 tnf_kthread_id, tid, tp, 713 tnf_ulong, page_count, 714 stack_pages); 715 716 rw_enter(&kas.a_lock, RW_READER); 717 err = segkp_fault(segkp->s_as->a_hat, 718 segkp, tp->t_swap, stack_size, 719 F_SOFTUNLOCK, S_WRITE); 720 rw_exit(&kas.a_lock); 721 722 if (err) { 723 cmn_err(CE_PANIC, 724 "swapout: segkp_fault " 725 "failed err: %d", err); 726 } 727 CPU_STATS_ADDQ(cpup, 728 vm, pgswapout, stack_pages); 729 730 mutex_enter(&pp->p_lock); 731 pp->p_swapcnt++; 732 swapped_lwps++; 733 goto top; 734 } 735 } 736 } 737 thread_unlock(tp); 738 } while ((tp = tp->t_forw) != pp->p_tlist); 739 740 /* 741 * Unload address space when all lwps are swapped out. 742 */ 743 if (pp->p_swapcnt == pp->p_lwpcnt) { 744 size_t as_size = 0; 745 746 /* 747 * Avoid invoking as_swapout() if the process has 748 * no MMU resources since pageout will eventually 749 * steal pages belonging to this address space. This 750 * saves CPU cycles as the number of pages that are 751 * potentially freed or pushed out by the segment 752 * swapout operation is very small. 753 */ 754 if (rm_asrss(pp->p_as) != 0) 755 as_size = as_swapout(as); 756 757 CPU_STATS_ADDQ(cpup, vm, pgswapout, btop(as_size)); 758 CPU_STATS_ADDQ(cpup, vm, swapout, 1); 759 ws_pages += btop(as_size); 760 761 TRACE_2(TR_FAC_SCHED, TR_SWAPOUT, 762 "swapout: pp %p pages_pushed %lu", pp, ws_pages); 763 /* Kernel probe */ 764 TNF_PROBE_2(swapout_process, "vm swap swapout", /* CSTYLED */, 765 tnf_pid, pid, pp->p_pid, 766 tnf_ulong, page_count, ws_pages); 767 } 768 *swrss = ws_pages; 769 return (swapped_lwps); 770 } 771 772 void 773 swapout_lwp(klwp_t *lwp) 774 { 775 kthread_id_t tp = curthread; 776 777 ASSERT(curthread == lwptot(lwp)); 778 779 /* 780 * Don't insert the thread onto the swap queue if 781 * sufficient memory is available. 782 */ 783 if (avefree > desfree || avefree < desfree && freemem > desfree) { 784 thread_lock(tp); 785 tp->t_schedflag &= ~TS_SWAPENQ; 786 thread_unlock(tp); 787 return; 788 } 789 790 /* 791 * Lock the thread, then move it to the swapped queue from the 792 * onproc queue and set its state to be TS_RUN. 793 */ 794 thread_lock(tp); 795 ASSERT(tp->t_state == TS_ONPROC); 796 if (tp->t_schedflag & TS_SWAPENQ) { 797 tp->t_schedflag &= ~TS_SWAPENQ; 798 799 /* 800 * Set the state of this thread to be runnable 801 * and move it from the onproc queue to the swap queue. 802 */ 803 disp_swapped_enq(tp); 804 805 /* 806 * Insert the thread onto the swap queue. 807 */ 808 tp->t_link = tswap_queue; 809 tswap_queue = tp; 810 tp->t_schedflag |= TS_ON_SWAPQ; 811 812 thread_unlock_nopreempt(tp); 813 814 TRACE_1(TR_FAC_SCHED, TR_SWAPOUT_LWP, "swapout_lwp:%x", lwp); 815 816 swtch(); 817 } else { 818 thread_unlock(tp); 819 } 820 } 821 822 /* 823 * Swap all threads on the swap queue. 824 */ 825 static void 826 process_swap_queue(void) 827 { 828 kthread_id_t tp; 829 uint_t ws_pages; 830 proc_t *pp; 831 struct cpu *cpup = CPU; 832 klwp_t *lwp; 833 int err; 834 835 if (tswap_queue == NULL) 836 return; 837 838 /* 839 * Acquire the "swapped_lock" which locks the swap queue, 840 * and unload the stacks of all threads on it. 841 */ 842 disp_lock_enter(&swapped_lock); 843 while ((tp = tswap_queue) != NULL) { 844 pgcnt_t stack_pages; 845 size_t stack_size; 846 847 tswap_queue = tp->t_link; 848 tp->t_link = NULL; 849 850 /* 851 * Drop the "dispatcher lock" before acquiring "t_lock" 852 * to avoid spinning on it since the thread at the front 853 * of the swap queue could be pinned before giving up 854 * its "t_lock" in resume. 855 */ 856 disp_lock_exit(&swapped_lock); 857 lock_set(&tp->t_lock); 858 859 /* 860 * Now, re-acquire the "swapped_lock". Acquiring this lock 861 * results in locking the thread since its dispatcher lock 862 * (t_lockp) is the "swapped_lock". 863 */ 864 disp_lock_enter(&swapped_lock); 865 ASSERT(tp->t_state == TS_RUN); 866 ASSERT(tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)); 867 868 tp->t_schedflag &= ~(TS_LOAD | TS_ON_SWAPQ); 869 tp->t_stime = ddi_get_lbolt(); /* swapout time */ 870 disp_lock_exit(&swapped_lock); 871 lock_clear(&tp->t_lock); 872 873 lwp = ttolwp(tp); 874 lwp->lwp_ru.nswap++; 875 876 pp = ttoproc(tp); 877 stack_size = swapsize(tp->t_swap); 878 stack_pages = btopr(stack_size); 879 880 /* Kernel probe */ 881 TNF_PROBE_4(swapout_lwp, "vm swap swapout", /* CSTYLED */, 882 tnf_pid, pid, pp->p_pid, 883 tnf_lwpid, lwpid, tp->t_tid, 884 tnf_kthread_id, tid, tp, 885 tnf_ulong, page_count, stack_pages); 886 887 rw_enter(&kas.a_lock, RW_READER); 888 err = segkp_fault(segkp->s_as->a_hat, segkp, tp->t_swap, 889 stack_size, F_SOFTUNLOCK, S_WRITE); 890 rw_exit(&kas.a_lock); 891 892 if (err) { 893 cmn_err(CE_PANIC, 894 "process_swap_list: segkp_fault failed err: %d", err); 895 } 896 CPU_STATS_ADDQ(cpup, vm, pgswapout, stack_pages); 897 898 nswapped++; 899 tot_swapped_out++; 900 swapqswap++; 901 902 /* 903 * Don't need p_lock since the swapper is the only 904 * thread which increments/decrements p_swapcnt and p_swrss. 905 */ 906 ws_pages = stack_pages; 907 pp->p_swapcnt++; 908 909 TRACE_1(TR_FAC_SCHED, TR_SWAPQ_LWP, "swaplist: pp %p", pp); 910 911 /* 912 * Unload address space when all lwps are swapped out. 913 */ 914 if (pp->p_swapcnt == pp->p_lwpcnt) { 915 size_t as_size = 0; 916 917 if (rm_asrss(pp->p_as) != 0) 918 as_size = as_swapout(pp->p_as); 919 920 CPU_STATS_ADDQ(cpup, vm, pgswapout, 921 btop(as_size)); 922 CPU_STATS_ADDQ(cpup, vm, swapout, 1); 923 924 ws_pages += btop(as_size); 925 926 TRACE_2(TR_FAC_SCHED, TR_SWAPQ_PROC, 927 "swaplist_proc: pp %p pages_pushed: %lu", 928 pp, ws_pages); 929 /* Kernel probe */ 930 TNF_PROBE_2(swapout_process, "vm swap swapout", 931 /* CSTYLED */, 932 tnf_pid, pid, pp->p_pid, 933 tnf_ulong, page_count, ws_pages); 934 } 935 pp->p_swrss += ws_pages; 936 disp_lock_enter(&swapped_lock); 937 } 938 disp_lock_exit(&swapped_lock); 939 } 940