17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5de1a369bSvb70745 * Common Development and Distribution License (the "License"). 6de1a369bSvb70745 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 2197eda132Sraf 227c478bd9Sstevel@tonic-gate /* 23*d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/param.h> 317c478bd9Sstevel@tonic-gate #include <sys/types.h> 327c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 337c478bd9Sstevel@tonic-gate #include <sys/systm.h> 347c478bd9Sstevel@tonic-gate #include <sys/proc.h> 357c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 367c478bd9Sstevel@tonic-gate #include <sys/var.h> 377c478bd9Sstevel@tonic-gate #include <sys/tuneable.h> 387c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 397c478bd9Sstevel@tonic-gate #include <sys/buf.h> 407c478bd9Sstevel@tonic-gate #include <sys/disp.h> 417c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 427c478bd9Sstevel@tonic-gate #include <sys/vmparam.h> 437c478bd9Sstevel@tonic-gate #include <sys/class.h> 447c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 457c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 467c478bd9Sstevel@tonic-gate #include <sys/debug.h> 477c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 487c478bd9Sstevel@tonic-gate #include <sys/procfs.h> 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate #include <vm/seg.h> 517c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h> 527c478bd9Sstevel@tonic-gate #include <vm/as.h> 537c478bd9Sstevel@tonic-gate #include <vm/rm.h> 547c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 557c478bd9Sstevel@tonic-gate #include <sys/callb.h> 567c478bd9Sstevel@tonic-gate 577c478bd9Sstevel@tonic-gate /* 587c478bd9Sstevel@tonic-gate * The swapper sleeps on runout when there is no one to swap in. 597c478bd9Sstevel@tonic-gate * It sleeps on runin when it could not find space to swap someone 607c478bd9Sstevel@tonic-gate * in or after swapping someone in. 617c478bd9Sstevel@tonic-gate */ 627c478bd9Sstevel@tonic-gate char runout; 637c478bd9Sstevel@tonic-gate char runin; 647c478bd9Sstevel@tonic-gate char wake_sched; /* flag tells clock to wake swapper on next tick */ 657c478bd9Sstevel@tonic-gate char wake_sched_sec; /* flag tells clock to wake swapper after a second */ 667c478bd9Sstevel@tonic-gate 677c478bd9Sstevel@tonic-gate /* 687c478bd9Sstevel@tonic-gate * The swapper swaps processes to reduce memory demand and runs 697c478bd9Sstevel@tonic-gate * when avefree < desfree. The swapper resorts to SOFTSWAP when 707c478bd9Sstevel@tonic-gate * avefree < desfree which results in swapping out all processes 717c478bd9Sstevel@tonic-gate * sleeping for more than maxslp seconds. HARDSWAP occurs when the 727c478bd9Sstevel@tonic-gate * system is on the verge of thrashing and this results in swapping 737c478bd9Sstevel@tonic-gate * out runnable threads or threads sleeping for less than maxslp secs. 747c478bd9Sstevel@tonic-gate * 757c478bd9Sstevel@tonic-gate * The swapper runs through all the active processes in the system 767c478bd9Sstevel@tonic-gate * and invokes the scheduling class specific swapin/swapout routine 777c478bd9Sstevel@tonic-gate * for every thread in the process to obtain an effective priority 787c478bd9Sstevel@tonic-gate * for the process. A priority of -1 implies that the thread isn't 797c478bd9Sstevel@tonic-gate * swappable. This effective priority is used to find the most 807c478bd9Sstevel@tonic-gate * eligible process to swapout or swapin. 817c478bd9Sstevel@tonic-gate * 827c478bd9Sstevel@tonic-gate * NOTE: Threads which have been swapped are not linked on any 837c478bd9Sstevel@tonic-gate * queue and their dispatcher lock points at the "swapped_lock". 847c478bd9Sstevel@tonic-gate * 857c478bd9Sstevel@tonic-gate * Processes containing threads with the TS_DONT_SWAP flag set cannot be 867c478bd9Sstevel@tonic-gate * swapped out immediately by the swapper. This is due to the fact that 877c478bd9Sstevel@tonic-gate * such threads may be holding locks which may be needed by the swapper 887c478bd9Sstevel@tonic-gate * to push its pages out. The TS_SWAPENQ flag is set on such threads 897c478bd9Sstevel@tonic-gate * to prevent them running in user mode. When such threads reach a 907c478bd9Sstevel@tonic-gate * safe point (i.e., are not holding any locks - CL_TRAPRET), they 917c478bd9Sstevel@tonic-gate * queue themseleves onto the swap queue which is processed by the 927c478bd9Sstevel@tonic-gate * swapper. This results in reducing memory demand when the system 937c478bd9Sstevel@tonic-gate * is desparate for memory as the thread can't run in user mode. 947c478bd9Sstevel@tonic-gate * 957c478bd9Sstevel@tonic-gate * The swap queue consists of threads, linked via t_link, which are 967c478bd9Sstevel@tonic-gate * haven't been swapped, are runnable but not on the run queue. The 977c478bd9Sstevel@tonic-gate * swap queue is protected by the "swapped_lock". The dispatcher 987c478bd9Sstevel@tonic-gate * lock (t_lockp) of all threads on the swap queue points at the 997c478bd9Sstevel@tonic-gate * "swapped_lock". Thus, the entire queue and/or threads on the 1007c478bd9Sstevel@tonic-gate * queue can be locked by acquiring "swapped_lock". 1017c478bd9Sstevel@tonic-gate */ 1027c478bd9Sstevel@tonic-gate static kthread_t *tswap_queue; 1037c478bd9Sstevel@tonic-gate extern disp_lock_t swapped_lock; /* protects swap queue and threads on it */ 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate int maxslp = 0; 1067c478bd9Sstevel@tonic-gate pgcnt_t avefree; /* 5 sec moving average of free memory */ 1077c478bd9Sstevel@tonic-gate pgcnt_t avefree30; /* 30 sec moving average of free memory */ 1087c478bd9Sstevel@tonic-gate 1097c478bd9Sstevel@tonic-gate /* 1107c478bd9Sstevel@tonic-gate * Minimum size used to decide if sufficient memory is available 1117c478bd9Sstevel@tonic-gate * before a process is swapped in. This is necessary since in most 1127c478bd9Sstevel@tonic-gate * cases the actual size of a process (p_swrss) being swapped in 1137c478bd9Sstevel@tonic-gate * is usually 2 pages (kernel stack pages). This is due to the fact 1147c478bd9Sstevel@tonic-gate * almost all user pages of a process are stolen by pageout before 1157c478bd9Sstevel@tonic-gate * the swapper decides to swapout it out. 1167c478bd9Sstevel@tonic-gate */ 1177c478bd9Sstevel@tonic-gate int min_procsize = 12; 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate static int swapin(proc_t *); 1207c478bd9Sstevel@tonic-gate static int swapout(proc_t *, uint_t *, int); 1217c478bd9Sstevel@tonic-gate static void process_swap_queue(); 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate #ifdef __sparc 1247c478bd9Sstevel@tonic-gate extern void lwp_swapin(kthread_t *); 1257c478bd9Sstevel@tonic-gate #endif /* __sparc */ 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate /* 1287c478bd9Sstevel@tonic-gate * Counters to keep track of the number of swapins or swapouts. 1297c478bd9Sstevel@tonic-gate */ 1307c478bd9Sstevel@tonic-gate uint_t tot_swapped_in, tot_swapped_out; 1317c478bd9Sstevel@tonic-gate uint_t softswap, hardswap, swapqswap; 1327c478bd9Sstevel@tonic-gate 1337c478bd9Sstevel@tonic-gate /* 1347c478bd9Sstevel@tonic-gate * Macro to determine if a process is eligble to be swapped. 1357c478bd9Sstevel@tonic-gate */ 1367c478bd9Sstevel@tonic-gate #define not_swappable(p) \ 1377c478bd9Sstevel@tonic-gate (((p)->p_flag & SSYS) || (p)->p_stat == SIDL || \ 1387c478bd9Sstevel@tonic-gate (p)->p_stat == SZOMB || (p)->p_as == NULL || \ 1397c478bd9Sstevel@tonic-gate (p)->p_as == &kas) 1407c478bd9Sstevel@tonic-gate 1417c478bd9Sstevel@tonic-gate /* 1427c478bd9Sstevel@tonic-gate * Memory scheduler. 1437c478bd9Sstevel@tonic-gate */ 1447c478bd9Sstevel@tonic-gate void 1457c478bd9Sstevel@tonic-gate sched() 1467c478bd9Sstevel@tonic-gate { 1477c478bd9Sstevel@tonic-gate kthread_id_t t; 1487c478bd9Sstevel@tonic-gate pri_t proc_pri; 1497c478bd9Sstevel@tonic-gate pri_t thread_pri; 1507c478bd9Sstevel@tonic-gate pri_t swapin_pri; 1517c478bd9Sstevel@tonic-gate int desperate; 1527c478bd9Sstevel@tonic-gate pgcnt_t needs; 1537c478bd9Sstevel@tonic-gate int divisor; 1547c478bd9Sstevel@tonic-gate proc_t *prp; 1557c478bd9Sstevel@tonic-gate proc_t *swapout_prp; 1567c478bd9Sstevel@tonic-gate proc_t *swapin_prp; 1577c478bd9Sstevel@tonic-gate spgcnt_t avail; 1587c478bd9Sstevel@tonic-gate int chosen_pri; 1597c478bd9Sstevel@tonic-gate time_t swapout_time; 1607c478bd9Sstevel@tonic-gate time_t swapin_proc_time; 1617c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 1627c478bd9Sstevel@tonic-gate kmutex_t swap_cpr_lock; 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate mutex_init(&swap_cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1657c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &swap_cpr_lock, callb_generic_cpr, "sched"); 1667c478bd9Sstevel@tonic-gate if (maxslp == 0) 1677c478bd9Sstevel@tonic-gate maxslp = MAXSLP; 1687c478bd9Sstevel@tonic-gate loop: 1697c478bd9Sstevel@tonic-gate needs = 0; 1707c478bd9Sstevel@tonic-gate desperate = 0; 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate swapin_pri = v.v_nglobpris; 1737c478bd9Sstevel@tonic-gate swapin_prp = NULL; 1747c478bd9Sstevel@tonic-gate chosen_pri = -1; 1757c478bd9Sstevel@tonic-gate 1767c478bd9Sstevel@tonic-gate process_swap_queue(); 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate /* 1797c478bd9Sstevel@tonic-gate * Set desperate if 1807c478bd9Sstevel@tonic-gate * 1. At least 2 runnable processes (on average). 1817c478bd9Sstevel@tonic-gate * 2. Short (5 sec) and longer (30 sec) average is less 1827c478bd9Sstevel@tonic-gate * than minfree and desfree respectively. 1837c478bd9Sstevel@tonic-gate * 3. Pagein + pageout rate is excessive. 1847c478bd9Sstevel@tonic-gate */ 1857c478bd9Sstevel@tonic-gate if (avenrun[0] >= 2 * FSCALE && 1867c478bd9Sstevel@tonic-gate (MAX(avefree, avefree30) < desfree) && 1877c478bd9Sstevel@tonic-gate (pginrate + pgoutrate > maxpgio || avefree < minfree)) { 1887c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_SCHED, TR_DESPERATE, 1897c478bd9Sstevel@tonic-gate "desp:avefree: %d, avefree30: %d, freemem: %d" 1907c478bd9Sstevel@tonic-gate " pginrate: %d\n", avefree, avefree30, freemem, pginrate); 1917c478bd9Sstevel@tonic-gate desperate = 1; 1927c478bd9Sstevel@tonic-gate goto unload; 1937c478bd9Sstevel@tonic-gate } 1947c478bd9Sstevel@tonic-gate 1957c478bd9Sstevel@tonic-gate /* 1967c478bd9Sstevel@tonic-gate * Search list of processes to swapin and swapout deadwood. 1977c478bd9Sstevel@tonic-gate */ 1987c478bd9Sstevel@tonic-gate swapin_proc_time = 0; 1997c478bd9Sstevel@tonic-gate top: 2007c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 2017c478bd9Sstevel@tonic-gate for (prp = practive; prp != NULL; prp = prp->p_next) { 2027c478bd9Sstevel@tonic-gate if (not_swappable(prp)) 2037c478bd9Sstevel@tonic-gate continue; 2047c478bd9Sstevel@tonic-gate 2057c478bd9Sstevel@tonic-gate /* 2067c478bd9Sstevel@tonic-gate * Look at processes with at least one swapped lwp. 2077c478bd9Sstevel@tonic-gate */ 2087c478bd9Sstevel@tonic-gate if (prp->p_swapcnt) { 2097c478bd9Sstevel@tonic-gate time_t proc_time; 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate /* 2127c478bd9Sstevel@tonic-gate * Higher priority processes are good candidates 2137c478bd9Sstevel@tonic-gate * to swapin. 2147c478bd9Sstevel@tonic-gate */ 2157c478bd9Sstevel@tonic-gate mutex_enter(&prp->p_lock); 2167c478bd9Sstevel@tonic-gate proc_pri = -1; 2177c478bd9Sstevel@tonic-gate t = prp->p_tlist; 2187c478bd9Sstevel@tonic-gate proc_time = 0; 2197c478bd9Sstevel@tonic-gate do { 2207c478bd9Sstevel@tonic-gate if (t->t_schedflag & TS_LOAD) 2217c478bd9Sstevel@tonic-gate continue; 2227c478bd9Sstevel@tonic-gate 2237c478bd9Sstevel@tonic-gate thread_lock(t); 2247c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPIN(t, 0); 2257c478bd9Sstevel@tonic-gate thread_unlock(t); 2267c478bd9Sstevel@tonic-gate 2277c478bd9Sstevel@tonic-gate if (t->t_stime - proc_time > 0) 2287c478bd9Sstevel@tonic-gate proc_time = t->t_stime; 2297c478bd9Sstevel@tonic-gate if (thread_pri > proc_pri) 2307c478bd9Sstevel@tonic-gate proc_pri = thread_pri; 2317c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 2327c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate if (proc_pri == -1) 2357c478bd9Sstevel@tonic-gate continue; 2367c478bd9Sstevel@tonic-gate 2377c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_CHOOSE_SWAPIN, 2387c478bd9Sstevel@tonic-gate "prp %p epri %d proc_time %d", 2397c478bd9Sstevel@tonic-gate prp, proc_pri, proc_time); 2407c478bd9Sstevel@tonic-gate 2417c478bd9Sstevel@tonic-gate /* 2427c478bd9Sstevel@tonic-gate * Swapin processes with a high effective priority. 2437c478bd9Sstevel@tonic-gate */ 2447c478bd9Sstevel@tonic-gate if (swapin_prp == NULL || proc_pri > chosen_pri) { 2457c478bd9Sstevel@tonic-gate swapin_prp = prp; 2467c478bd9Sstevel@tonic-gate chosen_pri = proc_pri; 2477c478bd9Sstevel@tonic-gate swapin_pri = proc_pri; 2487c478bd9Sstevel@tonic-gate swapin_proc_time = proc_time; 2497c478bd9Sstevel@tonic-gate } 2507c478bd9Sstevel@tonic-gate } else { 2517c478bd9Sstevel@tonic-gate /* 2527c478bd9Sstevel@tonic-gate * No need to soft swap if we have sufficient 2537c478bd9Sstevel@tonic-gate * memory. 2547c478bd9Sstevel@tonic-gate */ 2557c478bd9Sstevel@tonic-gate if (avefree > desfree || 2567c478bd9Sstevel@tonic-gate avefree < desfree && freemem > desfree) 2577c478bd9Sstevel@tonic-gate continue; 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate /* 26097eda132Sraf * Skip processes that are exiting 26197eda132Sraf * or whose address spaces are locked. 2627c478bd9Sstevel@tonic-gate */ 2637c478bd9Sstevel@tonic-gate mutex_enter(&prp->p_lock); 26497eda132Sraf if ((prp->p_flag & SEXITING) || 2657c478bd9Sstevel@tonic-gate (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 2667c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 2677c478bd9Sstevel@tonic-gate continue; 2687c478bd9Sstevel@tonic-gate } 2697c478bd9Sstevel@tonic-gate 2707c478bd9Sstevel@tonic-gate /* 2717c478bd9Sstevel@tonic-gate * Softswapping to kick out deadwood. 2727c478bd9Sstevel@tonic-gate */ 2737c478bd9Sstevel@tonic-gate proc_pri = -1; 2747c478bd9Sstevel@tonic-gate t = prp->p_tlist; 2757c478bd9Sstevel@tonic-gate do { 2767c478bd9Sstevel@tonic-gate if ((t->t_schedflag & (TS_SWAPENQ | 2777c478bd9Sstevel@tonic-gate TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 2787c478bd9Sstevel@tonic-gate continue; 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate thread_lock(t); 2817c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPOUT(t, SOFTSWAP); 2827c478bd9Sstevel@tonic-gate thread_unlock(t); 2837c478bd9Sstevel@tonic-gate if (thread_pri > proc_pri) 2847c478bd9Sstevel@tonic-gate proc_pri = thread_pri; 2857c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 2867c478bd9Sstevel@tonic-gate 2877c478bd9Sstevel@tonic-gate if (proc_pri != -1) { 2887c478bd9Sstevel@tonic-gate uint_t swrss; 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 2917c478bd9Sstevel@tonic-gate 2927c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SOFTSWAP, 2937c478bd9Sstevel@tonic-gate "softswap:prp %p", prp); 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate (void) swapout(prp, &swrss, SOFTSWAP); 2967c478bd9Sstevel@tonic-gate softswap++; 2977c478bd9Sstevel@tonic-gate prp->p_swrss += swrss; 2987c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 2997c478bd9Sstevel@tonic-gate goto top; 3007c478bd9Sstevel@tonic-gate } 3017c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 3027c478bd9Sstevel@tonic-gate } 3037c478bd9Sstevel@tonic-gate } 3047c478bd9Sstevel@tonic-gate if (swapin_prp != NULL) 3057c478bd9Sstevel@tonic-gate mutex_enter(&swapin_prp->p_lock); 3067c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate if (swapin_prp == NULL) { 3097c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_RUNOUT, 3107c478bd9Sstevel@tonic-gate "schedrunout:runout nswapped: %d, avefree: %ld freemem: %ld", 3117c478bd9Sstevel@tonic-gate nswapped, avefree, freemem); 3127c478bd9Sstevel@tonic-gate 3137c478bd9Sstevel@tonic-gate t = curthread; 3147c478bd9Sstevel@tonic-gate thread_lock(t); 3157c478bd9Sstevel@tonic-gate runout++; 3167c478bd9Sstevel@tonic-gate t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 3177c478bd9Sstevel@tonic-gate t->t_whystop = PR_SUSPENDED; 3187c478bd9Sstevel@tonic-gate t->t_whatstop = SUSPEND_NORMAL; 3197c478bd9Sstevel@tonic-gate (void) new_mstate(t, LMS_SLEEP); 3207c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 3217c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 3227c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 3237c478bd9Sstevel@tonic-gate thread_stop(t); /* change state and drop lock */ 3247c478bd9Sstevel@tonic-gate swtch(); 3257c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 3267c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 3277c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 3287c478bd9Sstevel@tonic-gate goto loop; 3297c478bd9Sstevel@tonic-gate } 3307c478bd9Sstevel@tonic-gate 3317c478bd9Sstevel@tonic-gate /* 3327c478bd9Sstevel@tonic-gate * Decide how deserving this process is to be brought in. 3337c478bd9Sstevel@tonic-gate * Needs is an estimate of how much core the process will 3347c478bd9Sstevel@tonic-gate * need. If the process has been out for a while, then we 3357c478bd9Sstevel@tonic-gate * will bring it in with 1/2 the core needed, otherwise 3367c478bd9Sstevel@tonic-gate * we are conservative. 3377c478bd9Sstevel@tonic-gate */ 3387c478bd9Sstevel@tonic-gate divisor = 1; 339*d3d50737SRafael Vanoni swapout_time = (ddi_get_lbolt() - swapin_proc_time) / hz; 3407c478bd9Sstevel@tonic-gate if (swapout_time > maxslp / 2) 3417c478bd9Sstevel@tonic-gate divisor = 2; 3427c478bd9Sstevel@tonic-gate 3437c478bd9Sstevel@tonic-gate needs = MIN(swapin_prp->p_swrss, lotsfree); 3447c478bd9Sstevel@tonic-gate needs = MAX(needs, min_procsize); 3457c478bd9Sstevel@tonic-gate needs = needs / divisor; 3467c478bd9Sstevel@tonic-gate 3477c478bd9Sstevel@tonic-gate /* 3487c478bd9Sstevel@tonic-gate * Use freemem, since we want processes to be swapped 3497c478bd9Sstevel@tonic-gate * in quickly. 3507c478bd9Sstevel@tonic-gate */ 3517c478bd9Sstevel@tonic-gate avail = freemem - deficit; 3527c478bd9Sstevel@tonic-gate if (avail > (spgcnt_t)needs) { 3537c478bd9Sstevel@tonic-gate deficit += needs; 3547c478bd9Sstevel@tonic-gate 3557c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPIN_VALUES, 3567c478bd9Sstevel@tonic-gate "swapin_values: prp %p needs %lu", swapin_prp, needs); 3577c478bd9Sstevel@tonic-gate 3587c478bd9Sstevel@tonic-gate if (swapin(swapin_prp)) { 3597c478bd9Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 3607c478bd9Sstevel@tonic-gate goto loop; 3617c478bd9Sstevel@tonic-gate } 3627c478bd9Sstevel@tonic-gate deficit -= MIN(needs, deficit); 3637c478bd9Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 3647c478bd9Sstevel@tonic-gate } else { 3657c478bd9Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 3667c478bd9Sstevel@tonic-gate /* 3677c478bd9Sstevel@tonic-gate * If deficit is high, too many processes have been 3687c478bd9Sstevel@tonic-gate * swapped in so wait a sec before attempting to 3697c478bd9Sstevel@tonic-gate * swapin more. 3707c478bd9Sstevel@tonic-gate */ 3717c478bd9Sstevel@tonic-gate if (freemem > needs) { 3727c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_HIGH_DEFICIT, 3737c478bd9Sstevel@tonic-gate "deficit: prp %p needs %lu", swapin_prp, needs); 3747c478bd9Sstevel@tonic-gate goto block; 3757c478bd9Sstevel@tonic-gate } 3767c478bd9Sstevel@tonic-gate } 3777c478bd9Sstevel@tonic-gate 3787c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_UNLOAD, 3797c478bd9Sstevel@tonic-gate "unload: prp %p needs %lu", swapin_prp, needs); 3807c478bd9Sstevel@tonic-gate 3817c478bd9Sstevel@tonic-gate unload: 3827c478bd9Sstevel@tonic-gate /* 3837c478bd9Sstevel@tonic-gate * Unload all unloadable modules, free all other memory 3847c478bd9Sstevel@tonic-gate * resources we can find, then look for a thread to hardswap. 3857c478bd9Sstevel@tonic-gate */ 3867c478bd9Sstevel@tonic-gate modreap(); 3877c478bd9Sstevel@tonic-gate segkp_cache_free(); 3887c478bd9Sstevel@tonic-gate 3897c478bd9Sstevel@tonic-gate swapout_prp = NULL; 3907c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 3917c478bd9Sstevel@tonic-gate for (prp = practive; prp != NULL; prp = prp->p_next) { 3927c478bd9Sstevel@tonic-gate 3937c478bd9Sstevel@tonic-gate /* 3947c478bd9Sstevel@tonic-gate * No need to soft swap if we have sufficient 3957c478bd9Sstevel@tonic-gate * memory. 3967c478bd9Sstevel@tonic-gate */ 3977c478bd9Sstevel@tonic-gate if (not_swappable(prp)) 3987c478bd9Sstevel@tonic-gate continue; 3997c478bd9Sstevel@tonic-gate 4007c478bd9Sstevel@tonic-gate if (avefree > minfree || 4017c478bd9Sstevel@tonic-gate avefree < minfree && freemem > desfree) { 4027c478bd9Sstevel@tonic-gate swapout_prp = NULL; 4037c478bd9Sstevel@tonic-gate break; 4047c478bd9Sstevel@tonic-gate } 4057c478bd9Sstevel@tonic-gate 4067c478bd9Sstevel@tonic-gate /* 40797eda132Sraf * Skip processes that are exiting 40897eda132Sraf * or whose address spaces are locked. 4097c478bd9Sstevel@tonic-gate */ 4107c478bd9Sstevel@tonic-gate mutex_enter(&prp->p_lock); 41197eda132Sraf if ((prp->p_flag & SEXITING) || 4127c478bd9Sstevel@tonic-gate (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 4137c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 4147c478bd9Sstevel@tonic-gate continue; 4157c478bd9Sstevel@tonic-gate } 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate proc_pri = -1; 4187c478bd9Sstevel@tonic-gate t = prp->p_tlist; 4197c478bd9Sstevel@tonic-gate do { 4207c478bd9Sstevel@tonic-gate if ((t->t_schedflag & (TS_SWAPENQ | 4217c478bd9Sstevel@tonic-gate TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 4227c478bd9Sstevel@tonic-gate continue; 4237c478bd9Sstevel@tonic-gate 4247c478bd9Sstevel@tonic-gate thread_lock(t); 4257c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPOUT(t, HARDSWAP); 4267c478bd9Sstevel@tonic-gate thread_unlock(t); 4277c478bd9Sstevel@tonic-gate if (thread_pri > proc_pri) 4287c478bd9Sstevel@tonic-gate proc_pri = thread_pri; 4297c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 4307c478bd9Sstevel@tonic-gate 4317c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 4327c478bd9Sstevel@tonic-gate if (proc_pri == -1) 4337c478bd9Sstevel@tonic-gate continue; 4347c478bd9Sstevel@tonic-gate 4357c478bd9Sstevel@tonic-gate /* 4367c478bd9Sstevel@tonic-gate * Swapout processes sleeping with a lower priority 4377c478bd9Sstevel@tonic-gate * than the one currently being swapped in, if any. 4387c478bd9Sstevel@tonic-gate */ 4397c478bd9Sstevel@tonic-gate if (swapin_prp == NULL || swapin_pri > proc_pri) { 4407c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_CHOOSE_SWAPOUT, 4417c478bd9Sstevel@tonic-gate "hardswap: prp %p needs %lu", prp, needs); 4427c478bd9Sstevel@tonic-gate 4437c478bd9Sstevel@tonic-gate if (swapout_prp == NULL || proc_pri < chosen_pri) { 4447c478bd9Sstevel@tonic-gate swapout_prp = prp; 4457c478bd9Sstevel@tonic-gate chosen_pri = proc_pri; 4467c478bd9Sstevel@tonic-gate } 4477c478bd9Sstevel@tonic-gate } 4487c478bd9Sstevel@tonic-gate } 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate /* 4517c478bd9Sstevel@tonic-gate * Acquire the "p_lock" before dropping "pidlock" 4527c478bd9Sstevel@tonic-gate * to prevent the proc structure from being freed 4537c478bd9Sstevel@tonic-gate * if the process exits before swapout completes. 4547c478bd9Sstevel@tonic-gate */ 4557c478bd9Sstevel@tonic-gate if (swapout_prp != NULL) 4567c478bd9Sstevel@tonic-gate mutex_enter(&swapout_prp->p_lock); 4577c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate if ((prp = swapout_prp) != NULL) { 4607c478bd9Sstevel@tonic-gate uint_t swrss = 0; 4617c478bd9Sstevel@tonic-gate int swapped; 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate swapped = swapout(prp, &swrss, HARDSWAP); 4647c478bd9Sstevel@tonic-gate if (swapped) { 4657c478bd9Sstevel@tonic-gate /* 4667c478bd9Sstevel@tonic-gate * If desperate, we want to give the space obtained 4677c478bd9Sstevel@tonic-gate * by swapping this process out to processes in core, 4687c478bd9Sstevel@tonic-gate * so we give them a chance by increasing deficit. 4697c478bd9Sstevel@tonic-gate */ 4707c478bd9Sstevel@tonic-gate prp->p_swrss += swrss; 4717c478bd9Sstevel@tonic-gate if (desperate) 4727c478bd9Sstevel@tonic-gate deficit += MIN(prp->p_swrss, lotsfree); 4737c478bd9Sstevel@tonic-gate hardswap++; 4747c478bd9Sstevel@tonic-gate } 4757c478bd9Sstevel@tonic-gate mutex_exit(&swapout_prp->p_lock); 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate if (swapped) 4787c478bd9Sstevel@tonic-gate goto loop; 4797c478bd9Sstevel@tonic-gate } 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate /* 4827c478bd9Sstevel@tonic-gate * Delay for 1 second and look again later. 4837c478bd9Sstevel@tonic-gate */ 4847c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_RUNIN, 4857c478bd9Sstevel@tonic-gate "schedrunin:runin nswapped: %d, avefree: %ld freemem: %ld", 4867c478bd9Sstevel@tonic-gate nswapped, avefree, freemem); 4877c478bd9Sstevel@tonic-gate 4887c478bd9Sstevel@tonic-gate block: 4897c478bd9Sstevel@tonic-gate t = curthread; 4907c478bd9Sstevel@tonic-gate thread_lock(t); 4917c478bd9Sstevel@tonic-gate runin++; 4927c478bd9Sstevel@tonic-gate t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 4937c478bd9Sstevel@tonic-gate t->t_whystop = PR_SUSPENDED; 4947c478bd9Sstevel@tonic-gate t->t_whatstop = SUSPEND_NORMAL; 4957c478bd9Sstevel@tonic-gate (void) new_mstate(t, LMS_SLEEP); 4967c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 4977c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 4987c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 4997c478bd9Sstevel@tonic-gate thread_stop(t); /* change to stop state and drop lock */ 5007c478bd9Sstevel@tonic-gate swtch(); 5017c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 5027c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 5037c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 5047c478bd9Sstevel@tonic-gate goto loop; 5057c478bd9Sstevel@tonic-gate } 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate /* 5087c478bd9Sstevel@tonic-gate * Remove the specified thread from the swap queue. 5097c478bd9Sstevel@tonic-gate */ 5107c478bd9Sstevel@tonic-gate static void 5117c478bd9Sstevel@tonic-gate swapdeq(kthread_id_t tp) 5127c478bd9Sstevel@tonic-gate { 5137c478bd9Sstevel@tonic-gate kthread_id_t *tpp; 5147c478bd9Sstevel@tonic-gate 5157c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 5167c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_ON_SWAPQ); 5177c478bd9Sstevel@tonic-gate 5187c478bd9Sstevel@tonic-gate tpp = &tswap_queue; 5197c478bd9Sstevel@tonic-gate for (;;) { 5207c478bd9Sstevel@tonic-gate ASSERT(*tpp != NULL); 5217c478bd9Sstevel@tonic-gate if (*tpp == tp) 5227c478bd9Sstevel@tonic-gate break; 5237c478bd9Sstevel@tonic-gate tpp = &(*tpp)->t_link; 5247c478bd9Sstevel@tonic-gate } 5257c478bd9Sstevel@tonic-gate *tpp = tp->t_link; 5267c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~TS_ON_SWAPQ; 5277c478bd9Sstevel@tonic-gate } 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate /* 5307c478bd9Sstevel@tonic-gate * Swap in lwps. Returns nonzero on success (i.e., if at least one lwp is 5317c478bd9Sstevel@tonic-gate * swapped in) and 0 on failure. 5327c478bd9Sstevel@tonic-gate */ 5337c478bd9Sstevel@tonic-gate static int 5347c478bd9Sstevel@tonic-gate swapin(proc_t *pp) 5357c478bd9Sstevel@tonic-gate { 5367c478bd9Sstevel@tonic-gate kthread_id_t tp; 5377c478bd9Sstevel@tonic-gate int err; 5387c478bd9Sstevel@tonic-gate int num_swapped_in = 0; 5397c478bd9Sstevel@tonic-gate struct cpu *cpup = CPU; 5407c478bd9Sstevel@tonic-gate pri_t thread_pri; 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock)); 5437c478bd9Sstevel@tonic-gate ASSERT(pp->p_swapcnt); 5447c478bd9Sstevel@tonic-gate 5457c478bd9Sstevel@tonic-gate top: 5467c478bd9Sstevel@tonic-gate tp = pp->p_tlist; 5477c478bd9Sstevel@tonic-gate do { 5487c478bd9Sstevel@tonic-gate /* 5497c478bd9Sstevel@tonic-gate * Only swapin eligible lwps (specified by the scheduling 5507c478bd9Sstevel@tonic-gate * class) which are unloaded and ready to run. 5517c478bd9Sstevel@tonic-gate */ 5527c478bd9Sstevel@tonic-gate thread_lock(tp); 5537c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPIN(tp, 0); 5547c478bd9Sstevel@tonic-gate if (thread_pri != -1 && tp->t_state == TS_RUN && 5557c478bd9Sstevel@tonic-gate (tp->t_schedflag & TS_LOAD) == 0) { 5567c478bd9Sstevel@tonic-gate size_t stack_size; 5577c478bd9Sstevel@tonic-gate pgcnt_t stack_pages; 5587c478bd9Sstevel@tonic-gate 5597c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ON_SWAPQ) == 0); 5607c478bd9Sstevel@tonic-gate 5617c478bd9Sstevel@tonic-gate thread_unlock(tp); 5627c478bd9Sstevel@tonic-gate /* 5637c478bd9Sstevel@tonic-gate * Now drop the p_lock since the stack needs 5647c478bd9Sstevel@tonic-gate * to brought in. 5657c478bd9Sstevel@tonic-gate */ 5667c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5677c478bd9Sstevel@tonic-gate 5687c478bd9Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 5697c478bd9Sstevel@tonic-gate stack_pages = btopr(stack_size); 5707c478bd9Sstevel@tonic-gate /* Kernel probe */ 5717c478bd9Sstevel@tonic-gate TNF_PROBE_4(swapin_lwp, "vm swap swapin", /* CSTYLED */, 5727c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 5737c478bd9Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 5747c478bd9Sstevel@tonic-gate tnf_kthread_id, tid, tp, 5757c478bd9Sstevel@tonic-gate tnf_ulong, page_count, stack_pages); 5767c478bd9Sstevel@tonic-gate 5777c478bd9Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 5787c478bd9Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, segkp, 5797c478bd9Sstevel@tonic-gate tp->t_swap, stack_size, F_SOFTLOCK, S_OTHER); 5807c478bd9Sstevel@tonic-gate rw_exit(&kas.a_lock); 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate /* 5837c478bd9Sstevel@tonic-gate * Re-acquire the p_lock. 5847c478bd9Sstevel@tonic-gate */ 5857c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5867c478bd9Sstevel@tonic-gate if (err) { 5877c478bd9Sstevel@tonic-gate num_swapped_in = 0; 5887c478bd9Sstevel@tonic-gate break; 5897c478bd9Sstevel@tonic-gate } else { 590de1a369bSvb70745 #ifdef __sparc 591de1a369bSvb70745 lwp_swapin(tp); 592de1a369bSvb70745 #endif /* __sparc */ 5937c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapin, 1); 5947c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapin, 5957c478bd9Sstevel@tonic-gate stack_pages); 5967c478bd9Sstevel@tonic-gate 5977c478bd9Sstevel@tonic-gate pp->p_swapcnt--; 5987c478bd9Sstevel@tonic-gate pp->p_swrss -= stack_pages; 5997c478bd9Sstevel@tonic-gate 6007c478bd9Sstevel@tonic-gate thread_lock(tp); 6017c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_LOAD; 6027c478bd9Sstevel@tonic-gate dq_sruninc(tp); 6037c478bd9Sstevel@tonic-gate 604*d3d50737SRafael Vanoni /* set swapin time */ 605*d3d50737SRafael Vanoni tp->t_stime = ddi_get_lbolt(); 6067c478bd9Sstevel@tonic-gate thread_unlock(tp); 6077c478bd9Sstevel@tonic-gate 6087c478bd9Sstevel@tonic-gate nswapped--; 6097c478bd9Sstevel@tonic-gate tot_swapped_in++; 6107c478bd9Sstevel@tonic-gate num_swapped_in++; 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPIN, 6137c478bd9Sstevel@tonic-gate "swapin: pp %p stack_pages %lu", 6147c478bd9Sstevel@tonic-gate pp, stack_pages); 6157c478bd9Sstevel@tonic-gate goto top; 6167c478bd9Sstevel@tonic-gate } 6177c478bd9Sstevel@tonic-gate } 6187c478bd9Sstevel@tonic-gate thread_unlock(tp); 6197c478bd9Sstevel@tonic-gate } while ((tp = tp->t_forw) != pp->p_tlist); 6207c478bd9Sstevel@tonic-gate return (num_swapped_in); 6217c478bd9Sstevel@tonic-gate } 6227c478bd9Sstevel@tonic-gate 6237c478bd9Sstevel@tonic-gate /* 6247c478bd9Sstevel@tonic-gate * Swap out lwps. Returns nonzero on success (i.e., if at least one lwp is 6257c478bd9Sstevel@tonic-gate * swapped out) and 0 on failure. 6267c478bd9Sstevel@tonic-gate */ 6277c478bd9Sstevel@tonic-gate static int 6287c478bd9Sstevel@tonic-gate swapout(proc_t *pp, uint_t *swrss, int swapflags) 6297c478bd9Sstevel@tonic-gate { 6307c478bd9Sstevel@tonic-gate kthread_id_t tp; 6317c478bd9Sstevel@tonic-gate pgcnt_t ws_pages = 0; 6327c478bd9Sstevel@tonic-gate int err; 6337c478bd9Sstevel@tonic-gate int swapped_lwps = 0; 6347c478bd9Sstevel@tonic-gate struct as *as = pp->p_as; 6357c478bd9Sstevel@tonic-gate struct cpu *cpup = CPU; 6367c478bd9Sstevel@tonic-gate pri_t thread_pri; 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock)); 6397c478bd9Sstevel@tonic-gate 64097eda132Sraf if (pp->p_flag & SEXITING) 6417c478bd9Sstevel@tonic-gate return (0); 6427c478bd9Sstevel@tonic-gate 6437c478bd9Sstevel@tonic-gate top: 6447c478bd9Sstevel@tonic-gate tp = pp->p_tlist; 6457c478bd9Sstevel@tonic-gate do { 6467c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(tp); 6477c478bd9Sstevel@tonic-gate 6487c478bd9Sstevel@tonic-gate /* 6497c478bd9Sstevel@tonic-gate * Swapout eligible lwps (specified by the scheduling 6507c478bd9Sstevel@tonic-gate * class) which don't have TS_DONT_SWAP set. Set the 6517c478bd9Sstevel@tonic-gate * "intent to swap" flag (TS_SWAPENQ) on threads 6527c478bd9Sstevel@tonic-gate * which have TS_DONT_SWAP set so that they can be 6537c478bd9Sstevel@tonic-gate * swapped if and when they reach a safe point. 6547c478bd9Sstevel@tonic-gate */ 6557c478bd9Sstevel@tonic-gate thread_lock(tp); 6567c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPOUT(tp, swapflags); 6577c478bd9Sstevel@tonic-gate if (thread_pri != -1) { 6587c478bd9Sstevel@tonic-gate if (tp->t_schedflag & TS_DONT_SWAP) { 6597c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_SWAPENQ; 6607c478bd9Sstevel@tonic-gate tp->t_trapret = 1; 6617c478bd9Sstevel@tonic-gate aston(tp); 6627c478bd9Sstevel@tonic-gate } else { 6637c478bd9Sstevel@tonic-gate pgcnt_t stack_pages; 6647c478bd9Sstevel@tonic-gate size_t stack_size; 6657c478bd9Sstevel@tonic-gate 6667c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & 6677c478bd9Sstevel@tonic-gate (TS_DONT_SWAP | TS_LOAD)) == TS_LOAD); 6687c478bd9Sstevel@tonic-gate 6697c478bd9Sstevel@tonic-gate if (lock_try(&tp->t_lock)) { 6707c478bd9Sstevel@tonic-gate /* 6717c478bd9Sstevel@tonic-gate * Remove thread from the swap_queue. 6727c478bd9Sstevel@tonic-gate */ 6737c478bd9Sstevel@tonic-gate if (tp->t_schedflag & TS_ON_SWAPQ) { 6747c478bd9Sstevel@tonic-gate ASSERT(!(tp->t_schedflag & 6757c478bd9Sstevel@tonic-gate TS_SWAPENQ)); 6767c478bd9Sstevel@tonic-gate swapdeq(tp); 6777c478bd9Sstevel@tonic-gate } else if (tp->t_state == TS_RUN) 6787c478bd9Sstevel@tonic-gate dq_srundec(tp); 6797c478bd9Sstevel@tonic-gate 6807c478bd9Sstevel@tonic-gate tp->t_schedflag &= 6817c478bd9Sstevel@tonic-gate ~(TS_LOAD | TS_SWAPENQ); 6827c478bd9Sstevel@tonic-gate lock_clear(&tp->t_lock); 6837c478bd9Sstevel@tonic-gate 6847c478bd9Sstevel@tonic-gate /* 6857c478bd9Sstevel@tonic-gate * Set swapout time if the thread isn't 6867c478bd9Sstevel@tonic-gate * sleeping. 6877c478bd9Sstevel@tonic-gate */ 6887c478bd9Sstevel@tonic-gate if (tp->t_state != TS_SLEEP) 689*d3d50737SRafael Vanoni tp->t_stime = ddi_get_lbolt(); 6907c478bd9Sstevel@tonic-gate thread_unlock(tp); 6917c478bd9Sstevel@tonic-gate 6927c478bd9Sstevel@tonic-gate nswapped++; 6937c478bd9Sstevel@tonic-gate tot_swapped_out++; 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate lwp->lwp_ru.nswap++; 6967c478bd9Sstevel@tonic-gate 6977c478bd9Sstevel@tonic-gate /* 6987c478bd9Sstevel@tonic-gate * Now drop the p_lock since the 6997c478bd9Sstevel@tonic-gate * stack needs to pushed out. 7007c478bd9Sstevel@tonic-gate */ 7017c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 7047c478bd9Sstevel@tonic-gate stack_pages = btopr(stack_size); 7057c478bd9Sstevel@tonic-gate ws_pages += stack_pages; 7067c478bd9Sstevel@tonic-gate /* Kernel probe */ 7077c478bd9Sstevel@tonic-gate TNF_PROBE_4(swapout_lwp, 7087c478bd9Sstevel@tonic-gate "vm swap swapout", 7097c478bd9Sstevel@tonic-gate /* CSTYLED */, 7107c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 7117c478bd9Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 7127c478bd9Sstevel@tonic-gate tnf_kthread_id, tid, tp, 7137c478bd9Sstevel@tonic-gate tnf_ulong, page_count, 7147c478bd9Sstevel@tonic-gate stack_pages); 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 7177c478bd9Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, 7187c478bd9Sstevel@tonic-gate segkp, tp->t_swap, stack_size, 7197c478bd9Sstevel@tonic-gate F_SOFTUNLOCK, S_WRITE); 7207c478bd9Sstevel@tonic-gate rw_exit(&kas.a_lock); 7217c478bd9Sstevel@tonic-gate 7227c478bd9Sstevel@tonic-gate if (err) { 7237c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 7247c478bd9Sstevel@tonic-gate "swapout: segkp_fault " 7257c478bd9Sstevel@tonic-gate "failed err: %d", err); 7267c478bd9Sstevel@tonic-gate } 7277c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, 7287c478bd9Sstevel@tonic-gate vm, pgswapout, stack_pages); 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_lock); 7317c478bd9Sstevel@tonic-gate pp->p_swapcnt++; 7327c478bd9Sstevel@tonic-gate swapped_lwps++; 7337c478bd9Sstevel@tonic-gate goto top; 7347c478bd9Sstevel@tonic-gate } 7357c478bd9Sstevel@tonic-gate } 7367c478bd9Sstevel@tonic-gate } 7377c478bd9Sstevel@tonic-gate thread_unlock(tp); 7387c478bd9Sstevel@tonic-gate } while ((tp = tp->t_forw) != pp->p_tlist); 7397c478bd9Sstevel@tonic-gate 7407c478bd9Sstevel@tonic-gate /* 7417c478bd9Sstevel@tonic-gate * Unload address space when all lwps are swapped out. 7427c478bd9Sstevel@tonic-gate */ 7437c478bd9Sstevel@tonic-gate if (pp->p_swapcnt == pp->p_lwpcnt) { 7447c478bd9Sstevel@tonic-gate size_t as_size = 0; 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate /* 7477c478bd9Sstevel@tonic-gate * Avoid invoking as_swapout() if the process has 7487c478bd9Sstevel@tonic-gate * no MMU resources since pageout will eventually 7497c478bd9Sstevel@tonic-gate * steal pages belonging to this address space. This 7507c478bd9Sstevel@tonic-gate * saves CPU cycles as the number of pages that are 7517c478bd9Sstevel@tonic-gate * potentially freed or pushed out by the segment 7527c478bd9Sstevel@tonic-gate * swapout operation is very small. 7537c478bd9Sstevel@tonic-gate */ 7547c478bd9Sstevel@tonic-gate if (rm_asrss(pp->p_as) != 0) 7557c478bd9Sstevel@tonic-gate as_size = as_swapout(as); 7567c478bd9Sstevel@tonic-gate 7577c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, btop(as_size)); 7587c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapout, 1); 7597c478bd9Sstevel@tonic-gate ws_pages += btop(as_size); 7607c478bd9Sstevel@tonic-gate 7617c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPOUT, 7627c478bd9Sstevel@tonic-gate "swapout: pp %p pages_pushed %lu", pp, ws_pages); 7637c478bd9Sstevel@tonic-gate /* Kernel probe */ 7647c478bd9Sstevel@tonic-gate TNF_PROBE_2(swapout_process, "vm swap swapout", /* CSTYLED */, 7657c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 7667c478bd9Sstevel@tonic-gate tnf_ulong, page_count, ws_pages); 7677c478bd9Sstevel@tonic-gate } 7687c478bd9Sstevel@tonic-gate *swrss = ws_pages; 7697c478bd9Sstevel@tonic-gate return (swapped_lwps); 7707c478bd9Sstevel@tonic-gate } 7717c478bd9Sstevel@tonic-gate 7727c478bd9Sstevel@tonic-gate void 7737c478bd9Sstevel@tonic-gate swapout_lwp(klwp_t *lwp) 7747c478bd9Sstevel@tonic-gate { 7757c478bd9Sstevel@tonic-gate kthread_id_t tp = curthread; 7767c478bd9Sstevel@tonic-gate 7777c478bd9Sstevel@tonic-gate ASSERT(curthread == lwptot(lwp)); 7787c478bd9Sstevel@tonic-gate 7797c478bd9Sstevel@tonic-gate /* 7807c478bd9Sstevel@tonic-gate * Don't insert the thread onto the swap queue if 7817c478bd9Sstevel@tonic-gate * sufficient memory is available. 7827c478bd9Sstevel@tonic-gate */ 7837c478bd9Sstevel@tonic-gate if (avefree > desfree || avefree < desfree && freemem > desfree) { 7847c478bd9Sstevel@tonic-gate thread_lock(tp); 7857c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~TS_SWAPENQ; 7867c478bd9Sstevel@tonic-gate thread_unlock(tp); 7877c478bd9Sstevel@tonic-gate return; 7887c478bd9Sstevel@tonic-gate } 7897c478bd9Sstevel@tonic-gate 7907c478bd9Sstevel@tonic-gate /* 7917c478bd9Sstevel@tonic-gate * Lock the thread, then move it to the swapped queue from the 7927c478bd9Sstevel@tonic-gate * onproc queue and set its state to be TS_RUN. 7937c478bd9Sstevel@tonic-gate */ 7947c478bd9Sstevel@tonic-gate thread_lock(tp); 7957c478bd9Sstevel@tonic-gate ASSERT(tp->t_state == TS_ONPROC); 7967c478bd9Sstevel@tonic-gate if (tp->t_schedflag & TS_SWAPENQ) { 7977c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~TS_SWAPENQ; 7987c478bd9Sstevel@tonic-gate 7997c478bd9Sstevel@tonic-gate /* 8007c478bd9Sstevel@tonic-gate * Set the state of this thread to be runnable 8017c478bd9Sstevel@tonic-gate * and move it from the onproc queue to the swap queue. 8027c478bd9Sstevel@tonic-gate */ 8037c478bd9Sstevel@tonic-gate disp_swapped_enq(tp); 8047c478bd9Sstevel@tonic-gate 8057c478bd9Sstevel@tonic-gate /* 8067c478bd9Sstevel@tonic-gate * Insert the thread onto the swap queue. 8077c478bd9Sstevel@tonic-gate */ 8087c478bd9Sstevel@tonic-gate tp->t_link = tswap_queue; 8097c478bd9Sstevel@tonic-gate tswap_queue = tp; 8107c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_ON_SWAPQ; 8117c478bd9Sstevel@tonic-gate 8127c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(tp); 8137c478bd9Sstevel@tonic-gate 8147c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SWAPOUT_LWP, "swapout_lwp:%x", lwp); 8157c478bd9Sstevel@tonic-gate 8167c478bd9Sstevel@tonic-gate swtch(); 8177c478bd9Sstevel@tonic-gate } else { 8187c478bd9Sstevel@tonic-gate thread_unlock(tp); 8197c478bd9Sstevel@tonic-gate } 8207c478bd9Sstevel@tonic-gate } 8217c478bd9Sstevel@tonic-gate 8227c478bd9Sstevel@tonic-gate /* 8237c478bd9Sstevel@tonic-gate * Swap all threads on the swap queue. 8247c478bd9Sstevel@tonic-gate */ 8257c478bd9Sstevel@tonic-gate static void 8267c478bd9Sstevel@tonic-gate process_swap_queue(void) 8277c478bd9Sstevel@tonic-gate { 8287c478bd9Sstevel@tonic-gate kthread_id_t tp; 8297c478bd9Sstevel@tonic-gate uint_t ws_pages; 8307c478bd9Sstevel@tonic-gate proc_t *pp; 8317c478bd9Sstevel@tonic-gate struct cpu *cpup = CPU; 8327c478bd9Sstevel@tonic-gate klwp_t *lwp; 8337c478bd9Sstevel@tonic-gate int err; 8347c478bd9Sstevel@tonic-gate 8357c478bd9Sstevel@tonic-gate if (tswap_queue == NULL) 8367c478bd9Sstevel@tonic-gate return; 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate /* 8397c478bd9Sstevel@tonic-gate * Acquire the "swapped_lock" which locks the swap queue, 8407c478bd9Sstevel@tonic-gate * and unload the stacks of all threads on it. 8417c478bd9Sstevel@tonic-gate */ 8427c478bd9Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 8437c478bd9Sstevel@tonic-gate while ((tp = tswap_queue) != NULL) { 8447c478bd9Sstevel@tonic-gate pgcnt_t stack_pages; 8457c478bd9Sstevel@tonic-gate size_t stack_size; 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate tswap_queue = tp->t_link; 8487c478bd9Sstevel@tonic-gate tp->t_link = NULL; 8497c478bd9Sstevel@tonic-gate 8507c478bd9Sstevel@tonic-gate /* 8517c478bd9Sstevel@tonic-gate * Drop the "dispatcher lock" before acquiring "t_lock" 8527c478bd9Sstevel@tonic-gate * to avoid spinning on it since the thread at the front 8537c478bd9Sstevel@tonic-gate * of the swap queue could be pinned before giving up 8547c478bd9Sstevel@tonic-gate * its "t_lock" in resume. 8557c478bd9Sstevel@tonic-gate */ 8567c478bd9Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 8577c478bd9Sstevel@tonic-gate lock_set(&tp->t_lock); 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate /* 8607c478bd9Sstevel@tonic-gate * Now, re-acquire the "swapped_lock". Acquiring this lock 8617c478bd9Sstevel@tonic-gate * results in locking the thread since its dispatcher lock 8627c478bd9Sstevel@tonic-gate * (t_lockp) is the "swapped_lock". 8637c478bd9Sstevel@tonic-gate */ 8647c478bd9Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 8657c478bd9Sstevel@tonic-gate ASSERT(tp->t_state == TS_RUN); 8667c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)); 8677c478bd9Sstevel@tonic-gate 8687c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~(TS_LOAD | TS_ON_SWAPQ); 869*d3d50737SRafael Vanoni tp->t_stime = ddi_get_lbolt(); /* swapout time */ 8707c478bd9Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 8717c478bd9Sstevel@tonic-gate lock_clear(&tp->t_lock); 8727c478bd9Sstevel@tonic-gate 8737c478bd9Sstevel@tonic-gate lwp = ttolwp(tp); 8747c478bd9Sstevel@tonic-gate lwp->lwp_ru.nswap++; 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate pp = ttoproc(tp); 8777c478bd9Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 8787c478bd9Sstevel@tonic-gate stack_pages = btopr(stack_size); 8797c478bd9Sstevel@tonic-gate 8807c478bd9Sstevel@tonic-gate /* Kernel probe */ 8817c478bd9Sstevel@tonic-gate TNF_PROBE_4(swapout_lwp, "vm swap swapout", /* CSTYLED */, 8827c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 8837c478bd9Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 8847c478bd9Sstevel@tonic-gate tnf_kthread_id, tid, tp, 8857c478bd9Sstevel@tonic-gate tnf_ulong, page_count, stack_pages); 8867c478bd9Sstevel@tonic-gate 8877c478bd9Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 8887c478bd9Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, segkp, tp->t_swap, 8897c478bd9Sstevel@tonic-gate stack_size, F_SOFTUNLOCK, S_WRITE); 8907c478bd9Sstevel@tonic-gate rw_exit(&kas.a_lock); 8917c478bd9Sstevel@tonic-gate 8927c478bd9Sstevel@tonic-gate if (err) { 8937c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 8947c478bd9Sstevel@tonic-gate "process_swap_list: segkp_fault failed err: %d", err); 8957c478bd9Sstevel@tonic-gate } 8967c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, stack_pages); 8977c478bd9Sstevel@tonic-gate 8987c478bd9Sstevel@tonic-gate nswapped++; 8997c478bd9Sstevel@tonic-gate tot_swapped_out++; 9007c478bd9Sstevel@tonic-gate swapqswap++; 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate /* 9037c478bd9Sstevel@tonic-gate * Don't need p_lock since the swapper is the only 9047c478bd9Sstevel@tonic-gate * thread which increments/decrements p_swapcnt and p_swrss. 9057c478bd9Sstevel@tonic-gate */ 9067c478bd9Sstevel@tonic-gate ws_pages = stack_pages; 9077c478bd9Sstevel@tonic-gate pp->p_swapcnt++; 9087c478bd9Sstevel@tonic-gate 9097c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SWAPQ_LWP, "swaplist: pp %p", pp); 9107c478bd9Sstevel@tonic-gate 9117c478bd9Sstevel@tonic-gate /* 9127c478bd9Sstevel@tonic-gate * Unload address space when all lwps are swapped out. 9137c478bd9Sstevel@tonic-gate */ 9147c478bd9Sstevel@tonic-gate if (pp->p_swapcnt == pp->p_lwpcnt) { 9157c478bd9Sstevel@tonic-gate size_t as_size = 0; 9167c478bd9Sstevel@tonic-gate 9177c478bd9Sstevel@tonic-gate if (rm_asrss(pp->p_as) != 0) 9187c478bd9Sstevel@tonic-gate as_size = as_swapout(pp->p_as); 9197c478bd9Sstevel@tonic-gate 9207c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, 9217c478bd9Sstevel@tonic-gate btop(as_size)); 9227c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapout, 1); 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate ws_pages += btop(as_size); 9257c478bd9Sstevel@tonic-gate 9267c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPQ_PROC, 9277c478bd9Sstevel@tonic-gate "swaplist_proc: pp %p pages_pushed: %lu", 9287c478bd9Sstevel@tonic-gate pp, ws_pages); 9297c478bd9Sstevel@tonic-gate /* Kernel probe */ 9307c478bd9Sstevel@tonic-gate TNF_PROBE_2(swapout_process, "vm swap swapout", 9317c478bd9Sstevel@tonic-gate /* CSTYLED */, 9327c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 9337c478bd9Sstevel@tonic-gate tnf_ulong, page_count, ws_pages); 9347c478bd9Sstevel@tonic-gate } 9357c478bd9Sstevel@tonic-gate pp->p_swrss += ws_pages; 9367c478bd9Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 9377c478bd9Sstevel@tonic-gate } 9387c478bd9Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 9397c478bd9Sstevel@tonic-gate } 940