17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 22*97eda132Sraf 237c478bd9Sstevel@tonic-gate /* 247c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 257c478bd9Sstevel@tonic-gate * Use is subject to license terms. 267c478bd9Sstevel@tonic-gate */ 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 297c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 307c478bd9Sstevel@tonic-gate 317c478bd9Sstevel@tonic-gate 327c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 337c478bd9Sstevel@tonic-gate 347c478bd9Sstevel@tonic-gate #include <sys/param.h> 357c478bd9Sstevel@tonic-gate #include <sys/types.h> 367c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 377c478bd9Sstevel@tonic-gate #include <sys/systm.h> 387c478bd9Sstevel@tonic-gate #include <sys/proc.h> 397c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 407c478bd9Sstevel@tonic-gate #include <sys/var.h> 417c478bd9Sstevel@tonic-gate #include <sys/tuneable.h> 427c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 437c478bd9Sstevel@tonic-gate #include <sys/buf.h> 447c478bd9Sstevel@tonic-gate #include <sys/disp.h> 457c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 467c478bd9Sstevel@tonic-gate #include <sys/vmparam.h> 477c478bd9Sstevel@tonic-gate #include <sys/class.h> 487c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 497c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 507c478bd9Sstevel@tonic-gate #include <sys/debug.h> 517c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 527c478bd9Sstevel@tonic-gate #include <sys/procfs.h> 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate #include <vm/seg.h> 557c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h> 567c478bd9Sstevel@tonic-gate #include <vm/as.h> 577c478bd9Sstevel@tonic-gate #include <vm/rm.h> 587c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 597c478bd9Sstevel@tonic-gate #include <sys/callb.h> 607c478bd9Sstevel@tonic-gate 617c478bd9Sstevel@tonic-gate /* 627c478bd9Sstevel@tonic-gate * The swapper sleeps on runout when there is no one to swap in. 637c478bd9Sstevel@tonic-gate * It sleeps on runin when it could not find space to swap someone 647c478bd9Sstevel@tonic-gate * in or after swapping someone in. 657c478bd9Sstevel@tonic-gate */ 667c478bd9Sstevel@tonic-gate char runout; 677c478bd9Sstevel@tonic-gate char runin; 687c478bd9Sstevel@tonic-gate char wake_sched; /* flag tells clock to wake swapper on next tick */ 697c478bd9Sstevel@tonic-gate char wake_sched_sec; /* flag tells clock to wake swapper after a second */ 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate /* 727c478bd9Sstevel@tonic-gate * The swapper swaps processes to reduce memory demand and runs 737c478bd9Sstevel@tonic-gate * when avefree < desfree. The swapper resorts to SOFTSWAP when 747c478bd9Sstevel@tonic-gate * avefree < desfree which results in swapping out all processes 757c478bd9Sstevel@tonic-gate * sleeping for more than maxslp seconds. HARDSWAP occurs when the 767c478bd9Sstevel@tonic-gate * system is on the verge of thrashing and this results in swapping 777c478bd9Sstevel@tonic-gate * out runnable threads or threads sleeping for less than maxslp secs. 787c478bd9Sstevel@tonic-gate * 797c478bd9Sstevel@tonic-gate * The swapper runs through all the active processes in the system 807c478bd9Sstevel@tonic-gate * and invokes the scheduling class specific swapin/swapout routine 817c478bd9Sstevel@tonic-gate * for every thread in the process to obtain an effective priority 827c478bd9Sstevel@tonic-gate * for the process. A priority of -1 implies that the thread isn't 837c478bd9Sstevel@tonic-gate * swappable. This effective priority is used to find the most 847c478bd9Sstevel@tonic-gate * eligible process to swapout or swapin. 857c478bd9Sstevel@tonic-gate * 867c478bd9Sstevel@tonic-gate * NOTE: Threads which have been swapped are not linked on any 877c478bd9Sstevel@tonic-gate * queue and their dispatcher lock points at the "swapped_lock". 887c478bd9Sstevel@tonic-gate * 897c478bd9Sstevel@tonic-gate * Processes containing threads with the TS_DONT_SWAP flag set cannot be 907c478bd9Sstevel@tonic-gate * swapped out immediately by the swapper. This is due to the fact that 917c478bd9Sstevel@tonic-gate * such threads may be holding locks which may be needed by the swapper 927c478bd9Sstevel@tonic-gate * to push its pages out. The TS_SWAPENQ flag is set on such threads 937c478bd9Sstevel@tonic-gate * to prevent them running in user mode. When such threads reach a 947c478bd9Sstevel@tonic-gate * safe point (i.e., are not holding any locks - CL_TRAPRET), they 957c478bd9Sstevel@tonic-gate * queue themseleves onto the swap queue which is processed by the 967c478bd9Sstevel@tonic-gate * swapper. This results in reducing memory demand when the system 977c478bd9Sstevel@tonic-gate * is desparate for memory as the thread can't run in user mode. 987c478bd9Sstevel@tonic-gate * 997c478bd9Sstevel@tonic-gate * The swap queue consists of threads, linked via t_link, which are 1007c478bd9Sstevel@tonic-gate * haven't been swapped, are runnable but not on the run queue. The 1017c478bd9Sstevel@tonic-gate * swap queue is protected by the "swapped_lock". The dispatcher 1027c478bd9Sstevel@tonic-gate * lock (t_lockp) of all threads on the swap queue points at the 1037c478bd9Sstevel@tonic-gate * "swapped_lock". Thus, the entire queue and/or threads on the 1047c478bd9Sstevel@tonic-gate * queue can be locked by acquiring "swapped_lock". 1057c478bd9Sstevel@tonic-gate */ 1067c478bd9Sstevel@tonic-gate static kthread_t *tswap_queue; 1077c478bd9Sstevel@tonic-gate extern disp_lock_t swapped_lock; /* protects swap queue and threads on it */ 1087c478bd9Sstevel@tonic-gate 1097c478bd9Sstevel@tonic-gate int maxslp = 0; 1107c478bd9Sstevel@tonic-gate pgcnt_t avefree; /* 5 sec moving average of free memory */ 1117c478bd9Sstevel@tonic-gate pgcnt_t avefree30; /* 30 sec moving average of free memory */ 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate /* 1147c478bd9Sstevel@tonic-gate * Minimum size used to decide if sufficient memory is available 1157c478bd9Sstevel@tonic-gate * before a process is swapped in. This is necessary since in most 1167c478bd9Sstevel@tonic-gate * cases the actual size of a process (p_swrss) being swapped in 1177c478bd9Sstevel@tonic-gate * is usually 2 pages (kernel stack pages). This is due to the fact 1187c478bd9Sstevel@tonic-gate * almost all user pages of a process are stolen by pageout before 1197c478bd9Sstevel@tonic-gate * the swapper decides to swapout it out. 1207c478bd9Sstevel@tonic-gate */ 1217c478bd9Sstevel@tonic-gate int min_procsize = 12; 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate static int swapin(proc_t *); 1247c478bd9Sstevel@tonic-gate static int swapout(proc_t *, uint_t *, int); 1257c478bd9Sstevel@tonic-gate static void process_swap_queue(); 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate #ifdef __sparc 1287c478bd9Sstevel@tonic-gate extern void lwp_swapin(kthread_t *); 1297c478bd9Sstevel@tonic-gate #endif /* __sparc */ 1307c478bd9Sstevel@tonic-gate 1317c478bd9Sstevel@tonic-gate /* 1327c478bd9Sstevel@tonic-gate * Counters to keep track of the number of swapins or swapouts. 1337c478bd9Sstevel@tonic-gate */ 1347c478bd9Sstevel@tonic-gate uint_t tot_swapped_in, tot_swapped_out; 1357c478bd9Sstevel@tonic-gate uint_t softswap, hardswap, swapqswap; 1367c478bd9Sstevel@tonic-gate 1377c478bd9Sstevel@tonic-gate /* 1387c478bd9Sstevel@tonic-gate * Macro to determine if a process is eligble to be swapped. 1397c478bd9Sstevel@tonic-gate */ 1407c478bd9Sstevel@tonic-gate #define not_swappable(p) \ 1417c478bd9Sstevel@tonic-gate (((p)->p_flag & SSYS) || (p)->p_stat == SIDL || \ 1427c478bd9Sstevel@tonic-gate (p)->p_stat == SZOMB || (p)->p_as == NULL || \ 1437c478bd9Sstevel@tonic-gate (p)->p_as == &kas) 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate /* 1467c478bd9Sstevel@tonic-gate * Memory scheduler. 1477c478bd9Sstevel@tonic-gate */ 1487c478bd9Sstevel@tonic-gate void 1497c478bd9Sstevel@tonic-gate sched() 1507c478bd9Sstevel@tonic-gate { 1517c478bd9Sstevel@tonic-gate kthread_id_t t; 1527c478bd9Sstevel@tonic-gate pri_t proc_pri; 1537c478bd9Sstevel@tonic-gate pri_t thread_pri; 1547c478bd9Sstevel@tonic-gate pri_t swapin_pri; 1557c478bd9Sstevel@tonic-gate int desperate; 1567c478bd9Sstevel@tonic-gate pgcnt_t needs; 1577c478bd9Sstevel@tonic-gate int divisor; 1587c478bd9Sstevel@tonic-gate proc_t *prp; 1597c478bd9Sstevel@tonic-gate proc_t *swapout_prp; 1607c478bd9Sstevel@tonic-gate proc_t *swapin_prp; 1617c478bd9Sstevel@tonic-gate spgcnt_t avail; 1627c478bd9Sstevel@tonic-gate int chosen_pri; 1637c478bd9Sstevel@tonic-gate time_t swapout_time; 1647c478bd9Sstevel@tonic-gate time_t swapin_proc_time; 1657c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 1667c478bd9Sstevel@tonic-gate kmutex_t swap_cpr_lock; 1677c478bd9Sstevel@tonic-gate 1687c478bd9Sstevel@tonic-gate mutex_init(&swap_cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1697c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &swap_cpr_lock, callb_generic_cpr, "sched"); 1707c478bd9Sstevel@tonic-gate if (maxslp == 0) 1717c478bd9Sstevel@tonic-gate maxslp = MAXSLP; 1727c478bd9Sstevel@tonic-gate loop: 1737c478bd9Sstevel@tonic-gate needs = 0; 1747c478bd9Sstevel@tonic-gate desperate = 0; 1757c478bd9Sstevel@tonic-gate 1767c478bd9Sstevel@tonic-gate swapin_pri = v.v_nglobpris; 1777c478bd9Sstevel@tonic-gate swapin_prp = NULL; 1787c478bd9Sstevel@tonic-gate chosen_pri = -1; 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate process_swap_queue(); 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate /* 1837c478bd9Sstevel@tonic-gate * Set desperate if 1847c478bd9Sstevel@tonic-gate * 1. At least 2 runnable processes (on average). 1857c478bd9Sstevel@tonic-gate * 2. Short (5 sec) and longer (30 sec) average is less 1867c478bd9Sstevel@tonic-gate * than minfree and desfree respectively. 1877c478bd9Sstevel@tonic-gate * 3. Pagein + pageout rate is excessive. 1887c478bd9Sstevel@tonic-gate */ 1897c478bd9Sstevel@tonic-gate if (avenrun[0] >= 2 * FSCALE && 1907c478bd9Sstevel@tonic-gate (MAX(avefree, avefree30) < desfree) && 1917c478bd9Sstevel@tonic-gate (pginrate + pgoutrate > maxpgio || avefree < minfree)) { 1927c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_SCHED, TR_DESPERATE, 1937c478bd9Sstevel@tonic-gate "desp:avefree: %d, avefree30: %d, freemem: %d" 1947c478bd9Sstevel@tonic-gate " pginrate: %d\n", avefree, avefree30, freemem, pginrate); 1957c478bd9Sstevel@tonic-gate desperate = 1; 1967c478bd9Sstevel@tonic-gate goto unload; 1977c478bd9Sstevel@tonic-gate } 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate /* 2007c478bd9Sstevel@tonic-gate * Search list of processes to swapin and swapout deadwood. 2017c478bd9Sstevel@tonic-gate */ 2027c478bd9Sstevel@tonic-gate swapin_proc_time = 0; 2037c478bd9Sstevel@tonic-gate top: 2047c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 2057c478bd9Sstevel@tonic-gate for (prp = practive; prp != NULL; prp = prp->p_next) { 2067c478bd9Sstevel@tonic-gate if (not_swappable(prp)) 2077c478bd9Sstevel@tonic-gate continue; 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate /* 2107c478bd9Sstevel@tonic-gate * Look at processes with at least one swapped lwp. 2117c478bd9Sstevel@tonic-gate */ 2127c478bd9Sstevel@tonic-gate if (prp->p_swapcnt) { 2137c478bd9Sstevel@tonic-gate time_t proc_time; 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* 2167c478bd9Sstevel@tonic-gate * Higher priority processes are good candidates 2177c478bd9Sstevel@tonic-gate * to swapin. 2187c478bd9Sstevel@tonic-gate */ 2197c478bd9Sstevel@tonic-gate mutex_enter(&prp->p_lock); 2207c478bd9Sstevel@tonic-gate proc_pri = -1; 2217c478bd9Sstevel@tonic-gate t = prp->p_tlist; 2227c478bd9Sstevel@tonic-gate proc_time = 0; 2237c478bd9Sstevel@tonic-gate do { 2247c478bd9Sstevel@tonic-gate if (t->t_schedflag & TS_LOAD) 2257c478bd9Sstevel@tonic-gate continue; 2267c478bd9Sstevel@tonic-gate 2277c478bd9Sstevel@tonic-gate thread_lock(t); 2287c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPIN(t, 0); 2297c478bd9Sstevel@tonic-gate thread_unlock(t); 2307c478bd9Sstevel@tonic-gate 2317c478bd9Sstevel@tonic-gate if (t->t_stime - proc_time > 0) 2327c478bd9Sstevel@tonic-gate proc_time = t->t_stime; 2337c478bd9Sstevel@tonic-gate if (thread_pri > proc_pri) 2347c478bd9Sstevel@tonic-gate proc_pri = thread_pri; 2357c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 2367c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 2377c478bd9Sstevel@tonic-gate 2387c478bd9Sstevel@tonic-gate if (proc_pri == -1) 2397c478bd9Sstevel@tonic-gate continue; 2407c478bd9Sstevel@tonic-gate 2417c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_CHOOSE_SWAPIN, 2427c478bd9Sstevel@tonic-gate "prp %p epri %d proc_time %d", 2437c478bd9Sstevel@tonic-gate prp, proc_pri, proc_time); 2447c478bd9Sstevel@tonic-gate 2457c478bd9Sstevel@tonic-gate /* 2467c478bd9Sstevel@tonic-gate * Swapin processes with a high effective priority. 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate if (swapin_prp == NULL || proc_pri > chosen_pri) { 2497c478bd9Sstevel@tonic-gate swapin_prp = prp; 2507c478bd9Sstevel@tonic-gate chosen_pri = proc_pri; 2517c478bd9Sstevel@tonic-gate swapin_pri = proc_pri; 2527c478bd9Sstevel@tonic-gate swapin_proc_time = proc_time; 2537c478bd9Sstevel@tonic-gate } 2547c478bd9Sstevel@tonic-gate } else { 2557c478bd9Sstevel@tonic-gate /* 2567c478bd9Sstevel@tonic-gate * No need to soft swap if we have sufficient 2577c478bd9Sstevel@tonic-gate * memory. 2587c478bd9Sstevel@tonic-gate */ 2597c478bd9Sstevel@tonic-gate if (avefree > desfree || 2607c478bd9Sstevel@tonic-gate avefree < desfree && freemem > desfree) 2617c478bd9Sstevel@tonic-gate continue; 2627c478bd9Sstevel@tonic-gate 2637c478bd9Sstevel@tonic-gate /* 264*97eda132Sraf * Skip processes that are exiting 265*97eda132Sraf * or whose address spaces are locked. 2667c478bd9Sstevel@tonic-gate */ 2677c478bd9Sstevel@tonic-gate mutex_enter(&prp->p_lock); 268*97eda132Sraf if ((prp->p_flag & SEXITING) || 2697c478bd9Sstevel@tonic-gate (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 2707c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 2717c478bd9Sstevel@tonic-gate continue; 2727c478bd9Sstevel@tonic-gate } 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate /* 2757c478bd9Sstevel@tonic-gate * Softswapping to kick out deadwood. 2767c478bd9Sstevel@tonic-gate */ 2777c478bd9Sstevel@tonic-gate proc_pri = -1; 2787c478bd9Sstevel@tonic-gate t = prp->p_tlist; 2797c478bd9Sstevel@tonic-gate do { 2807c478bd9Sstevel@tonic-gate if ((t->t_schedflag & (TS_SWAPENQ | 2817c478bd9Sstevel@tonic-gate TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 2827c478bd9Sstevel@tonic-gate continue; 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate thread_lock(t); 2857c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPOUT(t, SOFTSWAP); 2867c478bd9Sstevel@tonic-gate thread_unlock(t); 2877c478bd9Sstevel@tonic-gate if (thread_pri > proc_pri) 2887c478bd9Sstevel@tonic-gate proc_pri = thread_pri; 2897c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate if (proc_pri != -1) { 2927c478bd9Sstevel@tonic-gate uint_t swrss; 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 2957c478bd9Sstevel@tonic-gate 2967c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SOFTSWAP, 2977c478bd9Sstevel@tonic-gate "softswap:prp %p", prp); 2987c478bd9Sstevel@tonic-gate 2997c478bd9Sstevel@tonic-gate (void) swapout(prp, &swrss, SOFTSWAP); 3007c478bd9Sstevel@tonic-gate softswap++; 3017c478bd9Sstevel@tonic-gate prp->p_swrss += swrss; 3027c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 3037c478bd9Sstevel@tonic-gate goto top; 3047c478bd9Sstevel@tonic-gate } 3057c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 3067c478bd9Sstevel@tonic-gate } 3077c478bd9Sstevel@tonic-gate } 3087c478bd9Sstevel@tonic-gate if (swapin_prp != NULL) 3097c478bd9Sstevel@tonic-gate mutex_enter(&swapin_prp->p_lock); 3107c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 3117c478bd9Sstevel@tonic-gate 3127c478bd9Sstevel@tonic-gate if (swapin_prp == NULL) { 3137c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_RUNOUT, 3147c478bd9Sstevel@tonic-gate "schedrunout:runout nswapped: %d, avefree: %ld freemem: %ld", 3157c478bd9Sstevel@tonic-gate nswapped, avefree, freemem); 3167c478bd9Sstevel@tonic-gate 3177c478bd9Sstevel@tonic-gate t = curthread; 3187c478bd9Sstevel@tonic-gate thread_lock(t); 3197c478bd9Sstevel@tonic-gate runout++; 3207c478bd9Sstevel@tonic-gate t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 3217c478bd9Sstevel@tonic-gate t->t_whystop = PR_SUSPENDED; 3227c478bd9Sstevel@tonic-gate t->t_whatstop = SUSPEND_NORMAL; 3237c478bd9Sstevel@tonic-gate (void) new_mstate(t, LMS_SLEEP); 3247c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 3257c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 3267c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 3277c478bd9Sstevel@tonic-gate thread_stop(t); /* change state and drop lock */ 3287c478bd9Sstevel@tonic-gate swtch(); 3297c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 3307c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 3317c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 3327c478bd9Sstevel@tonic-gate goto loop; 3337c478bd9Sstevel@tonic-gate } 3347c478bd9Sstevel@tonic-gate 3357c478bd9Sstevel@tonic-gate /* 3367c478bd9Sstevel@tonic-gate * Decide how deserving this process is to be brought in. 3377c478bd9Sstevel@tonic-gate * Needs is an estimate of how much core the process will 3387c478bd9Sstevel@tonic-gate * need. If the process has been out for a while, then we 3397c478bd9Sstevel@tonic-gate * will bring it in with 1/2 the core needed, otherwise 3407c478bd9Sstevel@tonic-gate * we are conservative. 3417c478bd9Sstevel@tonic-gate */ 3427c478bd9Sstevel@tonic-gate divisor = 1; 3437c478bd9Sstevel@tonic-gate swapout_time = (lbolt - swapin_proc_time) / hz; 3447c478bd9Sstevel@tonic-gate if (swapout_time > maxslp / 2) 3457c478bd9Sstevel@tonic-gate divisor = 2; 3467c478bd9Sstevel@tonic-gate 3477c478bd9Sstevel@tonic-gate needs = MIN(swapin_prp->p_swrss, lotsfree); 3487c478bd9Sstevel@tonic-gate needs = MAX(needs, min_procsize); 3497c478bd9Sstevel@tonic-gate needs = needs / divisor; 3507c478bd9Sstevel@tonic-gate 3517c478bd9Sstevel@tonic-gate /* 3527c478bd9Sstevel@tonic-gate * Use freemem, since we want processes to be swapped 3537c478bd9Sstevel@tonic-gate * in quickly. 3547c478bd9Sstevel@tonic-gate */ 3557c478bd9Sstevel@tonic-gate avail = freemem - deficit; 3567c478bd9Sstevel@tonic-gate if (avail > (spgcnt_t)needs) { 3577c478bd9Sstevel@tonic-gate deficit += needs; 3587c478bd9Sstevel@tonic-gate 3597c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPIN_VALUES, 3607c478bd9Sstevel@tonic-gate "swapin_values: prp %p needs %lu", swapin_prp, needs); 3617c478bd9Sstevel@tonic-gate 3627c478bd9Sstevel@tonic-gate if (swapin(swapin_prp)) { 3637c478bd9Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 3647c478bd9Sstevel@tonic-gate goto loop; 3657c478bd9Sstevel@tonic-gate } 3667c478bd9Sstevel@tonic-gate deficit -= MIN(needs, deficit); 3677c478bd9Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 3687c478bd9Sstevel@tonic-gate } else { 3697c478bd9Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 3707c478bd9Sstevel@tonic-gate /* 3717c478bd9Sstevel@tonic-gate * If deficit is high, too many processes have been 3727c478bd9Sstevel@tonic-gate * swapped in so wait a sec before attempting to 3737c478bd9Sstevel@tonic-gate * swapin more. 3747c478bd9Sstevel@tonic-gate */ 3757c478bd9Sstevel@tonic-gate if (freemem > needs) { 3767c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_HIGH_DEFICIT, 3777c478bd9Sstevel@tonic-gate "deficit: prp %p needs %lu", swapin_prp, needs); 3787c478bd9Sstevel@tonic-gate goto block; 3797c478bd9Sstevel@tonic-gate } 3807c478bd9Sstevel@tonic-gate } 3817c478bd9Sstevel@tonic-gate 3827c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_UNLOAD, 3837c478bd9Sstevel@tonic-gate "unload: prp %p needs %lu", swapin_prp, needs); 3847c478bd9Sstevel@tonic-gate 3857c478bd9Sstevel@tonic-gate unload: 3867c478bd9Sstevel@tonic-gate /* 3877c478bd9Sstevel@tonic-gate * Unload all unloadable modules, free all other memory 3887c478bd9Sstevel@tonic-gate * resources we can find, then look for a thread to hardswap. 3897c478bd9Sstevel@tonic-gate */ 3907c478bd9Sstevel@tonic-gate modreap(); 3917c478bd9Sstevel@tonic-gate segkp_cache_free(); 3927c478bd9Sstevel@tonic-gate 3937c478bd9Sstevel@tonic-gate swapout_prp = NULL; 3947c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 3957c478bd9Sstevel@tonic-gate for (prp = practive; prp != NULL; prp = prp->p_next) { 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate /* 3987c478bd9Sstevel@tonic-gate * No need to soft swap if we have sufficient 3997c478bd9Sstevel@tonic-gate * memory. 4007c478bd9Sstevel@tonic-gate */ 4017c478bd9Sstevel@tonic-gate if (not_swappable(prp)) 4027c478bd9Sstevel@tonic-gate continue; 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate if (avefree > minfree || 4057c478bd9Sstevel@tonic-gate avefree < minfree && freemem > desfree) { 4067c478bd9Sstevel@tonic-gate swapout_prp = NULL; 4077c478bd9Sstevel@tonic-gate break; 4087c478bd9Sstevel@tonic-gate } 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate /* 411*97eda132Sraf * Skip processes that are exiting 412*97eda132Sraf * or whose address spaces are locked. 4137c478bd9Sstevel@tonic-gate */ 4147c478bd9Sstevel@tonic-gate mutex_enter(&prp->p_lock); 415*97eda132Sraf if ((prp->p_flag & SEXITING) || 4167c478bd9Sstevel@tonic-gate (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 4177c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 4187c478bd9Sstevel@tonic-gate continue; 4197c478bd9Sstevel@tonic-gate } 4207c478bd9Sstevel@tonic-gate 4217c478bd9Sstevel@tonic-gate proc_pri = -1; 4227c478bd9Sstevel@tonic-gate t = prp->p_tlist; 4237c478bd9Sstevel@tonic-gate do { 4247c478bd9Sstevel@tonic-gate if ((t->t_schedflag & (TS_SWAPENQ | 4257c478bd9Sstevel@tonic-gate TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 4267c478bd9Sstevel@tonic-gate continue; 4277c478bd9Sstevel@tonic-gate 4287c478bd9Sstevel@tonic-gate thread_lock(t); 4297c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPOUT(t, HARDSWAP); 4307c478bd9Sstevel@tonic-gate thread_unlock(t); 4317c478bd9Sstevel@tonic-gate if (thread_pri > proc_pri) 4327c478bd9Sstevel@tonic-gate proc_pri = thread_pri; 4337c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 4347c478bd9Sstevel@tonic-gate 4357c478bd9Sstevel@tonic-gate mutex_exit(&prp->p_lock); 4367c478bd9Sstevel@tonic-gate if (proc_pri == -1) 4377c478bd9Sstevel@tonic-gate continue; 4387c478bd9Sstevel@tonic-gate 4397c478bd9Sstevel@tonic-gate /* 4407c478bd9Sstevel@tonic-gate * Swapout processes sleeping with a lower priority 4417c478bd9Sstevel@tonic-gate * than the one currently being swapped in, if any. 4427c478bd9Sstevel@tonic-gate */ 4437c478bd9Sstevel@tonic-gate if (swapin_prp == NULL || swapin_pri > proc_pri) { 4447c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_CHOOSE_SWAPOUT, 4457c478bd9Sstevel@tonic-gate "hardswap: prp %p needs %lu", prp, needs); 4467c478bd9Sstevel@tonic-gate 4477c478bd9Sstevel@tonic-gate if (swapout_prp == NULL || proc_pri < chosen_pri) { 4487c478bd9Sstevel@tonic-gate swapout_prp = prp; 4497c478bd9Sstevel@tonic-gate chosen_pri = proc_pri; 4507c478bd9Sstevel@tonic-gate } 4517c478bd9Sstevel@tonic-gate } 4527c478bd9Sstevel@tonic-gate } 4537c478bd9Sstevel@tonic-gate 4547c478bd9Sstevel@tonic-gate /* 4557c478bd9Sstevel@tonic-gate * Acquire the "p_lock" before dropping "pidlock" 4567c478bd9Sstevel@tonic-gate * to prevent the proc structure from being freed 4577c478bd9Sstevel@tonic-gate * if the process exits before swapout completes. 4587c478bd9Sstevel@tonic-gate */ 4597c478bd9Sstevel@tonic-gate if (swapout_prp != NULL) 4607c478bd9Sstevel@tonic-gate mutex_enter(&swapout_prp->p_lock); 4617c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate if ((prp = swapout_prp) != NULL) { 4647c478bd9Sstevel@tonic-gate uint_t swrss = 0; 4657c478bd9Sstevel@tonic-gate int swapped; 4667c478bd9Sstevel@tonic-gate 4677c478bd9Sstevel@tonic-gate swapped = swapout(prp, &swrss, HARDSWAP); 4687c478bd9Sstevel@tonic-gate if (swapped) { 4697c478bd9Sstevel@tonic-gate /* 4707c478bd9Sstevel@tonic-gate * If desperate, we want to give the space obtained 4717c478bd9Sstevel@tonic-gate * by swapping this process out to processes in core, 4727c478bd9Sstevel@tonic-gate * so we give them a chance by increasing deficit. 4737c478bd9Sstevel@tonic-gate */ 4747c478bd9Sstevel@tonic-gate prp->p_swrss += swrss; 4757c478bd9Sstevel@tonic-gate if (desperate) 4767c478bd9Sstevel@tonic-gate deficit += MIN(prp->p_swrss, lotsfree); 4777c478bd9Sstevel@tonic-gate hardswap++; 4787c478bd9Sstevel@tonic-gate } 4797c478bd9Sstevel@tonic-gate mutex_exit(&swapout_prp->p_lock); 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate if (swapped) 4827c478bd9Sstevel@tonic-gate goto loop; 4837c478bd9Sstevel@tonic-gate } 4847c478bd9Sstevel@tonic-gate 4857c478bd9Sstevel@tonic-gate /* 4867c478bd9Sstevel@tonic-gate * Delay for 1 second and look again later. 4877c478bd9Sstevel@tonic-gate */ 4887c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_RUNIN, 4897c478bd9Sstevel@tonic-gate "schedrunin:runin nswapped: %d, avefree: %ld freemem: %ld", 4907c478bd9Sstevel@tonic-gate nswapped, avefree, freemem); 4917c478bd9Sstevel@tonic-gate 4927c478bd9Sstevel@tonic-gate block: 4937c478bd9Sstevel@tonic-gate t = curthread; 4947c478bd9Sstevel@tonic-gate thread_lock(t); 4957c478bd9Sstevel@tonic-gate runin++; 4967c478bd9Sstevel@tonic-gate t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 4977c478bd9Sstevel@tonic-gate t->t_whystop = PR_SUSPENDED; 4987c478bd9Sstevel@tonic-gate t->t_whatstop = SUSPEND_NORMAL; 4997c478bd9Sstevel@tonic-gate (void) new_mstate(t, LMS_SLEEP); 5007c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 5017c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 5027c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 5037c478bd9Sstevel@tonic-gate thread_stop(t); /* change to stop state and drop lock */ 5047c478bd9Sstevel@tonic-gate swtch(); 5057c478bd9Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 5067c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 5077c478bd9Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 5087c478bd9Sstevel@tonic-gate goto loop; 5097c478bd9Sstevel@tonic-gate } 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate /* 5127c478bd9Sstevel@tonic-gate * Remove the specified thread from the swap queue. 5137c478bd9Sstevel@tonic-gate */ 5147c478bd9Sstevel@tonic-gate static void 5157c478bd9Sstevel@tonic-gate swapdeq(kthread_id_t tp) 5167c478bd9Sstevel@tonic-gate { 5177c478bd9Sstevel@tonic-gate kthread_id_t *tpp; 5187c478bd9Sstevel@tonic-gate 5197c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 5207c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_ON_SWAPQ); 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate tpp = &tswap_queue; 5237c478bd9Sstevel@tonic-gate for (;;) { 5247c478bd9Sstevel@tonic-gate ASSERT(*tpp != NULL); 5257c478bd9Sstevel@tonic-gate if (*tpp == tp) 5267c478bd9Sstevel@tonic-gate break; 5277c478bd9Sstevel@tonic-gate tpp = &(*tpp)->t_link; 5287c478bd9Sstevel@tonic-gate } 5297c478bd9Sstevel@tonic-gate *tpp = tp->t_link; 5307c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~TS_ON_SWAPQ; 5317c478bd9Sstevel@tonic-gate } 5327c478bd9Sstevel@tonic-gate 5337c478bd9Sstevel@tonic-gate /* 5347c478bd9Sstevel@tonic-gate * Swap in lwps. Returns nonzero on success (i.e., if at least one lwp is 5357c478bd9Sstevel@tonic-gate * swapped in) and 0 on failure. 5367c478bd9Sstevel@tonic-gate */ 5377c478bd9Sstevel@tonic-gate static int 5387c478bd9Sstevel@tonic-gate swapin(proc_t *pp) 5397c478bd9Sstevel@tonic-gate { 5407c478bd9Sstevel@tonic-gate kthread_id_t tp; 5417c478bd9Sstevel@tonic-gate int err; 5427c478bd9Sstevel@tonic-gate int num_swapped_in = 0; 5437c478bd9Sstevel@tonic-gate struct cpu *cpup = CPU; 5447c478bd9Sstevel@tonic-gate pri_t thread_pri; 5457c478bd9Sstevel@tonic-gate 5467c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock)); 5477c478bd9Sstevel@tonic-gate ASSERT(pp->p_swapcnt); 5487c478bd9Sstevel@tonic-gate 5497c478bd9Sstevel@tonic-gate top: 5507c478bd9Sstevel@tonic-gate tp = pp->p_tlist; 5517c478bd9Sstevel@tonic-gate do { 5527c478bd9Sstevel@tonic-gate /* 5537c478bd9Sstevel@tonic-gate * Only swapin eligible lwps (specified by the scheduling 5547c478bd9Sstevel@tonic-gate * class) which are unloaded and ready to run. 5557c478bd9Sstevel@tonic-gate */ 5567c478bd9Sstevel@tonic-gate thread_lock(tp); 5577c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPIN(tp, 0); 5587c478bd9Sstevel@tonic-gate if (thread_pri != -1 && tp->t_state == TS_RUN && 5597c478bd9Sstevel@tonic-gate (tp->t_schedflag & TS_LOAD) == 0) { 5607c478bd9Sstevel@tonic-gate size_t stack_size; 5617c478bd9Sstevel@tonic-gate pgcnt_t stack_pages; 5627c478bd9Sstevel@tonic-gate 5637c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ON_SWAPQ) == 0); 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate thread_unlock(tp); 5667c478bd9Sstevel@tonic-gate /* 5677c478bd9Sstevel@tonic-gate * Now drop the p_lock since the stack needs 5687c478bd9Sstevel@tonic-gate * to brought in. 5697c478bd9Sstevel@tonic-gate */ 5707c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 5737c478bd9Sstevel@tonic-gate stack_pages = btopr(stack_size); 5747c478bd9Sstevel@tonic-gate /* Kernel probe */ 5757c478bd9Sstevel@tonic-gate TNF_PROBE_4(swapin_lwp, "vm swap swapin", /* CSTYLED */, 5767c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 5777c478bd9Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 5787c478bd9Sstevel@tonic-gate tnf_kthread_id, tid, tp, 5797c478bd9Sstevel@tonic-gate tnf_ulong, page_count, stack_pages); 5807c478bd9Sstevel@tonic-gate 5817c478bd9Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 5827c478bd9Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, segkp, 5837c478bd9Sstevel@tonic-gate tp->t_swap, stack_size, F_SOFTLOCK, S_OTHER); 5847c478bd9Sstevel@tonic-gate rw_exit(&kas.a_lock); 5857c478bd9Sstevel@tonic-gate 5867c478bd9Sstevel@tonic-gate #ifdef __sparc 5877c478bd9Sstevel@tonic-gate lwp_swapin(tp); 5887c478bd9Sstevel@tonic-gate #endif /* __sparc */ 5897c478bd9Sstevel@tonic-gate 5907c478bd9Sstevel@tonic-gate /* 5917c478bd9Sstevel@tonic-gate * Re-acquire the p_lock. 5927c478bd9Sstevel@tonic-gate */ 5937c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5947c478bd9Sstevel@tonic-gate if (err) { 5957c478bd9Sstevel@tonic-gate num_swapped_in = 0; 5967c478bd9Sstevel@tonic-gate break; 5977c478bd9Sstevel@tonic-gate } else { 5987c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapin, 1); 5997c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapin, 6007c478bd9Sstevel@tonic-gate stack_pages); 6017c478bd9Sstevel@tonic-gate 6027c478bd9Sstevel@tonic-gate pp->p_swapcnt--; 6037c478bd9Sstevel@tonic-gate pp->p_swrss -= stack_pages; 6047c478bd9Sstevel@tonic-gate 6057c478bd9Sstevel@tonic-gate thread_lock(tp); 6067c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_LOAD; 6077c478bd9Sstevel@tonic-gate dq_sruninc(tp); 6087c478bd9Sstevel@tonic-gate 6097c478bd9Sstevel@tonic-gate tp->t_stime = lbolt; /* set swapin time */ 6107c478bd9Sstevel@tonic-gate thread_unlock(tp); 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate nswapped--; 6137c478bd9Sstevel@tonic-gate tot_swapped_in++; 6147c478bd9Sstevel@tonic-gate num_swapped_in++; 6157c478bd9Sstevel@tonic-gate 6167c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPIN, 6177c478bd9Sstevel@tonic-gate "swapin: pp %p stack_pages %lu", 6187c478bd9Sstevel@tonic-gate pp, stack_pages); 6197c478bd9Sstevel@tonic-gate goto top; 6207c478bd9Sstevel@tonic-gate } 6217c478bd9Sstevel@tonic-gate } 6227c478bd9Sstevel@tonic-gate thread_unlock(tp); 6237c478bd9Sstevel@tonic-gate } while ((tp = tp->t_forw) != pp->p_tlist); 6247c478bd9Sstevel@tonic-gate return (num_swapped_in); 6257c478bd9Sstevel@tonic-gate } 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate /* 6287c478bd9Sstevel@tonic-gate * Swap out lwps. Returns nonzero on success (i.e., if at least one lwp is 6297c478bd9Sstevel@tonic-gate * swapped out) and 0 on failure. 6307c478bd9Sstevel@tonic-gate */ 6317c478bd9Sstevel@tonic-gate static int 6327c478bd9Sstevel@tonic-gate swapout(proc_t *pp, uint_t *swrss, int swapflags) 6337c478bd9Sstevel@tonic-gate { 6347c478bd9Sstevel@tonic-gate kthread_id_t tp; 6357c478bd9Sstevel@tonic-gate pgcnt_t ws_pages = 0; 6367c478bd9Sstevel@tonic-gate int err; 6377c478bd9Sstevel@tonic-gate int swapped_lwps = 0; 6387c478bd9Sstevel@tonic-gate struct as *as = pp->p_as; 6397c478bd9Sstevel@tonic-gate struct cpu *cpup = CPU; 6407c478bd9Sstevel@tonic-gate pri_t thread_pri; 6417c478bd9Sstevel@tonic-gate 6427c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock)); 6437c478bd9Sstevel@tonic-gate 644*97eda132Sraf if (pp->p_flag & SEXITING) 6457c478bd9Sstevel@tonic-gate return (0); 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate top: 6487c478bd9Sstevel@tonic-gate tp = pp->p_tlist; 6497c478bd9Sstevel@tonic-gate do { 6507c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(tp); 6517c478bd9Sstevel@tonic-gate 6527c478bd9Sstevel@tonic-gate /* 6537c478bd9Sstevel@tonic-gate * Swapout eligible lwps (specified by the scheduling 6547c478bd9Sstevel@tonic-gate * class) which don't have TS_DONT_SWAP set. Set the 6557c478bd9Sstevel@tonic-gate * "intent to swap" flag (TS_SWAPENQ) on threads 6567c478bd9Sstevel@tonic-gate * which have TS_DONT_SWAP set so that they can be 6577c478bd9Sstevel@tonic-gate * swapped if and when they reach a safe point. 6587c478bd9Sstevel@tonic-gate */ 6597c478bd9Sstevel@tonic-gate thread_lock(tp); 6607c478bd9Sstevel@tonic-gate thread_pri = CL_SWAPOUT(tp, swapflags); 6617c478bd9Sstevel@tonic-gate if (thread_pri != -1) { 6627c478bd9Sstevel@tonic-gate if (tp->t_schedflag & TS_DONT_SWAP) { 6637c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_SWAPENQ; 6647c478bd9Sstevel@tonic-gate tp->t_trapret = 1; 6657c478bd9Sstevel@tonic-gate aston(tp); 6667c478bd9Sstevel@tonic-gate } else { 6677c478bd9Sstevel@tonic-gate pgcnt_t stack_pages; 6687c478bd9Sstevel@tonic-gate size_t stack_size; 6697c478bd9Sstevel@tonic-gate 6707c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & 6717c478bd9Sstevel@tonic-gate (TS_DONT_SWAP | TS_LOAD)) == TS_LOAD); 6727c478bd9Sstevel@tonic-gate 6737c478bd9Sstevel@tonic-gate if (lock_try(&tp->t_lock)) { 6747c478bd9Sstevel@tonic-gate /* 6757c478bd9Sstevel@tonic-gate * Remove thread from the swap_queue. 6767c478bd9Sstevel@tonic-gate */ 6777c478bd9Sstevel@tonic-gate if (tp->t_schedflag & TS_ON_SWAPQ) { 6787c478bd9Sstevel@tonic-gate ASSERT(!(tp->t_schedflag & 6797c478bd9Sstevel@tonic-gate TS_SWAPENQ)); 6807c478bd9Sstevel@tonic-gate swapdeq(tp); 6817c478bd9Sstevel@tonic-gate } else if (tp->t_state == TS_RUN) 6827c478bd9Sstevel@tonic-gate dq_srundec(tp); 6837c478bd9Sstevel@tonic-gate 6847c478bd9Sstevel@tonic-gate tp->t_schedflag &= 6857c478bd9Sstevel@tonic-gate ~(TS_LOAD | TS_SWAPENQ); 6867c478bd9Sstevel@tonic-gate lock_clear(&tp->t_lock); 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate /* 6897c478bd9Sstevel@tonic-gate * Set swapout time if the thread isn't 6907c478bd9Sstevel@tonic-gate * sleeping. 6917c478bd9Sstevel@tonic-gate */ 6927c478bd9Sstevel@tonic-gate if (tp->t_state != TS_SLEEP) 6937c478bd9Sstevel@tonic-gate tp->t_stime = lbolt; 6947c478bd9Sstevel@tonic-gate thread_unlock(tp); 6957c478bd9Sstevel@tonic-gate 6967c478bd9Sstevel@tonic-gate nswapped++; 6977c478bd9Sstevel@tonic-gate tot_swapped_out++; 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate lwp->lwp_ru.nswap++; 7007c478bd9Sstevel@tonic-gate 7017c478bd9Sstevel@tonic-gate /* 7027c478bd9Sstevel@tonic-gate * Now drop the p_lock since the 7037c478bd9Sstevel@tonic-gate * stack needs to pushed out. 7047c478bd9Sstevel@tonic-gate */ 7057c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 7067c478bd9Sstevel@tonic-gate 7077c478bd9Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 7087c478bd9Sstevel@tonic-gate stack_pages = btopr(stack_size); 7097c478bd9Sstevel@tonic-gate ws_pages += stack_pages; 7107c478bd9Sstevel@tonic-gate /* Kernel probe */ 7117c478bd9Sstevel@tonic-gate TNF_PROBE_4(swapout_lwp, 7127c478bd9Sstevel@tonic-gate "vm swap swapout", 7137c478bd9Sstevel@tonic-gate /* CSTYLED */, 7147c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 7157c478bd9Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 7167c478bd9Sstevel@tonic-gate tnf_kthread_id, tid, tp, 7177c478bd9Sstevel@tonic-gate tnf_ulong, page_count, 7187c478bd9Sstevel@tonic-gate stack_pages); 7197c478bd9Sstevel@tonic-gate 7207c478bd9Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 7217c478bd9Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, 7227c478bd9Sstevel@tonic-gate segkp, tp->t_swap, stack_size, 7237c478bd9Sstevel@tonic-gate F_SOFTUNLOCK, S_WRITE); 7247c478bd9Sstevel@tonic-gate rw_exit(&kas.a_lock); 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate if (err) { 7277c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 7287c478bd9Sstevel@tonic-gate "swapout: segkp_fault " 7297c478bd9Sstevel@tonic-gate "failed err: %d", err); 7307c478bd9Sstevel@tonic-gate } 7317c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, 7327c478bd9Sstevel@tonic-gate vm, pgswapout, stack_pages); 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_lock); 7357c478bd9Sstevel@tonic-gate pp->p_swapcnt++; 7367c478bd9Sstevel@tonic-gate swapped_lwps++; 7377c478bd9Sstevel@tonic-gate goto top; 7387c478bd9Sstevel@tonic-gate } 7397c478bd9Sstevel@tonic-gate } 7407c478bd9Sstevel@tonic-gate } 7417c478bd9Sstevel@tonic-gate thread_unlock(tp); 7427c478bd9Sstevel@tonic-gate } while ((tp = tp->t_forw) != pp->p_tlist); 7437c478bd9Sstevel@tonic-gate 7447c478bd9Sstevel@tonic-gate /* 7457c478bd9Sstevel@tonic-gate * Unload address space when all lwps are swapped out. 7467c478bd9Sstevel@tonic-gate */ 7477c478bd9Sstevel@tonic-gate if (pp->p_swapcnt == pp->p_lwpcnt) { 7487c478bd9Sstevel@tonic-gate size_t as_size = 0; 7497c478bd9Sstevel@tonic-gate 7507c478bd9Sstevel@tonic-gate /* 7517c478bd9Sstevel@tonic-gate * Avoid invoking as_swapout() if the process has 7527c478bd9Sstevel@tonic-gate * no MMU resources since pageout will eventually 7537c478bd9Sstevel@tonic-gate * steal pages belonging to this address space. This 7547c478bd9Sstevel@tonic-gate * saves CPU cycles as the number of pages that are 7557c478bd9Sstevel@tonic-gate * potentially freed or pushed out by the segment 7567c478bd9Sstevel@tonic-gate * swapout operation is very small. 7577c478bd9Sstevel@tonic-gate */ 7587c478bd9Sstevel@tonic-gate if (rm_asrss(pp->p_as) != 0) 7597c478bd9Sstevel@tonic-gate as_size = as_swapout(as); 7607c478bd9Sstevel@tonic-gate 7617c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, btop(as_size)); 7627c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapout, 1); 7637c478bd9Sstevel@tonic-gate ws_pages += btop(as_size); 7647c478bd9Sstevel@tonic-gate 7657c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPOUT, 7667c478bd9Sstevel@tonic-gate "swapout: pp %p pages_pushed %lu", pp, ws_pages); 7677c478bd9Sstevel@tonic-gate /* Kernel probe */ 7687c478bd9Sstevel@tonic-gate TNF_PROBE_2(swapout_process, "vm swap swapout", /* CSTYLED */, 7697c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 7707c478bd9Sstevel@tonic-gate tnf_ulong, page_count, ws_pages); 7717c478bd9Sstevel@tonic-gate } 7727c478bd9Sstevel@tonic-gate *swrss = ws_pages; 7737c478bd9Sstevel@tonic-gate return (swapped_lwps); 7747c478bd9Sstevel@tonic-gate } 7757c478bd9Sstevel@tonic-gate 7767c478bd9Sstevel@tonic-gate void 7777c478bd9Sstevel@tonic-gate swapout_lwp(klwp_t *lwp) 7787c478bd9Sstevel@tonic-gate { 7797c478bd9Sstevel@tonic-gate kthread_id_t tp = curthread; 7807c478bd9Sstevel@tonic-gate 7817c478bd9Sstevel@tonic-gate ASSERT(curthread == lwptot(lwp)); 7827c478bd9Sstevel@tonic-gate 7837c478bd9Sstevel@tonic-gate /* 7847c478bd9Sstevel@tonic-gate * Don't insert the thread onto the swap queue if 7857c478bd9Sstevel@tonic-gate * sufficient memory is available. 7867c478bd9Sstevel@tonic-gate */ 7877c478bd9Sstevel@tonic-gate if (avefree > desfree || avefree < desfree && freemem > desfree) { 7887c478bd9Sstevel@tonic-gate thread_lock(tp); 7897c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~TS_SWAPENQ; 7907c478bd9Sstevel@tonic-gate thread_unlock(tp); 7917c478bd9Sstevel@tonic-gate return; 7927c478bd9Sstevel@tonic-gate } 7937c478bd9Sstevel@tonic-gate 7947c478bd9Sstevel@tonic-gate /* 7957c478bd9Sstevel@tonic-gate * Lock the thread, then move it to the swapped queue from the 7967c478bd9Sstevel@tonic-gate * onproc queue and set its state to be TS_RUN. 7977c478bd9Sstevel@tonic-gate */ 7987c478bd9Sstevel@tonic-gate thread_lock(tp); 7997c478bd9Sstevel@tonic-gate ASSERT(tp->t_state == TS_ONPROC); 8007c478bd9Sstevel@tonic-gate if (tp->t_schedflag & TS_SWAPENQ) { 8017c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~TS_SWAPENQ; 8027c478bd9Sstevel@tonic-gate 8037c478bd9Sstevel@tonic-gate /* 8047c478bd9Sstevel@tonic-gate * Set the state of this thread to be runnable 8057c478bd9Sstevel@tonic-gate * and move it from the onproc queue to the swap queue. 8067c478bd9Sstevel@tonic-gate */ 8077c478bd9Sstevel@tonic-gate disp_swapped_enq(tp); 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate /* 8107c478bd9Sstevel@tonic-gate * Insert the thread onto the swap queue. 8117c478bd9Sstevel@tonic-gate */ 8127c478bd9Sstevel@tonic-gate tp->t_link = tswap_queue; 8137c478bd9Sstevel@tonic-gate tswap_queue = tp; 8147c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_ON_SWAPQ; 8157c478bd9Sstevel@tonic-gate 8167c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(tp); 8177c478bd9Sstevel@tonic-gate 8187c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SWAPOUT_LWP, "swapout_lwp:%x", lwp); 8197c478bd9Sstevel@tonic-gate 8207c478bd9Sstevel@tonic-gate swtch(); 8217c478bd9Sstevel@tonic-gate } else { 8227c478bd9Sstevel@tonic-gate thread_unlock(tp); 8237c478bd9Sstevel@tonic-gate } 8247c478bd9Sstevel@tonic-gate } 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate /* 8277c478bd9Sstevel@tonic-gate * Swap all threads on the swap queue. 8287c478bd9Sstevel@tonic-gate */ 8297c478bd9Sstevel@tonic-gate static void 8307c478bd9Sstevel@tonic-gate process_swap_queue(void) 8317c478bd9Sstevel@tonic-gate { 8327c478bd9Sstevel@tonic-gate kthread_id_t tp; 8337c478bd9Sstevel@tonic-gate uint_t ws_pages; 8347c478bd9Sstevel@tonic-gate proc_t *pp; 8357c478bd9Sstevel@tonic-gate struct cpu *cpup = CPU; 8367c478bd9Sstevel@tonic-gate klwp_t *lwp; 8377c478bd9Sstevel@tonic-gate int err; 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate if (tswap_queue == NULL) 8407c478bd9Sstevel@tonic-gate return; 8417c478bd9Sstevel@tonic-gate 8427c478bd9Sstevel@tonic-gate /* 8437c478bd9Sstevel@tonic-gate * Acquire the "swapped_lock" which locks the swap queue, 8447c478bd9Sstevel@tonic-gate * and unload the stacks of all threads on it. 8457c478bd9Sstevel@tonic-gate */ 8467c478bd9Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 8477c478bd9Sstevel@tonic-gate while ((tp = tswap_queue) != NULL) { 8487c478bd9Sstevel@tonic-gate pgcnt_t stack_pages; 8497c478bd9Sstevel@tonic-gate size_t stack_size; 8507c478bd9Sstevel@tonic-gate 8517c478bd9Sstevel@tonic-gate tswap_queue = tp->t_link; 8527c478bd9Sstevel@tonic-gate tp->t_link = NULL; 8537c478bd9Sstevel@tonic-gate 8547c478bd9Sstevel@tonic-gate /* 8557c478bd9Sstevel@tonic-gate * Drop the "dispatcher lock" before acquiring "t_lock" 8567c478bd9Sstevel@tonic-gate * to avoid spinning on it since the thread at the front 8577c478bd9Sstevel@tonic-gate * of the swap queue could be pinned before giving up 8587c478bd9Sstevel@tonic-gate * its "t_lock" in resume. 8597c478bd9Sstevel@tonic-gate */ 8607c478bd9Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 8617c478bd9Sstevel@tonic-gate lock_set(&tp->t_lock); 8627c478bd9Sstevel@tonic-gate 8637c478bd9Sstevel@tonic-gate /* 8647c478bd9Sstevel@tonic-gate * Now, re-acquire the "swapped_lock". Acquiring this lock 8657c478bd9Sstevel@tonic-gate * results in locking the thread since its dispatcher lock 8667c478bd9Sstevel@tonic-gate * (t_lockp) is the "swapped_lock". 8677c478bd9Sstevel@tonic-gate */ 8687c478bd9Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 8697c478bd9Sstevel@tonic-gate ASSERT(tp->t_state == TS_RUN); 8707c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)); 8717c478bd9Sstevel@tonic-gate 8727c478bd9Sstevel@tonic-gate tp->t_schedflag &= ~(TS_LOAD | TS_ON_SWAPQ); 8737c478bd9Sstevel@tonic-gate tp->t_stime = lbolt; /* swapout time */ 8747c478bd9Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 8757c478bd9Sstevel@tonic-gate lock_clear(&tp->t_lock); 8767c478bd9Sstevel@tonic-gate 8777c478bd9Sstevel@tonic-gate lwp = ttolwp(tp); 8787c478bd9Sstevel@tonic-gate lwp->lwp_ru.nswap++; 8797c478bd9Sstevel@tonic-gate 8807c478bd9Sstevel@tonic-gate pp = ttoproc(tp); 8817c478bd9Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 8827c478bd9Sstevel@tonic-gate stack_pages = btopr(stack_size); 8837c478bd9Sstevel@tonic-gate 8847c478bd9Sstevel@tonic-gate /* Kernel probe */ 8857c478bd9Sstevel@tonic-gate TNF_PROBE_4(swapout_lwp, "vm swap swapout", /* CSTYLED */, 8867c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 8877c478bd9Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 8887c478bd9Sstevel@tonic-gate tnf_kthread_id, tid, tp, 8897c478bd9Sstevel@tonic-gate tnf_ulong, page_count, stack_pages); 8907c478bd9Sstevel@tonic-gate 8917c478bd9Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 8927c478bd9Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, segkp, tp->t_swap, 8937c478bd9Sstevel@tonic-gate stack_size, F_SOFTUNLOCK, S_WRITE); 8947c478bd9Sstevel@tonic-gate rw_exit(&kas.a_lock); 8957c478bd9Sstevel@tonic-gate 8967c478bd9Sstevel@tonic-gate if (err) { 8977c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 8987c478bd9Sstevel@tonic-gate "process_swap_list: segkp_fault failed err: %d", err); 8997c478bd9Sstevel@tonic-gate } 9007c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, stack_pages); 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate nswapped++; 9037c478bd9Sstevel@tonic-gate tot_swapped_out++; 9047c478bd9Sstevel@tonic-gate swapqswap++; 9057c478bd9Sstevel@tonic-gate 9067c478bd9Sstevel@tonic-gate /* 9077c478bd9Sstevel@tonic-gate * Don't need p_lock since the swapper is the only 9087c478bd9Sstevel@tonic-gate * thread which increments/decrements p_swapcnt and p_swrss. 9097c478bd9Sstevel@tonic-gate */ 9107c478bd9Sstevel@tonic-gate ws_pages = stack_pages; 9117c478bd9Sstevel@tonic-gate pp->p_swapcnt++; 9127c478bd9Sstevel@tonic-gate 9137c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SWAPQ_LWP, "swaplist: pp %p", pp); 9147c478bd9Sstevel@tonic-gate 9157c478bd9Sstevel@tonic-gate /* 9167c478bd9Sstevel@tonic-gate * Unload address space when all lwps are swapped out. 9177c478bd9Sstevel@tonic-gate */ 9187c478bd9Sstevel@tonic-gate if (pp->p_swapcnt == pp->p_lwpcnt) { 9197c478bd9Sstevel@tonic-gate size_t as_size = 0; 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate if (rm_asrss(pp->p_as) != 0) 9227c478bd9Sstevel@tonic-gate as_size = as_swapout(pp->p_as); 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, 9257c478bd9Sstevel@tonic-gate btop(as_size)); 9267c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapout, 1); 9277c478bd9Sstevel@tonic-gate 9287c478bd9Sstevel@tonic-gate ws_pages += btop(as_size); 9297c478bd9Sstevel@tonic-gate 9307c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPQ_PROC, 9317c478bd9Sstevel@tonic-gate "swaplist_proc: pp %p pages_pushed: %lu", 9327c478bd9Sstevel@tonic-gate pp, ws_pages); 9337c478bd9Sstevel@tonic-gate /* Kernel probe */ 9347c478bd9Sstevel@tonic-gate TNF_PROBE_2(swapout_process, "vm swap swapout", 9357c478bd9Sstevel@tonic-gate /* CSTYLED */, 9367c478bd9Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 9377c478bd9Sstevel@tonic-gate tnf_ulong, page_count, ws_pages); 9387c478bd9Sstevel@tonic-gate } 9397c478bd9Sstevel@tonic-gate pp->p_swrss += ws_pages; 9407c478bd9Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 9417c478bd9Sstevel@tonic-gate } 9427c478bd9Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 9437c478bd9Sstevel@tonic-gate } 944