17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ad23a2dbSjohansen * Common Development and Distribution License (the "License"). 6ad23a2dbSjohansen * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* 307c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 317c478bd9Sstevel@tonic-gate * The Regents of the University of California 327c478bd9Sstevel@tonic-gate * All Rights Reserved 337c478bd9Sstevel@tonic-gate * 347c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 357c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 367c478bd9Sstevel@tonic-gate * contributors. 377c478bd9Sstevel@tonic-gate */ 387c478bd9Sstevel@tonic-gate 397c478bd9Sstevel@tonic-gate #include <sys/types.h> 407c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 417c478bd9Sstevel@tonic-gate #include <sys/param.h> 427c478bd9Sstevel@tonic-gate #include <sys/buf.h> 437c478bd9Sstevel@tonic-gate #include <sys/uio.h> 447c478bd9Sstevel@tonic-gate #include <sys/proc.h> 457c478bd9Sstevel@tonic-gate #include <sys/systm.h> 467c478bd9Sstevel@tonic-gate #include <sys/mman.h> 477c478bd9Sstevel@tonic-gate #include <sys/cred.h> 487c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 497c478bd9Sstevel@tonic-gate #include <sys/vm.h> 507c478bd9Sstevel@tonic-gate #include <sys/vmparam.h> 517c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 527c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 537c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 547c478bd9Sstevel@tonic-gate #include <sys/user.h> 557c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 567c478bd9Sstevel@tonic-gate #include <sys/debug.h> 577c478bd9Sstevel@tonic-gate #include <sys/callb.h> 587c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 597c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h> 607c478bd9Sstevel@tonic-gate #include <sys/time.h> 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate #include <vm/hat.h> 637c478bd9Sstevel@tonic-gate #include <vm/as.h> 647c478bd9Sstevel@tonic-gate #include <vm/seg.h> 657c478bd9Sstevel@tonic-gate #include <vm/page.h> 667c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 677c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 687c478bd9Sstevel@tonic-gate 697c478bd9Sstevel@tonic-gate static int checkpage(page_t *, int); 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate /* 727c478bd9Sstevel@tonic-gate * The following parameters control operation of the page replacement 737c478bd9Sstevel@tonic-gate * algorithm. They are initialized to 0, and then computed at boot time 747c478bd9Sstevel@tonic-gate * based on the size of the system. If they are patched non-zero in 757c478bd9Sstevel@tonic-gate * a loaded vmunix they are left alone and may thus be changed per system 767c478bd9Sstevel@tonic-gate * using adb on the loaded system. 777c478bd9Sstevel@tonic-gate */ 787c478bd9Sstevel@tonic-gate pgcnt_t slowscan = 0; 797c478bd9Sstevel@tonic-gate pgcnt_t fastscan = 0; 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate static pgcnt_t handspreadpages = 0; 827c478bd9Sstevel@tonic-gate static int loopfraction = 2; 837c478bd9Sstevel@tonic-gate static pgcnt_t looppages; 847c478bd9Sstevel@tonic-gate static int min_percent_cpu = 4; 857c478bd9Sstevel@tonic-gate static int max_percent_cpu = 80; 867c478bd9Sstevel@tonic-gate static pgcnt_t maxfastscan = 0; 877c478bd9Sstevel@tonic-gate static pgcnt_t maxslowscan = 100; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate pgcnt_t maxpgio = 0; 907c478bd9Sstevel@tonic-gate pgcnt_t minfree = 0; 917c478bd9Sstevel@tonic-gate pgcnt_t desfree = 0; 927c478bd9Sstevel@tonic-gate pgcnt_t lotsfree = 0; 937c478bd9Sstevel@tonic-gate pgcnt_t needfree = 0; 947c478bd9Sstevel@tonic-gate pgcnt_t throttlefree = 0; 957c478bd9Sstevel@tonic-gate pgcnt_t pageout_reserve = 0; 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate pgcnt_t deficit; 987c478bd9Sstevel@tonic-gate pgcnt_t nscan; 997c478bd9Sstevel@tonic-gate pgcnt_t desscan; 1007c478bd9Sstevel@tonic-gate 1017c478bd9Sstevel@tonic-gate /* 1027c478bd9Sstevel@tonic-gate * Values for min_pageout_ticks, max_pageout_ticks and pageout_ticks 1037c478bd9Sstevel@tonic-gate * are the number of ticks in each wakeup cycle that gives the 1047c478bd9Sstevel@tonic-gate * equivalent of some underlying %CPU duty cycle. 1057c478bd9Sstevel@tonic-gate * When RATETOSCHEDPAGING is 4, and hz is 100, pageout_scanner is 1067c478bd9Sstevel@tonic-gate * awakened every 25 clock ticks. So, converting from %CPU to ticks 1077c478bd9Sstevel@tonic-gate * per wakeup cycle would be x% of 25, that is (x * 100) / 25. 1087c478bd9Sstevel@tonic-gate * So, for example, 4% == 1 tick and 80% == 20 ticks. 1097c478bd9Sstevel@tonic-gate * 1107c478bd9Sstevel@tonic-gate * min_pageout_ticks: 1117c478bd9Sstevel@tonic-gate * ticks/wakeup equivalent of min_percent_cpu. 1127c478bd9Sstevel@tonic-gate * 1137c478bd9Sstevel@tonic-gate * max_pageout_ticks: 1147c478bd9Sstevel@tonic-gate * ticks/wakeup equivalent of max_percent_cpu. 1157c478bd9Sstevel@tonic-gate * 1167c478bd9Sstevel@tonic-gate * pageout_ticks: 1177c478bd9Sstevel@tonic-gate * Number of clock ticks budgeted for each wakeup cycle. 1187c478bd9Sstevel@tonic-gate * Computed each time around by schedpaging(). 1197c478bd9Sstevel@tonic-gate * Varies between min_pageout_ticks .. max_pageout_ticks, 1207c478bd9Sstevel@tonic-gate * depending on memory pressure. 1217c478bd9Sstevel@tonic-gate * 1227c478bd9Sstevel@tonic-gate * pageout_lbolt: 1237c478bd9Sstevel@tonic-gate * Timestamp of the last time pageout_scanner woke up and started 1247c478bd9Sstevel@tonic-gate * (or resumed) scanning for not recently referenced pages. 1257c478bd9Sstevel@tonic-gate */ 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate static clock_t min_pageout_ticks; 1287c478bd9Sstevel@tonic-gate static clock_t max_pageout_ticks; 1297c478bd9Sstevel@tonic-gate static clock_t pageout_ticks; 1307c478bd9Sstevel@tonic-gate static clock_t pageout_lbolt; 1317c478bd9Sstevel@tonic-gate 1327c478bd9Sstevel@tonic-gate static uint_t reset_hands; 1337c478bd9Sstevel@tonic-gate 1347c478bd9Sstevel@tonic-gate #define PAGES_POLL_MASK 1023 1357c478bd9Sstevel@tonic-gate 1367c478bd9Sstevel@tonic-gate /* 1377c478bd9Sstevel@tonic-gate * pageout_sample_lim: 1387c478bd9Sstevel@tonic-gate * The limit on the number of samples needed to establish a value 1397c478bd9Sstevel@tonic-gate * for new pageout parameters, fastscan, slowscan, and handspreadpages. 1407c478bd9Sstevel@tonic-gate * 1417c478bd9Sstevel@tonic-gate * pageout_sample_cnt: 1427c478bd9Sstevel@tonic-gate * Current sample number. Once the sample gets large enough, 1437c478bd9Sstevel@tonic-gate * set new values for handspreadpages, fastscan and slowscan. 1447c478bd9Sstevel@tonic-gate * 1457c478bd9Sstevel@tonic-gate * pageout_sample_pages: 1467c478bd9Sstevel@tonic-gate * The accumulated number of pages scanned during sampling. 1477c478bd9Sstevel@tonic-gate * 1487c478bd9Sstevel@tonic-gate * pageout_sample_ticks: 1497c478bd9Sstevel@tonic-gate * The accumulated clock ticks for the sample. 1507c478bd9Sstevel@tonic-gate * 1517c478bd9Sstevel@tonic-gate * pageout_rate: 1527c478bd9Sstevel@tonic-gate * Rate in pages/nanosecond, computed at the end of sampling. 1537c478bd9Sstevel@tonic-gate * 1547c478bd9Sstevel@tonic-gate * pageout_new_spread: 1557c478bd9Sstevel@tonic-gate * The new value to use for fastscan and handspreadpages. 1567c478bd9Sstevel@tonic-gate * Calculated after enough samples have been taken. 1577c478bd9Sstevel@tonic-gate */ 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate typedef hrtime_t hrrate_t; 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate static uint64_t pageout_sample_lim = 4; 1627c478bd9Sstevel@tonic-gate static uint64_t pageout_sample_cnt = 0; 1637c478bd9Sstevel@tonic-gate static pgcnt_t pageout_sample_pages = 0; 1647c478bd9Sstevel@tonic-gate static hrrate_t pageout_rate = 0; 1657c478bd9Sstevel@tonic-gate static pgcnt_t pageout_new_spread = 0; 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate static clock_t pageout_cycle_ticks; 1687c478bd9Sstevel@tonic-gate static hrtime_t sample_start, sample_end; 1697c478bd9Sstevel@tonic-gate static hrtime_t pageout_sample_etime = 0; 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * Record number of times a pageout_scanner wakeup cycle finished because it 1737c478bd9Sstevel@tonic-gate * timed out (exceeded its CPU budget), rather than because it visited 1747c478bd9Sstevel@tonic-gate * its budgeted number of pages. 1757c478bd9Sstevel@tonic-gate */ 1767c478bd9Sstevel@tonic-gate uint64_t pageout_timeouts = 0; 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate #ifdef VM_STATS 1797c478bd9Sstevel@tonic-gate static struct pageoutvmstats_str { 1807c478bd9Sstevel@tonic-gate ulong_t checkpage[3]; 1817c478bd9Sstevel@tonic-gate } pageoutvmstats; 1827c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate /* 1857c478bd9Sstevel@tonic-gate * Threads waiting for free memory use this condition variable and lock until 1867c478bd9Sstevel@tonic-gate * memory becomes available. 1877c478bd9Sstevel@tonic-gate */ 1887c478bd9Sstevel@tonic-gate kmutex_t memavail_lock; 1897c478bd9Sstevel@tonic-gate kcondvar_t memavail_cv; 1907c478bd9Sstevel@tonic-gate 1917c478bd9Sstevel@tonic-gate /* 1927c478bd9Sstevel@tonic-gate * The size of the clock loop. 1937c478bd9Sstevel@tonic-gate */ 1947c478bd9Sstevel@tonic-gate #define LOOPPAGES total_pages 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate /* 1977c478bd9Sstevel@tonic-gate * Set up the paging constants for the clock algorithm. 1987c478bd9Sstevel@tonic-gate * Called after the system is initialized and the amount of memory 1997c478bd9Sstevel@tonic-gate * and number of paging devices is known. 2007c478bd9Sstevel@tonic-gate * 2017c478bd9Sstevel@tonic-gate * lotsfree is 1/64 of memory, but at least 512K. 2027c478bd9Sstevel@tonic-gate * desfree is 1/2 of lotsfree. 2037c478bd9Sstevel@tonic-gate * minfree is 1/2 of desfree. 2047c478bd9Sstevel@tonic-gate * 2057c478bd9Sstevel@tonic-gate * Note: to revert to the paging algorithm of Solaris 2.4/2.5, set: 2067c478bd9Sstevel@tonic-gate * 2077c478bd9Sstevel@tonic-gate * lotsfree = btop(512K) 2087c478bd9Sstevel@tonic-gate * desfree = btop(200K) 2097c478bd9Sstevel@tonic-gate * minfree = btop(100K) 2107c478bd9Sstevel@tonic-gate * throttlefree = INT_MIN 2117c478bd9Sstevel@tonic-gate * max_percent_cpu = 4 2127c478bd9Sstevel@tonic-gate */ 2137c478bd9Sstevel@tonic-gate void 2147c478bd9Sstevel@tonic-gate setupclock(int recalc) 2157c478bd9Sstevel@tonic-gate { 2167c478bd9Sstevel@tonic-gate 2177c478bd9Sstevel@tonic-gate static spgcnt_t init_lfree, init_dfree, init_mfree; 2187c478bd9Sstevel@tonic-gate static spgcnt_t init_tfree, init_preserve, init_mpgio; 2197c478bd9Sstevel@tonic-gate static spgcnt_t init_mfscan, init_fscan, init_sscan, init_hspages; 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate looppages = LOOPPAGES; 2227c478bd9Sstevel@tonic-gate 2237c478bd9Sstevel@tonic-gate /* 2247c478bd9Sstevel@tonic-gate * setupclock can now be called to recalculate the paging 2257c478bd9Sstevel@tonic-gate * parameters in the case of dynamic addition of memory. 2267c478bd9Sstevel@tonic-gate * So to make sure we make the proper calculations, if such a 2277c478bd9Sstevel@tonic-gate * situation should arise, we save away the initial values 2287c478bd9Sstevel@tonic-gate * of each parameter so we can recall them when needed. This 2297c478bd9Sstevel@tonic-gate * way we don't lose the settings an admin might have made 2307c478bd9Sstevel@tonic-gate * through the /etc/system file. 2317c478bd9Sstevel@tonic-gate */ 2327c478bd9Sstevel@tonic-gate 2337c478bd9Sstevel@tonic-gate if (!recalc) { 2347c478bd9Sstevel@tonic-gate init_lfree = lotsfree; 2357c478bd9Sstevel@tonic-gate init_dfree = desfree; 2367c478bd9Sstevel@tonic-gate init_mfree = minfree; 2377c478bd9Sstevel@tonic-gate init_tfree = throttlefree; 2387c478bd9Sstevel@tonic-gate init_preserve = pageout_reserve; 2397c478bd9Sstevel@tonic-gate init_mpgio = maxpgio; 2407c478bd9Sstevel@tonic-gate init_mfscan = maxfastscan; 2417c478bd9Sstevel@tonic-gate init_fscan = fastscan; 2427c478bd9Sstevel@tonic-gate init_sscan = slowscan; 2437c478bd9Sstevel@tonic-gate init_hspages = handspreadpages; 2447c478bd9Sstevel@tonic-gate } 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate /* 2477c478bd9Sstevel@tonic-gate * Set up thresholds for paging: 2487c478bd9Sstevel@tonic-gate */ 2497c478bd9Sstevel@tonic-gate 2507c478bd9Sstevel@tonic-gate /* 2517c478bd9Sstevel@tonic-gate * Lotsfree is threshold where paging daemon turns on. 2527c478bd9Sstevel@tonic-gate */ 2537c478bd9Sstevel@tonic-gate if (init_lfree == 0 || init_lfree >= looppages) 2547c478bd9Sstevel@tonic-gate lotsfree = MAX(looppages / 64, btop(512 * 1024)); 2557c478bd9Sstevel@tonic-gate else 2567c478bd9Sstevel@tonic-gate lotsfree = init_lfree; 2577c478bd9Sstevel@tonic-gate 2587c478bd9Sstevel@tonic-gate /* 2597c478bd9Sstevel@tonic-gate * Desfree is amount of memory desired free. 2607c478bd9Sstevel@tonic-gate * If less than this for extended period, start swapping. 2617c478bd9Sstevel@tonic-gate */ 2627c478bd9Sstevel@tonic-gate if (init_dfree == 0 || init_dfree >= lotsfree) 2637c478bd9Sstevel@tonic-gate desfree = lotsfree / 2; 2647c478bd9Sstevel@tonic-gate else 2657c478bd9Sstevel@tonic-gate desfree = init_dfree; 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate /* 2687c478bd9Sstevel@tonic-gate * Minfree is minimal amount of free memory which is tolerable. 2697c478bd9Sstevel@tonic-gate */ 2707c478bd9Sstevel@tonic-gate if (init_mfree == 0 || init_mfree >= desfree) 2717c478bd9Sstevel@tonic-gate minfree = desfree / 2; 2727c478bd9Sstevel@tonic-gate else 2737c478bd9Sstevel@tonic-gate minfree = init_mfree; 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate /* 2767c478bd9Sstevel@tonic-gate * Throttlefree is the point at which we start throttling 2777c478bd9Sstevel@tonic-gate * PG_WAIT requests until enough memory becomes available. 2787c478bd9Sstevel@tonic-gate */ 2797c478bd9Sstevel@tonic-gate if (init_tfree == 0 || init_tfree >= desfree) 2807c478bd9Sstevel@tonic-gate throttlefree = minfree; 2817c478bd9Sstevel@tonic-gate else 2827c478bd9Sstevel@tonic-gate throttlefree = init_tfree; 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate /* 2857c478bd9Sstevel@tonic-gate * Pageout_reserve is the number of pages that we keep in 2867c478bd9Sstevel@tonic-gate * stock for pageout's own use. Having a few such pages 2877c478bd9Sstevel@tonic-gate * provides insurance against system deadlock due to 2887c478bd9Sstevel@tonic-gate * pageout needing pages. When freemem < pageout_reserve, 2897c478bd9Sstevel@tonic-gate * non-blocking allocations are denied to any threads 2907c478bd9Sstevel@tonic-gate * other than pageout and sched. (At some point we might 2917c478bd9Sstevel@tonic-gate * want to consider a per-thread flag like T_PUSHING_PAGES 2927c478bd9Sstevel@tonic-gate * to indicate that a thread is part of the page-pushing 2937c478bd9Sstevel@tonic-gate * dance (e.g. an interrupt thread) and thus is entitled 2947c478bd9Sstevel@tonic-gate * to the same special dispensation we accord pageout.) 2957c478bd9Sstevel@tonic-gate */ 2967c478bd9Sstevel@tonic-gate if (init_preserve == 0 || init_preserve >= throttlefree) 2977c478bd9Sstevel@tonic-gate pageout_reserve = throttlefree / 2; 2987c478bd9Sstevel@tonic-gate else 2997c478bd9Sstevel@tonic-gate pageout_reserve = init_preserve; 3007c478bd9Sstevel@tonic-gate 3017c478bd9Sstevel@tonic-gate /* 3027c478bd9Sstevel@tonic-gate * Maxpgio thresholds how much paging is acceptable. 3037c478bd9Sstevel@tonic-gate * This figures that 2/3 busy on an arm is all that is 3047c478bd9Sstevel@tonic-gate * tolerable for paging. We assume one operation per disk rev. 3057c478bd9Sstevel@tonic-gate * 3067c478bd9Sstevel@tonic-gate * XXX - Does not account for multiple swap devices. 3077c478bd9Sstevel@tonic-gate */ 3087c478bd9Sstevel@tonic-gate if (init_mpgio == 0) 3097c478bd9Sstevel@tonic-gate maxpgio = (DISKRPM * 2) / 3; 3107c478bd9Sstevel@tonic-gate else 3117c478bd9Sstevel@tonic-gate maxpgio = init_mpgio; 3127c478bd9Sstevel@tonic-gate 3137c478bd9Sstevel@tonic-gate /* 3147c478bd9Sstevel@tonic-gate * The clock scan rate varies between fastscan and slowscan 3157c478bd9Sstevel@tonic-gate * based on the amount of free memory available. Fastscan 3167c478bd9Sstevel@tonic-gate * rate should be set based on the number pages that can be 3177c478bd9Sstevel@tonic-gate * scanned per sec using ~10% of processor time. Since this 3187c478bd9Sstevel@tonic-gate * value depends on the processor, MMU, Mhz etc., it is 3197c478bd9Sstevel@tonic-gate * difficult to determine it in a generic manner for all 3207c478bd9Sstevel@tonic-gate * architectures. 3217c478bd9Sstevel@tonic-gate * 3227c478bd9Sstevel@tonic-gate * Instead of trying to determine the number of pages scanned 3237c478bd9Sstevel@tonic-gate * per sec for every processor, fastscan is set to be the smaller 3247c478bd9Sstevel@tonic-gate * of 1/2 of memory or MAXHANDSPREADPAGES and the sampling 3257c478bd9Sstevel@tonic-gate * time is limited to ~4% of processor time. 3267c478bd9Sstevel@tonic-gate * 3277c478bd9Sstevel@tonic-gate * Setting fastscan to be 1/2 of memory allows pageout to scan 3287c478bd9Sstevel@tonic-gate * all of memory in ~2 secs. This implies that user pages not 3297c478bd9Sstevel@tonic-gate * accessed within 1 sec (assuming, handspreadpages == fastscan) 3307c478bd9Sstevel@tonic-gate * can be reclaimed when free memory is very low. Stealing pages 3317c478bd9Sstevel@tonic-gate * not accessed within 1 sec seems reasonable and ensures that 3327c478bd9Sstevel@tonic-gate * active user processes don't thrash. 3337c478bd9Sstevel@tonic-gate * 3347c478bd9Sstevel@tonic-gate * Smaller values of fastscan result in scanning fewer pages 3357c478bd9Sstevel@tonic-gate * every second and consequently pageout may not be able to free 3367c478bd9Sstevel@tonic-gate * sufficient memory to maintain the minimum threshold. Larger 3377c478bd9Sstevel@tonic-gate * values of fastscan result in scanning a lot more pages which 3387c478bd9Sstevel@tonic-gate * could lead to thrashing and higher CPU usage. 3397c478bd9Sstevel@tonic-gate * 3407c478bd9Sstevel@tonic-gate * Fastscan needs to be limited to a maximum value and should not 3417c478bd9Sstevel@tonic-gate * scale with memory to prevent pageout from consuming too much 3427c478bd9Sstevel@tonic-gate * time for scanning on slow CPU's and avoid thrashing, as a 3437c478bd9Sstevel@tonic-gate * result of scanning too many pages, on faster CPU's. 3447c478bd9Sstevel@tonic-gate * The value of 64 Meg was chosen for MAXHANDSPREADPAGES 3457c478bd9Sstevel@tonic-gate * (the upper bound for fastscan) based on the average number 3467c478bd9Sstevel@tonic-gate * of pages that can potentially be scanned in ~1 sec (using ~4% 3477c478bd9Sstevel@tonic-gate * of the CPU) on some of the following machines that currently 3487c478bd9Sstevel@tonic-gate * run Solaris 2.x: 3497c478bd9Sstevel@tonic-gate * 3507c478bd9Sstevel@tonic-gate * average memory scanned in ~1 sec 3517c478bd9Sstevel@tonic-gate * 3527c478bd9Sstevel@tonic-gate * 25 Mhz SS1+: 23 Meg 3537c478bd9Sstevel@tonic-gate * LX: 37 Meg 3547c478bd9Sstevel@tonic-gate * 50 Mhz SC2000: 68 Meg 3557c478bd9Sstevel@tonic-gate * 3567c478bd9Sstevel@tonic-gate * 40 Mhz 486: 26 Meg 3577c478bd9Sstevel@tonic-gate * 66 Mhz 486: 42 Meg 3587c478bd9Sstevel@tonic-gate * 3597c478bd9Sstevel@tonic-gate * When free memory falls just below lotsfree, the scan rate 3607c478bd9Sstevel@tonic-gate * goes from 0 to slowscan (i.e., pageout starts running). This 3617c478bd9Sstevel@tonic-gate * transition needs to be smooth and is achieved by ensuring that 3627c478bd9Sstevel@tonic-gate * pageout scans a small number of pages to satisfy the transient 3637c478bd9Sstevel@tonic-gate * memory demand. This is set to not exceed 100 pages/sec (25 per 3647c478bd9Sstevel@tonic-gate * wakeup) since scanning that many pages has no noticible impact 3657c478bd9Sstevel@tonic-gate * on system performance. 3667c478bd9Sstevel@tonic-gate * 3677c478bd9Sstevel@tonic-gate * In addition to setting fastscan and slowscan, pageout is 3687c478bd9Sstevel@tonic-gate * limited to using ~4% of the CPU. This results in increasing 3697c478bd9Sstevel@tonic-gate * the time taken to scan all of memory, which in turn means that 3707c478bd9Sstevel@tonic-gate * user processes have a better opportunity of preventing their 3717c478bd9Sstevel@tonic-gate * pages from being stolen. This has a positive effect on 3727c478bd9Sstevel@tonic-gate * interactive and overall system performance when memory demand 3737c478bd9Sstevel@tonic-gate * is high. 3747c478bd9Sstevel@tonic-gate * 3757c478bd9Sstevel@tonic-gate * Thus, the rate at which pages are scanned for replacement will 3767c478bd9Sstevel@tonic-gate * vary linearly between slowscan and the number of pages that 3777c478bd9Sstevel@tonic-gate * can be scanned using ~4% of processor time instead of varying 3787c478bd9Sstevel@tonic-gate * linearly between slowscan and fastscan. 3797c478bd9Sstevel@tonic-gate * 3807c478bd9Sstevel@tonic-gate * Also, the processor time used by pageout will vary from ~1% 3817c478bd9Sstevel@tonic-gate * at slowscan to ~4% at fastscan instead of varying between 3827c478bd9Sstevel@tonic-gate * ~1% at slowscan and ~10% at fastscan. 3837c478bd9Sstevel@tonic-gate * 3847c478bd9Sstevel@tonic-gate * The values chosen for the various VM parameters (fastscan, 3857c478bd9Sstevel@tonic-gate * handspreadpages, etc) are not universally true for all machines, 3867c478bd9Sstevel@tonic-gate * but appear to be a good rule of thumb for the machines we've 3877c478bd9Sstevel@tonic-gate * tested. They have the following ranges: 3887c478bd9Sstevel@tonic-gate * 3897c478bd9Sstevel@tonic-gate * cpu speed: 20 to 70 Mhz 3907c478bd9Sstevel@tonic-gate * page size: 4K to 8K 3917c478bd9Sstevel@tonic-gate * memory size: 16M to 5G 3927c478bd9Sstevel@tonic-gate * page scan rate: 4000 - 17400 4K pages per sec 3937c478bd9Sstevel@tonic-gate * 3947c478bd9Sstevel@tonic-gate * The values need to be re-examined for machines which don't 3957c478bd9Sstevel@tonic-gate * fall into the various ranges (e.g., slower or faster CPUs, 3967c478bd9Sstevel@tonic-gate * smaller or larger pagesizes etc) shown above. 3977c478bd9Sstevel@tonic-gate * 3987c478bd9Sstevel@tonic-gate * On an MP machine, pageout is often unable to maintain the 3997c478bd9Sstevel@tonic-gate * minimum paging thresholds under heavy load. This is due to 4007c478bd9Sstevel@tonic-gate * the fact that user processes running on other CPU's can be 4017c478bd9Sstevel@tonic-gate * dirtying memory at a much faster pace than pageout can find 4027c478bd9Sstevel@tonic-gate * pages to free. The memory demands could be met by enabling 4037c478bd9Sstevel@tonic-gate * more than one CPU to run the clock algorithm in such a manner 4047c478bd9Sstevel@tonic-gate * that the various clock hands don't overlap. This also makes 4057c478bd9Sstevel@tonic-gate * it more difficult to determine the values for fastscan, slowscan 4067c478bd9Sstevel@tonic-gate * and handspreadpages. 4077c478bd9Sstevel@tonic-gate * 4087c478bd9Sstevel@tonic-gate * The swapper is currently used to free up memory when pageout 4097c478bd9Sstevel@tonic-gate * is unable to meet memory demands by swapping out processes. 4107c478bd9Sstevel@tonic-gate * In addition to freeing up memory, swapping also reduces the 4117c478bd9Sstevel@tonic-gate * demand for memory by preventing user processes from running 4127c478bd9Sstevel@tonic-gate * and thereby consuming memory. 4137c478bd9Sstevel@tonic-gate */ 4147c478bd9Sstevel@tonic-gate if (init_mfscan == 0) { 4157c478bd9Sstevel@tonic-gate if (pageout_new_spread != 0) 4167c478bd9Sstevel@tonic-gate maxfastscan = pageout_new_spread; 4177c478bd9Sstevel@tonic-gate else 4187c478bd9Sstevel@tonic-gate maxfastscan = MAXHANDSPREADPAGES; 4197c478bd9Sstevel@tonic-gate } else { 4207c478bd9Sstevel@tonic-gate maxfastscan = init_mfscan; 4217c478bd9Sstevel@tonic-gate } 4227c478bd9Sstevel@tonic-gate if (init_fscan == 0) 4237c478bd9Sstevel@tonic-gate fastscan = MIN(looppages / loopfraction, maxfastscan); 4247c478bd9Sstevel@tonic-gate else 4257c478bd9Sstevel@tonic-gate fastscan = init_fscan; 4267c478bd9Sstevel@tonic-gate if (fastscan > looppages / loopfraction) 4277c478bd9Sstevel@tonic-gate fastscan = looppages / loopfraction; 4287c478bd9Sstevel@tonic-gate 4297c478bd9Sstevel@tonic-gate /* 4307c478bd9Sstevel@tonic-gate * Set slow scan time to 1/10 the fast scan time, but 4317c478bd9Sstevel@tonic-gate * not to exceed maxslowscan. 4327c478bd9Sstevel@tonic-gate */ 4337c478bd9Sstevel@tonic-gate if (init_sscan == 0) 4347c478bd9Sstevel@tonic-gate slowscan = MIN(fastscan / 10, maxslowscan); 4357c478bd9Sstevel@tonic-gate else 4367c478bd9Sstevel@tonic-gate slowscan = init_sscan; 4377c478bd9Sstevel@tonic-gate if (slowscan > fastscan / 2) 4387c478bd9Sstevel@tonic-gate slowscan = fastscan / 2; 4397c478bd9Sstevel@tonic-gate 4407c478bd9Sstevel@tonic-gate /* 4417c478bd9Sstevel@tonic-gate * Handspreadpages is distance (in pages) between front and back 4427c478bd9Sstevel@tonic-gate * pageout daemon hands. The amount of time to reclaim a page 4437c478bd9Sstevel@tonic-gate * once pageout examines it increases with this distance and 4447c478bd9Sstevel@tonic-gate * decreases as the scan rate rises. It must be < the amount 4457c478bd9Sstevel@tonic-gate * of pageable memory. 4467c478bd9Sstevel@tonic-gate * 4477c478bd9Sstevel@tonic-gate * Since pageout is limited to ~4% of the CPU, setting handspreadpages 4487c478bd9Sstevel@tonic-gate * to be "fastscan" results in the front hand being a few secs 4497c478bd9Sstevel@tonic-gate * (varies based on the processor speed) ahead of the back hand 4507c478bd9Sstevel@tonic-gate * at fastscan rates. This distance can be further reduced, if 4517c478bd9Sstevel@tonic-gate * necessary, by increasing the processor time used by pageout 4527c478bd9Sstevel@tonic-gate * to be more than ~4% and preferrably not more than ~10%. 4537c478bd9Sstevel@tonic-gate * 4547c478bd9Sstevel@tonic-gate * As a result, user processes have a much better chance of 4557c478bd9Sstevel@tonic-gate * referencing their pages before the back hand examines them. 4567c478bd9Sstevel@tonic-gate * This also significantly lowers the number of reclaims from 4577c478bd9Sstevel@tonic-gate * the freelist since pageout does not end up freeing pages which 4587c478bd9Sstevel@tonic-gate * may be referenced a sec later. 4597c478bd9Sstevel@tonic-gate */ 4607c478bd9Sstevel@tonic-gate if (init_hspages == 0) 4617c478bd9Sstevel@tonic-gate handspreadpages = fastscan; 4627c478bd9Sstevel@tonic-gate else 4637c478bd9Sstevel@tonic-gate handspreadpages = init_hspages; 4647c478bd9Sstevel@tonic-gate 4657c478bd9Sstevel@tonic-gate /* 4667c478bd9Sstevel@tonic-gate * Make sure that back hand follows front hand by at least 4677c478bd9Sstevel@tonic-gate * 1/RATETOSCHEDPAGING seconds. Without this test, it is possible 4687c478bd9Sstevel@tonic-gate * for the back hand to look at a page during the same wakeup of 4697c478bd9Sstevel@tonic-gate * the pageout daemon in which the front hand cleared its ref bit. 4707c478bd9Sstevel@tonic-gate */ 4717c478bd9Sstevel@tonic-gate if (handspreadpages >= looppages) 4727c478bd9Sstevel@tonic-gate handspreadpages = looppages - 1; 4737c478bd9Sstevel@tonic-gate 4747c478bd9Sstevel@tonic-gate /* 4757c478bd9Sstevel@tonic-gate * If we have been called to recalculate the parameters, 4767c478bd9Sstevel@tonic-gate * set a flag to re-evaluate the clock hand pointers. 4777c478bd9Sstevel@tonic-gate */ 4787c478bd9Sstevel@tonic-gate if (recalc) 4797c478bd9Sstevel@tonic-gate reset_hands = 1; 4807c478bd9Sstevel@tonic-gate } 4817c478bd9Sstevel@tonic-gate 4827c478bd9Sstevel@tonic-gate /* 4837c478bd9Sstevel@tonic-gate * Pageout scheduling. 4847c478bd9Sstevel@tonic-gate * 4857c478bd9Sstevel@tonic-gate * Schedpaging controls the rate at which the page out daemon runs by 4867c478bd9Sstevel@tonic-gate * setting the global variables nscan and desscan RATETOSCHEDPAGING 4877c478bd9Sstevel@tonic-gate * times a second. Nscan records the number of pages pageout has examined 4887c478bd9Sstevel@tonic-gate * in its current pass; schedpaging resets this value to zero each time 4897c478bd9Sstevel@tonic-gate * it runs. Desscan records the number of pages pageout should examine 4907c478bd9Sstevel@tonic-gate * in its next pass; schedpaging sets this value based on the amount of 4917c478bd9Sstevel@tonic-gate * currently available memory. 4927c478bd9Sstevel@tonic-gate */ 4937c478bd9Sstevel@tonic-gate 4947c478bd9Sstevel@tonic-gate #define RATETOSCHEDPAGING 4 /* hz that is */ 4957c478bd9Sstevel@tonic-gate 4967c478bd9Sstevel@tonic-gate static kmutex_t pageout_mutex; /* held while pageout or schedpaging running */ 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate /* 4997c478bd9Sstevel@tonic-gate * Pool of available async pageout putpage requests. 5007c478bd9Sstevel@tonic-gate */ 5017c478bd9Sstevel@tonic-gate static struct async_reqs *push_req; 5027c478bd9Sstevel@tonic-gate static struct async_reqs *req_freelist; /* available req structs */ 5037c478bd9Sstevel@tonic-gate static struct async_reqs *push_list; /* pending reqs */ 5047c478bd9Sstevel@tonic-gate static kmutex_t push_lock; /* protects req pool */ 5057c478bd9Sstevel@tonic-gate static kcondvar_t push_cv; 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate static int async_list_size = 256; /* number of async request structs */ 5087c478bd9Sstevel@tonic-gate 5097c478bd9Sstevel@tonic-gate static void pageout_scanner(void); 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate /* 5127c478bd9Sstevel@tonic-gate * If a page is being shared more than "po_share" times 5137c478bd9Sstevel@tonic-gate * then leave it alone- don't page it out. 5147c478bd9Sstevel@tonic-gate */ 5157c478bd9Sstevel@tonic-gate #define MIN_PO_SHARE (8) 5167c478bd9Sstevel@tonic-gate #define MAX_PO_SHARE ((MIN_PO_SHARE) << 24) 5177c478bd9Sstevel@tonic-gate ulong_t po_share = MIN_PO_SHARE; 5187c478bd9Sstevel@tonic-gate 5197c478bd9Sstevel@tonic-gate /* 5207c478bd9Sstevel@tonic-gate * Schedule rate for paging. 5217c478bd9Sstevel@tonic-gate * Rate is linear interpolation between 5227c478bd9Sstevel@tonic-gate * slowscan with lotsfree and fastscan when out of memory. 5237c478bd9Sstevel@tonic-gate */ 5247c478bd9Sstevel@tonic-gate static void 5257c478bd9Sstevel@tonic-gate schedpaging(void *arg) 5267c478bd9Sstevel@tonic-gate { 5277c478bd9Sstevel@tonic-gate spgcnt_t vavail; 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate if (freemem < lotsfree + needfree + kmem_reapahead) 5307c478bd9Sstevel@tonic-gate kmem_reap(); 5317c478bd9Sstevel@tonic-gate 532a98e9dbfSaguzovsk if (freemem < lotsfree + needfree) 5337c478bd9Sstevel@tonic-gate seg_preap(); 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate if (kcage_on && (kcage_freemem < kcage_desfree || kcage_needfree)) 5367c478bd9Sstevel@tonic-gate kcage_cageout_wakeup(); 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate if (mutex_tryenter(&pageout_mutex)) { 5397c478bd9Sstevel@tonic-gate /* pageout() not running */ 5407c478bd9Sstevel@tonic-gate nscan = 0; 5417c478bd9Sstevel@tonic-gate vavail = freemem - deficit; 54206cfbf35Sjimp if (pageout_new_spread != 0) 54306cfbf35Sjimp vavail -= needfree; 5447c478bd9Sstevel@tonic-gate if (vavail < 0) 5457c478bd9Sstevel@tonic-gate vavail = 0; 5467c478bd9Sstevel@tonic-gate if (vavail > lotsfree) 5477c478bd9Sstevel@tonic-gate vavail = lotsfree; 5487c478bd9Sstevel@tonic-gate 5497c478bd9Sstevel@tonic-gate /* 5507c478bd9Sstevel@tonic-gate * Fix for 1161438 (CRS SPR# 73922). All variables 5517c478bd9Sstevel@tonic-gate * in the original calculation for desscan were 32 bit signed 5527c478bd9Sstevel@tonic-gate * ints. As freemem approaches 0x0 on a system with 1 Gig or 5537c478bd9Sstevel@tonic-gate * more of memory, the calculation can overflow. When this 5547c478bd9Sstevel@tonic-gate * happens, desscan becomes negative and pageout_scanner() 5557c478bd9Sstevel@tonic-gate * stops paging out. 5567c478bd9Sstevel@tonic-gate */ 55706cfbf35Sjimp if ((needfree) && (pageout_new_spread == 0)) { 55806cfbf35Sjimp /* 55906cfbf35Sjimp * If we've not yet collected enough samples to 56006cfbf35Sjimp * calculate a spread, use the old logic of kicking 56106cfbf35Sjimp * into high gear anytime needfree is non-zero. 56206cfbf35Sjimp */ 5637c478bd9Sstevel@tonic-gate desscan = fastscan / RATETOSCHEDPAGING; 5647c478bd9Sstevel@tonic-gate } else { 56506cfbf35Sjimp /* 56606cfbf35Sjimp * Once we've calculated a spread based on system 56706cfbf35Sjimp * memory and usage, just treat needfree as another 56806cfbf35Sjimp * form of deficit. 56906cfbf35Sjimp */ 5707c478bd9Sstevel@tonic-gate spgcnt_t faststmp, slowstmp, result; 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate slowstmp = slowscan * vavail; 5737c478bd9Sstevel@tonic-gate faststmp = fastscan * (lotsfree - vavail); 5747c478bd9Sstevel@tonic-gate result = (slowstmp + faststmp) / 5757c478bd9Sstevel@tonic-gate nz(lotsfree) / RATETOSCHEDPAGING; 5767c478bd9Sstevel@tonic-gate desscan = (pgcnt_t)result; 5777c478bd9Sstevel@tonic-gate } 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate pageout_ticks = min_pageout_ticks + (lotsfree - vavail) * 5807c478bd9Sstevel@tonic-gate (max_pageout_ticks - min_pageout_ticks) / nz(lotsfree); 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate if (freemem < lotsfree + needfree || 5837c478bd9Sstevel@tonic-gate pageout_sample_cnt < pageout_sample_lim) { 5847c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 5857c478bd9Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 5867c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 5877c478bd9Sstevel@tonic-gate } else { 5887c478bd9Sstevel@tonic-gate /* 5897c478bd9Sstevel@tonic-gate * There are enough free pages, no need to 5907c478bd9Sstevel@tonic-gate * kick the scanner thread. And next time 5917c478bd9Sstevel@tonic-gate * around, keep more of the `highly shared' 5927c478bd9Sstevel@tonic-gate * pages. 5937c478bd9Sstevel@tonic-gate */ 5947c478bd9Sstevel@tonic-gate cv_signal_pageout(); 5957c478bd9Sstevel@tonic-gate if (po_share > MIN_PO_SHARE) { 5967c478bd9Sstevel@tonic-gate po_share >>= 1; 5977c478bd9Sstevel@tonic-gate } 5987c478bd9Sstevel@tonic-gate } 5997c478bd9Sstevel@tonic-gate mutex_exit(&pageout_mutex); 6007c478bd9Sstevel@tonic-gate } 6017c478bd9Sstevel@tonic-gate 6027c478bd9Sstevel@tonic-gate /* 6037c478bd9Sstevel@tonic-gate * Signal threads waiting for available memory. 6047c478bd9Sstevel@tonic-gate * NOTE: usually we need to grab memavail_lock before cv_broadcast, but 6057c478bd9Sstevel@tonic-gate * in this case it is not needed - the waiters will be waken up during 6067c478bd9Sstevel@tonic-gate * the next invocation of this function. 6077c478bd9Sstevel@tonic-gate */ 6087c478bd9Sstevel@tonic-gate if (kmem_avail() > 0) 6097c478bd9Sstevel@tonic-gate cv_broadcast(&memavail_cv); 6107c478bd9Sstevel@tonic-gate 6117c478bd9Sstevel@tonic-gate (void) timeout(schedpaging, arg, hz / RATETOSCHEDPAGING); 6127c478bd9Sstevel@tonic-gate } 6137c478bd9Sstevel@tonic-gate 6147c478bd9Sstevel@tonic-gate pgcnt_t pushes; 6157c478bd9Sstevel@tonic-gate ulong_t push_list_size; /* # of requests on pageout queue */ 6167c478bd9Sstevel@tonic-gate 6177c478bd9Sstevel@tonic-gate #define FRONT 1 6187c478bd9Sstevel@tonic-gate #define BACK 2 6197c478bd9Sstevel@tonic-gate 6207c478bd9Sstevel@tonic-gate int dopageout = 1; /* must be non-zero to turn page stealing on */ 6217c478bd9Sstevel@tonic-gate 6227c478bd9Sstevel@tonic-gate /* 6237c478bd9Sstevel@tonic-gate * The page out daemon, which runs as process 2. 6247c478bd9Sstevel@tonic-gate * 6257c478bd9Sstevel@tonic-gate * As long as there are at least lotsfree pages, 6267c478bd9Sstevel@tonic-gate * this process is not run. When the number of free 6277c478bd9Sstevel@tonic-gate * pages stays in the range desfree to lotsfree, 6287c478bd9Sstevel@tonic-gate * this daemon runs through the pages in the loop 6297c478bd9Sstevel@tonic-gate * at a rate determined in schedpaging(). Pageout manages 6307c478bd9Sstevel@tonic-gate * two hands on the clock. The front hand moves through 6317c478bd9Sstevel@tonic-gate * memory, clearing the reference bit, 6327c478bd9Sstevel@tonic-gate * and stealing pages from procs that are over maxrss. 6337c478bd9Sstevel@tonic-gate * The back hand travels a distance behind the front hand, 6347c478bd9Sstevel@tonic-gate * freeing the pages that have not been referenced in the time 6357c478bd9Sstevel@tonic-gate * since the front hand passed. If modified, they are pushed to 6367c478bd9Sstevel@tonic-gate * swap before being freed. 6377c478bd9Sstevel@tonic-gate * 6387c478bd9Sstevel@tonic-gate * There are 2 threads that act on behalf of the pageout process. 6397c478bd9Sstevel@tonic-gate * One thread scans pages (pageout_scanner) and frees them up if 6407c478bd9Sstevel@tonic-gate * they don't require any VOP_PUTPAGE operation. If a page must be 6417c478bd9Sstevel@tonic-gate * written back to its backing store, the request is put on a list 6427c478bd9Sstevel@tonic-gate * and the other (pageout) thread is signaled. The pageout thread 6437c478bd9Sstevel@tonic-gate * grabs VOP_PUTPAGE requests from the list, and processes them. 6447c478bd9Sstevel@tonic-gate * Some filesystems may require resources for the VOP_PUTPAGE 6457c478bd9Sstevel@tonic-gate * operations (like memory) and hence can block the pageout 6467c478bd9Sstevel@tonic-gate * thread, but the scanner thread can still operate. There is still 647da6c28aaSamw * no guarantee that memory deadlocks cannot occur. 6487c478bd9Sstevel@tonic-gate * 6497c478bd9Sstevel@tonic-gate * For now, this thing is in very rough form. 6507c478bd9Sstevel@tonic-gate */ 6517c478bd9Sstevel@tonic-gate void 6527c478bd9Sstevel@tonic-gate pageout() 6537c478bd9Sstevel@tonic-gate { 6547c478bd9Sstevel@tonic-gate struct async_reqs *arg; 6557c478bd9Sstevel@tonic-gate pri_t pageout_pri; 6567c478bd9Sstevel@tonic-gate int i; 6577c478bd9Sstevel@tonic-gate pgcnt_t max_pushes; 6587c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 6597c478bd9Sstevel@tonic-gate 6607c478bd9Sstevel@tonic-gate proc_pageout = ttoproc(curthread); 6617c478bd9Sstevel@tonic-gate proc_pageout->p_cstime = 0; 6627c478bd9Sstevel@tonic-gate proc_pageout->p_stime = 0; 6637c478bd9Sstevel@tonic-gate proc_pageout->p_cutime = 0; 6647c478bd9Sstevel@tonic-gate proc_pageout->p_utime = 0; 665ae115bc7Smrj bcopy("pageout", PTOU(curproc)->u_psargs, 8); 666ae115bc7Smrj bcopy("pageout", PTOU(curproc)->u_comm, 7); 6677c478bd9Sstevel@tonic-gate 6687c478bd9Sstevel@tonic-gate /* 6697c478bd9Sstevel@tonic-gate * Create pageout scanner thread 6707c478bd9Sstevel@tonic-gate */ 6717c478bd9Sstevel@tonic-gate mutex_init(&pageout_mutex, NULL, MUTEX_DEFAULT, NULL); 6727c478bd9Sstevel@tonic-gate mutex_init(&push_lock, NULL, MUTEX_DEFAULT, NULL); 6737c478bd9Sstevel@tonic-gate 6747c478bd9Sstevel@tonic-gate /* 6757c478bd9Sstevel@tonic-gate * Allocate and initialize the async request structures 6767c478bd9Sstevel@tonic-gate * for pageout. 6777c478bd9Sstevel@tonic-gate */ 6787c478bd9Sstevel@tonic-gate push_req = (struct async_reqs *) 6797c478bd9Sstevel@tonic-gate kmem_zalloc(async_list_size * sizeof (struct async_reqs), KM_SLEEP); 6807c478bd9Sstevel@tonic-gate 6817c478bd9Sstevel@tonic-gate req_freelist = push_req; 6827c478bd9Sstevel@tonic-gate for (i = 0; i < async_list_size - 1; i++) 6837c478bd9Sstevel@tonic-gate push_req[i].a_next = &push_req[i + 1]; 6847c478bd9Sstevel@tonic-gate 6857c478bd9Sstevel@tonic-gate pageout_pri = curthread->t_pri; 686*35a5a358SJonathan Adams 687*35a5a358SJonathan Adams /* Create the pageout scanner thread. */ 688*35a5a358SJonathan Adams (void) lwp_kernel_create(proc_pageout, pageout_scanner, NULL, TS_RUN, 689*35a5a358SJonathan Adams pageout_pri - 1); 6907c478bd9Sstevel@tonic-gate 6917c478bd9Sstevel@tonic-gate /* 6927c478bd9Sstevel@tonic-gate * kick off pageout scheduler. 6937c478bd9Sstevel@tonic-gate */ 6947c478bd9Sstevel@tonic-gate schedpaging(NULL); 6957c478bd9Sstevel@tonic-gate 6967c478bd9Sstevel@tonic-gate /* 6977c478bd9Sstevel@tonic-gate * Create kernel cage thread. 6987c478bd9Sstevel@tonic-gate * The kernel cage thread is started under the pageout process 6997c478bd9Sstevel@tonic-gate * to take advantage of the less restricted page allocation 7007c478bd9Sstevel@tonic-gate * in page_create_throttle(). 7017c478bd9Sstevel@tonic-gate */ 7027c478bd9Sstevel@tonic-gate kcage_cageout_init(); 7037c478bd9Sstevel@tonic-gate 7047c478bd9Sstevel@tonic-gate /* 7057c478bd9Sstevel@tonic-gate * Limit pushes to avoid saturating pageout devices. 7067c478bd9Sstevel@tonic-gate */ 7077c478bd9Sstevel@tonic-gate max_pushes = maxpgio / RATETOSCHEDPAGING; 7087c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &push_lock, callb_generic_cpr, "pageout"); 7097c478bd9Sstevel@tonic-gate 7107c478bd9Sstevel@tonic-gate for (;;) { 7117c478bd9Sstevel@tonic-gate mutex_enter(&push_lock); 7127c478bd9Sstevel@tonic-gate 7137c478bd9Sstevel@tonic-gate while ((arg = push_list) == NULL || pushes > max_pushes) { 7147c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 7157c478bd9Sstevel@tonic-gate cv_wait(&push_cv, &push_lock); 7167c478bd9Sstevel@tonic-gate pushes = 0; 7177c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &push_lock); 7187c478bd9Sstevel@tonic-gate } 7197c478bd9Sstevel@tonic-gate push_list = arg->a_next; 7207c478bd9Sstevel@tonic-gate arg->a_next = NULL; 7217c478bd9Sstevel@tonic-gate mutex_exit(&push_lock); 7227c478bd9Sstevel@tonic-gate 7237c478bd9Sstevel@tonic-gate if (VOP_PUTPAGE(arg->a_vp, (offset_t)arg->a_off, 72406cfbf35Sjimp arg->a_len, arg->a_flags, arg->a_cred, NULL) == 0) { 7257c478bd9Sstevel@tonic-gate pushes++; 7267c478bd9Sstevel@tonic-gate } 7277c478bd9Sstevel@tonic-gate 7287c478bd9Sstevel@tonic-gate /* vp held by checkpage() */ 7297c478bd9Sstevel@tonic-gate VN_RELE(arg->a_vp); 7307c478bd9Sstevel@tonic-gate 7317c478bd9Sstevel@tonic-gate mutex_enter(&push_lock); 7327c478bd9Sstevel@tonic-gate arg->a_next = req_freelist; /* back on freelist */ 7337c478bd9Sstevel@tonic-gate req_freelist = arg; 7347c478bd9Sstevel@tonic-gate push_list_size--; 7357c478bd9Sstevel@tonic-gate mutex_exit(&push_lock); 7367c478bd9Sstevel@tonic-gate } 7377c478bd9Sstevel@tonic-gate } 7387c478bd9Sstevel@tonic-gate 7397c478bd9Sstevel@tonic-gate /* 7407c478bd9Sstevel@tonic-gate * Kernel thread that scans pages looking for ones to free 7417c478bd9Sstevel@tonic-gate */ 7427c478bd9Sstevel@tonic-gate static void 7437c478bd9Sstevel@tonic-gate pageout_scanner(void) 7447c478bd9Sstevel@tonic-gate { 7457c478bd9Sstevel@tonic-gate struct page *fronthand, *backhand; 7467c478bd9Sstevel@tonic-gate uint_t count; 7477c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 7487c478bd9Sstevel@tonic-gate pgcnt_t nscan_limit; 7497c478bd9Sstevel@tonic-gate pgcnt_t pcount; 7507c478bd9Sstevel@tonic-gate 7517c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &pageout_mutex, callb_generic_cpr, "poscan"); 7527c478bd9Sstevel@tonic-gate mutex_enter(&pageout_mutex); 7537c478bd9Sstevel@tonic-gate 7547c478bd9Sstevel@tonic-gate /* 7557c478bd9Sstevel@tonic-gate * The restart case does not attempt to point the hands at roughly 7567c478bd9Sstevel@tonic-gate * the right point on the assumption that after one circuit things 7577c478bd9Sstevel@tonic-gate * will have settled down - and restarts shouldn't be that often. 7587c478bd9Sstevel@tonic-gate */ 7597c478bd9Sstevel@tonic-gate 7607c478bd9Sstevel@tonic-gate /* 7617c478bd9Sstevel@tonic-gate * Set the two clock hands to be separated by a reasonable amount, 7627c478bd9Sstevel@tonic-gate * but no more than 360 degrees apart. 7637c478bd9Sstevel@tonic-gate */ 7647c478bd9Sstevel@tonic-gate backhand = page_first(); 7657c478bd9Sstevel@tonic-gate if (handspreadpages >= total_pages) 7667c478bd9Sstevel@tonic-gate fronthand = page_nextn(backhand, total_pages - 1); 7677c478bd9Sstevel@tonic-gate else 7687c478bd9Sstevel@tonic-gate fronthand = page_nextn(backhand, handspreadpages); 7697c478bd9Sstevel@tonic-gate 7707c478bd9Sstevel@tonic-gate min_pageout_ticks = MAX(1, 7717c478bd9Sstevel@tonic-gate ((hz * min_percent_cpu) / 100) / RATETOSCHEDPAGING); 7727c478bd9Sstevel@tonic-gate max_pageout_ticks = MAX(min_pageout_ticks, 7737c478bd9Sstevel@tonic-gate ((hz * max_percent_cpu) / 100) / RATETOSCHEDPAGING); 7747c478bd9Sstevel@tonic-gate 7757c478bd9Sstevel@tonic-gate loop: 7767c478bd9Sstevel@tonic-gate cv_signal_pageout(); 7777c478bd9Sstevel@tonic-gate 7787c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 7797c478bd9Sstevel@tonic-gate cv_wait(&proc_pageout->p_cv, &pageout_mutex); 7807c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &pageout_mutex); 7817c478bd9Sstevel@tonic-gate 7827c478bd9Sstevel@tonic-gate if (!dopageout) 7837c478bd9Sstevel@tonic-gate goto loop; 7847c478bd9Sstevel@tonic-gate 7857c478bd9Sstevel@tonic-gate if (reset_hands) { 7867c478bd9Sstevel@tonic-gate reset_hands = 0; 7877c478bd9Sstevel@tonic-gate 7887c478bd9Sstevel@tonic-gate backhand = page_first(); 7897c478bd9Sstevel@tonic-gate if (handspreadpages >= total_pages) 7907c478bd9Sstevel@tonic-gate fronthand = page_nextn(backhand, total_pages - 1); 7917c478bd9Sstevel@tonic-gate else 7927c478bd9Sstevel@tonic-gate fronthand = page_nextn(backhand, handspreadpages); 7937c478bd9Sstevel@tonic-gate } 7947c478bd9Sstevel@tonic-gate 7957c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, pgrrun, 1); 7967c478bd9Sstevel@tonic-gate count = 0; 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGEOUT_START, 7997c478bd9Sstevel@tonic-gate "pageout_start:freemem %ld lotsfree %ld nscan %ld desscan %ld", 8007c478bd9Sstevel@tonic-gate freemem, lotsfree, nscan, desscan); 8017c478bd9Sstevel@tonic-gate 8027c478bd9Sstevel@tonic-gate /* Kernel probe */ 8037c478bd9Sstevel@tonic-gate TNF_PROBE_2(pageout_scan_start, "vm pagedaemon", /* CSTYLED */, 80406cfbf35Sjimp tnf_ulong, pages_free, freemem, tnf_ulong, pages_needed, needfree); 8057c478bd9Sstevel@tonic-gate 8067c478bd9Sstevel@tonic-gate pcount = 0; 8077c478bd9Sstevel@tonic-gate if (pageout_sample_cnt < pageout_sample_lim) { 8087c478bd9Sstevel@tonic-gate nscan_limit = total_pages; 8097c478bd9Sstevel@tonic-gate } else { 8107c478bd9Sstevel@tonic-gate nscan_limit = desscan; 8117c478bd9Sstevel@tonic-gate } 812d3d50737SRafael Vanoni pageout_lbolt = ddi_get_lbolt(); 8137c478bd9Sstevel@tonic-gate sample_start = gethrtime(); 8147c478bd9Sstevel@tonic-gate 8157c478bd9Sstevel@tonic-gate /* 8167c478bd9Sstevel@tonic-gate * Scan the appropriate number of pages for a single duty cycle. 8177c478bd9Sstevel@tonic-gate * However, stop scanning as soon as there is enough free memory. 8187c478bd9Sstevel@tonic-gate * For a short while, we will be sampling the performance of the 8197c478bd9Sstevel@tonic-gate * scanner and need to keep running just to get sample data, in 8207c478bd9Sstevel@tonic-gate * which case we keep going and don't pay attention to whether 8217c478bd9Sstevel@tonic-gate * or not there is enough free memory. 8227c478bd9Sstevel@tonic-gate */ 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate while (nscan < nscan_limit && (freemem < lotsfree + needfree || 8257c478bd9Sstevel@tonic-gate pageout_sample_cnt < pageout_sample_lim)) { 8267c478bd9Sstevel@tonic-gate int rvfront, rvback; 8277c478bd9Sstevel@tonic-gate 8287c478bd9Sstevel@tonic-gate /* 8297c478bd9Sstevel@tonic-gate * Check to see if we have exceeded our %CPU budget 8307c478bd9Sstevel@tonic-gate * for this wakeup, but not on every single page visited, 8317c478bd9Sstevel@tonic-gate * just every once in a while. 8327c478bd9Sstevel@tonic-gate */ 8337c478bd9Sstevel@tonic-gate if ((pcount & PAGES_POLL_MASK) == PAGES_POLL_MASK) { 834d3d50737SRafael Vanoni pageout_cycle_ticks = ddi_get_lbolt() - pageout_lbolt; 8357c478bd9Sstevel@tonic-gate if (pageout_cycle_ticks >= pageout_ticks) { 8367c478bd9Sstevel@tonic-gate ++pageout_timeouts; 8377c478bd9Sstevel@tonic-gate break; 8387c478bd9Sstevel@tonic-gate } 8397c478bd9Sstevel@tonic-gate } 8407c478bd9Sstevel@tonic-gate 8417c478bd9Sstevel@tonic-gate /* 8427c478bd9Sstevel@tonic-gate * If checkpage manages to add a page to the free list, 8437c478bd9Sstevel@tonic-gate * we give ourselves another couple of trips around the loop. 8447c478bd9Sstevel@tonic-gate */ 8457c478bd9Sstevel@tonic-gate if ((rvfront = checkpage(fronthand, FRONT)) == 1) 8467c478bd9Sstevel@tonic-gate count = 0; 8477c478bd9Sstevel@tonic-gate if ((rvback = checkpage(backhand, BACK)) == 1) 8487c478bd9Sstevel@tonic-gate count = 0; 8497c478bd9Sstevel@tonic-gate 8507c478bd9Sstevel@tonic-gate ++pcount; 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate /* 8537c478bd9Sstevel@tonic-gate * protected by pageout_mutex instead of cpu_stat_lock 8547c478bd9Sstevel@tonic-gate */ 8557c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, scan, 1); 8567c478bd9Sstevel@tonic-gate 8577c478bd9Sstevel@tonic-gate /* 8587c478bd9Sstevel@tonic-gate * Don't include ineligible pages in the number scanned. 8597c478bd9Sstevel@tonic-gate */ 8607c478bd9Sstevel@tonic-gate if (rvfront != -1 || rvback != -1) 8617c478bd9Sstevel@tonic-gate nscan++; 8627c478bd9Sstevel@tonic-gate 8637c478bd9Sstevel@tonic-gate backhand = page_next(backhand); 8647c478bd9Sstevel@tonic-gate 8657c478bd9Sstevel@tonic-gate /* 8667c478bd9Sstevel@tonic-gate * backhand update and wraparound check are done separately 8677c478bd9Sstevel@tonic-gate * because lint barks when it finds an empty "if" body 8687c478bd9Sstevel@tonic-gate */ 8697c478bd9Sstevel@tonic-gate 8707c478bd9Sstevel@tonic-gate if ((fronthand = page_next(fronthand)) == page_first()) { 8717c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGEOUT_HAND_WRAP, 8727c478bd9Sstevel@tonic-gate "pageout_hand_wrap:freemem %ld whichhand %d", 8737c478bd9Sstevel@tonic-gate freemem, FRONT); 8747c478bd9Sstevel@tonic-gate 8757c478bd9Sstevel@tonic-gate /* 8767c478bd9Sstevel@tonic-gate * protected by pageout_mutex instead of cpu_stat_lock 8777c478bd9Sstevel@tonic-gate */ 8787c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, rev, 1); 8797c478bd9Sstevel@tonic-gate if (++count > 1) { 8807c478bd9Sstevel@tonic-gate /* 8817c478bd9Sstevel@tonic-gate * Extremely unlikely, but it happens. 8827c478bd9Sstevel@tonic-gate * We went around the loop at least once 8837c478bd9Sstevel@tonic-gate * and didn't get far enough. 8847c478bd9Sstevel@tonic-gate * If we are still skipping `highly shared' 8857c478bd9Sstevel@tonic-gate * pages, skip fewer of them. Otherwise, 8867c478bd9Sstevel@tonic-gate * give up till the next clock tick. 8877c478bd9Sstevel@tonic-gate */ 8887c478bd9Sstevel@tonic-gate if (po_share < MAX_PO_SHARE) { 8897c478bd9Sstevel@tonic-gate po_share <<= 1; 8907c478bd9Sstevel@tonic-gate } else { 8917c478bd9Sstevel@tonic-gate /* 8927c478bd9Sstevel@tonic-gate * Really a "goto loop", but 8937c478bd9Sstevel@tonic-gate * if someone is TRACing or 8947c478bd9Sstevel@tonic-gate * TNF_PROBE_ing, at least 8957c478bd9Sstevel@tonic-gate * make records to show 8967c478bd9Sstevel@tonic-gate * where we are. 8977c478bd9Sstevel@tonic-gate */ 8987c478bd9Sstevel@tonic-gate break; 8997c478bd9Sstevel@tonic-gate } 9007c478bd9Sstevel@tonic-gate } 9017c478bd9Sstevel@tonic-gate } 9027c478bd9Sstevel@tonic-gate } 9037c478bd9Sstevel@tonic-gate 9047c478bd9Sstevel@tonic-gate sample_end = gethrtime(); 9057c478bd9Sstevel@tonic-gate 9067c478bd9Sstevel@tonic-gate TRACE_5(TR_FAC_VM, TR_PAGEOUT_END, 9077c478bd9Sstevel@tonic-gate "pageout_end:freemem %ld lots %ld nscan %ld des %ld count %u", 9087c478bd9Sstevel@tonic-gate freemem, lotsfree, nscan, desscan, count); 9097c478bd9Sstevel@tonic-gate 9107c478bd9Sstevel@tonic-gate /* Kernel probe */ 9117c478bd9Sstevel@tonic-gate TNF_PROBE_2(pageout_scan_end, "vm pagedaemon", /* CSTYLED */, 91206cfbf35Sjimp tnf_ulong, pages_scanned, nscan, tnf_ulong, pages_free, freemem); 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate if (pageout_sample_cnt < pageout_sample_lim) { 9157c478bd9Sstevel@tonic-gate pageout_sample_pages += pcount; 9167c478bd9Sstevel@tonic-gate pageout_sample_etime += sample_end - sample_start; 9177c478bd9Sstevel@tonic-gate ++pageout_sample_cnt; 9187c478bd9Sstevel@tonic-gate } 9197c478bd9Sstevel@tonic-gate if (pageout_sample_cnt >= pageout_sample_lim && 9207c478bd9Sstevel@tonic-gate pageout_new_spread == 0) { 9217c478bd9Sstevel@tonic-gate pageout_rate = (hrrate_t)pageout_sample_pages * 9227c478bd9Sstevel@tonic-gate (hrrate_t)(NANOSEC) / pageout_sample_etime; 9237c478bd9Sstevel@tonic-gate pageout_new_spread = pageout_rate / 10; 9247c478bd9Sstevel@tonic-gate setupclock(1); 9257c478bd9Sstevel@tonic-gate } 9267c478bd9Sstevel@tonic-gate 9277c478bd9Sstevel@tonic-gate goto loop; 9287c478bd9Sstevel@tonic-gate } 9297c478bd9Sstevel@tonic-gate 9307c478bd9Sstevel@tonic-gate /* 9317c478bd9Sstevel@tonic-gate * Look at the page at hand. If it is locked (e.g., for physical i/o), 9327c478bd9Sstevel@tonic-gate * system (u., page table) or free, then leave it alone. Otherwise, 9337c478bd9Sstevel@tonic-gate * if we are running the front hand, turn off the page's reference bit. 9347c478bd9Sstevel@tonic-gate * If the proc is over maxrss, we take it. If running the back hand, 9357c478bd9Sstevel@tonic-gate * check whether the page has been reclaimed. If not, free the page, 9367c478bd9Sstevel@tonic-gate * pushing it to disk first if necessary. 9377c478bd9Sstevel@tonic-gate * 9387c478bd9Sstevel@tonic-gate * Return values: 9397c478bd9Sstevel@tonic-gate * -1 if the page is not a candidate at all, 9407c478bd9Sstevel@tonic-gate * 0 if not freed, or 9417c478bd9Sstevel@tonic-gate * 1 if we freed it. 9427c478bd9Sstevel@tonic-gate */ 9437c478bd9Sstevel@tonic-gate static int 9447c478bd9Sstevel@tonic-gate checkpage(struct page *pp, int whichhand) 9457c478bd9Sstevel@tonic-gate { 9467c478bd9Sstevel@tonic-gate int ppattr; 9477c478bd9Sstevel@tonic-gate int isfs = 0; 9487c478bd9Sstevel@tonic-gate int isexec = 0; 9497c478bd9Sstevel@tonic-gate int pagesync_flag; 9507c478bd9Sstevel@tonic-gate 9517c478bd9Sstevel@tonic-gate /* 9527c478bd9Sstevel@tonic-gate * Skip pages: 9537c478bd9Sstevel@tonic-gate * - associated with the kernel vnode since 9547c478bd9Sstevel@tonic-gate * they are always "exclusively" locked. 9557c478bd9Sstevel@tonic-gate * - that are free 9567c478bd9Sstevel@tonic-gate * - that are shared more than po_share'd times 9577c478bd9Sstevel@tonic-gate * - its already locked 9587c478bd9Sstevel@tonic-gate * 9597c478bd9Sstevel@tonic-gate * NOTE: These optimizations assume that reads are atomic. 9607c478bd9Sstevel@tonic-gate */ 961a98e9dbfSaguzovsk 962a98e9dbfSaguzovsk if (PP_ISKAS(pp) || PAGE_LOCKED(pp) || PP_ISFREE(pp) || 963a98e9dbfSaguzovsk pp->p_lckcnt != 0 || pp->p_cowcnt != 0 || 964a98e9dbfSaguzovsk hat_page_checkshare(pp, po_share)) { 9657c478bd9Sstevel@tonic-gate return (-1); 9667c478bd9Sstevel@tonic-gate } 9677c478bd9Sstevel@tonic-gate 9687c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 9697c478bd9Sstevel@tonic-gate /* 9707c478bd9Sstevel@tonic-gate * Skip the page if we can't acquire the "exclusive" lock. 9717c478bd9Sstevel@tonic-gate */ 9727c478bd9Sstevel@tonic-gate return (-1); 9737c478bd9Sstevel@tonic-gate } else if (PP_ISFREE(pp)) { 9747c478bd9Sstevel@tonic-gate /* 9757c478bd9Sstevel@tonic-gate * It became free between the above check and our actually 9767c478bd9Sstevel@tonic-gate * locking the page. Oh, well there will be other pages. 9777c478bd9Sstevel@tonic-gate */ 9787c478bd9Sstevel@tonic-gate page_unlock(pp); 9797c478bd9Sstevel@tonic-gate return (-1); 9807c478bd9Sstevel@tonic-gate } 9817c478bd9Sstevel@tonic-gate 9827c478bd9Sstevel@tonic-gate /* 9837c478bd9Sstevel@tonic-gate * Reject pages that cannot be freed. The page_struct_lock 9847c478bd9Sstevel@tonic-gate * need not be acquired to examine these 9857c478bd9Sstevel@tonic-gate * fields since the page has an "exclusive" lock. 9867c478bd9Sstevel@tonic-gate */ 9877c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 9887c478bd9Sstevel@tonic-gate page_unlock(pp); 9897c478bd9Sstevel@tonic-gate return (-1); 9907c478bd9Sstevel@tonic-gate } 9917c478bd9Sstevel@tonic-gate 9927c478bd9Sstevel@tonic-gate /* 9937c478bd9Sstevel@tonic-gate * Maintain statistics for what we are freeing 9947c478bd9Sstevel@tonic-gate */ 9957c478bd9Sstevel@tonic-gate 9967c478bd9Sstevel@tonic-gate if (pp->p_vnode != NULL) { 9977c478bd9Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) 9987c478bd9Sstevel@tonic-gate isexec = 1; 9997c478bd9Sstevel@tonic-gate 10007c478bd9Sstevel@tonic-gate if (!IS_SWAPFSVP(pp->p_vnode)) 10017c478bd9Sstevel@tonic-gate isfs = 1; 10027c478bd9Sstevel@tonic-gate } 10037c478bd9Sstevel@tonic-gate 10047c478bd9Sstevel@tonic-gate /* 10057c478bd9Sstevel@tonic-gate * Turn off REF and MOD bits with the front hand. 10067c478bd9Sstevel@tonic-gate * The back hand examines the REF bit and always considers 10077c478bd9Sstevel@tonic-gate * SHARED pages as referenced. 10087c478bd9Sstevel@tonic-gate */ 10097c478bd9Sstevel@tonic-gate if (whichhand == FRONT) 10107c478bd9Sstevel@tonic-gate pagesync_flag = HAT_SYNC_ZERORM; 10117c478bd9Sstevel@tonic-gate else 10127c478bd9Sstevel@tonic-gate pagesync_flag = HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_REF | 10137c478bd9Sstevel@tonic-gate HAT_SYNC_STOPON_SHARED; 10147c478bd9Sstevel@tonic-gate 10157c478bd9Sstevel@tonic-gate ppattr = hat_pagesync(pp, pagesync_flag); 10167c478bd9Sstevel@tonic-gate 10177c478bd9Sstevel@tonic-gate recheck: 10187c478bd9Sstevel@tonic-gate /* 10197c478bd9Sstevel@tonic-gate * If page is referenced; make unreferenced but reclaimable. 10207c478bd9Sstevel@tonic-gate * If this page is not referenced, then it must be reclaimable 10217c478bd9Sstevel@tonic-gate * and we can add it to the free list. 10227c478bd9Sstevel@tonic-gate */ 10237c478bd9Sstevel@tonic-gate if (ppattr & P_REF) { 10247c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGEOUT_ISREF, 10257c478bd9Sstevel@tonic-gate "pageout_isref:pp %p whichhand %d", pp, whichhand); 10267c478bd9Sstevel@tonic-gate if (whichhand == FRONT) { 10277c478bd9Sstevel@tonic-gate /* 10287c478bd9Sstevel@tonic-gate * Checking of rss or madvise flags needed here... 10297c478bd9Sstevel@tonic-gate * 10307c478bd9Sstevel@tonic-gate * If not "well-behaved", fall through into the code 10317c478bd9Sstevel@tonic-gate * for not referenced. 10327c478bd9Sstevel@tonic-gate */ 10337c478bd9Sstevel@tonic-gate hat_clrref(pp); 10347c478bd9Sstevel@tonic-gate } 10357c478bd9Sstevel@tonic-gate /* 10367c478bd9Sstevel@tonic-gate * Somebody referenced the page since the front 10377c478bd9Sstevel@tonic-gate * hand went by, so it's not a candidate for 10387c478bd9Sstevel@tonic-gate * freeing up. 10397c478bd9Sstevel@tonic-gate */ 10407c478bd9Sstevel@tonic-gate page_unlock(pp); 10417c478bd9Sstevel@tonic-gate return (0); 10427c478bd9Sstevel@tonic-gate } 10437c478bd9Sstevel@tonic-gate 10447c478bd9Sstevel@tonic-gate VM_STAT_ADD(pageoutvmstats.checkpage[0]); 10457c478bd9Sstevel@tonic-gate 10467c478bd9Sstevel@tonic-gate /* 10477c478bd9Sstevel@tonic-gate * If large page, attempt to demote it. If successfully demoted, 10487c478bd9Sstevel@tonic-gate * retry the checkpage. 10497c478bd9Sstevel@tonic-gate */ 10507c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 10517c478bd9Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 10527c478bd9Sstevel@tonic-gate VM_STAT_ADD(pageoutvmstats.checkpage[1]); 10537c478bd9Sstevel@tonic-gate page_unlock(pp); 10547c478bd9Sstevel@tonic-gate return (-1); 10557c478bd9Sstevel@tonic-gate } 10567c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 10577c478bd9Sstevel@tonic-gate VM_STAT_ADD(pageoutvmstats.checkpage[2]); 10587c478bd9Sstevel@tonic-gate /* 10597c478bd9Sstevel@tonic-gate * since page_try_demote_pages() could have unloaded some 10607c478bd9Sstevel@tonic-gate * mappings it makes sense to reload ppattr. 10617c478bd9Sstevel@tonic-gate */ 10627c478bd9Sstevel@tonic-gate ppattr = hat_page_getattr(pp, P_MOD | P_REF); 10637c478bd9Sstevel@tonic-gate } 10647c478bd9Sstevel@tonic-gate 10657c478bd9Sstevel@tonic-gate /* 10667c478bd9Sstevel@tonic-gate * If the page is currently dirty, we have to arrange 10677c478bd9Sstevel@tonic-gate * to have it cleaned before it can be freed. 10687c478bd9Sstevel@tonic-gate * 10697c478bd9Sstevel@tonic-gate * XXX - ASSERT(pp->p_vnode != NULL); 10707c478bd9Sstevel@tonic-gate */ 10717c478bd9Sstevel@tonic-gate if ((ppattr & P_MOD) && pp->p_vnode) { 10727c478bd9Sstevel@tonic-gate struct vnode *vp = pp->p_vnode; 10737c478bd9Sstevel@tonic-gate u_offset_t offset = pp->p_offset; 10747c478bd9Sstevel@tonic-gate 10757c478bd9Sstevel@tonic-gate /* 10767c478bd9Sstevel@tonic-gate * XXX - Test for process being swapped out or about to exit? 10777c478bd9Sstevel@tonic-gate * [Can't get back to process(es) using the page.] 10787c478bd9Sstevel@tonic-gate */ 10797c478bd9Sstevel@tonic-gate 10807c478bd9Sstevel@tonic-gate /* 10817c478bd9Sstevel@tonic-gate * Hold the vnode before releasing the page lock to 10827c478bd9Sstevel@tonic-gate * prevent it from being freed and re-used by some 10837c478bd9Sstevel@tonic-gate * other thread. 10847c478bd9Sstevel@tonic-gate */ 10857c478bd9Sstevel@tonic-gate VN_HOLD(vp); 10867c478bd9Sstevel@tonic-gate page_unlock(pp); 10877c478bd9Sstevel@tonic-gate 10887c478bd9Sstevel@tonic-gate /* 10897c478bd9Sstevel@tonic-gate * Queue i/o request for the pageout thread. 10907c478bd9Sstevel@tonic-gate */ 10917c478bd9Sstevel@tonic-gate if (!queue_io_request(vp, offset)) { 10927c478bd9Sstevel@tonic-gate VN_RELE(vp); 10937c478bd9Sstevel@tonic-gate return (0); 10947c478bd9Sstevel@tonic-gate } 10957c478bd9Sstevel@tonic-gate return (1); 10967c478bd9Sstevel@tonic-gate } 10977c478bd9Sstevel@tonic-gate 10987c478bd9Sstevel@tonic-gate /* 10997c478bd9Sstevel@tonic-gate * Now we unload all the translations, 11007c478bd9Sstevel@tonic-gate * and put the page back on to the free list. 11017c478bd9Sstevel@tonic-gate * If the page was used (referenced or modified) after 11027c478bd9Sstevel@tonic-gate * the pagesync but before it was unloaded we catch it 11037c478bd9Sstevel@tonic-gate * and handle the page properly. 11047c478bd9Sstevel@tonic-gate */ 11057c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGEOUT_FREE, 11067c478bd9Sstevel@tonic-gate "pageout_free:pp %p whichhand %d", pp, whichhand); 11077c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 11087c478bd9Sstevel@tonic-gate ppattr = hat_page_getattr(pp, P_MOD | P_REF); 11097c478bd9Sstevel@tonic-gate if ((ppattr & P_REF) || ((ppattr & P_MOD) && pp->p_vnode)) 11107c478bd9Sstevel@tonic-gate goto recheck; 11117c478bd9Sstevel@tonic-gate 11127c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context*/ 11137c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 11147c478bd9Sstevel@tonic-gate 11157c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, dfree, 1); 11167c478bd9Sstevel@tonic-gate 11177c478bd9Sstevel@tonic-gate if (isfs) { 11187c478bd9Sstevel@tonic-gate if (isexec) { 11197c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, execfree, 1); 11207c478bd9Sstevel@tonic-gate } else { 11217c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, fsfree, 1); 11227c478bd9Sstevel@tonic-gate } 11237c478bd9Sstevel@tonic-gate } else { 11247c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, anonfree, 1); 11257c478bd9Sstevel@tonic-gate } 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate return (1); /* freed a page! */ 11287c478bd9Sstevel@tonic-gate } 11297c478bd9Sstevel@tonic-gate 11307c478bd9Sstevel@tonic-gate /* 11317c478bd9Sstevel@tonic-gate * Queue async i/o request from pageout_scanner and segment swapout 11327c478bd9Sstevel@tonic-gate * routines on one common list. This ensures that pageout devices (swap) 11337c478bd9Sstevel@tonic-gate * are not saturated by pageout_scanner or swapout requests. 11347c478bd9Sstevel@tonic-gate * The pageout thread empties this list by initiating i/o operations. 11357c478bd9Sstevel@tonic-gate */ 11367c478bd9Sstevel@tonic-gate int 11377c478bd9Sstevel@tonic-gate queue_io_request(vnode_t *vp, u_offset_t off) 11387c478bd9Sstevel@tonic-gate { 11397c478bd9Sstevel@tonic-gate struct async_reqs *arg; 11407c478bd9Sstevel@tonic-gate 11417c478bd9Sstevel@tonic-gate /* 11427c478bd9Sstevel@tonic-gate * If we cannot allocate an async request struct, 11437c478bd9Sstevel@tonic-gate * skip this page. 11447c478bd9Sstevel@tonic-gate */ 11457c478bd9Sstevel@tonic-gate mutex_enter(&push_lock); 11467c478bd9Sstevel@tonic-gate if ((arg = req_freelist) == NULL) { 11477c478bd9Sstevel@tonic-gate mutex_exit(&push_lock); 11487c478bd9Sstevel@tonic-gate return (0); 11497c478bd9Sstevel@tonic-gate } 11507c478bd9Sstevel@tonic-gate req_freelist = arg->a_next; /* adjust freelist */ 11517c478bd9Sstevel@tonic-gate push_list_size++; 11527c478bd9Sstevel@tonic-gate 11537c478bd9Sstevel@tonic-gate arg->a_vp = vp; 11547c478bd9Sstevel@tonic-gate arg->a_off = off; 11557c478bd9Sstevel@tonic-gate arg->a_len = PAGESIZE; 11567c478bd9Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 11577c478bd9Sstevel@tonic-gate arg->a_cred = kcred; /* always held */ 11587c478bd9Sstevel@tonic-gate 11597c478bd9Sstevel@tonic-gate /* 11607c478bd9Sstevel@tonic-gate * Add to list of pending write requests. 11617c478bd9Sstevel@tonic-gate */ 11627c478bd9Sstevel@tonic-gate arg->a_next = push_list; 11637c478bd9Sstevel@tonic-gate push_list = arg; 11647c478bd9Sstevel@tonic-gate 11657c478bd9Sstevel@tonic-gate if (req_freelist == NULL) { 11667c478bd9Sstevel@tonic-gate /* 11677c478bd9Sstevel@tonic-gate * No free async requests left. The lock is held so we 11687c478bd9Sstevel@tonic-gate * might as well signal the pusher thread now. 11697c478bd9Sstevel@tonic-gate */ 11707c478bd9Sstevel@tonic-gate cv_signal(&push_cv); 11717c478bd9Sstevel@tonic-gate } 11727c478bd9Sstevel@tonic-gate mutex_exit(&push_lock); 11737c478bd9Sstevel@tonic-gate return (1); 11747c478bd9Sstevel@tonic-gate } 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate /* 11777c478bd9Sstevel@tonic-gate * Wakeup pageout to initiate i/o if push_list is not empty. 11787c478bd9Sstevel@tonic-gate */ 11797c478bd9Sstevel@tonic-gate void 11807c478bd9Sstevel@tonic-gate cv_signal_pageout() 11817c478bd9Sstevel@tonic-gate { 11827c478bd9Sstevel@tonic-gate if (push_list != NULL) { 11837c478bd9Sstevel@tonic-gate mutex_enter(&push_lock); 11847c478bd9Sstevel@tonic-gate cv_signal(&push_cv); 11857c478bd9Sstevel@tonic-gate mutex_exit(&push_lock); 11867c478bd9Sstevel@tonic-gate } 11877c478bd9Sstevel@tonic-gate } 1188