1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 24*7c478bd9Sstevel@tonic-gate 25*7c478bd9Sstevel@tonic-gate 26*7c478bd9Sstevel@tonic-gate /* 27*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 29*7c478bd9Sstevel@tonic-gate */ 30*7c478bd9Sstevel@tonic-gate 31*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 32*7c478bd9Sstevel@tonic-gate 33*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/tuneable.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/inline.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/proc.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/user.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/var.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/buf.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/cred.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/swap.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/vm.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 51*7c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/callb.h> 53*7c478bd9Sstevel@tonic-gate #include <sys/reboot.h> 54*7c478bd9Sstevel@tonic-gate #include <sys/time.h> 55*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 56*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 57*7c478bd9Sstevel@tonic-gate 58*7c478bd9Sstevel@tonic-gate #include <vm/hat.h> 59*7c478bd9Sstevel@tonic-gate #include <vm/page.h> 60*7c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 61*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 62*7c478bd9Sstevel@tonic-gate 63*7c478bd9Sstevel@tonic-gate int doiflush = 1; /* non-zero to turn inode flushing on */ 64*7c478bd9Sstevel@tonic-gate int dopageflush = 1; /* non-zero to turn page flushing on */ 65*7c478bd9Sstevel@tonic-gate 66*7c478bd9Sstevel@tonic-gate /* 67*7c478bd9Sstevel@tonic-gate * To improve boot performance, don't run the inode flushing loop until 68*7c478bd9Sstevel@tonic-gate * the specified number of seconds after boot. To revert to the old 69*7c478bd9Sstevel@tonic-gate * behavior, set fsflush_iflush_delay to 0. We have not created any new 70*7c478bd9Sstevel@tonic-gate * filesystem danger that did not exist previously, since there is always a 71*7c478bd9Sstevel@tonic-gate * window in between when fsflush does the inode flush loop during which the 72*7c478bd9Sstevel@tonic-gate * system could crash, fail to sync the filesystem, and fsck will be needed 73*7c478bd9Sstevel@tonic-gate * to recover. We have, however, widened this window. Finally, 74*7c478bd9Sstevel@tonic-gate * we never delay inode flushing if we're booting into single user mode, 75*7c478bd9Sstevel@tonic-gate * where the administrator may be modifying files or using fsck. This 76*7c478bd9Sstevel@tonic-gate * modification avoids inode flushes during boot whose only purpose is to 77*7c478bd9Sstevel@tonic-gate * update atimes on files which have been accessed during boot. 78*7c478bd9Sstevel@tonic-gate */ 79*7c478bd9Sstevel@tonic-gate int fsflush_iflush_delay = 60; 80*7c478bd9Sstevel@tonic-gate 81*7c478bd9Sstevel@tonic-gate kcondvar_t fsflush_cv; 82*7c478bd9Sstevel@tonic-gate static kmutex_t fsflush_lock; /* just for the cv_wait */ 83*7c478bd9Sstevel@tonic-gate ksema_t fsflush_sema; /* to serialize with reboot */ 84*7c478bd9Sstevel@tonic-gate 85*7c478bd9Sstevel@tonic-gate /* 86*7c478bd9Sstevel@tonic-gate * some statistics for fsflush_do_pages 87*7c478bd9Sstevel@tonic-gate */ 88*7c478bd9Sstevel@tonic-gate typedef struct { 89*7c478bd9Sstevel@tonic-gate ulong_t fsf_scan; /* number of pages scanned */ 90*7c478bd9Sstevel@tonic-gate ulong_t fsf_examined; /* number of page_t's actually examined, can */ 91*7c478bd9Sstevel@tonic-gate /* be less than fsf_scan due to large pages */ 92*7c478bd9Sstevel@tonic-gate ulong_t fsf_locked; /* pages we actually page_lock()ed */ 93*7c478bd9Sstevel@tonic-gate ulong_t fsf_modified; /* number of modified pages found */ 94*7c478bd9Sstevel@tonic-gate ulong_t fsf_coalesce; /* number of page coalesces done */ 95*7c478bd9Sstevel@tonic-gate ulong_t fsf_time; /* nanoseconds of run time */ 96*7c478bd9Sstevel@tonic-gate ulong_t fsf_releases; /* number of page_release() done */ 97*7c478bd9Sstevel@tonic-gate } fsf_stat_t; 98*7c478bd9Sstevel@tonic-gate 99*7c478bd9Sstevel@tonic-gate fsf_stat_t fsf_recent; /* counts for most recent duty cycle */ 100*7c478bd9Sstevel@tonic-gate fsf_stat_t fsf_total; /* total of counts */ 101*7c478bd9Sstevel@tonic-gate ulong_t fsf_cycles; /* number of runs refelected in fsf_total */ 102*7c478bd9Sstevel@tonic-gate 103*7c478bd9Sstevel@tonic-gate /* 104*7c478bd9Sstevel@tonic-gate * data used to determine when we can coalese consecutive free pages 105*7c478bd9Sstevel@tonic-gate * into larger pages. 106*7c478bd9Sstevel@tonic-gate */ 107*7c478bd9Sstevel@tonic-gate #define MAX_PAGESIZES 32 108*7c478bd9Sstevel@tonic-gate static ulong_t fsf_npgsz; 109*7c478bd9Sstevel@tonic-gate static pgcnt_t fsf_pgcnt[MAX_PAGESIZES]; 110*7c478bd9Sstevel@tonic-gate static pgcnt_t fsf_mask[MAX_PAGESIZES]; 111*7c478bd9Sstevel@tonic-gate 112*7c478bd9Sstevel@tonic-gate 113*7c478bd9Sstevel@tonic-gate /* 114*7c478bd9Sstevel@tonic-gate * Scan page_t's and issue I/O's for modified pages. 115*7c478bd9Sstevel@tonic-gate * 116*7c478bd9Sstevel@tonic-gate * Also coalesces consecutive small sized free pages into the next larger 117*7c478bd9Sstevel@tonic-gate * pagesize. This costs a tiny bit of time in fsflush, but will reduce time 118*7c478bd9Sstevel@tonic-gate * spent scanning on later passes and for anybody allocating large pages. 119*7c478bd9Sstevel@tonic-gate */ 120*7c478bd9Sstevel@tonic-gate static void 121*7c478bd9Sstevel@tonic-gate fsflush_do_pages() 122*7c478bd9Sstevel@tonic-gate { 123*7c478bd9Sstevel@tonic-gate vnode_t *vp; 124*7c478bd9Sstevel@tonic-gate ulong_t pcount; 125*7c478bd9Sstevel@tonic-gate hrtime_t timer = gethrtime(); 126*7c478bd9Sstevel@tonic-gate ulong_t releases = 0; 127*7c478bd9Sstevel@tonic-gate ulong_t nexamined = 0; 128*7c478bd9Sstevel@tonic-gate ulong_t nlocked = 0; 129*7c478bd9Sstevel@tonic-gate ulong_t nmodified = 0; 130*7c478bd9Sstevel@tonic-gate ulong_t ncoalesce = 0; 131*7c478bd9Sstevel@tonic-gate int mod; 132*7c478bd9Sstevel@tonic-gate u_offset_t offset; 133*7c478bd9Sstevel@tonic-gate uint_t szc; 134*7c478bd9Sstevel@tonic-gate 135*7c478bd9Sstevel@tonic-gate page_t *coal_page = NULL; /* 1st page in group to coalese */ 136*7c478bd9Sstevel@tonic-gate uint_t coal_szc = 0; /* size code, coal_page->p_szc */ 137*7c478bd9Sstevel@tonic-gate uint_t coal_cnt = 0; /* count of pages seen */ 138*7c478bd9Sstevel@tonic-gate 139*7c478bd9Sstevel@tonic-gate static ulong_t nscan = 0; 140*7c478bd9Sstevel@tonic-gate static pgcnt_t last_total_pages = 0; 141*7c478bd9Sstevel@tonic-gate static void *pp_cookie = NULL; 142*7c478bd9Sstevel@tonic-gate static page_t *pp; 143*7c478bd9Sstevel@tonic-gate 144*7c478bd9Sstevel@tonic-gate /* 145*7c478bd9Sstevel@tonic-gate * Check to see if total_pages has changed. 146*7c478bd9Sstevel@tonic-gate */ 147*7c478bd9Sstevel@tonic-gate if (total_pages != last_total_pages) { 148*7c478bd9Sstevel@tonic-gate last_total_pages = total_pages; 149*7c478bd9Sstevel@tonic-gate nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup; 150*7c478bd9Sstevel@tonic-gate } 151*7c478bd9Sstevel@tonic-gate 152*7c478bd9Sstevel@tonic-gate /* 153*7c478bd9Sstevel@tonic-gate * On first time through initialize the cookie used for page_t scans 154*7c478bd9Sstevel@tonic-gate */ 155*7c478bd9Sstevel@tonic-gate if (pp_cookie == NULL) 156*7c478bd9Sstevel@tonic-gate pp = page_next_scan_init(&pp_cookie); 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate pcount = 0; 159*7c478bd9Sstevel@tonic-gate while (pcount <= nscan) { 160*7c478bd9Sstevel@tonic-gate 161*7c478bd9Sstevel@tonic-gate /* 162*7c478bd9Sstevel@tonic-gate * move to the next page, skipping over large pages 163*7c478bd9Sstevel@tonic-gate * and issuing prefetches. 164*7c478bd9Sstevel@tonic-gate */ 165*7c478bd9Sstevel@tonic-gate pp = page_next_scan_large(pp, &pcount, &pp_cookie); 166*7c478bd9Sstevel@tonic-gate prefetch_page_r((void *)pp); 167*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 168*7c478bd9Sstevel@tonic-gate 169*7c478bd9Sstevel@tonic-gate /* 170*7c478bd9Sstevel@tonic-gate * Do a bunch of dirty tests (ie. no locking) to determine 171*7c478bd9Sstevel@tonic-gate * if we can quickly skip this page. These tests are repeated 172*7c478bd9Sstevel@tonic-gate * after acquiring the page lock. 173*7c478bd9Sstevel@tonic-gate */ 174*7c478bd9Sstevel@tonic-gate ++nexamined; 175*7c478bd9Sstevel@tonic-gate if (PP_ISSWAP(pp)) { 176*7c478bd9Sstevel@tonic-gate coal_page = NULL; 177*7c478bd9Sstevel@tonic-gate continue; 178*7c478bd9Sstevel@tonic-gate } 179*7c478bd9Sstevel@tonic-gate 180*7c478bd9Sstevel@tonic-gate /* 181*7c478bd9Sstevel@tonic-gate * skip free pages too, but try coalescing them into larger 182*7c478bd9Sstevel@tonic-gate * pagesizes 183*7c478bd9Sstevel@tonic-gate */ 184*7c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 185*7c478bd9Sstevel@tonic-gate /* 186*7c478bd9Sstevel@tonic-gate * skip pages with a file system identity or that 187*7c478bd9Sstevel@tonic-gate * are already maximum size 188*7c478bd9Sstevel@tonic-gate */ 189*7c478bd9Sstevel@tonic-gate szc = pp->p_szc; 190*7c478bd9Sstevel@tonic-gate if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) { 191*7c478bd9Sstevel@tonic-gate coal_page = NULL; 192*7c478bd9Sstevel@tonic-gate continue; 193*7c478bd9Sstevel@tonic-gate } 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate /* 196*7c478bd9Sstevel@tonic-gate * If not in a coalescing candidate page or the size 197*7c478bd9Sstevel@tonic-gate * codes are different, start a new candidate. 198*7c478bd9Sstevel@tonic-gate */ 199*7c478bd9Sstevel@tonic-gate if (coal_page == NULL || coal_szc != szc) { 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate /* 202*7c478bd9Sstevel@tonic-gate * page must be properly aligned 203*7c478bd9Sstevel@tonic-gate */ 204*7c478bd9Sstevel@tonic-gate if ((page_pptonum(pp) & fsf_mask[szc]) != 0) { 205*7c478bd9Sstevel@tonic-gate coal_page = NULL; 206*7c478bd9Sstevel@tonic-gate continue; 207*7c478bd9Sstevel@tonic-gate } 208*7c478bd9Sstevel@tonic-gate coal_page = pp; 209*7c478bd9Sstevel@tonic-gate coal_szc = szc; 210*7c478bd9Sstevel@tonic-gate coal_cnt = 1; 211*7c478bd9Sstevel@tonic-gate continue; 212*7c478bd9Sstevel@tonic-gate } 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate /* 215*7c478bd9Sstevel@tonic-gate * acceptable to add this to existing candidate page 216*7c478bd9Sstevel@tonic-gate */ 217*7c478bd9Sstevel@tonic-gate ++coal_cnt; 218*7c478bd9Sstevel@tonic-gate if (coal_cnt < fsf_pgcnt[coal_szc]) 219*7c478bd9Sstevel@tonic-gate continue; 220*7c478bd9Sstevel@tonic-gate 221*7c478bd9Sstevel@tonic-gate /* 222*7c478bd9Sstevel@tonic-gate * We've got enough pages to coalesce, so do it. 223*7c478bd9Sstevel@tonic-gate * After promoting, we clear coal_page, so it will 224*7c478bd9Sstevel@tonic-gate * take another pass to promote this to an even 225*7c478bd9Sstevel@tonic-gate * larger page. 226*7c478bd9Sstevel@tonic-gate */ 227*7c478bd9Sstevel@tonic-gate ++ncoalesce; 228*7c478bd9Sstevel@tonic-gate (void) page_promote_size(coal_page, coal_szc); 229*7c478bd9Sstevel@tonic-gate coal_page = NULL; 230*7c478bd9Sstevel@tonic-gate continue; 231*7c478bd9Sstevel@tonic-gate } else { 232*7c478bd9Sstevel@tonic-gate coal_page = NULL; 233*7c478bd9Sstevel@tonic-gate } 234*7c478bd9Sstevel@tonic-gate 235*7c478bd9Sstevel@tonic-gate if (pp->p_vnode == &kvp || 236*7c478bd9Sstevel@tonic-gate PAGE_LOCKED(pp) || 237*7c478bd9Sstevel@tonic-gate pp->p_lckcnt != 0 || 238*7c478bd9Sstevel@tonic-gate pp->p_cowcnt != 0) 239*7c478bd9Sstevel@tonic-gate continue; 240*7c478bd9Sstevel@tonic-gate 241*7c478bd9Sstevel@tonic-gate 242*7c478bd9Sstevel@tonic-gate /* 243*7c478bd9Sstevel@tonic-gate * Reject pages that can't be "exclusively" locked. 244*7c478bd9Sstevel@tonic-gate */ 245*7c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) 246*7c478bd9Sstevel@tonic-gate continue; 247*7c478bd9Sstevel@tonic-gate ++nlocked; 248*7c478bd9Sstevel@tonic-gate 249*7c478bd9Sstevel@tonic-gate 250*7c478bd9Sstevel@tonic-gate /* 251*7c478bd9Sstevel@tonic-gate * After locking the page, redo the above checks. 252*7c478bd9Sstevel@tonic-gate * Since we locked the page, leave out the PAGE_LOCKED() test. 253*7c478bd9Sstevel@tonic-gate */ 254*7c478bd9Sstevel@tonic-gate vp = pp->p_vnode; 255*7c478bd9Sstevel@tonic-gate if (PP_ISSWAP(pp) || 256*7c478bd9Sstevel@tonic-gate PP_ISFREE(pp) || 257*7c478bd9Sstevel@tonic-gate vp == NULL || 258*7c478bd9Sstevel@tonic-gate vp == &kvp || 259*7c478bd9Sstevel@tonic-gate pp->p_lckcnt != 0 || 260*7c478bd9Sstevel@tonic-gate pp->p_cowcnt != 0 || 261*7c478bd9Sstevel@tonic-gate (vp->v_flag & VISSWAP) != 0) { 262*7c478bd9Sstevel@tonic-gate page_unlock(pp); 263*7c478bd9Sstevel@tonic-gate continue; 264*7c478bd9Sstevel@tonic-gate } 265*7c478bd9Sstevel@tonic-gate 266*7c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR); 267*7c478bd9Sstevel@tonic-gate 268*7c478bd9Sstevel@tonic-gate /* 269*7c478bd9Sstevel@tonic-gate * Check the modified bit. Leaving the bit alone in hardware. 270*7c478bd9Sstevel@tonic-gate * It will be cleared if we do the putpage. 271*7c478bd9Sstevel@tonic-gate */ 272*7c478bd9Sstevel@tonic-gate if (IS_VMODSORT(vp)) 273*7c478bd9Sstevel@tonic-gate mod = hat_ismod(pp); 274*7c478bd9Sstevel@tonic-gate else 275*7c478bd9Sstevel@tonic-gate mod = hat_pagesync(pp, 276*7c478bd9Sstevel@tonic-gate HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD; 277*7c478bd9Sstevel@tonic-gate 278*7c478bd9Sstevel@tonic-gate if (mod) { 279*7c478bd9Sstevel@tonic-gate ++nmodified; 280*7c478bd9Sstevel@tonic-gate offset = pp->p_offset; 281*7c478bd9Sstevel@tonic-gate 282*7c478bd9Sstevel@tonic-gate /* 283*7c478bd9Sstevel@tonic-gate * Hold the vnode before releasing the page lock 284*7c478bd9Sstevel@tonic-gate * to prevent it from being freed and re-used by 285*7c478bd9Sstevel@tonic-gate * some other thread. 286*7c478bd9Sstevel@tonic-gate */ 287*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 288*7c478bd9Sstevel@tonic-gate 289*7c478bd9Sstevel@tonic-gate page_unlock(pp); 290*7c478bd9Sstevel@tonic-gate 291*7c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC, 292*7c478bd9Sstevel@tonic-gate kcred); 293*7c478bd9Sstevel@tonic-gate 294*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 295*7c478bd9Sstevel@tonic-gate } else { 296*7c478bd9Sstevel@tonic-gate 297*7c478bd9Sstevel@tonic-gate /* 298*7c478bd9Sstevel@tonic-gate * Catch any pages which should be on the cache list, 299*7c478bd9Sstevel@tonic-gate * but aren't yet. 300*7c478bd9Sstevel@tonic-gate */ 301*7c478bd9Sstevel@tonic-gate if (hat_page_is_mapped(pp) == 0) { 302*7c478bd9Sstevel@tonic-gate ++releases; 303*7c478bd9Sstevel@tonic-gate (void) page_release(pp, 1); 304*7c478bd9Sstevel@tonic-gate } else { 305*7c478bd9Sstevel@tonic-gate page_unlock(pp); 306*7c478bd9Sstevel@tonic-gate } 307*7c478bd9Sstevel@tonic-gate } 308*7c478bd9Sstevel@tonic-gate } 309*7c478bd9Sstevel@tonic-gate 310*7c478bd9Sstevel@tonic-gate /* 311*7c478bd9Sstevel@tonic-gate * maintain statistics 312*7c478bd9Sstevel@tonic-gate * reset every million wakeups, just to avoid overflow 313*7c478bd9Sstevel@tonic-gate */ 314*7c478bd9Sstevel@tonic-gate if (++fsf_cycles == 1000000) { 315*7c478bd9Sstevel@tonic-gate fsf_cycles = 0; 316*7c478bd9Sstevel@tonic-gate fsf_total.fsf_scan = 0; 317*7c478bd9Sstevel@tonic-gate fsf_total.fsf_examined = 0; 318*7c478bd9Sstevel@tonic-gate fsf_total.fsf_locked = 0; 319*7c478bd9Sstevel@tonic-gate fsf_total.fsf_modified = 0; 320*7c478bd9Sstevel@tonic-gate fsf_total.fsf_coalesce = 0; 321*7c478bd9Sstevel@tonic-gate fsf_total.fsf_time = 0; 322*7c478bd9Sstevel@tonic-gate fsf_total.fsf_releases = 0; 323*7c478bd9Sstevel@tonic-gate } else { 324*7c478bd9Sstevel@tonic-gate fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan; 325*7c478bd9Sstevel@tonic-gate fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined; 326*7c478bd9Sstevel@tonic-gate fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked; 327*7c478bd9Sstevel@tonic-gate fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified; 328*7c478bd9Sstevel@tonic-gate fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce; 329*7c478bd9Sstevel@tonic-gate fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer; 330*7c478bd9Sstevel@tonic-gate fsf_total.fsf_releases += fsf_recent.fsf_releases = releases; 331*7c478bd9Sstevel@tonic-gate } 332*7c478bd9Sstevel@tonic-gate } 333*7c478bd9Sstevel@tonic-gate 334*7c478bd9Sstevel@tonic-gate /* 335*7c478bd9Sstevel@tonic-gate * As part of file system hardening, this daemon is awakened 336*7c478bd9Sstevel@tonic-gate * every second to flush cached data which includes the 337*7c478bd9Sstevel@tonic-gate * buffer cache, the inode cache and mapped pages. 338*7c478bd9Sstevel@tonic-gate */ 339*7c478bd9Sstevel@tonic-gate void 340*7c478bd9Sstevel@tonic-gate fsflush() 341*7c478bd9Sstevel@tonic-gate { 342*7c478bd9Sstevel@tonic-gate struct buf *bp, *dwp; 343*7c478bd9Sstevel@tonic-gate struct hbuf *hp; 344*7c478bd9Sstevel@tonic-gate int autoup; 345*7c478bd9Sstevel@tonic-gate unsigned int ix, icount, count = 0; 346*7c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 347*7c478bd9Sstevel@tonic-gate uint_t bcount; 348*7c478bd9Sstevel@tonic-gate kmutex_t *hmp; 349*7c478bd9Sstevel@tonic-gate struct vfssw *vswp; 350*7c478bd9Sstevel@tonic-gate 351*7c478bd9Sstevel@tonic-gate proc_fsflush = ttoproc(curthread); 352*7c478bd9Sstevel@tonic-gate proc_fsflush->p_cstime = 0; 353*7c478bd9Sstevel@tonic-gate proc_fsflush->p_stime = 0; 354*7c478bd9Sstevel@tonic-gate proc_fsflush->p_cutime = 0; 355*7c478bd9Sstevel@tonic-gate proc_fsflush->p_utime = 0; 356*7c478bd9Sstevel@tonic-gate bcopy("fsflush", u.u_psargs, 8); 357*7c478bd9Sstevel@tonic-gate bcopy("fsflush", u.u_comm, 7); 358*7c478bd9Sstevel@tonic-gate 359*7c478bd9Sstevel@tonic-gate mutex_init(&fsflush_lock, NULL, MUTEX_DEFAULT, NULL); 360*7c478bd9Sstevel@tonic-gate sema_init(&fsflush_sema, 0, NULL, SEMA_DEFAULT, NULL); 361*7c478bd9Sstevel@tonic-gate 362*7c478bd9Sstevel@tonic-gate /* 363*7c478bd9Sstevel@tonic-gate * Setup page coalescing. 364*7c478bd9Sstevel@tonic-gate */ 365*7c478bd9Sstevel@tonic-gate fsf_npgsz = page_num_pagesizes(); 366*7c478bd9Sstevel@tonic-gate ASSERT(fsf_npgsz < MAX_PAGESIZES); 367*7c478bd9Sstevel@tonic-gate for (ix = 0; ix < fsf_npgsz - 1; ++ix) { 368*7c478bd9Sstevel@tonic-gate fsf_pgcnt[ix] = 369*7c478bd9Sstevel@tonic-gate page_get_pagesize(ix + 1) / page_get_pagesize(ix); 370*7c478bd9Sstevel@tonic-gate fsf_mask[ix] = page_get_pagecnt(ix + 1) - 1; 371*7c478bd9Sstevel@tonic-gate } 372*7c478bd9Sstevel@tonic-gate 373*7c478bd9Sstevel@tonic-gate autoup = v.v_autoup * hz; 374*7c478bd9Sstevel@tonic-gate icount = v.v_autoup / tune.t_fsflushr; 375*7c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &fsflush_lock, callb_generic_cpr, "fsflush"); 376*7c478bd9Sstevel@tonic-gate loop: 377*7c478bd9Sstevel@tonic-gate sema_v(&fsflush_sema); 378*7c478bd9Sstevel@tonic-gate mutex_enter(&fsflush_lock); 379*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 380*7c478bd9Sstevel@tonic-gate cv_wait(&fsflush_cv, &fsflush_lock); /* wait for clock */ 381*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &fsflush_lock); 382*7c478bd9Sstevel@tonic-gate mutex_exit(&fsflush_lock); 383*7c478bd9Sstevel@tonic-gate sema_p(&fsflush_sema); 384*7c478bd9Sstevel@tonic-gate 385*7c478bd9Sstevel@tonic-gate /* 386*7c478bd9Sstevel@tonic-gate * Write back all old B_DELWRI buffers on the freelist. 387*7c478bd9Sstevel@tonic-gate */ 388*7c478bd9Sstevel@tonic-gate bcount = 0; 389*7c478bd9Sstevel@tonic-gate for (ix = 0; ix < v.v_hbuf; ix++) { 390*7c478bd9Sstevel@tonic-gate 391*7c478bd9Sstevel@tonic-gate hp = &hbuf[ix]; 392*7c478bd9Sstevel@tonic-gate dwp = (struct buf *)&dwbuf[ix]; 393*7c478bd9Sstevel@tonic-gate 394*7c478bd9Sstevel@tonic-gate bcount += (hp->b_length); 395*7c478bd9Sstevel@tonic-gate 396*7c478bd9Sstevel@tonic-gate if (dwp->av_forw == dwp) { 397*7c478bd9Sstevel@tonic-gate continue; 398*7c478bd9Sstevel@tonic-gate } 399*7c478bd9Sstevel@tonic-gate 400*7c478bd9Sstevel@tonic-gate hmp = &hbuf[ix].b_lock; 401*7c478bd9Sstevel@tonic-gate mutex_enter(hmp); 402*7c478bd9Sstevel@tonic-gate bp = dwp->av_forw; 403*7c478bd9Sstevel@tonic-gate 404*7c478bd9Sstevel@tonic-gate /* 405*7c478bd9Sstevel@tonic-gate * Go down only on the delayed write lists. 406*7c478bd9Sstevel@tonic-gate */ 407*7c478bd9Sstevel@tonic-gate while (bp != dwp) { 408*7c478bd9Sstevel@tonic-gate 409*7c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_DELWRI); 410*7c478bd9Sstevel@tonic-gate 411*7c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) && 412*7c478bd9Sstevel@tonic-gate (lbolt - bp->b_start >= autoup) && 413*7c478bd9Sstevel@tonic-gate sema_tryp(&bp->b_sem)) { 414*7c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 415*7c478bd9Sstevel@tonic-gate hp->b_length--; 416*7c478bd9Sstevel@tonic-gate notavail(bp); 417*7c478bd9Sstevel@tonic-gate mutex_exit(hmp); 418*7c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { 419*7c478bd9Sstevel@tonic-gate BWRITE(bp); 420*7c478bd9Sstevel@tonic-gate } else { 421*7c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, 422*7c478bd9Sstevel@tonic-gate bp); 423*7c478bd9Sstevel@tonic-gate } 424*7c478bd9Sstevel@tonic-gate mutex_enter(hmp); 425*7c478bd9Sstevel@tonic-gate bp = dwp->av_forw; 426*7c478bd9Sstevel@tonic-gate } else { 427*7c478bd9Sstevel@tonic-gate bp = bp->av_forw; 428*7c478bd9Sstevel@tonic-gate } 429*7c478bd9Sstevel@tonic-gate } 430*7c478bd9Sstevel@tonic-gate mutex_exit(hmp); 431*7c478bd9Sstevel@tonic-gate } 432*7c478bd9Sstevel@tonic-gate 433*7c478bd9Sstevel@tonic-gate /* 434*7c478bd9Sstevel@tonic-gate * 435*7c478bd9Sstevel@tonic-gate * There is no need to wakeup any thread waiting on bio_mem_cv 436*7c478bd9Sstevel@tonic-gate * since brelse will wake them up as soon as IO is complete. 437*7c478bd9Sstevel@tonic-gate */ 438*7c478bd9Sstevel@tonic-gate bfreelist.b_bcount = bcount; 439*7c478bd9Sstevel@tonic-gate 440*7c478bd9Sstevel@tonic-gate if (dopageflush) 441*7c478bd9Sstevel@tonic-gate fsflush_do_pages(); 442*7c478bd9Sstevel@tonic-gate 443*7c478bd9Sstevel@tonic-gate if (!doiflush) 444*7c478bd9Sstevel@tonic-gate goto loop; 445*7c478bd9Sstevel@tonic-gate 446*7c478bd9Sstevel@tonic-gate /* 447*7c478bd9Sstevel@tonic-gate * If the system was not booted to single user mode, skip the 448*7c478bd9Sstevel@tonic-gate * inode flushing until after fsflush_iflush_delay secs have elapsed. 449*7c478bd9Sstevel@tonic-gate */ 450*7c478bd9Sstevel@tonic-gate if ((boothowto & RB_SINGLE) == 0 && 451*7c478bd9Sstevel@tonic-gate (lbolt64 / hz) < fsflush_iflush_delay) 452*7c478bd9Sstevel@tonic-gate goto loop; 453*7c478bd9Sstevel@tonic-gate 454*7c478bd9Sstevel@tonic-gate /* 455*7c478bd9Sstevel@tonic-gate * Flush cached attribute information (e.g. inodes). 456*7c478bd9Sstevel@tonic-gate */ 457*7c478bd9Sstevel@tonic-gate if (++count >= icount) { 458*7c478bd9Sstevel@tonic-gate count = 0; 459*7c478bd9Sstevel@tonic-gate 460*7c478bd9Sstevel@tonic-gate /* 461*7c478bd9Sstevel@tonic-gate * Sync back cached data. 462*7c478bd9Sstevel@tonic-gate */ 463*7c478bd9Sstevel@tonic-gate RLOCK_VFSSW(); 464*7c478bd9Sstevel@tonic-gate for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 465*7c478bd9Sstevel@tonic-gate if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { 466*7c478bd9Sstevel@tonic-gate vfs_refvfssw(vswp); 467*7c478bd9Sstevel@tonic-gate RUNLOCK_VFSSW(); 468*7c478bd9Sstevel@tonic-gate (void) fsop_sync_by_kind(vswp - vfssw, 469*7c478bd9Sstevel@tonic-gate SYNC_ATTR, kcred); 470*7c478bd9Sstevel@tonic-gate vfs_unrefvfssw(vswp); 471*7c478bd9Sstevel@tonic-gate RLOCK_VFSSW(); 472*7c478bd9Sstevel@tonic-gate } 473*7c478bd9Sstevel@tonic-gate } 474*7c478bd9Sstevel@tonic-gate RUNLOCK_VFSSW(); 475*7c478bd9Sstevel@tonic-gate } 476*7c478bd9Sstevel@tonic-gate goto loop; 477*7c478bd9Sstevel@tonic-gate } 478