xref: /illumos-gate/usr/src/uts/common/fs/fsflush.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
24*7c478bd9Sstevel@tonic-gate 
25*7c478bd9Sstevel@tonic-gate 
26*7c478bd9Sstevel@tonic-gate /*
27*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
29*7c478bd9Sstevel@tonic-gate  */
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/inline.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/var.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/buf.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/swap.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/vm.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/callb.h>
53*7c478bd9Sstevel@tonic-gate #include <sys/reboot.h>
54*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
55*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
56*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
57*7c478bd9Sstevel@tonic-gate 
58*7c478bd9Sstevel@tonic-gate #include <vm/hat.h>
59*7c478bd9Sstevel@tonic-gate #include <vm/page.h>
60*7c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
61*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
62*7c478bd9Sstevel@tonic-gate 
63*7c478bd9Sstevel@tonic-gate int doiflush = 1;	/* non-zero to turn inode flushing on */
64*7c478bd9Sstevel@tonic-gate int dopageflush = 1;	/* non-zero to turn page flushing on */
65*7c478bd9Sstevel@tonic-gate 
66*7c478bd9Sstevel@tonic-gate /*
67*7c478bd9Sstevel@tonic-gate  * To improve boot performance, don't run the inode flushing loop until
68*7c478bd9Sstevel@tonic-gate  * the specified number of seconds after boot.  To revert to the old
69*7c478bd9Sstevel@tonic-gate  * behavior, set fsflush_iflush_delay to 0.  We have not created any new
70*7c478bd9Sstevel@tonic-gate  * filesystem danger that did not exist previously, since there is always a
71*7c478bd9Sstevel@tonic-gate  * window in between when fsflush does the inode flush loop during which the
72*7c478bd9Sstevel@tonic-gate  * system could crash, fail to sync the filesystem, and fsck will be needed
73*7c478bd9Sstevel@tonic-gate  * to recover.  We have, however, widened this window.  Finally,
74*7c478bd9Sstevel@tonic-gate  * we never delay inode flushing if we're booting into single user mode,
75*7c478bd9Sstevel@tonic-gate  * where the administrator may be modifying files or using fsck.  This
76*7c478bd9Sstevel@tonic-gate  * modification avoids inode flushes during boot whose only purpose is to
77*7c478bd9Sstevel@tonic-gate  * update atimes on files which have been accessed during boot.
78*7c478bd9Sstevel@tonic-gate  */
79*7c478bd9Sstevel@tonic-gate int fsflush_iflush_delay = 60;
80*7c478bd9Sstevel@tonic-gate 
81*7c478bd9Sstevel@tonic-gate kcondvar_t fsflush_cv;
82*7c478bd9Sstevel@tonic-gate static kmutex_t fsflush_lock;	/* just for the cv_wait */
83*7c478bd9Sstevel@tonic-gate ksema_t fsflush_sema;		/* to serialize with reboot */
84*7c478bd9Sstevel@tonic-gate 
85*7c478bd9Sstevel@tonic-gate /*
86*7c478bd9Sstevel@tonic-gate  * some statistics for fsflush_do_pages
87*7c478bd9Sstevel@tonic-gate  */
88*7c478bd9Sstevel@tonic-gate typedef struct {
89*7c478bd9Sstevel@tonic-gate 	ulong_t fsf_scan;	/* number of pages scanned */
90*7c478bd9Sstevel@tonic-gate 	ulong_t fsf_examined;	/* number of page_t's actually examined, can */
91*7c478bd9Sstevel@tonic-gate 				/* be less than fsf_scan due to large pages */
92*7c478bd9Sstevel@tonic-gate 	ulong_t fsf_locked;	/* pages we actually page_lock()ed */
93*7c478bd9Sstevel@tonic-gate 	ulong_t fsf_modified;	/* number of modified pages found */
94*7c478bd9Sstevel@tonic-gate 	ulong_t fsf_coalesce;	/* number of page coalesces done */
95*7c478bd9Sstevel@tonic-gate 	ulong_t fsf_time;	/* nanoseconds of run time */
96*7c478bd9Sstevel@tonic-gate 	ulong_t fsf_releases;	/* number of page_release() done */
97*7c478bd9Sstevel@tonic-gate } fsf_stat_t;
98*7c478bd9Sstevel@tonic-gate 
99*7c478bd9Sstevel@tonic-gate fsf_stat_t fsf_recent;	/* counts for most recent duty cycle */
100*7c478bd9Sstevel@tonic-gate fsf_stat_t fsf_total;	/* total of counts */
101*7c478bd9Sstevel@tonic-gate ulong_t fsf_cycles;	/* number of runs refelected in fsf_total */
102*7c478bd9Sstevel@tonic-gate 
103*7c478bd9Sstevel@tonic-gate /*
104*7c478bd9Sstevel@tonic-gate  * data used to determine when we can coalese consecutive free pages
105*7c478bd9Sstevel@tonic-gate  * into larger pages.
106*7c478bd9Sstevel@tonic-gate  */
107*7c478bd9Sstevel@tonic-gate #define	MAX_PAGESIZES	32
108*7c478bd9Sstevel@tonic-gate static ulong_t		fsf_npgsz;
109*7c478bd9Sstevel@tonic-gate static pgcnt_t		fsf_pgcnt[MAX_PAGESIZES];
110*7c478bd9Sstevel@tonic-gate static pgcnt_t		fsf_mask[MAX_PAGESIZES];
111*7c478bd9Sstevel@tonic-gate 
112*7c478bd9Sstevel@tonic-gate 
113*7c478bd9Sstevel@tonic-gate /*
114*7c478bd9Sstevel@tonic-gate  * Scan page_t's and issue I/O's for modified pages.
115*7c478bd9Sstevel@tonic-gate  *
116*7c478bd9Sstevel@tonic-gate  * Also coalesces consecutive small sized free pages into the next larger
117*7c478bd9Sstevel@tonic-gate  * pagesize. This costs a tiny bit of time in fsflush, but will reduce time
118*7c478bd9Sstevel@tonic-gate  * spent scanning on later passes and for anybody allocating large pages.
119*7c478bd9Sstevel@tonic-gate  */
120*7c478bd9Sstevel@tonic-gate static void
121*7c478bd9Sstevel@tonic-gate fsflush_do_pages()
122*7c478bd9Sstevel@tonic-gate {
123*7c478bd9Sstevel@tonic-gate 	vnode_t		*vp;
124*7c478bd9Sstevel@tonic-gate 	ulong_t		pcount;
125*7c478bd9Sstevel@tonic-gate 	hrtime_t	timer = gethrtime();
126*7c478bd9Sstevel@tonic-gate 	ulong_t		releases = 0;
127*7c478bd9Sstevel@tonic-gate 	ulong_t		nexamined = 0;
128*7c478bd9Sstevel@tonic-gate 	ulong_t		nlocked = 0;
129*7c478bd9Sstevel@tonic-gate 	ulong_t		nmodified = 0;
130*7c478bd9Sstevel@tonic-gate 	ulong_t		ncoalesce = 0;
131*7c478bd9Sstevel@tonic-gate 	int		mod;
132*7c478bd9Sstevel@tonic-gate 	u_offset_t	offset;
133*7c478bd9Sstevel@tonic-gate 	uint_t		szc;
134*7c478bd9Sstevel@tonic-gate 
135*7c478bd9Sstevel@tonic-gate 	page_t		*coal_page = NULL;  /* 1st page in group to coalese */
136*7c478bd9Sstevel@tonic-gate 	uint_t		coal_szc = 0;	    /* size code, coal_page->p_szc */
137*7c478bd9Sstevel@tonic-gate 	uint_t		coal_cnt = 0;	    /* count of pages seen */
138*7c478bd9Sstevel@tonic-gate 
139*7c478bd9Sstevel@tonic-gate 	static ulong_t	nscan = 0;
140*7c478bd9Sstevel@tonic-gate 	static pgcnt_t	last_total_pages = 0;
141*7c478bd9Sstevel@tonic-gate 	static void	*pp_cookie = NULL;
142*7c478bd9Sstevel@tonic-gate 	static page_t	*pp;
143*7c478bd9Sstevel@tonic-gate 
144*7c478bd9Sstevel@tonic-gate 	/*
145*7c478bd9Sstevel@tonic-gate 	 * Check to see if total_pages has changed.
146*7c478bd9Sstevel@tonic-gate 	 */
147*7c478bd9Sstevel@tonic-gate 	if (total_pages != last_total_pages) {
148*7c478bd9Sstevel@tonic-gate 		last_total_pages = total_pages;
149*7c478bd9Sstevel@tonic-gate 		nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup;
150*7c478bd9Sstevel@tonic-gate 	}
151*7c478bd9Sstevel@tonic-gate 
152*7c478bd9Sstevel@tonic-gate 	/*
153*7c478bd9Sstevel@tonic-gate 	 * On first time through initialize the cookie used for page_t scans
154*7c478bd9Sstevel@tonic-gate 	 */
155*7c478bd9Sstevel@tonic-gate 	if (pp_cookie == NULL)
156*7c478bd9Sstevel@tonic-gate 		pp = page_next_scan_init(&pp_cookie);
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate 	pcount = 0;
159*7c478bd9Sstevel@tonic-gate 	while (pcount <= nscan) {
160*7c478bd9Sstevel@tonic-gate 
161*7c478bd9Sstevel@tonic-gate 		/*
162*7c478bd9Sstevel@tonic-gate 		 * move to the next page, skipping over large pages
163*7c478bd9Sstevel@tonic-gate 		 * and issuing prefetches.
164*7c478bd9Sstevel@tonic-gate 		 */
165*7c478bd9Sstevel@tonic-gate 		pp = page_next_scan_large(pp, &pcount, &pp_cookie);
166*7c478bd9Sstevel@tonic-gate 		prefetch_page_r((void *)pp);
167*7c478bd9Sstevel@tonic-gate 		ASSERT(pp != NULL);
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate 		/*
170*7c478bd9Sstevel@tonic-gate 		 * Do a bunch of dirty tests (ie. no locking) to determine
171*7c478bd9Sstevel@tonic-gate 		 * if we can quickly skip this page. These tests are repeated
172*7c478bd9Sstevel@tonic-gate 		 * after acquiring the page lock.
173*7c478bd9Sstevel@tonic-gate 		 */
174*7c478bd9Sstevel@tonic-gate 		++nexamined;
175*7c478bd9Sstevel@tonic-gate 		if (PP_ISSWAP(pp)) {
176*7c478bd9Sstevel@tonic-gate 			coal_page = NULL;
177*7c478bd9Sstevel@tonic-gate 			continue;
178*7c478bd9Sstevel@tonic-gate 		}
179*7c478bd9Sstevel@tonic-gate 
180*7c478bd9Sstevel@tonic-gate 		/*
181*7c478bd9Sstevel@tonic-gate 		 * skip free pages too, but try coalescing them into larger
182*7c478bd9Sstevel@tonic-gate 		 * pagesizes
183*7c478bd9Sstevel@tonic-gate 		 */
184*7c478bd9Sstevel@tonic-gate 		if (PP_ISFREE(pp)) {
185*7c478bd9Sstevel@tonic-gate 			/*
186*7c478bd9Sstevel@tonic-gate 			 * skip pages with a file system identity or that
187*7c478bd9Sstevel@tonic-gate 			 * are already maximum size
188*7c478bd9Sstevel@tonic-gate 			 */
189*7c478bd9Sstevel@tonic-gate 			szc = pp->p_szc;
190*7c478bd9Sstevel@tonic-gate 			if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) {
191*7c478bd9Sstevel@tonic-gate 				coal_page = NULL;
192*7c478bd9Sstevel@tonic-gate 				continue;
193*7c478bd9Sstevel@tonic-gate 			}
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate 			/*
196*7c478bd9Sstevel@tonic-gate 			 * If not in a coalescing candidate page or the size
197*7c478bd9Sstevel@tonic-gate 			 * codes are different, start a new candidate.
198*7c478bd9Sstevel@tonic-gate 			 */
199*7c478bd9Sstevel@tonic-gate 			if (coal_page == NULL || coal_szc != szc) {
200*7c478bd9Sstevel@tonic-gate 
201*7c478bd9Sstevel@tonic-gate 				/*
202*7c478bd9Sstevel@tonic-gate 				 * page must be properly aligned
203*7c478bd9Sstevel@tonic-gate 				 */
204*7c478bd9Sstevel@tonic-gate 				if ((page_pptonum(pp) & fsf_mask[szc]) != 0) {
205*7c478bd9Sstevel@tonic-gate 					coal_page = NULL;
206*7c478bd9Sstevel@tonic-gate 					continue;
207*7c478bd9Sstevel@tonic-gate 				}
208*7c478bd9Sstevel@tonic-gate 				coal_page = pp;
209*7c478bd9Sstevel@tonic-gate 				coal_szc = szc;
210*7c478bd9Sstevel@tonic-gate 				coal_cnt = 1;
211*7c478bd9Sstevel@tonic-gate 				continue;
212*7c478bd9Sstevel@tonic-gate 			}
213*7c478bd9Sstevel@tonic-gate 
214*7c478bd9Sstevel@tonic-gate 			/*
215*7c478bd9Sstevel@tonic-gate 			 * acceptable to add this to existing candidate page
216*7c478bd9Sstevel@tonic-gate 			 */
217*7c478bd9Sstevel@tonic-gate 			++coal_cnt;
218*7c478bd9Sstevel@tonic-gate 			if (coal_cnt < fsf_pgcnt[coal_szc])
219*7c478bd9Sstevel@tonic-gate 				continue;
220*7c478bd9Sstevel@tonic-gate 
221*7c478bd9Sstevel@tonic-gate 			/*
222*7c478bd9Sstevel@tonic-gate 			 * We've got enough pages to coalesce, so do it.
223*7c478bd9Sstevel@tonic-gate 			 * After promoting, we clear coal_page, so it will
224*7c478bd9Sstevel@tonic-gate 			 * take another pass to promote this to an even
225*7c478bd9Sstevel@tonic-gate 			 * larger page.
226*7c478bd9Sstevel@tonic-gate 			 */
227*7c478bd9Sstevel@tonic-gate 			++ncoalesce;
228*7c478bd9Sstevel@tonic-gate 			(void) page_promote_size(coal_page, coal_szc);
229*7c478bd9Sstevel@tonic-gate 			coal_page = NULL;
230*7c478bd9Sstevel@tonic-gate 			continue;
231*7c478bd9Sstevel@tonic-gate 		} else {
232*7c478bd9Sstevel@tonic-gate 			coal_page = NULL;
233*7c478bd9Sstevel@tonic-gate 		}
234*7c478bd9Sstevel@tonic-gate 
235*7c478bd9Sstevel@tonic-gate 		if (pp->p_vnode == &kvp ||
236*7c478bd9Sstevel@tonic-gate 		    PAGE_LOCKED(pp) ||
237*7c478bd9Sstevel@tonic-gate 		    pp->p_lckcnt != 0 ||
238*7c478bd9Sstevel@tonic-gate 		    pp->p_cowcnt != 0)
239*7c478bd9Sstevel@tonic-gate 			continue;
240*7c478bd9Sstevel@tonic-gate 
241*7c478bd9Sstevel@tonic-gate 
242*7c478bd9Sstevel@tonic-gate 		/*
243*7c478bd9Sstevel@tonic-gate 		 * Reject pages that can't be "exclusively" locked.
244*7c478bd9Sstevel@tonic-gate 		 */
245*7c478bd9Sstevel@tonic-gate 		if (!page_trylock(pp, SE_EXCL))
246*7c478bd9Sstevel@tonic-gate 			continue;
247*7c478bd9Sstevel@tonic-gate 		++nlocked;
248*7c478bd9Sstevel@tonic-gate 
249*7c478bd9Sstevel@tonic-gate 
250*7c478bd9Sstevel@tonic-gate 		/*
251*7c478bd9Sstevel@tonic-gate 		 * After locking the page, redo the above checks.
252*7c478bd9Sstevel@tonic-gate 		 * Since we locked the page, leave out the PAGE_LOCKED() test.
253*7c478bd9Sstevel@tonic-gate 		 */
254*7c478bd9Sstevel@tonic-gate 		vp = pp->p_vnode;
255*7c478bd9Sstevel@tonic-gate 		if (PP_ISSWAP(pp) ||
256*7c478bd9Sstevel@tonic-gate 		    PP_ISFREE(pp) ||
257*7c478bd9Sstevel@tonic-gate 		    vp == NULL ||
258*7c478bd9Sstevel@tonic-gate 		    vp == &kvp ||
259*7c478bd9Sstevel@tonic-gate 		    pp->p_lckcnt != 0 ||
260*7c478bd9Sstevel@tonic-gate 		    pp->p_cowcnt != 0 ||
261*7c478bd9Sstevel@tonic-gate 		    (vp->v_flag & VISSWAP) != 0) {
262*7c478bd9Sstevel@tonic-gate 			page_unlock(pp);
263*7c478bd9Sstevel@tonic-gate 			continue;
264*7c478bd9Sstevel@tonic-gate 		}
265*7c478bd9Sstevel@tonic-gate 
266*7c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type != VCHR);
267*7c478bd9Sstevel@tonic-gate 
268*7c478bd9Sstevel@tonic-gate 		/*
269*7c478bd9Sstevel@tonic-gate 		 * Check the modified bit. Leaving the bit alone in hardware.
270*7c478bd9Sstevel@tonic-gate 		 * It will be cleared if we do the putpage.
271*7c478bd9Sstevel@tonic-gate 		 */
272*7c478bd9Sstevel@tonic-gate 		if (IS_VMODSORT(vp))
273*7c478bd9Sstevel@tonic-gate 			mod = hat_ismod(pp);
274*7c478bd9Sstevel@tonic-gate 		else
275*7c478bd9Sstevel@tonic-gate 			mod = hat_pagesync(pp,
276*7c478bd9Sstevel@tonic-gate 			    HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD;
277*7c478bd9Sstevel@tonic-gate 
278*7c478bd9Sstevel@tonic-gate 		if (mod) {
279*7c478bd9Sstevel@tonic-gate 			++nmodified;
280*7c478bd9Sstevel@tonic-gate 			offset = pp->p_offset;
281*7c478bd9Sstevel@tonic-gate 
282*7c478bd9Sstevel@tonic-gate 			/*
283*7c478bd9Sstevel@tonic-gate 			 * Hold the vnode before releasing the page lock
284*7c478bd9Sstevel@tonic-gate 			 * to prevent it from being freed and re-used by
285*7c478bd9Sstevel@tonic-gate 			 * some other thread.
286*7c478bd9Sstevel@tonic-gate 			 */
287*7c478bd9Sstevel@tonic-gate 			VN_HOLD(vp);
288*7c478bd9Sstevel@tonic-gate 
289*7c478bd9Sstevel@tonic-gate 			page_unlock(pp);
290*7c478bd9Sstevel@tonic-gate 
291*7c478bd9Sstevel@tonic-gate 			(void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC,
292*7c478bd9Sstevel@tonic-gate 			    kcred);
293*7c478bd9Sstevel@tonic-gate 
294*7c478bd9Sstevel@tonic-gate 			VN_RELE(vp);
295*7c478bd9Sstevel@tonic-gate 		} else {
296*7c478bd9Sstevel@tonic-gate 
297*7c478bd9Sstevel@tonic-gate 			/*
298*7c478bd9Sstevel@tonic-gate 			 * Catch any pages which should be on the cache list,
299*7c478bd9Sstevel@tonic-gate 			 * but aren't yet.
300*7c478bd9Sstevel@tonic-gate 			 */
301*7c478bd9Sstevel@tonic-gate 			if (hat_page_is_mapped(pp) == 0) {
302*7c478bd9Sstevel@tonic-gate 				++releases;
303*7c478bd9Sstevel@tonic-gate 				(void) page_release(pp, 1);
304*7c478bd9Sstevel@tonic-gate 			} else {
305*7c478bd9Sstevel@tonic-gate 				page_unlock(pp);
306*7c478bd9Sstevel@tonic-gate 			}
307*7c478bd9Sstevel@tonic-gate 		}
308*7c478bd9Sstevel@tonic-gate 	}
309*7c478bd9Sstevel@tonic-gate 
310*7c478bd9Sstevel@tonic-gate 	/*
311*7c478bd9Sstevel@tonic-gate 	 * maintain statistics
312*7c478bd9Sstevel@tonic-gate 	 * reset every million wakeups, just to avoid overflow
313*7c478bd9Sstevel@tonic-gate 	 */
314*7c478bd9Sstevel@tonic-gate 	if (++fsf_cycles == 1000000) {
315*7c478bd9Sstevel@tonic-gate 		fsf_cycles = 0;
316*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_scan = 0;
317*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_examined = 0;
318*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_locked = 0;
319*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_modified = 0;
320*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_coalesce = 0;
321*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_time = 0;
322*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_releases = 0;
323*7c478bd9Sstevel@tonic-gate 	} else {
324*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan;
325*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined;
326*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked;
327*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified;
328*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce;
329*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer;
330*7c478bd9Sstevel@tonic-gate 		fsf_total.fsf_releases += fsf_recent.fsf_releases = releases;
331*7c478bd9Sstevel@tonic-gate 	}
332*7c478bd9Sstevel@tonic-gate }
333*7c478bd9Sstevel@tonic-gate 
334*7c478bd9Sstevel@tonic-gate /*
335*7c478bd9Sstevel@tonic-gate  * As part of file system hardening, this daemon is awakened
336*7c478bd9Sstevel@tonic-gate  * every second to flush cached data which includes the
337*7c478bd9Sstevel@tonic-gate  * buffer cache, the inode cache and mapped pages.
338*7c478bd9Sstevel@tonic-gate  */
339*7c478bd9Sstevel@tonic-gate void
340*7c478bd9Sstevel@tonic-gate fsflush()
341*7c478bd9Sstevel@tonic-gate {
342*7c478bd9Sstevel@tonic-gate 	struct buf *bp, *dwp;
343*7c478bd9Sstevel@tonic-gate 	struct hbuf *hp;
344*7c478bd9Sstevel@tonic-gate 	int autoup;
345*7c478bd9Sstevel@tonic-gate 	unsigned int ix, icount, count = 0;
346*7c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
347*7c478bd9Sstevel@tonic-gate 	uint_t		bcount;
348*7c478bd9Sstevel@tonic-gate 	kmutex_t	*hmp;
349*7c478bd9Sstevel@tonic-gate 	struct vfssw *vswp;
350*7c478bd9Sstevel@tonic-gate 
351*7c478bd9Sstevel@tonic-gate 	proc_fsflush = ttoproc(curthread);
352*7c478bd9Sstevel@tonic-gate 	proc_fsflush->p_cstime = 0;
353*7c478bd9Sstevel@tonic-gate 	proc_fsflush->p_stime =  0;
354*7c478bd9Sstevel@tonic-gate 	proc_fsflush->p_cutime =  0;
355*7c478bd9Sstevel@tonic-gate 	proc_fsflush->p_utime = 0;
356*7c478bd9Sstevel@tonic-gate 	bcopy("fsflush", u.u_psargs, 8);
357*7c478bd9Sstevel@tonic-gate 	bcopy("fsflush", u.u_comm, 7);
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 	mutex_init(&fsflush_lock, NULL, MUTEX_DEFAULT, NULL);
360*7c478bd9Sstevel@tonic-gate 	sema_init(&fsflush_sema, 0, NULL, SEMA_DEFAULT, NULL);
361*7c478bd9Sstevel@tonic-gate 
362*7c478bd9Sstevel@tonic-gate 	/*
363*7c478bd9Sstevel@tonic-gate 	 * Setup page coalescing.
364*7c478bd9Sstevel@tonic-gate 	 */
365*7c478bd9Sstevel@tonic-gate 	fsf_npgsz = page_num_pagesizes();
366*7c478bd9Sstevel@tonic-gate 	ASSERT(fsf_npgsz < MAX_PAGESIZES);
367*7c478bd9Sstevel@tonic-gate 	for (ix = 0; ix < fsf_npgsz - 1; ++ix) {
368*7c478bd9Sstevel@tonic-gate 		fsf_pgcnt[ix] =
369*7c478bd9Sstevel@tonic-gate 		    page_get_pagesize(ix + 1) / page_get_pagesize(ix);
370*7c478bd9Sstevel@tonic-gate 		fsf_mask[ix] = page_get_pagecnt(ix + 1) - 1;
371*7c478bd9Sstevel@tonic-gate 	}
372*7c478bd9Sstevel@tonic-gate 
373*7c478bd9Sstevel@tonic-gate 	autoup = v.v_autoup * hz;
374*7c478bd9Sstevel@tonic-gate 	icount = v.v_autoup / tune.t_fsflushr;
375*7c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &fsflush_lock, callb_generic_cpr, "fsflush");
376*7c478bd9Sstevel@tonic-gate loop:
377*7c478bd9Sstevel@tonic-gate 	sema_v(&fsflush_sema);
378*7c478bd9Sstevel@tonic-gate 	mutex_enter(&fsflush_lock);
379*7c478bd9Sstevel@tonic-gate 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
380*7c478bd9Sstevel@tonic-gate 	cv_wait(&fsflush_cv, &fsflush_lock);		/* wait for clock */
381*7c478bd9Sstevel@tonic-gate 	CALLB_CPR_SAFE_END(&cprinfo, &fsflush_lock);
382*7c478bd9Sstevel@tonic-gate 	mutex_exit(&fsflush_lock);
383*7c478bd9Sstevel@tonic-gate 	sema_p(&fsflush_sema);
384*7c478bd9Sstevel@tonic-gate 
385*7c478bd9Sstevel@tonic-gate 	/*
386*7c478bd9Sstevel@tonic-gate 	 * Write back all old B_DELWRI buffers on the freelist.
387*7c478bd9Sstevel@tonic-gate 	 */
388*7c478bd9Sstevel@tonic-gate 	bcount = 0;
389*7c478bd9Sstevel@tonic-gate 	for (ix = 0; ix < v.v_hbuf; ix++) {
390*7c478bd9Sstevel@tonic-gate 
391*7c478bd9Sstevel@tonic-gate 		hp = &hbuf[ix];
392*7c478bd9Sstevel@tonic-gate 		dwp = (struct buf *)&dwbuf[ix];
393*7c478bd9Sstevel@tonic-gate 
394*7c478bd9Sstevel@tonic-gate 		bcount += (hp->b_length);
395*7c478bd9Sstevel@tonic-gate 
396*7c478bd9Sstevel@tonic-gate 		if (dwp->av_forw == dwp) {
397*7c478bd9Sstevel@tonic-gate 			continue;
398*7c478bd9Sstevel@tonic-gate 		}
399*7c478bd9Sstevel@tonic-gate 
400*7c478bd9Sstevel@tonic-gate 		hmp = &hbuf[ix].b_lock;
401*7c478bd9Sstevel@tonic-gate 		mutex_enter(hmp);
402*7c478bd9Sstevel@tonic-gate 		bp = dwp->av_forw;
403*7c478bd9Sstevel@tonic-gate 
404*7c478bd9Sstevel@tonic-gate 		/*
405*7c478bd9Sstevel@tonic-gate 		 * Go down only on the delayed write lists.
406*7c478bd9Sstevel@tonic-gate 		 */
407*7c478bd9Sstevel@tonic-gate 		while (bp != dwp) {
408*7c478bd9Sstevel@tonic-gate 
409*7c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_flags & B_DELWRI);
410*7c478bd9Sstevel@tonic-gate 
411*7c478bd9Sstevel@tonic-gate 			if ((bp->b_flags & B_DELWRI) &&
412*7c478bd9Sstevel@tonic-gate 			    (lbolt - bp->b_start >= autoup) &&
413*7c478bd9Sstevel@tonic-gate 			    sema_tryp(&bp->b_sem)) {
414*7c478bd9Sstevel@tonic-gate 				bp->b_flags |= B_ASYNC;
415*7c478bd9Sstevel@tonic-gate 				hp->b_length--;
416*7c478bd9Sstevel@tonic-gate 				notavail(bp);
417*7c478bd9Sstevel@tonic-gate 				mutex_exit(hmp);
418*7c478bd9Sstevel@tonic-gate 				if (bp->b_vp == NULL) {
419*7c478bd9Sstevel@tonic-gate 					BWRITE(bp);
420*7c478bd9Sstevel@tonic-gate 				} else {
421*7c478bd9Sstevel@tonic-gate 					UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs,
422*7c478bd9Sstevel@tonic-gate 									bp);
423*7c478bd9Sstevel@tonic-gate 				}
424*7c478bd9Sstevel@tonic-gate 				mutex_enter(hmp);
425*7c478bd9Sstevel@tonic-gate 				bp = dwp->av_forw;
426*7c478bd9Sstevel@tonic-gate 			} else {
427*7c478bd9Sstevel@tonic-gate 				bp = bp->av_forw;
428*7c478bd9Sstevel@tonic-gate 			}
429*7c478bd9Sstevel@tonic-gate 		}
430*7c478bd9Sstevel@tonic-gate 		mutex_exit(hmp);
431*7c478bd9Sstevel@tonic-gate 	}
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate 	/*
434*7c478bd9Sstevel@tonic-gate 	 *
435*7c478bd9Sstevel@tonic-gate 	 * There is no need to wakeup any thread waiting on bio_mem_cv
436*7c478bd9Sstevel@tonic-gate 	 * since brelse will wake them up as soon as IO is complete.
437*7c478bd9Sstevel@tonic-gate 	 */
438*7c478bd9Sstevel@tonic-gate 	bfreelist.b_bcount = bcount;
439*7c478bd9Sstevel@tonic-gate 
440*7c478bd9Sstevel@tonic-gate 	if (dopageflush)
441*7c478bd9Sstevel@tonic-gate 		fsflush_do_pages();
442*7c478bd9Sstevel@tonic-gate 
443*7c478bd9Sstevel@tonic-gate 	if (!doiflush)
444*7c478bd9Sstevel@tonic-gate 		goto loop;
445*7c478bd9Sstevel@tonic-gate 
446*7c478bd9Sstevel@tonic-gate 	/*
447*7c478bd9Sstevel@tonic-gate 	 * If the system was not booted to single user mode, skip the
448*7c478bd9Sstevel@tonic-gate 	 * inode flushing until after fsflush_iflush_delay secs have elapsed.
449*7c478bd9Sstevel@tonic-gate 	 */
450*7c478bd9Sstevel@tonic-gate 	if ((boothowto & RB_SINGLE) == 0 &&
451*7c478bd9Sstevel@tonic-gate 	    (lbolt64 / hz) < fsflush_iflush_delay)
452*7c478bd9Sstevel@tonic-gate 		goto loop;
453*7c478bd9Sstevel@tonic-gate 
454*7c478bd9Sstevel@tonic-gate 	/*
455*7c478bd9Sstevel@tonic-gate 	 * Flush cached attribute information (e.g. inodes).
456*7c478bd9Sstevel@tonic-gate 	 */
457*7c478bd9Sstevel@tonic-gate 	if (++count >= icount) {
458*7c478bd9Sstevel@tonic-gate 		count = 0;
459*7c478bd9Sstevel@tonic-gate 
460*7c478bd9Sstevel@tonic-gate 		/*
461*7c478bd9Sstevel@tonic-gate 		 * Sync back cached data.
462*7c478bd9Sstevel@tonic-gate 		 */
463*7c478bd9Sstevel@tonic-gate 		RLOCK_VFSSW();
464*7c478bd9Sstevel@tonic-gate 		for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
465*7c478bd9Sstevel@tonic-gate 			if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) {
466*7c478bd9Sstevel@tonic-gate 				vfs_refvfssw(vswp);
467*7c478bd9Sstevel@tonic-gate 				RUNLOCK_VFSSW();
468*7c478bd9Sstevel@tonic-gate 				(void) fsop_sync_by_kind(vswp - vfssw,
469*7c478bd9Sstevel@tonic-gate 					SYNC_ATTR, kcred);
470*7c478bd9Sstevel@tonic-gate 				vfs_unrefvfssw(vswp);
471*7c478bd9Sstevel@tonic-gate 				RLOCK_VFSSW();
472*7c478bd9Sstevel@tonic-gate 			}
473*7c478bd9Sstevel@tonic-gate 		}
474*7c478bd9Sstevel@tonic-gate 		RUNLOCK_VFSSW();
475*7c478bd9Sstevel@tonic-gate 	}
476*7c478bd9Sstevel@tonic-gate 	goto loop;
477*7c478bd9Sstevel@tonic-gate }
478