17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ad23a2dbSjohansen * Common Development and Distribution License (the "License"). 6ad23a2dbSjohansen * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 227c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 237c478bd9Sstevel@tonic-gate 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate /* 2607fa3635SAmrita Sadhukhan * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 277c478bd9Sstevel@tonic-gate * Use is subject to license terms. 287c478bd9Sstevel@tonic-gate */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/types.h> 317c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 327c478bd9Sstevel@tonic-gate #include <sys/param.h> 337c478bd9Sstevel@tonic-gate #include <sys/tuneable.h> 347c478bd9Sstevel@tonic-gate #include <sys/inline.h> 357c478bd9Sstevel@tonic-gate #include <sys/systm.h> 367c478bd9Sstevel@tonic-gate #include <sys/proc.h> 377c478bd9Sstevel@tonic-gate #include <sys/user.h> 387c478bd9Sstevel@tonic-gate #include <sys/var.h> 397c478bd9Sstevel@tonic-gate #include <sys/buf.h> 407c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 417c478bd9Sstevel@tonic-gate #include <sys/cred.h> 427c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 437c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 447c478bd9Sstevel@tonic-gate #include <sys/swap.h> 457c478bd9Sstevel@tonic-gate #include <sys/vm.h> 467c478bd9Sstevel@tonic-gate #include <sys/debug.h> 477c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 487c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h> 497c478bd9Sstevel@tonic-gate #include <sys/callb.h> 507c478bd9Sstevel@tonic-gate #include <sys/reboot.h> 517c478bd9Sstevel@tonic-gate #include <sys/time.h> 527c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 537c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 547c478bd9Sstevel@tonic-gate 557c478bd9Sstevel@tonic-gate #include <vm/hat.h> 567c478bd9Sstevel@tonic-gate #include <vm/page.h> 577c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 587c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate int doiflush = 1; /* non-zero to turn inode flushing on */ 617c478bd9Sstevel@tonic-gate int dopageflush = 1; /* non-zero to turn page flushing on */ 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate /* 647c478bd9Sstevel@tonic-gate * To improve boot performance, don't run the inode flushing loop until 657c478bd9Sstevel@tonic-gate * the specified number of seconds after boot. To revert to the old 667c478bd9Sstevel@tonic-gate * behavior, set fsflush_iflush_delay to 0. We have not created any new 677c478bd9Sstevel@tonic-gate * filesystem danger that did not exist previously, since there is always a 687c478bd9Sstevel@tonic-gate * window in between when fsflush does the inode flush loop during which the 697c478bd9Sstevel@tonic-gate * system could crash, fail to sync the filesystem, and fsck will be needed 707c478bd9Sstevel@tonic-gate * to recover. We have, however, widened this window. Finally, 717c478bd9Sstevel@tonic-gate * we never delay inode flushing if we're booting into single user mode, 727c478bd9Sstevel@tonic-gate * where the administrator may be modifying files or using fsck. This 737c478bd9Sstevel@tonic-gate * modification avoids inode flushes during boot whose only purpose is to 747c478bd9Sstevel@tonic-gate * update atimes on files which have been accessed during boot. 757c478bd9Sstevel@tonic-gate */ 767c478bd9Sstevel@tonic-gate int fsflush_iflush_delay = 60; 777c478bd9Sstevel@tonic-gate 787c478bd9Sstevel@tonic-gate kcondvar_t fsflush_cv; 797c478bd9Sstevel@tonic-gate static kmutex_t fsflush_lock; /* just for the cv_wait */ 807c478bd9Sstevel@tonic-gate ksema_t fsflush_sema; /* to serialize with reboot */ 817c478bd9Sstevel@tonic-gate 827c478bd9Sstevel@tonic-gate /* 837c478bd9Sstevel@tonic-gate * some statistics for fsflush_do_pages 847c478bd9Sstevel@tonic-gate */ 857c478bd9Sstevel@tonic-gate typedef struct { 867c478bd9Sstevel@tonic-gate ulong_t fsf_scan; /* number of pages scanned */ 877c478bd9Sstevel@tonic-gate ulong_t fsf_examined; /* number of page_t's actually examined, can */ 887c478bd9Sstevel@tonic-gate /* be less than fsf_scan due to large pages */ 897c478bd9Sstevel@tonic-gate ulong_t fsf_locked; /* pages we actually page_lock()ed */ 907c478bd9Sstevel@tonic-gate ulong_t fsf_modified; /* number of modified pages found */ 917c478bd9Sstevel@tonic-gate ulong_t fsf_coalesce; /* number of page coalesces done */ 927c478bd9Sstevel@tonic-gate ulong_t fsf_time; /* nanoseconds of run time */ 937c478bd9Sstevel@tonic-gate ulong_t fsf_releases; /* number of page_release() done */ 947c478bd9Sstevel@tonic-gate } fsf_stat_t; 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate fsf_stat_t fsf_recent; /* counts for most recent duty cycle */ 977c478bd9Sstevel@tonic-gate fsf_stat_t fsf_total; /* total of counts */ 987c478bd9Sstevel@tonic-gate ulong_t fsf_cycles; /* number of runs refelected in fsf_total */ 997c478bd9Sstevel@tonic-gate 1007c478bd9Sstevel@tonic-gate /* 101da6c28aaSamw * data used to determine when we can coalesce consecutive free pages 1027c478bd9Sstevel@tonic-gate * into larger pages. 1037c478bd9Sstevel@tonic-gate */ 1047c478bd9Sstevel@tonic-gate #define MAX_PAGESIZES 32 1057c478bd9Sstevel@tonic-gate static ulong_t fsf_npgsz; 1067c478bd9Sstevel@tonic-gate static pgcnt_t fsf_pgcnt[MAX_PAGESIZES]; 1077c478bd9Sstevel@tonic-gate static pgcnt_t fsf_mask[MAX_PAGESIZES]; 1087c478bd9Sstevel@tonic-gate 1097c478bd9Sstevel@tonic-gate 1107c478bd9Sstevel@tonic-gate /* 1117c478bd9Sstevel@tonic-gate * Scan page_t's and issue I/O's for modified pages. 1127c478bd9Sstevel@tonic-gate * 1137c478bd9Sstevel@tonic-gate * Also coalesces consecutive small sized free pages into the next larger 1147c478bd9Sstevel@tonic-gate * pagesize. This costs a tiny bit of time in fsflush, but will reduce time 1157c478bd9Sstevel@tonic-gate * spent scanning on later passes and for anybody allocating large pages. 1167c478bd9Sstevel@tonic-gate */ 1177c478bd9Sstevel@tonic-gate static void 1187c478bd9Sstevel@tonic-gate fsflush_do_pages() 1197c478bd9Sstevel@tonic-gate { 1207c478bd9Sstevel@tonic-gate vnode_t *vp; 1217c478bd9Sstevel@tonic-gate ulong_t pcount; 1227c478bd9Sstevel@tonic-gate hrtime_t timer = gethrtime(); 1237c478bd9Sstevel@tonic-gate ulong_t releases = 0; 1247c478bd9Sstevel@tonic-gate ulong_t nexamined = 0; 1257c478bd9Sstevel@tonic-gate ulong_t nlocked = 0; 1267c478bd9Sstevel@tonic-gate ulong_t nmodified = 0; 1277c478bd9Sstevel@tonic-gate ulong_t ncoalesce = 0; 1289853d9e8SJason Beloro ulong_t cnt; 1297c478bd9Sstevel@tonic-gate int mod; 1309853d9e8SJason Beloro int fspage = 1; 1317c478bd9Sstevel@tonic-gate u_offset_t offset; 1327c478bd9Sstevel@tonic-gate uint_t szc; 1337c478bd9Sstevel@tonic-gate 134da6c28aaSamw page_t *coal_page = NULL; /* 1st page in group to coalesce */ 1357c478bd9Sstevel@tonic-gate uint_t coal_szc = 0; /* size code, coal_page->p_szc */ 1367c478bd9Sstevel@tonic-gate uint_t coal_cnt = 0; /* count of pages seen */ 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate static ulong_t nscan = 0; 1397c478bd9Sstevel@tonic-gate static pgcnt_t last_total_pages = 0; 1409853d9e8SJason Beloro static page_t *pp = NULL; 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate /* 1437c478bd9Sstevel@tonic-gate * Check to see if total_pages has changed. 1447c478bd9Sstevel@tonic-gate */ 1457c478bd9Sstevel@tonic-gate if (total_pages != last_total_pages) { 1467c478bd9Sstevel@tonic-gate last_total_pages = total_pages; 1477c478bd9Sstevel@tonic-gate nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup; 1487c478bd9Sstevel@tonic-gate } 1497c478bd9Sstevel@tonic-gate 1509853d9e8SJason Beloro if (pp == NULL) 1519853d9e8SJason Beloro pp = memsegs->pages; 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate pcount = 0; 15407fa3635SAmrita Sadhukhan while (pcount < nscan) { 1557c478bd9Sstevel@tonic-gate 1567c478bd9Sstevel@tonic-gate /* 1577c478bd9Sstevel@tonic-gate * move to the next page, skipping over large pages 1587c478bd9Sstevel@tonic-gate * and issuing prefetches. 1597c478bd9Sstevel@tonic-gate */ 1609853d9e8SJason Beloro if (pp->p_szc && fspage == 0) { 1619853d9e8SJason Beloro pfn_t pfn; 1629853d9e8SJason Beloro 1639853d9e8SJason Beloro pfn = page_pptonum(pp); 1649853d9e8SJason Beloro cnt = page_get_pagecnt(pp->p_szc); 1659853d9e8SJason Beloro cnt -= pfn & (cnt - 1); 1669853d9e8SJason Beloro } else 1679853d9e8SJason Beloro cnt = 1; 1689853d9e8SJason Beloro 1699853d9e8SJason Beloro pp = page_nextn(pp, cnt); 1707c478bd9Sstevel@tonic-gate prefetch_page_r((void *)pp); 1717c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 1729853d9e8SJason Beloro pcount += cnt; 1737c478bd9Sstevel@tonic-gate 1747c478bd9Sstevel@tonic-gate /* 1757c478bd9Sstevel@tonic-gate * Do a bunch of dirty tests (ie. no locking) to determine 1767c478bd9Sstevel@tonic-gate * if we can quickly skip this page. These tests are repeated 1777c478bd9Sstevel@tonic-gate * after acquiring the page lock. 1787c478bd9Sstevel@tonic-gate */ 1797c478bd9Sstevel@tonic-gate ++nexamined; 1807c478bd9Sstevel@tonic-gate if (PP_ISSWAP(pp)) { 1819853d9e8SJason Beloro fspage = 0; 1827c478bd9Sstevel@tonic-gate coal_page = NULL; 1837c478bd9Sstevel@tonic-gate continue; 1847c478bd9Sstevel@tonic-gate } 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate /* 1877c478bd9Sstevel@tonic-gate * skip free pages too, but try coalescing them into larger 1887c478bd9Sstevel@tonic-gate * pagesizes 1897c478bd9Sstevel@tonic-gate */ 1907c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 1917c478bd9Sstevel@tonic-gate /* 1927c478bd9Sstevel@tonic-gate * skip pages with a file system identity or that 1937c478bd9Sstevel@tonic-gate * are already maximum size 1947c478bd9Sstevel@tonic-gate */ 1959853d9e8SJason Beloro fspage = 0; 1967c478bd9Sstevel@tonic-gate szc = pp->p_szc; 1977c478bd9Sstevel@tonic-gate if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) { 1987c478bd9Sstevel@tonic-gate coal_page = NULL; 1997c478bd9Sstevel@tonic-gate continue; 2007c478bd9Sstevel@tonic-gate } 2017c478bd9Sstevel@tonic-gate 2027c478bd9Sstevel@tonic-gate /* 2037c478bd9Sstevel@tonic-gate * If not in a coalescing candidate page or the size 2047c478bd9Sstevel@tonic-gate * codes are different, start a new candidate. 2057c478bd9Sstevel@tonic-gate */ 2067c478bd9Sstevel@tonic-gate if (coal_page == NULL || coal_szc != szc) { 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate /* 2097c478bd9Sstevel@tonic-gate * page must be properly aligned 2107c478bd9Sstevel@tonic-gate */ 2117c478bd9Sstevel@tonic-gate if ((page_pptonum(pp) & fsf_mask[szc]) != 0) { 2127c478bd9Sstevel@tonic-gate coal_page = NULL; 2137c478bd9Sstevel@tonic-gate continue; 2147c478bd9Sstevel@tonic-gate } 2157c478bd9Sstevel@tonic-gate coal_page = pp; 2167c478bd9Sstevel@tonic-gate coal_szc = szc; 2177c478bd9Sstevel@tonic-gate coal_cnt = 1; 2187c478bd9Sstevel@tonic-gate continue; 2197c478bd9Sstevel@tonic-gate } 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate /* 2227c478bd9Sstevel@tonic-gate * acceptable to add this to existing candidate page 2237c478bd9Sstevel@tonic-gate */ 2247c478bd9Sstevel@tonic-gate ++coal_cnt; 2257c478bd9Sstevel@tonic-gate if (coal_cnt < fsf_pgcnt[coal_szc]) 2267c478bd9Sstevel@tonic-gate continue; 2277c478bd9Sstevel@tonic-gate 2287c478bd9Sstevel@tonic-gate /* 2297c478bd9Sstevel@tonic-gate * We've got enough pages to coalesce, so do it. 2307c478bd9Sstevel@tonic-gate * After promoting, we clear coal_page, so it will 2317c478bd9Sstevel@tonic-gate * take another pass to promote this to an even 2327c478bd9Sstevel@tonic-gate * larger page. 2337c478bd9Sstevel@tonic-gate */ 2347c478bd9Sstevel@tonic-gate ++ncoalesce; 2357c478bd9Sstevel@tonic-gate (void) page_promote_size(coal_page, coal_szc); 2367c478bd9Sstevel@tonic-gate coal_page = NULL; 2377c478bd9Sstevel@tonic-gate continue; 2387c478bd9Sstevel@tonic-gate } else { 2397c478bd9Sstevel@tonic-gate coal_page = NULL; 2407c478bd9Sstevel@tonic-gate } 2417c478bd9Sstevel@tonic-gate 242ad23a2dbSjohansen if (PP_ISKAS(pp) || 2437c478bd9Sstevel@tonic-gate PAGE_LOCKED(pp) || 2447c478bd9Sstevel@tonic-gate pp->p_lckcnt != 0 || 2459853d9e8SJason Beloro pp->p_cowcnt != 0) { 2469853d9e8SJason Beloro fspage = 0; 2477c478bd9Sstevel@tonic-gate continue; 2489853d9e8SJason Beloro } 2497c478bd9Sstevel@tonic-gate 2507c478bd9Sstevel@tonic-gate 2517c478bd9Sstevel@tonic-gate /* 2527c478bd9Sstevel@tonic-gate * Reject pages that can't be "exclusively" locked. 2537c478bd9Sstevel@tonic-gate */ 2547c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) 2557c478bd9Sstevel@tonic-gate continue; 2567c478bd9Sstevel@tonic-gate ++nlocked; 2577c478bd9Sstevel@tonic-gate 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate /* 2607c478bd9Sstevel@tonic-gate * After locking the page, redo the above checks. 2617c478bd9Sstevel@tonic-gate * Since we locked the page, leave out the PAGE_LOCKED() test. 2627c478bd9Sstevel@tonic-gate */ 2637c478bd9Sstevel@tonic-gate vp = pp->p_vnode; 2647c478bd9Sstevel@tonic-gate if (PP_ISSWAP(pp) || 2657c478bd9Sstevel@tonic-gate PP_ISFREE(pp) || 2667c478bd9Sstevel@tonic-gate vp == NULL || 267ad23a2dbSjohansen PP_ISKAS(pp) || 2687c478bd9Sstevel@tonic-gate (vp->v_flag & VISSWAP) != 0) { 2697c478bd9Sstevel@tonic-gate page_unlock(pp); 2709853d9e8SJason Beloro fspage = 0; 2719853d9e8SJason Beloro continue; 2729853d9e8SJason Beloro } 2739853d9e8SJason Beloro if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 2749853d9e8SJason Beloro page_unlock(pp); 2757c478bd9Sstevel@tonic-gate continue; 2767c478bd9Sstevel@tonic-gate } 2777c478bd9Sstevel@tonic-gate 2789853d9e8SJason Beloro fspage = 1; 2797c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR); 2807c478bd9Sstevel@tonic-gate 2817c478bd9Sstevel@tonic-gate /* 2827c478bd9Sstevel@tonic-gate * Check the modified bit. Leaving the bit alone in hardware. 2837c478bd9Sstevel@tonic-gate * It will be cleared if we do the putpage. 2847c478bd9Sstevel@tonic-gate */ 2857c478bd9Sstevel@tonic-gate if (IS_VMODSORT(vp)) 2867c478bd9Sstevel@tonic-gate mod = hat_ismod(pp); 2877c478bd9Sstevel@tonic-gate else 2887c478bd9Sstevel@tonic-gate mod = hat_pagesync(pp, 2897c478bd9Sstevel@tonic-gate HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD; 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate if (mod) { 2927c478bd9Sstevel@tonic-gate ++nmodified; 2937c478bd9Sstevel@tonic-gate offset = pp->p_offset; 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate /* 2967c478bd9Sstevel@tonic-gate * Hold the vnode before releasing the page lock 2977c478bd9Sstevel@tonic-gate * to prevent it from being freed and re-used by 2987c478bd9Sstevel@tonic-gate * some other thread. 2997c478bd9Sstevel@tonic-gate */ 3007c478bd9Sstevel@tonic-gate VN_HOLD(vp); 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate page_unlock(pp); 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC, 305da6c28aaSamw kcred, NULL); 3067c478bd9Sstevel@tonic-gate 3077c478bd9Sstevel@tonic-gate VN_RELE(vp); 3087c478bd9Sstevel@tonic-gate } else { 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate /* 3117c478bd9Sstevel@tonic-gate * Catch any pages which should be on the cache list, 3127c478bd9Sstevel@tonic-gate * but aren't yet. 3137c478bd9Sstevel@tonic-gate */ 3147c478bd9Sstevel@tonic-gate if (hat_page_is_mapped(pp) == 0) { 3157c478bd9Sstevel@tonic-gate ++releases; 3167c478bd9Sstevel@tonic-gate (void) page_release(pp, 1); 3177c478bd9Sstevel@tonic-gate } else { 3187c478bd9Sstevel@tonic-gate page_unlock(pp); 3197c478bd9Sstevel@tonic-gate } 3207c478bd9Sstevel@tonic-gate } 3217c478bd9Sstevel@tonic-gate } 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate /* 3247c478bd9Sstevel@tonic-gate * maintain statistics 3257c478bd9Sstevel@tonic-gate * reset every million wakeups, just to avoid overflow 3267c478bd9Sstevel@tonic-gate */ 3277c478bd9Sstevel@tonic-gate if (++fsf_cycles == 1000000) { 3287c478bd9Sstevel@tonic-gate fsf_cycles = 0; 3297c478bd9Sstevel@tonic-gate fsf_total.fsf_scan = 0; 3307c478bd9Sstevel@tonic-gate fsf_total.fsf_examined = 0; 3317c478bd9Sstevel@tonic-gate fsf_total.fsf_locked = 0; 3327c478bd9Sstevel@tonic-gate fsf_total.fsf_modified = 0; 3337c478bd9Sstevel@tonic-gate fsf_total.fsf_coalesce = 0; 3347c478bd9Sstevel@tonic-gate fsf_total.fsf_time = 0; 3357c478bd9Sstevel@tonic-gate fsf_total.fsf_releases = 0; 3367c478bd9Sstevel@tonic-gate } else { 3377c478bd9Sstevel@tonic-gate fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan; 3387c478bd9Sstevel@tonic-gate fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined; 3397c478bd9Sstevel@tonic-gate fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked; 3407c478bd9Sstevel@tonic-gate fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified; 3417c478bd9Sstevel@tonic-gate fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce; 3427c478bd9Sstevel@tonic-gate fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer; 3437c478bd9Sstevel@tonic-gate fsf_total.fsf_releases += fsf_recent.fsf_releases = releases; 3447c478bd9Sstevel@tonic-gate } 3457c478bd9Sstevel@tonic-gate } 3467c478bd9Sstevel@tonic-gate 3477c478bd9Sstevel@tonic-gate /* 3487c478bd9Sstevel@tonic-gate * As part of file system hardening, this daemon is awakened 3497c478bd9Sstevel@tonic-gate * every second to flush cached data which includes the 3507c478bd9Sstevel@tonic-gate * buffer cache, the inode cache and mapped pages. 3517c478bd9Sstevel@tonic-gate */ 3527c478bd9Sstevel@tonic-gate void 3537c478bd9Sstevel@tonic-gate fsflush() 3547c478bd9Sstevel@tonic-gate { 3557c478bd9Sstevel@tonic-gate struct buf *bp, *dwp; 3567c478bd9Sstevel@tonic-gate struct hbuf *hp; 3577c478bd9Sstevel@tonic-gate int autoup; 3587c478bd9Sstevel@tonic-gate unsigned int ix, icount, count = 0; 3597c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 3607c478bd9Sstevel@tonic-gate uint_t bcount; 3617c478bd9Sstevel@tonic-gate kmutex_t *hmp; 3627c478bd9Sstevel@tonic-gate struct vfssw *vswp; 3637c478bd9Sstevel@tonic-gate 3647c478bd9Sstevel@tonic-gate proc_fsflush = ttoproc(curthread); 3657c478bd9Sstevel@tonic-gate proc_fsflush->p_cstime = 0; 3667c478bd9Sstevel@tonic-gate proc_fsflush->p_stime = 0; 3677c478bd9Sstevel@tonic-gate proc_fsflush->p_cutime = 0; 3687c478bd9Sstevel@tonic-gate proc_fsflush->p_utime = 0; 369ae115bc7Smrj bcopy("fsflush", curproc->p_user.u_psargs, 8); 370ae115bc7Smrj bcopy("fsflush", curproc->p_user.u_comm, 7); 3717c478bd9Sstevel@tonic-gate 3727c478bd9Sstevel@tonic-gate mutex_init(&fsflush_lock, NULL, MUTEX_DEFAULT, NULL); 3737c478bd9Sstevel@tonic-gate sema_init(&fsflush_sema, 0, NULL, SEMA_DEFAULT, NULL); 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate /* 3767c478bd9Sstevel@tonic-gate * Setup page coalescing. 3777c478bd9Sstevel@tonic-gate */ 3787c478bd9Sstevel@tonic-gate fsf_npgsz = page_num_pagesizes(); 3797c478bd9Sstevel@tonic-gate ASSERT(fsf_npgsz < MAX_PAGESIZES); 3807c478bd9Sstevel@tonic-gate for (ix = 0; ix < fsf_npgsz - 1; ++ix) { 3817c478bd9Sstevel@tonic-gate fsf_pgcnt[ix] = 3827c478bd9Sstevel@tonic-gate page_get_pagesize(ix + 1) / page_get_pagesize(ix); 3837c478bd9Sstevel@tonic-gate fsf_mask[ix] = page_get_pagecnt(ix + 1) - 1; 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate 3867c478bd9Sstevel@tonic-gate autoup = v.v_autoup * hz; 3877c478bd9Sstevel@tonic-gate icount = v.v_autoup / tune.t_fsflushr; 3887c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &fsflush_lock, callb_generic_cpr, "fsflush"); 3897c478bd9Sstevel@tonic-gate loop: 3907c478bd9Sstevel@tonic-gate sema_v(&fsflush_sema); 3917c478bd9Sstevel@tonic-gate mutex_enter(&fsflush_lock); 3927c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 3937c478bd9Sstevel@tonic-gate cv_wait(&fsflush_cv, &fsflush_lock); /* wait for clock */ 3947c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &fsflush_lock); 3957c478bd9Sstevel@tonic-gate mutex_exit(&fsflush_lock); 3967c478bd9Sstevel@tonic-gate sema_p(&fsflush_sema); 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate /* 3997c478bd9Sstevel@tonic-gate * Write back all old B_DELWRI buffers on the freelist. 4007c478bd9Sstevel@tonic-gate */ 4017c478bd9Sstevel@tonic-gate bcount = 0; 4027c478bd9Sstevel@tonic-gate for (ix = 0; ix < v.v_hbuf; ix++) { 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate hp = &hbuf[ix]; 4057c478bd9Sstevel@tonic-gate dwp = (struct buf *)&dwbuf[ix]; 4067c478bd9Sstevel@tonic-gate 4077c478bd9Sstevel@tonic-gate bcount += (hp->b_length); 4087c478bd9Sstevel@tonic-gate 4097c478bd9Sstevel@tonic-gate if (dwp->av_forw == dwp) { 4107c478bd9Sstevel@tonic-gate continue; 4117c478bd9Sstevel@tonic-gate } 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate hmp = &hbuf[ix].b_lock; 4147c478bd9Sstevel@tonic-gate mutex_enter(hmp); 4157c478bd9Sstevel@tonic-gate bp = dwp->av_forw; 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate /* 4187c478bd9Sstevel@tonic-gate * Go down only on the delayed write lists. 4197c478bd9Sstevel@tonic-gate */ 4207c478bd9Sstevel@tonic-gate while (bp != dwp) { 4217c478bd9Sstevel@tonic-gate 4227c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_DELWRI); 4237c478bd9Sstevel@tonic-gate 4247c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) && 425*d3d50737SRafael Vanoni (ddi_get_lbolt() - bp->b_start >= autoup) && 4267c478bd9Sstevel@tonic-gate sema_tryp(&bp->b_sem)) { 4277c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 4287c478bd9Sstevel@tonic-gate hp->b_length--; 4297c478bd9Sstevel@tonic-gate notavail(bp); 4307c478bd9Sstevel@tonic-gate mutex_exit(hmp); 4317c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { 4327c478bd9Sstevel@tonic-gate BWRITE(bp); 4337c478bd9Sstevel@tonic-gate } else { 4347c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, 4357c478bd9Sstevel@tonic-gate bp); 4367c478bd9Sstevel@tonic-gate } 4377c478bd9Sstevel@tonic-gate mutex_enter(hmp); 4387c478bd9Sstevel@tonic-gate bp = dwp->av_forw; 4397c478bd9Sstevel@tonic-gate } else { 4407c478bd9Sstevel@tonic-gate bp = bp->av_forw; 4417c478bd9Sstevel@tonic-gate } 4427c478bd9Sstevel@tonic-gate } 4437c478bd9Sstevel@tonic-gate mutex_exit(hmp); 4447c478bd9Sstevel@tonic-gate } 4457c478bd9Sstevel@tonic-gate 4467c478bd9Sstevel@tonic-gate /* 4477c478bd9Sstevel@tonic-gate * 4487c478bd9Sstevel@tonic-gate * There is no need to wakeup any thread waiting on bio_mem_cv 4497c478bd9Sstevel@tonic-gate * since brelse will wake them up as soon as IO is complete. 4507c478bd9Sstevel@tonic-gate */ 4517c478bd9Sstevel@tonic-gate bfreelist.b_bcount = bcount; 4527c478bd9Sstevel@tonic-gate 4537c478bd9Sstevel@tonic-gate if (dopageflush) 4547c478bd9Sstevel@tonic-gate fsflush_do_pages(); 4557c478bd9Sstevel@tonic-gate 4567c478bd9Sstevel@tonic-gate if (!doiflush) 4577c478bd9Sstevel@tonic-gate goto loop; 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate /* 4607c478bd9Sstevel@tonic-gate * If the system was not booted to single user mode, skip the 4617c478bd9Sstevel@tonic-gate * inode flushing until after fsflush_iflush_delay secs have elapsed. 4627c478bd9Sstevel@tonic-gate */ 4637c478bd9Sstevel@tonic-gate if ((boothowto & RB_SINGLE) == 0 && 464*d3d50737SRafael Vanoni (ddi_get_lbolt64() / hz) < fsflush_iflush_delay) 4657c478bd9Sstevel@tonic-gate goto loop; 4667c478bd9Sstevel@tonic-gate 4677c478bd9Sstevel@tonic-gate /* 4687c478bd9Sstevel@tonic-gate * Flush cached attribute information (e.g. inodes). 4697c478bd9Sstevel@tonic-gate */ 4707c478bd9Sstevel@tonic-gate if (++count >= icount) { 4717c478bd9Sstevel@tonic-gate count = 0; 4727c478bd9Sstevel@tonic-gate 4737c478bd9Sstevel@tonic-gate /* 4747c478bd9Sstevel@tonic-gate * Sync back cached data. 4757c478bd9Sstevel@tonic-gate */ 4767c478bd9Sstevel@tonic-gate RLOCK_VFSSW(); 4777c478bd9Sstevel@tonic-gate for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 4787c478bd9Sstevel@tonic-gate if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { 4797c478bd9Sstevel@tonic-gate vfs_refvfssw(vswp); 4807c478bd9Sstevel@tonic-gate RUNLOCK_VFSSW(); 4817c478bd9Sstevel@tonic-gate (void) fsop_sync_by_kind(vswp - vfssw, 4827c478bd9Sstevel@tonic-gate SYNC_ATTR, kcred); 4837c478bd9Sstevel@tonic-gate vfs_unrefvfssw(vswp); 4847c478bd9Sstevel@tonic-gate RLOCK_VFSSW(); 4857c478bd9Sstevel@tonic-gate } 4867c478bd9Sstevel@tonic-gate } 4877c478bd9Sstevel@tonic-gate RUNLOCK_VFSSW(); 4887c478bd9Sstevel@tonic-gate } 4897c478bd9Sstevel@tonic-gate goto loop; 4907c478bd9Sstevel@tonic-gate } 491