17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 56f84fed5Scth * Common Development and Distribution License (the "License"). 66f84fed5Scth * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 24*2c164fafSPatrick Mooney * Copyright 2019 Joyent, Inc. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 327c478bd9Sstevel@tonic-gate * The Regents of the University of California 337c478bd9Sstevel@tonic-gate * All Rights Reserved 347c478bd9Sstevel@tonic-gate * 357c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 367c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 377c478bd9Sstevel@tonic-gate * contributors. 387c478bd9Sstevel@tonic-gate */ 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate #include <sys/types.h> 417c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 427c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 437c478bd9Sstevel@tonic-gate #include <sys/conf.h> 447c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 457c478bd9Sstevel@tonic-gate #include <sys/errno.h> 467c478bd9Sstevel@tonic-gate #include <sys/debug.h> 477c478bd9Sstevel@tonic-gate #include <sys/buf.h> 487c478bd9Sstevel@tonic-gate #include <sys/var.h> 497c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 507c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 527c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 537c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 547c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 557c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 567c478bd9Sstevel@tonic-gate #include <vm/page.h> 577c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 587c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 597c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 607c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 617c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 627c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 637c478bd9Sstevel@tonic-gate #include <sys/systm.h> 647c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 657c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 667c478bd9Sstevel@tonic-gate 677c478bd9Sstevel@tonic-gate /* Locks */ 687c478bd9Sstevel@tonic-gate static kmutex_t blist_lock; /* protects b_list */ 697c478bd9Sstevel@tonic-gate static kmutex_t bhdr_lock; /* protects the bhdrlist */ 707c478bd9Sstevel@tonic-gate static kmutex_t bfree_lock; /* protects the bfreelist structure */ 717c478bd9Sstevel@tonic-gate 727c478bd9Sstevel@tonic-gate struct hbuf *hbuf; /* Hash buckets */ 737c478bd9Sstevel@tonic-gate struct dwbuf *dwbuf; /* Delayed write buckets */ 747c478bd9Sstevel@tonic-gate static struct buf *bhdrlist; /* buf header free list */ 757c478bd9Sstevel@tonic-gate static int nbuf; /* number of buffer headers allocated */ 767c478bd9Sstevel@tonic-gate 777c478bd9Sstevel@tonic-gate static int lastindex; /* Reference point on where to start */ 787c478bd9Sstevel@tonic-gate /* when looking for free buffers */ 797c478bd9Sstevel@tonic-gate 807c478bd9Sstevel@tonic-gate #define bio_bhash(dev, bn) (hash2ints((dev), (int)(bn)) & v.v_hmask) 817c478bd9Sstevel@tonic-gate #define EMPTY_LIST ((struct buf *)-1) 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate static kcondvar_t bio_mem_cv; /* Condition variables */ 847c478bd9Sstevel@tonic-gate static kcondvar_t bio_flushinval_cv; 857c478bd9Sstevel@tonic-gate static int bio_doingflush; /* flush in progress */ 867c478bd9Sstevel@tonic-gate static int bio_doinginval; /* inval in progress */ 877c478bd9Sstevel@tonic-gate static int bio_flinv_cv_wanted; /* someone waiting for cv */ 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate /* 907c478bd9Sstevel@tonic-gate * Statistics on the buffer cache 917c478bd9Sstevel@tonic-gate */ 927c478bd9Sstevel@tonic-gate struct biostats biostats = { 937c478bd9Sstevel@tonic-gate { "buffer_cache_lookups", KSTAT_DATA_UINT32 }, 947c478bd9Sstevel@tonic-gate { "buffer_cache_hits", KSTAT_DATA_UINT32 }, 957c478bd9Sstevel@tonic-gate { "new_buffer_requests", KSTAT_DATA_UINT32 }, 967c478bd9Sstevel@tonic-gate { "waits_for_buffer_allocs", KSTAT_DATA_UINT32 }, 977c478bd9Sstevel@tonic-gate { "buffers_locked_by_someone", KSTAT_DATA_UINT32 }, 987c478bd9Sstevel@tonic-gate { "duplicate_buffers_found", KSTAT_DATA_UINT32 } 997c478bd9Sstevel@tonic-gate }; 1007c478bd9Sstevel@tonic-gate 1017c478bd9Sstevel@tonic-gate /* 1027c478bd9Sstevel@tonic-gate * kstat data 1037c478bd9Sstevel@tonic-gate */ 1047c478bd9Sstevel@tonic-gate kstat_named_t *biostats_ptr = (kstat_named_t *)&biostats; 1057c478bd9Sstevel@tonic-gate uint_t biostats_ndata = (uint_t)(sizeof (biostats) / 1067c478bd9Sstevel@tonic-gate sizeof (kstat_named_t)); 1077c478bd9Sstevel@tonic-gate 1087c478bd9Sstevel@tonic-gate /* 1097c478bd9Sstevel@tonic-gate * Statistics on ufs buffer cache 1107c478bd9Sstevel@tonic-gate * Not protected by locks 1117c478bd9Sstevel@tonic-gate */ 1127c478bd9Sstevel@tonic-gate struct ufsbiostats ub = { 1137c478bd9Sstevel@tonic-gate { "breads", KSTAT_DATA_UINT32 }, 1147c478bd9Sstevel@tonic-gate { "bwrites", KSTAT_DATA_UINT32 }, 1157c478bd9Sstevel@tonic-gate { "fbiwrites", KSTAT_DATA_UINT32 }, 1167c478bd9Sstevel@tonic-gate { "getpages", KSTAT_DATA_UINT32 }, 1177c478bd9Sstevel@tonic-gate { "getras", KSTAT_DATA_UINT32 }, 1187c478bd9Sstevel@tonic-gate { "putsyncs", KSTAT_DATA_UINT32 }, 1197c478bd9Sstevel@tonic-gate { "putasyncs", KSTAT_DATA_UINT32 }, 1207c478bd9Sstevel@tonic-gate { "putpageios", KSTAT_DATA_UINT32 }, 1217c478bd9Sstevel@tonic-gate }; 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate /* 1247c478bd9Sstevel@tonic-gate * more UFS Logging eccentricities... 1257c478bd9Sstevel@tonic-gate * 1267c478bd9Sstevel@tonic-gate * required since "#pragma weak ..." doesn't work in reverse order. 1277c478bd9Sstevel@tonic-gate * i.e.: genunix (bio.c) is loaded before the ufs modules and pointers 1287c478bd9Sstevel@tonic-gate * to ufs routines don't get plugged into bio.c calls so 1297c478bd9Sstevel@tonic-gate * we initialize it when setting up the "lufsops" table 1307c478bd9Sstevel@tonic-gate * in "lufs.c:_init()" 1317c478bd9Sstevel@tonic-gate */ 1327c478bd9Sstevel@tonic-gate void (*bio_lufs_strategy)(void *, buf_t *); 1337c478bd9Sstevel@tonic-gate void (*bio_snapshot_strategy)(void *, buf_t *); 1347c478bd9Sstevel@tonic-gate 1357c478bd9Sstevel@tonic-gate 1367c478bd9Sstevel@tonic-gate /* Private routines */ 1377c478bd9Sstevel@tonic-gate static struct buf *bio_getfreeblk(long); 1387c478bd9Sstevel@tonic-gate static void bio_mem_get(long); 1397c478bd9Sstevel@tonic-gate static void bio_bhdr_free(struct buf *); 1407c478bd9Sstevel@tonic-gate static struct buf *bio_bhdr_alloc(void); 1417c478bd9Sstevel@tonic-gate static void bio_recycle(int, long); 1427c478bd9Sstevel@tonic-gate static void bio_pageio_done(struct buf *); 1437c478bd9Sstevel@tonic-gate static int bio_incore(dev_t, daddr_t); 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate /* 1467c478bd9Sstevel@tonic-gate * Buffer cache constants 1477c478bd9Sstevel@tonic-gate */ 1487c478bd9Sstevel@tonic-gate #define BIO_BUF_PERCENT (100/2) /* default: 2% of memory */ 1497c478bd9Sstevel@tonic-gate #define BIO_MAX_PERCENT (100/20) /* max is 20% of real memory */ 1507c478bd9Sstevel@tonic-gate #define BIO_BHDR_POOL 100 /* Default bhdr pool size */ 1517c478bd9Sstevel@tonic-gate #define BIO_MIN_HDR 10 /* Minimum number of buffer headers */ 1527c478bd9Sstevel@tonic-gate #define BIO_MIN_HWM (BIO_MIN_HDR * MAXBSIZE / 1024) 1537c478bd9Sstevel@tonic-gate #define BIO_HASHLEN 4 /* Target length of hash chains */ 1547c478bd9Sstevel@tonic-gate 1557c478bd9Sstevel@tonic-gate 1567c478bd9Sstevel@tonic-gate /* Flags for bio_recycle() */ 1577c478bd9Sstevel@tonic-gate #define BIO_HEADER 0x01 1587c478bd9Sstevel@tonic-gate #define BIO_MEM 0x02 1597c478bd9Sstevel@tonic-gate 1607c478bd9Sstevel@tonic-gate extern int bufhwm; /* User tunable - high water mark for mem */ 1617c478bd9Sstevel@tonic-gate extern int bufhwm_pct; /* ditto - given in % of physmem */ 1627c478bd9Sstevel@tonic-gate 1637c478bd9Sstevel@tonic-gate /* 1647c478bd9Sstevel@tonic-gate * The following routines allocate and free 1657c478bd9Sstevel@tonic-gate * buffers with various side effects. In general the 1667c478bd9Sstevel@tonic-gate * arguments to an allocate routine are a device and 1677c478bd9Sstevel@tonic-gate * a block number, and the value is a pointer to 1687c478bd9Sstevel@tonic-gate * to the buffer header; the buffer returned is locked with a 1697c478bd9Sstevel@tonic-gate * binary semaphore so that no one else can touch it. If the block was 1707c478bd9Sstevel@tonic-gate * already in core, no I/O need be done; if it is 1717c478bd9Sstevel@tonic-gate * already locked, the process waits until it becomes free. 1727c478bd9Sstevel@tonic-gate * The following routines allocate a buffer: 1737c478bd9Sstevel@tonic-gate * getblk 1747c478bd9Sstevel@tonic-gate * bread/BREAD 1757c478bd9Sstevel@tonic-gate * breada 1767c478bd9Sstevel@tonic-gate * Eventually the buffer must be released, possibly with the 1777c478bd9Sstevel@tonic-gate * side effect of writing it out, by using one of 1787c478bd9Sstevel@tonic-gate * bwrite/BWRITE/brwrite 1797c478bd9Sstevel@tonic-gate * bdwrite/bdrwrite 1807c478bd9Sstevel@tonic-gate * bawrite 1817c478bd9Sstevel@tonic-gate * brelse 1827c478bd9Sstevel@tonic-gate * 1837c478bd9Sstevel@tonic-gate * The B_WANTED/B_BUSY bits are NOT used by these routines for synchronization. 1847c478bd9Sstevel@tonic-gate * Instead, a binary semaphore, b_sem is used to gain exclusive access to 1857c478bd9Sstevel@tonic-gate * a buffer and a binary semaphore, b_io is used for I/O synchronization. 1867c478bd9Sstevel@tonic-gate * B_DONE is still used to denote a buffer with I/O complete on it. 1877c478bd9Sstevel@tonic-gate * 1887c478bd9Sstevel@tonic-gate * The bfreelist.b_bcount field is computed everytime fsflush runs. It is 1897c478bd9Sstevel@tonic-gate * should not be used where a very accurate count of the free buffers is 1907c478bd9Sstevel@tonic-gate * needed. 1917c478bd9Sstevel@tonic-gate */ 1927c478bd9Sstevel@tonic-gate 1937c478bd9Sstevel@tonic-gate /* 1947c478bd9Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer. 1957c478bd9Sstevel@tonic-gate * 1967c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 1977c478bd9Sstevel@tonic-gate * BREAD() directly avoids the extra function call overhead invoked 1987c478bd9Sstevel@tonic-gate * by calling this routine. 1997c478bd9Sstevel@tonic-gate */ 2007c478bd9Sstevel@tonic-gate struct buf * 2017c478bd9Sstevel@tonic-gate bread(dev_t dev, daddr_t blkno, long bsize) 2027c478bd9Sstevel@tonic-gate { 2037c478bd9Sstevel@tonic-gate return (BREAD(dev, blkno, bsize)); 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate /* 2077c478bd9Sstevel@tonic-gate * Common code for reading a buffer with various options 2087c478bd9Sstevel@tonic-gate * 2097c478bd9Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer. 2107c478bd9Sstevel@tonic-gate */ 2117c478bd9Sstevel@tonic-gate struct buf * 2127c478bd9Sstevel@tonic-gate bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize) 2137c478bd9Sstevel@tonic-gate { 2147c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; 2157c478bd9Sstevel@tonic-gate struct buf *bp; 2167c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 2177c478bd9Sstevel@tonic-gate 2187c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1); 2197c478bd9Sstevel@tonic-gate bp = getblk_common(ufsvfsp, dev, blkno, bsize, /* errflg */ 1); 2207c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DONE) 2217c478bd9Sstevel@tonic-gate return (bp); 2227c478bd9Sstevel@tonic-gate bp->b_flags |= B_READ; 2237c478bd9Sstevel@tonic-gate ASSERT(bp->b_bcount == bsize); 2247c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { /* !ufs */ 2257c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 2267c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { 2277c478bd9Sstevel@tonic-gate /* ufs && logging */ 2287c478bd9Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); 2297c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { 2307c478bd9Sstevel@tonic-gate /* ufs && snapshots */ 2317c478bd9Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); 2327c478bd9Sstevel@tonic-gate } else { 233d3d50737SRafael Vanoni ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); 2347c478bd9Sstevel@tonic-gate ub.ub_breads.value.ul++; /* ufs && !logging */ 2357c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 2367c478bd9Sstevel@tonic-gate } 2377c478bd9Sstevel@tonic-gate if (lwp != NULL) 2387c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2397c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2407c478bd9Sstevel@tonic-gate (void) biowait(bp); 2417c478bd9Sstevel@tonic-gate return (bp); 2427c478bd9Sstevel@tonic-gate } 2437c478bd9Sstevel@tonic-gate 2447c478bd9Sstevel@tonic-gate /* 2457c478bd9Sstevel@tonic-gate * Read in the block, like bread, but also start I/O on the 2467c478bd9Sstevel@tonic-gate * read-ahead block (which is not allocated to the caller). 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate struct buf * 2497c478bd9Sstevel@tonic-gate breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize) 2507c478bd9Sstevel@tonic-gate { 2517c478bd9Sstevel@tonic-gate struct buf *bp, *rabp; 2527c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 2537c478bd9Sstevel@tonic-gate 2547c478bd9Sstevel@tonic-gate bp = NULL; 2557c478bd9Sstevel@tonic-gate if (!bio_incore(dev, blkno)) { 2567c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1); 2577c478bd9Sstevel@tonic-gate bp = GETBLK(dev, blkno, bsize); 2587c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DONE) == 0) { 2597c478bd9Sstevel@tonic-gate bp->b_flags |= B_READ; 2607c478bd9Sstevel@tonic-gate bp->b_bcount = bsize; 2617c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 2627c478bd9Sstevel@tonic-gate if (lwp != NULL) 2637c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2647c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2657c478bd9Sstevel@tonic-gate } 2667c478bd9Sstevel@tonic-gate } 2677c478bd9Sstevel@tonic-gate if (rablkno && bfreelist.b_bcount > 1 && 2687c478bd9Sstevel@tonic-gate !bio_incore(dev, rablkno)) { 2697c478bd9Sstevel@tonic-gate rabp = GETBLK(dev, rablkno, bsize); 2707c478bd9Sstevel@tonic-gate if (rabp->b_flags & B_DONE) 2717c478bd9Sstevel@tonic-gate brelse(rabp); 2727c478bd9Sstevel@tonic-gate else { 2737c478bd9Sstevel@tonic-gate rabp->b_flags |= B_READ|B_ASYNC; 2747c478bd9Sstevel@tonic-gate rabp->b_bcount = bsize; 2757c478bd9Sstevel@tonic-gate (void) bdev_strategy(rabp); 2767c478bd9Sstevel@tonic-gate if (lwp != NULL) 2777c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2787c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2797c478bd9Sstevel@tonic-gate } 2807c478bd9Sstevel@tonic-gate } 2817c478bd9Sstevel@tonic-gate if (bp == NULL) 2827c478bd9Sstevel@tonic-gate return (BREAD(dev, blkno, bsize)); 2837c478bd9Sstevel@tonic-gate (void) biowait(bp); 2847c478bd9Sstevel@tonic-gate return (bp); 2857c478bd9Sstevel@tonic-gate } 2867c478bd9Sstevel@tonic-gate 2877c478bd9Sstevel@tonic-gate /* 2887c478bd9Sstevel@tonic-gate * Common code for writing a buffer with various options. 2897c478bd9Sstevel@tonic-gate * 2907c478bd9Sstevel@tonic-gate * force_wait - wait for write completion regardless of B_ASYNC flag 2917c478bd9Sstevel@tonic-gate * do_relse - release the buffer when we are done 2927c478bd9Sstevel@tonic-gate * clear_flags - flags to clear from the buffer 2937c478bd9Sstevel@tonic-gate */ 2947c478bd9Sstevel@tonic-gate void 2957c478bd9Sstevel@tonic-gate bwrite_common(void *arg, struct buf *bp, int force_wait, 2967c478bd9Sstevel@tonic-gate int do_relse, int clear_flags) 2977c478bd9Sstevel@tonic-gate { 2987c478bd9Sstevel@tonic-gate register int do_wait; 2997c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; 3007c478bd9Sstevel@tonic-gate int flag; 3017c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 3027c478bd9Sstevel@tonic-gate struct cpu *cpup; 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3057c478bd9Sstevel@tonic-gate flag = bp->b_flags; 3067c478bd9Sstevel@tonic-gate bp->b_flags &= ~clear_flags; 3077c478bd9Sstevel@tonic-gate if (lwp != NULL) 3087c478bd9Sstevel@tonic-gate lwp->lwp_ru.oublock++; 3097c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 3107c478bd9Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */ 3117c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, lwrite, 1); 3127c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bwrite, 1); 3137c478bd9Sstevel@tonic-gate do_wait = ((flag & B_ASYNC) == 0 || force_wait); 3147c478bd9Sstevel@tonic-gate if (do_wait == 0) 3157c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bawrite, 1); 3167c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 3177c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { 3187c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 3197c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { 3207c478bd9Sstevel@tonic-gate /* ufs && logging */ 3217c478bd9Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); 3227c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { 3237c478bd9Sstevel@tonic-gate /* ufs && snapshots */ 3247c478bd9Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); 3257c478bd9Sstevel@tonic-gate } else { 3267c478bd9Sstevel@tonic-gate ub.ub_bwrites.value.ul++; /* ufs && !logging */ 3277c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 3287c478bd9Sstevel@tonic-gate } 3297c478bd9Sstevel@tonic-gate if (do_wait) { 3307c478bd9Sstevel@tonic-gate (void) biowait(bp); 3317c478bd9Sstevel@tonic-gate if (do_relse) { 3327c478bd9Sstevel@tonic-gate brelse(bp); 3337c478bd9Sstevel@tonic-gate } 3347c478bd9Sstevel@tonic-gate } 3357c478bd9Sstevel@tonic-gate } 3367c478bd9Sstevel@tonic-gate 3377c478bd9Sstevel@tonic-gate /* 3387c478bd9Sstevel@tonic-gate * Write the buffer, waiting for completion (unless B_ASYNC is set). 3397c478bd9Sstevel@tonic-gate * Then release the buffer. 3407c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 3417c478bd9Sstevel@tonic-gate * BWRITE() directly avoids the extra function call overhead invoked 3427c478bd9Sstevel@tonic-gate * by calling this routine. 3437c478bd9Sstevel@tonic-gate */ 3447c478bd9Sstevel@tonic-gate void 3457c478bd9Sstevel@tonic-gate bwrite(struct buf *bp) 3467c478bd9Sstevel@tonic-gate { 3477c478bd9Sstevel@tonic-gate BWRITE(bp); 3487c478bd9Sstevel@tonic-gate } 3497c478bd9Sstevel@tonic-gate 3507c478bd9Sstevel@tonic-gate /* 3517c478bd9Sstevel@tonic-gate * Write the buffer, waiting for completion. 3527c478bd9Sstevel@tonic-gate * But don't release the buffer afterwards. 3537c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 3547c478bd9Sstevel@tonic-gate * BWRITE2() directly avoids the extra function call overhead. 3557c478bd9Sstevel@tonic-gate */ 3567c478bd9Sstevel@tonic-gate void 3577c478bd9Sstevel@tonic-gate bwrite2(struct buf *bp) 3587c478bd9Sstevel@tonic-gate { 3597c478bd9Sstevel@tonic-gate BWRITE2(bp); 3607c478bd9Sstevel@tonic-gate } 3617c478bd9Sstevel@tonic-gate 3627c478bd9Sstevel@tonic-gate /* 3637c478bd9Sstevel@tonic-gate * Release the buffer, marking it so that if it is grabbed 3647c478bd9Sstevel@tonic-gate * for another purpose it will be written out before being 3657c478bd9Sstevel@tonic-gate * given up (e.g. when writing a partial block where it is 3667c478bd9Sstevel@tonic-gate * assumed that another write for the same block will soon follow). 3677c478bd9Sstevel@tonic-gate * Also save the time that the block is first marked as delayed 3687c478bd9Sstevel@tonic-gate * so that it will be written in a reasonable time. 3697c478bd9Sstevel@tonic-gate */ 3707c478bd9Sstevel@tonic-gate void 3717c478bd9Sstevel@tonic-gate bdwrite(struct buf *bp) 3727c478bd9Sstevel@tonic-gate { 3737c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3747c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lwrite, 1); 3757c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0) 376d3d50737SRafael Vanoni bp->b_start = ddi_get_lbolt(); 3777c478bd9Sstevel@tonic-gate /* 3787c478bd9Sstevel@tonic-gate * B_DONE allows others to use the buffer, B_DELWRI causes the 3797c478bd9Sstevel@tonic-gate * buffer to be written before being reused, and setting b_resid 3807c478bd9Sstevel@tonic-gate * to zero says the buffer is complete. 3817c478bd9Sstevel@tonic-gate */ 3827c478bd9Sstevel@tonic-gate bp->b_flags |= B_DELWRI | B_DONE; 3837c478bd9Sstevel@tonic-gate bp->b_resid = 0; 3847c478bd9Sstevel@tonic-gate brelse(bp); 3857c478bd9Sstevel@tonic-gate } 3867c478bd9Sstevel@tonic-gate 3877c478bd9Sstevel@tonic-gate /* 3887c478bd9Sstevel@tonic-gate * Release the buffer, start I/O on it, but don't wait for completion. 3897c478bd9Sstevel@tonic-gate */ 3907c478bd9Sstevel@tonic-gate void 3917c478bd9Sstevel@tonic-gate bawrite(struct buf *bp) 3927c478bd9Sstevel@tonic-gate { 3937c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3947c478bd9Sstevel@tonic-gate 3957c478bd9Sstevel@tonic-gate /* Use bfreelist.b_bcount as a weird-ass heuristic */ 3967c478bd9Sstevel@tonic-gate if (bfreelist.b_bcount > 4) 3977c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 3987c478bd9Sstevel@tonic-gate BWRITE(bp); 3997c478bd9Sstevel@tonic-gate } 4007c478bd9Sstevel@tonic-gate 4017c478bd9Sstevel@tonic-gate /* 4027c478bd9Sstevel@tonic-gate * Release the buffer, with no I/O implied. 4037c478bd9Sstevel@tonic-gate */ 4047c478bd9Sstevel@tonic-gate void 4057c478bd9Sstevel@tonic-gate brelse(struct buf *bp) 4067c478bd9Sstevel@tonic-gate { 4077c478bd9Sstevel@tonic-gate struct buf **backp; 4087c478bd9Sstevel@tonic-gate uint_t index; 4097c478bd9Sstevel@tonic-gate kmutex_t *hmp; 4107c478bd9Sstevel@tonic-gate struct buf *dp; 4117c478bd9Sstevel@tonic-gate struct hbuf *hp; 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate 4147c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 4157c478bd9Sstevel@tonic-gate 4167c478bd9Sstevel@tonic-gate /* 4177c478bd9Sstevel@tonic-gate * Clear the retry write flag if the buffer was written without 4187c478bd9Sstevel@tonic-gate * error. The presence of B_DELWRI means the buffer has not yet 4197c478bd9Sstevel@tonic-gate * been written and the presence of B_ERROR means that an error 4207c478bd9Sstevel@tonic-gate * is still occurring. 4217c478bd9Sstevel@tonic-gate */ 4227c478bd9Sstevel@tonic-gate if ((bp->b_flags & (B_ERROR | B_DELWRI | B_RETRYWRI)) == B_RETRYWRI) { 4237c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_RETRYWRI; 4247c478bd9Sstevel@tonic-gate } 4257c478bd9Sstevel@tonic-gate 4267c478bd9Sstevel@tonic-gate /* Check for anomalous conditions */ 4277c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_ERROR|B_NOCACHE)) { 4287c478bd9Sstevel@tonic-gate if (bp->b_flags & B_NOCACHE) { 4297c478bd9Sstevel@tonic-gate /* Don't add to the freelist. Destroy it now */ 4307c478bd9Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize); 4317c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 4327c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 4337c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf)); 4347c478bd9Sstevel@tonic-gate return; 4357c478bd9Sstevel@tonic-gate } 4367c478bd9Sstevel@tonic-gate /* 4377c478bd9Sstevel@tonic-gate * If a write failed and we are supposed to retry write, 4387c478bd9Sstevel@tonic-gate * don't toss the buffer. Keep it around and mark it 4397c478bd9Sstevel@tonic-gate * delayed write in the hopes that it will eventually 4407c478bd9Sstevel@tonic-gate * get flushed (and still keep the system running.) 4417c478bd9Sstevel@tonic-gate */ 4427c478bd9Sstevel@tonic-gate if ((bp->b_flags & (B_READ | B_RETRYWRI)) == B_RETRYWRI) { 4437c478bd9Sstevel@tonic-gate bp->b_flags |= B_DELWRI; 4447c478bd9Sstevel@tonic-gate /* keep fsflush from trying continuously to flush */ 445d3d50737SRafael Vanoni bp->b_start = ddi_get_lbolt(); 4467c478bd9Sstevel@tonic-gate } else 4477c478bd9Sstevel@tonic-gate bp->b_flags |= B_AGE|B_STALE; 4487c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_ERROR; 4497c478bd9Sstevel@tonic-gate bp->b_error = 0; 4507c478bd9Sstevel@tonic-gate } 4517c478bd9Sstevel@tonic-gate 4527c478bd9Sstevel@tonic-gate /* 4537c478bd9Sstevel@tonic-gate * If delayed write is set then put in on the delayed 4547c478bd9Sstevel@tonic-gate * write list instead of the free buffer list. 4557c478bd9Sstevel@tonic-gate */ 4567c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 4577c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate mutex_enter(hmp); 4607c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 4617c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate /* 4647c478bd9Sstevel@tonic-gate * Make sure that the number of entries on this list are 4657c478bd9Sstevel@tonic-gate * Zero <= count <= total # buffers 4667c478bd9Sstevel@tonic-gate */ 4677c478bd9Sstevel@tonic-gate ASSERT(hp->b_length >= 0); 4687c478bd9Sstevel@tonic-gate ASSERT(hp->b_length < nbuf); 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate hp->b_length++; /* We are adding this buffer */ 4717c478bd9Sstevel@tonic-gate 4727c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 4737c478bd9Sstevel@tonic-gate /* 4747c478bd9Sstevel@tonic-gate * This buffer goes on the delayed write buffer list 4757c478bd9Sstevel@tonic-gate */ 4767c478bd9Sstevel@tonic-gate dp = (struct buf *)&dwbuf[index]; 4777c478bd9Sstevel@tonic-gate } 4787c478bd9Sstevel@tonic-gate ASSERT(bp->b_bufsize > 0); 4797c478bd9Sstevel@tonic-gate ASSERT(bp->b_bcount > 0); 4807c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr != NULL); 4817c478bd9Sstevel@tonic-gate 4827c478bd9Sstevel@tonic-gate if (bp->b_flags & B_AGE) { 4837c478bd9Sstevel@tonic-gate backp = &dp->av_forw; 4847c478bd9Sstevel@tonic-gate (*backp)->av_back = bp; 4857c478bd9Sstevel@tonic-gate bp->av_forw = *backp; 4867c478bd9Sstevel@tonic-gate *backp = bp; 4877c478bd9Sstevel@tonic-gate bp->av_back = dp; 4887c478bd9Sstevel@tonic-gate } else { 4897c478bd9Sstevel@tonic-gate backp = &dp->av_back; 4907c478bd9Sstevel@tonic-gate (*backp)->av_forw = bp; 4917c478bd9Sstevel@tonic-gate bp->av_back = *backp; 4927c478bd9Sstevel@tonic-gate *backp = bp; 4937c478bd9Sstevel@tonic-gate bp->av_forw = dp; 4947c478bd9Sstevel@tonic-gate } 4957c478bd9Sstevel@tonic-gate mutex_exit(hmp); 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) { 4987c478bd9Sstevel@tonic-gate /* 4997c478bd9Sstevel@tonic-gate * Should come here very very rarely. 5007c478bd9Sstevel@tonic-gate */ 5017c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 5027c478bd9Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) { 5037c478bd9Sstevel@tonic-gate bfreelist.b_flags &= ~B_WANTED; 5047c478bd9Sstevel@tonic-gate cv_broadcast(&bio_mem_cv); 5057c478bd9Sstevel@tonic-gate } 5067c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 5077c478bd9Sstevel@tonic-gate } 5087c478bd9Sstevel@tonic-gate 5097c478bd9Sstevel@tonic-gate bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC); 5107c478bd9Sstevel@tonic-gate /* 5117c478bd9Sstevel@tonic-gate * Don't let anyone get the buffer off the freelist before we 5127c478bd9Sstevel@tonic-gate * release our hold on it. 5137c478bd9Sstevel@tonic-gate */ 5147c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 5157c478bd9Sstevel@tonic-gate } 5167c478bd9Sstevel@tonic-gate 5177c478bd9Sstevel@tonic-gate /* 5187c478bd9Sstevel@tonic-gate * Return a count of the number of B_BUSY buffers in the system 5197c478bd9Sstevel@tonic-gate * Can only be used as a good estimate. If 'cleanit' is set, 5207c478bd9Sstevel@tonic-gate * try to flush all bufs. 5217c478bd9Sstevel@tonic-gate */ 5227c478bd9Sstevel@tonic-gate int 5237c478bd9Sstevel@tonic-gate bio_busy(int cleanit) 5247c478bd9Sstevel@tonic-gate { 5257c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 5267c478bd9Sstevel@tonic-gate int busy = 0; 5277c478bd9Sstevel@tonic-gate int i; 5287c478bd9Sstevel@tonic-gate kmutex_t *hmp; 5297c478bd9Sstevel@tonic-gate 5307c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 5317c478bd9Sstevel@tonic-gate vfs_syncprogress(); 5327c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 5337c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate mutex_enter(hmp); 5367c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 5377c478bd9Sstevel@tonic-gate if (bp->b_flags & B_BUSY) 5387c478bd9Sstevel@tonic-gate busy++; 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate mutex_exit(hmp); 5417c478bd9Sstevel@tonic-gate } 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate if (cleanit && busy != 0) { 5447c478bd9Sstevel@tonic-gate bflush(NODEV); 5457c478bd9Sstevel@tonic-gate } 5467c478bd9Sstevel@tonic-gate 5477c478bd9Sstevel@tonic-gate return (busy); 5487c478bd9Sstevel@tonic-gate } 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate /* 5517c478bd9Sstevel@tonic-gate * this interface is provided for binary compatibility. 5527c478bd9Sstevel@tonic-gate * 5537c478bd9Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate 5547c478bd9Sstevel@tonic-gate * block is already associated, return it; otherwise search 5557c478bd9Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it. 5567c478bd9Sstevel@tonic-gate */ 5577c478bd9Sstevel@tonic-gate struct buf * 5587c478bd9Sstevel@tonic-gate getblk(dev_t dev, daddr_t blkno, long bsize) 5597c478bd9Sstevel@tonic-gate { 5607c478bd9Sstevel@tonic-gate return (getblk_common(/* ufsvfsp */ NULL, dev, 5617c478bd9Sstevel@tonic-gate blkno, bsize, /* errflg */ 0)); 5627c478bd9Sstevel@tonic-gate } 5637c478bd9Sstevel@tonic-gate 5647c478bd9Sstevel@tonic-gate /* 5657c478bd9Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate 5667c478bd9Sstevel@tonic-gate * block is already associated, return it; otherwise search 5677c478bd9Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it. 5687c478bd9Sstevel@tonic-gate */ 5697c478bd9Sstevel@tonic-gate struct buf * 5707c478bd9Sstevel@tonic-gate getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg) 5717c478bd9Sstevel@tonic-gate { 5727c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg; 5737c478bd9Sstevel@tonic-gate struct buf *bp; 5747c478bd9Sstevel@tonic-gate struct buf *dp; 5757c478bd9Sstevel@tonic-gate struct buf *nbp = NULL; 5767c478bd9Sstevel@tonic-gate struct buf *errbp; 5777c478bd9Sstevel@tonic-gate uint_t index; 5787c478bd9Sstevel@tonic-gate kmutex_t *hmp; 5797c478bd9Sstevel@tonic-gate struct hbuf *hp; 5807c478bd9Sstevel@tonic-gate 5817c478bd9Sstevel@tonic-gate if (getmajor(dev) >= devcnt) 5827c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "blkdev"); 5837c478bd9Sstevel@tonic-gate 5847c478bd9Sstevel@tonic-gate biostats.bio_lookup.value.ui32++; 5857c478bd9Sstevel@tonic-gate 5867c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 5877c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 5887c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 5897c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 5907c478bd9Sstevel@tonic-gate 5917c478bd9Sstevel@tonic-gate mutex_enter(hmp); 5927c478bd9Sstevel@tonic-gate loop: 5937c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 5947c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 5957c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) 5967c478bd9Sstevel@tonic-gate continue; 5977c478bd9Sstevel@tonic-gate /* 5987c478bd9Sstevel@tonic-gate * Avoid holding the hash lock in the event that 5997c478bd9Sstevel@tonic-gate * the buffer is locked by someone. Since the hash chain 6007c478bd9Sstevel@tonic-gate * may change when we drop the hash lock 6017c478bd9Sstevel@tonic-gate * we have to start at the beginning of the chain if the 6027c478bd9Sstevel@tonic-gate * buffer identity/contents aren't valid. 6037c478bd9Sstevel@tonic-gate */ 6047c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 6057c478bd9Sstevel@tonic-gate biostats.bio_bufbusy.value.ui32++; 6067c478bd9Sstevel@tonic-gate mutex_exit(hmp); 6077c478bd9Sstevel@tonic-gate /* 6087c478bd9Sstevel@tonic-gate * OK, we are dealing with a busy buffer. 6097c478bd9Sstevel@tonic-gate * In the case that we are panicking and we 6107c478bd9Sstevel@tonic-gate * got called from bread(), we have some chance 6117c478bd9Sstevel@tonic-gate * for error recovery. So better bail out from 6127c478bd9Sstevel@tonic-gate * here since sema_p() won't block. If we got 6137c478bd9Sstevel@tonic-gate * called directly from ufs routines, there is 6147c478bd9Sstevel@tonic-gate * no way to report an error yet. 6157c478bd9Sstevel@tonic-gate */ 6167c478bd9Sstevel@tonic-gate if (panicstr && errflg) 6177c478bd9Sstevel@tonic-gate goto errout; 6187c478bd9Sstevel@tonic-gate /* 6197c478bd9Sstevel@tonic-gate * For the following line of code to work 6207c478bd9Sstevel@tonic-gate * correctly never kmem_free the buffer "header". 6217c478bd9Sstevel@tonic-gate */ 6227c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); 6237c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 6247c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) { 6257c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 6267c478bd9Sstevel@tonic-gate mutex_enter(hmp); 6277c478bd9Sstevel@tonic-gate goto loop; /* start over */ 6287c478bd9Sstevel@tonic-gate } 6297c478bd9Sstevel@tonic-gate mutex_enter(hmp); 6307c478bd9Sstevel@tonic-gate } 6317c478bd9Sstevel@tonic-gate /* Found */ 6327c478bd9Sstevel@tonic-gate biostats.bio_hit.value.ui32++; 6337c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_AGE; 6347c478bd9Sstevel@tonic-gate 6357c478bd9Sstevel@tonic-gate /* 6367c478bd9Sstevel@tonic-gate * Yank it off the free/delayed write lists 6377c478bd9Sstevel@tonic-gate */ 6387c478bd9Sstevel@tonic-gate hp->b_length--; 6397c478bd9Sstevel@tonic-gate notavail(bp); 6407c478bd9Sstevel@tonic-gate mutex_exit(hmp); 6417c478bd9Sstevel@tonic-gate 6427c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) == NULL); 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate if (nbp == NULL) { 6457c478bd9Sstevel@tonic-gate /* 6467c478bd9Sstevel@tonic-gate * Make the common path short. 6477c478bd9Sstevel@tonic-gate */ 6487c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 6497c478bd9Sstevel@tonic-gate return (bp); 6507c478bd9Sstevel@tonic-gate } 6517c478bd9Sstevel@tonic-gate 6527c478bd9Sstevel@tonic-gate biostats.bio_bufdup.value.ui32++; 6537c478bd9Sstevel@tonic-gate 6547c478bd9Sstevel@tonic-gate /* 6557c478bd9Sstevel@tonic-gate * The buffer must have entered during the lock upgrade 6567c478bd9Sstevel@tonic-gate * so free the new buffer we allocated and return the 6577c478bd9Sstevel@tonic-gate * found buffer. 6587c478bd9Sstevel@tonic-gate */ 6597c478bd9Sstevel@tonic-gate kmem_free(nbp->b_un.b_addr, nbp->b_bufsize); 6607c478bd9Sstevel@tonic-gate nbp->b_un.b_addr = NULL; 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate /* 6637c478bd9Sstevel@tonic-gate * Account for the memory 6647c478bd9Sstevel@tonic-gate */ 6657c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 6667c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += nbp->b_bufsize; 6677c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 6687c478bd9Sstevel@tonic-gate 6697c478bd9Sstevel@tonic-gate /* 6707c478bd9Sstevel@tonic-gate * Destroy buf identity, and place on avail list 6717c478bd9Sstevel@tonic-gate */ 6727c478bd9Sstevel@tonic-gate nbp->b_dev = (o_dev_t)NODEV; 6737c478bd9Sstevel@tonic-gate nbp->b_edev = NODEV; 6747c478bd9Sstevel@tonic-gate nbp->b_flags = 0; 6757c478bd9Sstevel@tonic-gate nbp->b_file = NULL; 6767c478bd9Sstevel@tonic-gate nbp->b_offset = -1; 6777c478bd9Sstevel@tonic-gate 6787c478bd9Sstevel@tonic-gate sema_v(&nbp->b_sem); 6797c478bd9Sstevel@tonic-gate bio_bhdr_free(nbp); 6807c478bd9Sstevel@tonic-gate 6817c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 6827c478bd9Sstevel@tonic-gate return (bp); 6837c478bd9Sstevel@tonic-gate } 6847c478bd9Sstevel@tonic-gate 6857c478bd9Sstevel@tonic-gate /* 6867c478bd9Sstevel@tonic-gate * bio_getfreeblk may block so check the hash chain again. 6877c478bd9Sstevel@tonic-gate */ 6887c478bd9Sstevel@tonic-gate if (nbp == NULL) { 6897c478bd9Sstevel@tonic-gate mutex_exit(hmp); 6907c478bd9Sstevel@tonic-gate nbp = bio_getfreeblk(bsize); 6917c478bd9Sstevel@tonic-gate mutex_enter(hmp); 6927c478bd9Sstevel@tonic-gate goto loop; 6937c478bd9Sstevel@tonic-gate } 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate /* 6967c478bd9Sstevel@tonic-gate * New buffer. Assign nbp and stick it on the hash. 6977c478bd9Sstevel@tonic-gate */ 6987c478bd9Sstevel@tonic-gate nbp->b_flags = B_BUSY; 6997c478bd9Sstevel@tonic-gate nbp->b_edev = dev; 7007c478bd9Sstevel@tonic-gate nbp->b_dev = (o_dev_t)cmpdev(dev); 7017c478bd9Sstevel@tonic-gate nbp->b_blkno = blkno; 7027c478bd9Sstevel@tonic-gate nbp->b_iodone = NULL; 7037c478bd9Sstevel@tonic-gate nbp->b_bcount = bsize; 7047c478bd9Sstevel@tonic-gate /* 7057c478bd9Sstevel@tonic-gate * If we are given a ufsvfsp and the vfs_root field is NULL 7067c478bd9Sstevel@tonic-gate * then this must be I/O for a superblock. A superblock's 7077c478bd9Sstevel@tonic-gate * buffer is set up in mountfs() and there is no root vnode 7087c478bd9Sstevel@tonic-gate * at that point. 7097c478bd9Sstevel@tonic-gate */ 7107c478bd9Sstevel@tonic-gate if (ufsvfsp && ufsvfsp->vfs_root) { 7117c478bd9Sstevel@tonic-gate nbp->b_vp = ufsvfsp->vfs_root; 7127c478bd9Sstevel@tonic-gate } else { 7137c478bd9Sstevel@tonic-gate nbp->b_vp = NULL; 7147c478bd9Sstevel@tonic-gate } 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate ASSERT((nbp->b_flags & B_NOCACHE) == NULL); 7177c478bd9Sstevel@tonic-gate 7187c478bd9Sstevel@tonic-gate binshash(nbp, dp); 7197c478bd9Sstevel@tonic-gate mutex_exit(hmp); 7207c478bd9Sstevel@tonic-gate 7217c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&nbp->b_sem)); 7227c478bd9Sstevel@tonic-gate 7237c478bd9Sstevel@tonic-gate return (nbp); 7247c478bd9Sstevel@tonic-gate 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate /* 7277c478bd9Sstevel@tonic-gate * Come here in case of an internal error. At this point we couldn't 7287c478bd9Sstevel@tonic-gate * get a buffer, but he have to return one. Hence we allocate some 7297c478bd9Sstevel@tonic-gate * kind of error reply buffer on the fly. This buffer is marked as 7307c478bd9Sstevel@tonic-gate * B_NOCACHE | B_AGE | B_ERROR | B_DONE to assure the following: 7317c478bd9Sstevel@tonic-gate * - B_ERROR will indicate error to the caller. 7327c478bd9Sstevel@tonic-gate * - B_DONE will prevent us from reading the buffer from 7337c478bd9Sstevel@tonic-gate * the device. 7347c478bd9Sstevel@tonic-gate * - B_NOCACHE will cause that this buffer gets free'd in 7357c478bd9Sstevel@tonic-gate * brelse(). 7367c478bd9Sstevel@tonic-gate */ 7377c478bd9Sstevel@tonic-gate 7387c478bd9Sstevel@tonic-gate errout: 7397c478bd9Sstevel@tonic-gate errbp = geteblk(); 7407c478bd9Sstevel@tonic-gate sema_p(&errbp->b_sem); 7417c478bd9Sstevel@tonic-gate errbp->b_flags &= ~B_BUSY; 7427c478bd9Sstevel@tonic-gate errbp->b_flags |= (B_ERROR | B_DONE); 7437c478bd9Sstevel@tonic-gate return (errbp); 7447c478bd9Sstevel@tonic-gate } 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate /* 7477c478bd9Sstevel@tonic-gate * Get an empty block, not assigned to any particular device. 7487c478bd9Sstevel@tonic-gate * Returns a locked buffer that is not on any hash or free list. 7497c478bd9Sstevel@tonic-gate */ 7507c478bd9Sstevel@tonic-gate struct buf * 7517c478bd9Sstevel@tonic-gate ngeteblk(long bsize) 7527c478bd9Sstevel@tonic-gate { 7537c478bd9Sstevel@tonic-gate struct buf *bp; 7547c478bd9Sstevel@tonic-gate 7557c478bd9Sstevel@tonic-gate bp = kmem_alloc(sizeof (struct buf), KM_SLEEP); 7567c478bd9Sstevel@tonic-gate bioinit(bp); 7577c478bd9Sstevel@tonic-gate bp->av_forw = bp->av_back = NULL; 7587c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP); 7597c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize; 7607c478bd9Sstevel@tonic-gate bp->b_flags = B_BUSY | B_NOCACHE | B_AGE; 7617c478bd9Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 7627c478bd9Sstevel@tonic-gate bp->b_edev = NODEV; 7637c478bd9Sstevel@tonic-gate bp->b_lblkno = 0; 7647c478bd9Sstevel@tonic-gate bp->b_bcount = bsize; 7657c478bd9Sstevel@tonic-gate bp->b_iodone = NULL; 7667c478bd9Sstevel@tonic-gate return (bp); 7677c478bd9Sstevel@tonic-gate } 7687c478bd9Sstevel@tonic-gate 7697c478bd9Sstevel@tonic-gate /* 7707c478bd9Sstevel@tonic-gate * Interface of geteblk() is kept intact to maintain driver compatibility. 7717c478bd9Sstevel@tonic-gate * Use ngeteblk() to allocate block size other than 1 KB. 7727c478bd9Sstevel@tonic-gate */ 7737c478bd9Sstevel@tonic-gate struct buf * 7747c478bd9Sstevel@tonic-gate geteblk(void) 7757c478bd9Sstevel@tonic-gate { 7767c478bd9Sstevel@tonic-gate return (ngeteblk((long)1024)); 7777c478bd9Sstevel@tonic-gate } 7787c478bd9Sstevel@tonic-gate 7797c478bd9Sstevel@tonic-gate /* 7807c478bd9Sstevel@tonic-gate * Return a buffer w/o sleeping 7817c478bd9Sstevel@tonic-gate */ 7827c478bd9Sstevel@tonic-gate struct buf * 7837c478bd9Sstevel@tonic-gate trygetblk(dev_t dev, daddr_t blkno) 7847c478bd9Sstevel@tonic-gate { 7857c478bd9Sstevel@tonic-gate struct buf *bp; 7867c478bd9Sstevel@tonic-gate struct buf *dp; 7877c478bd9Sstevel@tonic-gate struct hbuf *hp; 7887c478bd9Sstevel@tonic-gate kmutex_t *hmp; 7897c478bd9Sstevel@tonic-gate uint_t index; 7907c478bd9Sstevel@tonic-gate 7917c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 7927c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 7937c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 7947c478bd9Sstevel@tonic-gate 7957c478bd9Sstevel@tonic-gate if (!mutex_tryenter(hmp)) 7967c478bd9Sstevel@tonic-gate return (NULL); 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 7997c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 8007c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 8017c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) 8027c478bd9Sstevel@tonic-gate continue; 8037c478bd9Sstevel@tonic-gate /* 8047c478bd9Sstevel@tonic-gate * Get access to a valid buffer without sleeping 8057c478bd9Sstevel@tonic-gate */ 8067c478bd9Sstevel@tonic-gate if (sema_tryp(&bp->b_sem)) { 8077c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DONE) { 8087c478bd9Sstevel@tonic-gate hp->b_length--; 8097c478bd9Sstevel@tonic-gate notavail(bp); 8107c478bd9Sstevel@tonic-gate mutex_exit(hmp); 8117c478bd9Sstevel@tonic-gate return (bp); 8127c478bd9Sstevel@tonic-gate } else { 8137c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 8147c478bd9Sstevel@tonic-gate break; 8157c478bd9Sstevel@tonic-gate } 8167c478bd9Sstevel@tonic-gate } 8177c478bd9Sstevel@tonic-gate break; 8187c478bd9Sstevel@tonic-gate } 8197c478bd9Sstevel@tonic-gate mutex_exit(hmp); 8207c478bd9Sstevel@tonic-gate return (NULL); 8217c478bd9Sstevel@tonic-gate } 8227c478bd9Sstevel@tonic-gate 8237c478bd9Sstevel@tonic-gate /* 8247c478bd9Sstevel@tonic-gate * Wait for I/O completion on the buffer; return errors 8257c478bd9Sstevel@tonic-gate * to the user. 8267c478bd9Sstevel@tonic-gate */ 8277c478bd9Sstevel@tonic-gate int 8287c478bd9Sstevel@tonic-gate iowait(struct buf *bp) 8297c478bd9Sstevel@tonic-gate { 8307c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8317c478bd9Sstevel@tonic-gate return (biowait(bp)); 8327c478bd9Sstevel@tonic-gate } 8337c478bd9Sstevel@tonic-gate 8347c478bd9Sstevel@tonic-gate /* 8357c478bd9Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous, 8367c478bd9Sstevel@tonic-gate * and wake up anyone waiting for it. 8377c478bd9Sstevel@tonic-gate */ 8387c478bd9Sstevel@tonic-gate void 8397c478bd9Sstevel@tonic-gate iodone(struct buf *bp) 8407c478bd9Sstevel@tonic-gate { 8417c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8427c478bd9Sstevel@tonic-gate (void) biodone(bp); 8437c478bd9Sstevel@tonic-gate } 8447c478bd9Sstevel@tonic-gate 8457c478bd9Sstevel@tonic-gate /* 8467c478bd9Sstevel@tonic-gate * Zero the core associated with a buffer. 8477c478bd9Sstevel@tonic-gate */ 8487c478bd9Sstevel@tonic-gate void 8497c478bd9Sstevel@tonic-gate clrbuf(struct buf *bp) 8507c478bd9Sstevel@tonic-gate { 8517c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8527c478bd9Sstevel@tonic-gate bzero(bp->b_un.b_addr, bp->b_bcount); 8537c478bd9Sstevel@tonic-gate bp->b_resid = 0; 8547c478bd9Sstevel@tonic-gate } 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate 8577c478bd9Sstevel@tonic-gate /* 8587c478bd9Sstevel@tonic-gate * Make sure all write-behind blocks on dev (or NODEV for all) 8597c478bd9Sstevel@tonic-gate * are flushed out. 8607c478bd9Sstevel@tonic-gate */ 8617c478bd9Sstevel@tonic-gate void 8627c478bd9Sstevel@tonic-gate bflush(dev_t dev) 8637c478bd9Sstevel@tonic-gate { 8647c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 8657c478bd9Sstevel@tonic-gate struct hbuf *hp; 8667c478bd9Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST; 8677c478bd9Sstevel@tonic-gate int i, index; 8687c478bd9Sstevel@tonic-gate kmutex_t *hmp; 8697c478bd9Sstevel@tonic-gate 8707c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 8717c478bd9Sstevel@tonic-gate /* 8727c478bd9Sstevel@tonic-gate * Wait for any invalidates or flushes ahead of us to finish. 8737c478bd9Sstevel@tonic-gate * We really could split blist_lock up per device for better 8747c478bd9Sstevel@tonic-gate * parallelism here. 8757c478bd9Sstevel@tonic-gate */ 8767c478bd9Sstevel@tonic-gate while (bio_doinginval || bio_doingflush) { 8777c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 1; 8787c478bd9Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock); 8797c478bd9Sstevel@tonic-gate } 8807c478bd9Sstevel@tonic-gate bio_doingflush++; 8817c478bd9Sstevel@tonic-gate /* 8827c478bd9Sstevel@tonic-gate * Gather all B_DELWRI buffer for device. 8837c478bd9Sstevel@tonic-gate * Lock ordering is b_sem > hash lock (brelse). 8847c478bd9Sstevel@tonic-gate * Since we are finding the buffer via the delayed write list, 8857c478bd9Sstevel@tonic-gate * it may be busy and we would block trying to get the 8867c478bd9Sstevel@tonic-gate * b_sem lock while holding hash lock. So transfer all the 8877c478bd9Sstevel@tonic-gate * candidates on the delwri_list and then drop the hash locks. 8887c478bd9Sstevel@tonic-gate */ 8897c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 8907c478bd9Sstevel@tonic-gate vfs_syncprogress(); 8917c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 8927c478bd9Sstevel@tonic-gate dp = (struct buf *)&dwbuf[i]; 8937c478bd9Sstevel@tonic-gate mutex_enter(hmp); 8947c478bd9Sstevel@tonic-gate for (bp = dp->av_forw; bp != dp; bp = bp->av_forw) { 8957c478bd9Sstevel@tonic-gate if (dev == NODEV || bp->b_edev == dev) { 8967c478bd9Sstevel@tonic-gate if (bp->b_list == NULL) { 8977c478bd9Sstevel@tonic-gate bp->b_list = delwri_list; 8987c478bd9Sstevel@tonic-gate delwri_list = bp; 8997c478bd9Sstevel@tonic-gate } 9007c478bd9Sstevel@tonic-gate } 9017c478bd9Sstevel@tonic-gate } 9027c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9037c478bd9Sstevel@tonic-gate } 9047c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 9057c478bd9Sstevel@tonic-gate 9067c478bd9Sstevel@tonic-gate /* 9077c478bd9Sstevel@tonic-gate * Now that the hash locks have been dropped grab the semaphores 9087c478bd9Sstevel@tonic-gate * and write back all the buffers that have B_DELWRI set. 9097c478bd9Sstevel@tonic-gate */ 9107c478bd9Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) { 9117c478bd9Sstevel@tonic-gate vfs_syncprogress(); 9127c478bd9Sstevel@tonic-gate bp = delwri_list; 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); /* may block */ 9157c478bd9Sstevel@tonic-gate if ((dev != bp->b_edev && dev != NODEV) || 9167c478bd9Sstevel@tonic-gate (panicstr && bp->b_flags & B_BUSY)) { 9177c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 9187c478bd9Sstevel@tonic-gate delwri_list = bp->b_list; 9197c478bd9Sstevel@tonic-gate bp->b_list = NULL; 9207c478bd9Sstevel@tonic-gate continue; /* No longer a candidate */ 9217c478bd9Sstevel@tonic-gate } 9227c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 9237c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 9247c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 9257c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 9267c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 9277c478bd9Sstevel@tonic-gate 9287c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 9297c478bd9Sstevel@tonic-gate mutex_enter(hmp); 9307c478bd9Sstevel@tonic-gate hp->b_length--; 9317c478bd9Sstevel@tonic-gate notavail(bp); 9327c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9337c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */ 9347c478bd9Sstevel@tonic-gate BWRITE(bp); 9357c478bd9Sstevel@tonic-gate } else { /* ufs */ 9367c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); 9377c478bd9Sstevel@tonic-gate } 9387c478bd9Sstevel@tonic-gate } else { 9397c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 9407c478bd9Sstevel@tonic-gate } 9417c478bd9Sstevel@tonic-gate delwri_list = bp->b_list; 9427c478bd9Sstevel@tonic-gate bp->b_list = NULL; 9437c478bd9Sstevel@tonic-gate } 9447c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 9457c478bd9Sstevel@tonic-gate bio_doingflush--; 9467c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 9477c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 9487c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 9497c478bd9Sstevel@tonic-gate } 9507c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 9517c478bd9Sstevel@tonic-gate } 9527c478bd9Sstevel@tonic-gate 9537c478bd9Sstevel@tonic-gate /* 9547c478bd9Sstevel@tonic-gate * Ensure that a specified block is up-to-date on disk. 9557c478bd9Sstevel@tonic-gate */ 9567c478bd9Sstevel@tonic-gate void 9577c478bd9Sstevel@tonic-gate blkflush(dev_t dev, daddr_t blkno) 9587c478bd9Sstevel@tonic-gate { 9597c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 9607c478bd9Sstevel@tonic-gate struct hbuf *hp; 9617c478bd9Sstevel@tonic-gate struct buf *sbp = NULL; 9627c478bd9Sstevel@tonic-gate uint_t index; 9637c478bd9Sstevel@tonic-gate kmutex_t *hmp; 9647c478bd9Sstevel@tonic-gate 9657c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 9667c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 9677c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 9687c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 9697c478bd9Sstevel@tonic-gate 9707c478bd9Sstevel@tonic-gate /* 9717c478bd9Sstevel@tonic-gate * Identify the buffer in the cache belonging to 9727c478bd9Sstevel@tonic-gate * this device and blkno (if any). 9737c478bd9Sstevel@tonic-gate */ 9747c478bd9Sstevel@tonic-gate mutex_enter(hmp); 9757c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 9767c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 9777c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) 9787c478bd9Sstevel@tonic-gate continue; 9797c478bd9Sstevel@tonic-gate sbp = bp; 9807c478bd9Sstevel@tonic-gate break; 9817c478bd9Sstevel@tonic-gate } 9827c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9837c478bd9Sstevel@tonic-gate if (sbp == NULL) 9847c478bd9Sstevel@tonic-gate return; 9857c478bd9Sstevel@tonic-gate /* 9867c478bd9Sstevel@tonic-gate * Now check the buffer we have identified and 9877c478bd9Sstevel@tonic-gate * make sure it still belongs to the device and is B_DELWRI 9887c478bd9Sstevel@tonic-gate */ 9897c478bd9Sstevel@tonic-gate sema_p(&sbp->b_sem); 9907c478bd9Sstevel@tonic-gate if (sbp->b_blkno == blkno && sbp->b_edev == dev && 9917c478bd9Sstevel@tonic-gate (sbp->b_flags & (B_DELWRI|B_STALE)) == B_DELWRI) { 9927c478bd9Sstevel@tonic-gate mutex_enter(hmp); 9937c478bd9Sstevel@tonic-gate hp->b_length--; 9947c478bd9Sstevel@tonic-gate notavail(sbp); 9957c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9967c478bd9Sstevel@tonic-gate /* 9977c478bd9Sstevel@tonic-gate * XXX - There is nothing to guarantee a synchronous 9987c478bd9Sstevel@tonic-gate * write here if the B_ASYNC flag is set. This needs 9997c478bd9Sstevel@tonic-gate * some investigation. 10007c478bd9Sstevel@tonic-gate */ 10017c478bd9Sstevel@tonic-gate if (sbp->b_vp == NULL) { /* !ufs */ 10027c478bd9Sstevel@tonic-gate BWRITE(sbp); /* synchronous write */ 10037c478bd9Sstevel@tonic-gate } else { /* ufs */ 10047c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(sbp->b_vp)->i_ufsvfs, sbp); 10057c478bd9Sstevel@tonic-gate } 10067c478bd9Sstevel@tonic-gate } else { 10077c478bd9Sstevel@tonic-gate sema_v(&sbp->b_sem); 10087c478bd9Sstevel@tonic-gate } 10097c478bd9Sstevel@tonic-gate } 10107c478bd9Sstevel@tonic-gate 10117c478bd9Sstevel@tonic-gate /* 10127c478bd9Sstevel@tonic-gate * Same as binval, except can force-invalidate delayed-write buffers 10137c478bd9Sstevel@tonic-gate * (which are not be already flushed because of device errors). Also 10147c478bd9Sstevel@tonic-gate * makes sure that the retry write flag is cleared. 10157c478bd9Sstevel@tonic-gate */ 10167c478bd9Sstevel@tonic-gate int 10177c478bd9Sstevel@tonic-gate bfinval(dev_t dev, int force) 10187c478bd9Sstevel@tonic-gate { 10197c478bd9Sstevel@tonic-gate struct buf *dp; 10207c478bd9Sstevel@tonic-gate struct buf *bp; 10217c478bd9Sstevel@tonic-gate struct buf *binval_list = EMPTY_LIST; 10227c478bd9Sstevel@tonic-gate int i, error = 0; 10237c478bd9Sstevel@tonic-gate kmutex_t *hmp; 10247c478bd9Sstevel@tonic-gate uint_t index; 10257c478bd9Sstevel@tonic-gate struct buf **backp; 10267c478bd9Sstevel@tonic-gate 10277c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 10287c478bd9Sstevel@tonic-gate /* 10297c478bd9Sstevel@tonic-gate * Wait for any flushes ahead of us to finish, it's ok to 10307c478bd9Sstevel@tonic-gate * do invalidates in parallel. 10317c478bd9Sstevel@tonic-gate */ 10327c478bd9Sstevel@tonic-gate while (bio_doingflush) { 10337c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 1; 10347c478bd9Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock); 10357c478bd9Sstevel@tonic-gate } 10367c478bd9Sstevel@tonic-gate bio_doinginval++; 10377c478bd9Sstevel@tonic-gate 10387c478bd9Sstevel@tonic-gate /* Gather bp's */ 10397c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 10407c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 10417c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 10427c478bd9Sstevel@tonic-gate 10437c478bd9Sstevel@tonic-gate mutex_enter(hmp); 10447c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 10457c478bd9Sstevel@tonic-gate if (bp->b_edev == dev) { 10467c478bd9Sstevel@tonic-gate if (bp->b_list == NULL) { 10477c478bd9Sstevel@tonic-gate bp->b_list = binval_list; 10487c478bd9Sstevel@tonic-gate binval_list = bp; 10497c478bd9Sstevel@tonic-gate } 10507c478bd9Sstevel@tonic-gate } 10517c478bd9Sstevel@tonic-gate } 10527c478bd9Sstevel@tonic-gate mutex_exit(hmp); 10537c478bd9Sstevel@tonic-gate } 10547c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 10557c478bd9Sstevel@tonic-gate 10567c478bd9Sstevel@tonic-gate /* Invalidate all bp's found */ 10577c478bd9Sstevel@tonic-gate while (binval_list != EMPTY_LIST) { 10587c478bd9Sstevel@tonic-gate bp = binval_list; 10597c478bd9Sstevel@tonic-gate 10607c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); 10617c478bd9Sstevel@tonic-gate if (bp->b_edev == dev) { 10627c478bd9Sstevel@tonic-gate if (force && (bp->b_flags & B_DELWRI)) { 10637c478bd9Sstevel@tonic-gate /* clear B_DELWRI, move to non-dw freelist */ 10647c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 10657c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 10667c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[index]; 10677c478bd9Sstevel@tonic-gate mutex_enter(hmp); 10687c478bd9Sstevel@tonic-gate 10697c478bd9Sstevel@tonic-gate /* remove from delayed write freelist */ 10707c478bd9Sstevel@tonic-gate notavail(bp); 10717c478bd9Sstevel@tonic-gate 10727c478bd9Sstevel@tonic-gate /* add to B_AGE side of non-dw freelist */ 10737c478bd9Sstevel@tonic-gate backp = &dp->av_forw; 10747c478bd9Sstevel@tonic-gate (*backp)->av_back = bp; 10757c478bd9Sstevel@tonic-gate bp->av_forw = *backp; 10767c478bd9Sstevel@tonic-gate *backp = bp; 10777c478bd9Sstevel@tonic-gate bp->av_back = dp; 10787c478bd9Sstevel@tonic-gate 10797c478bd9Sstevel@tonic-gate /* 10807c478bd9Sstevel@tonic-gate * make sure write retries and busy are cleared 10817c478bd9Sstevel@tonic-gate */ 10827c478bd9Sstevel@tonic-gate bp->b_flags &= 10837c478bd9Sstevel@tonic-gate ~(B_BUSY | B_DELWRI | B_RETRYWRI); 10847c478bd9Sstevel@tonic-gate mutex_exit(hmp); 10857c478bd9Sstevel@tonic-gate } 10867c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0) 10877c478bd9Sstevel@tonic-gate bp->b_flags |= B_STALE|B_AGE; 10887c478bd9Sstevel@tonic-gate else 10897c478bd9Sstevel@tonic-gate error = EIO; 10907c478bd9Sstevel@tonic-gate } 10917c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 10927c478bd9Sstevel@tonic-gate binval_list = bp->b_list; 10937c478bd9Sstevel@tonic-gate bp->b_list = NULL; 10947c478bd9Sstevel@tonic-gate } 10957c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 10967c478bd9Sstevel@tonic-gate bio_doinginval--; 10977c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 10987c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 10997c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 11007c478bd9Sstevel@tonic-gate } 11017c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 11027c478bd9Sstevel@tonic-gate return (error); 11037c478bd9Sstevel@tonic-gate } 11047c478bd9Sstevel@tonic-gate 11057c478bd9Sstevel@tonic-gate /* 11067c478bd9Sstevel@tonic-gate * If possible, invalidate blocks for a dev on demand 11077c478bd9Sstevel@tonic-gate */ 11087c478bd9Sstevel@tonic-gate void 11097c478bd9Sstevel@tonic-gate binval(dev_t dev) 11107c478bd9Sstevel@tonic-gate { 11117c478bd9Sstevel@tonic-gate (void) bfinval(dev, 0); 11127c478bd9Sstevel@tonic-gate } 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate /* 11157c478bd9Sstevel@tonic-gate * Initialize the buffer I/O system by freeing 11167c478bd9Sstevel@tonic-gate * all buffers and setting all device hash buffer lists to empty. 11177c478bd9Sstevel@tonic-gate */ 11187c478bd9Sstevel@tonic-gate void 11197c478bd9Sstevel@tonic-gate binit(void) 11207c478bd9Sstevel@tonic-gate { 11217c478bd9Sstevel@tonic-gate struct buf *bp; 11227c478bd9Sstevel@tonic-gate unsigned int i, pct; 11237c478bd9Sstevel@tonic-gate ulong_t bio_max_hwm, bio_default_hwm; 11247c478bd9Sstevel@tonic-gate 11257c478bd9Sstevel@tonic-gate /* 11267c478bd9Sstevel@tonic-gate * Maximum/Default values for bufhwm are set to the smallest of: 11277c478bd9Sstevel@tonic-gate * - BIO_MAX_PERCENT resp. BIO_BUF_PERCENT of real memory 11287c478bd9Sstevel@tonic-gate * - 1/4 of kernel virtual memory 11297c478bd9Sstevel@tonic-gate * - INT32_MAX to prevent overflows of v.v_bufhwm (which is int). 11307c478bd9Sstevel@tonic-gate * Additionally, in order to allow simple tuning by percentage of 11317c478bd9Sstevel@tonic-gate * physical memory, bufhwm_pct is used to calculate the default if 11327c478bd9Sstevel@tonic-gate * the value of this tunable is between 0 and BIO_MAX_PERCENT. 11337c478bd9Sstevel@tonic-gate * 11347c478bd9Sstevel@tonic-gate * Since the unit for v.v_bufhwm is kilobytes, this allows for 11357c478bd9Sstevel@tonic-gate * a maximum of 1024 * 2GB == 2TB memory usage by buffer headers. 11367c478bd9Sstevel@tonic-gate */ 11377c478bd9Sstevel@tonic-gate bio_max_hwm = MIN(physmem / BIO_MAX_PERCENT, 11387c478bd9Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); 11397c478bd9Sstevel@tonic-gate bio_max_hwm = MIN(INT32_MAX, bio_max_hwm); 11407c478bd9Sstevel@tonic-gate 11417c478bd9Sstevel@tonic-gate pct = BIO_BUF_PERCENT; 11427c478bd9Sstevel@tonic-gate if (bufhwm_pct != 0 && 11437c478bd9Sstevel@tonic-gate ((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) { 11447c478bd9Sstevel@tonic-gate pct = BIO_BUF_PERCENT; 11457c478bd9Sstevel@tonic-gate /* 11467c478bd9Sstevel@tonic-gate * Invalid user specified value, emit a warning. 11477c478bd9Sstevel@tonic-gate */ 11487c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \ 11497c478bd9Sstevel@tonic-gate range(1..%d). Using %d as default.", 11507c478bd9Sstevel@tonic-gate bufhwm_pct, 11517c478bd9Sstevel@tonic-gate 100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT); 11527c478bd9Sstevel@tonic-gate } 11537c478bd9Sstevel@tonic-gate 11547c478bd9Sstevel@tonic-gate bio_default_hwm = MIN(physmem / pct, 11557c478bd9Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); 11567c478bd9Sstevel@tonic-gate bio_default_hwm = MIN(INT32_MAX, bio_default_hwm); 11577c478bd9Sstevel@tonic-gate 11587c478bd9Sstevel@tonic-gate if ((v.v_bufhwm = bufhwm) == 0) 11597c478bd9Sstevel@tonic-gate v.v_bufhwm = bio_default_hwm; 11607c478bd9Sstevel@tonic-gate 11617c478bd9Sstevel@tonic-gate if (v.v_bufhwm < BIO_MIN_HWM || v.v_bufhwm > bio_max_hwm) { 11627c478bd9Sstevel@tonic-gate v.v_bufhwm = (int)bio_max_hwm; 11637c478bd9Sstevel@tonic-gate /* 11647c478bd9Sstevel@tonic-gate * Invalid user specified value, emit a warning. 11657c478bd9Sstevel@tonic-gate */ 11667c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 11677c478bd9Sstevel@tonic-gate "binit: bufhwm(%d) out \ 11687c478bd9Sstevel@tonic-gate of range(%d..%lu). Using %lu as default", 11697c478bd9Sstevel@tonic-gate bufhwm, 11707c478bd9Sstevel@tonic-gate BIO_MIN_HWM, bio_max_hwm, bio_max_hwm); 11717c478bd9Sstevel@tonic-gate } 11727c478bd9Sstevel@tonic-gate 11737c478bd9Sstevel@tonic-gate /* 11747c478bd9Sstevel@tonic-gate * Determine the number of hash buckets. Default is to 11757c478bd9Sstevel@tonic-gate * create ~BIO_HASHLEN entries per chain based on MAXBSIZE buffers. 11767c478bd9Sstevel@tonic-gate * Round up number to the next power of 2. 11777c478bd9Sstevel@tonic-gate */ 11787c478bd9Sstevel@tonic-gate v.v_hbuf = 1 << highbit((((ulong_t)v.v_bufhwm * 1024) / MAXBSIZE) / 11797c478bd9Sstevel@tonic-gate BIO_HASHLEN); 11807c478bd9Sstevel@tonic-gate v.v_hmask = v.v_hbuf - 1; 11817c478bd9Sstevel@tonic-gate v.v_buf = BIO_BHDR_POOL; 11827c478bd9Sstevel@tonic-gate 11837c478bd9Sstevel@tonic-gate hbuf = kmem_zalloc(v.v_hbuf * sizeof (struct hbuf), KM_SLEEP); 11847c478bd9Sstevel@tonic-gate 11857c478bd9Sstevel@tonic-gate dwbuf = kmem_zalloc(v.v_hbuf * sizeof (struct dwbuf), KM_SLEEP); 11867c478bd9Sstevel@tonic-gate 11877c478bd9Sstevel@tonic-gate bfreelist.b_bufsize = (size_t)v.v_bufhwm * 1024; 11887c478bd9Sstevel@tonic-gate bp = &bfreelist; 11897c478bd9Sstevel@tonic-gate bp->b_forw = bp->b_back = bp->av_forw = bp->av_back = bp; 11907c478bd9Sstevel@tonic-gate 11917c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 11927c478bd9Sstevel@tonic-gate hbuf[i].b_forw = hbuf[i].b_back = (struct buf *)&hbuf[i]; 11937c478bd9Sstevel@tonic-gate hbuf[i].av_forw = hbuf[i].av_back = (struct buf *)&hbuf[i]; 11947c478bd9Sstevel@tonic-gate 11957c478bd9Sstevel@tonic-gate /* 11967c478bd9Sstevel@tonic-gate * Initialize the delayed write buffer list. 11977c478bd9Sstevel@tonic-gate */ 11987c478bd9Sstevel@tonic-gate dwbuf[i].b_forw = dwbuf[i].b_back = (struct buf *)&dwbuf[i]; 11997c478bd9Sstevel@tonic-gate dwbuf[i].av_forw = dwbuf[i].av_back = (struct buf *)&dwbuf[i]; 12007c478bd9Sstevel@tonic-gate } 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate /* 12047c478bd9Sstevel@tonic-gate * Wait for I/O completion on the buffer; return error code. 12057c478bd9Sstevel@tonic-gate * If bp was for synchronous I/O, bp is invalid and associated 12067c478bd9Sstevel@tonic-gate * resources are freed on return. 12077c478bd9Sstevel@tonic-gate */ 12087c478bd9Sstevel@tonic-gate int 12097c478bd9Sstevel@tonic-gate biowait(struct buf *bp) 12107c478bd9Sstevel@tonic-gate { 12117c478bd9Sstevel@tonic-gate int error = 0; 12127c478bd9Sstevel@tonic-gate struct cpu *cpup; 12137c478bd9Sstevel@tonic-gate 12147c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12157c478bd9Sstevel@tonic-gate 12167c478bd9Sstevel@tonic-gate cpup = CPU; 12171a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&cpup->cpu_stats.sys.iowait); 12187c478bd9Sstevel@tonic-gate DTRACE_IO1(wait__start, struct buf *, bp); 12197c478bd9Sstevel@tonic-gate 12207c478bd9Sstevel@tonic-gate /* 12217c478bd9Sstevel@tonic-gate * In case of panic, busy wait for completion 12227c478bd9Sstevel@tonic-gate */ 12237c478bd9Sstevel@tonic-gate if (panicstr) { 12247c478bd9Sstevel@tonic-gate while ((bp->b_flags & B_DONE) == 0) 12257c478bd9Sstevel@tonic-gate drv_usecwait(10); 12267c478bd9Sstevel@tonic-gate } else 12277c478bd9Sstevel@tonic-gate sema_p(&bp->b_io); 12287c478bd9Sstevel@tonic-gate 12297c478bd9Sstevel@tonic-gate DTRACE_IO1(wait__done, struct buf *, bp); 12301a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&cpup->cpu_stats.sys.iowait); 12317c478bd9Sstevel@tonic-gate 12327c478bd9Sstevel@tonic-gate error = geterror(bp); 12337c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_ASYNC) == 0) { 12347c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 12357c478bd9Sstevel@tonic-gate bp_mapout(bp); 12367c478bd9Sstevel@tonic-gate } 12377c478bd9Sstevel@tonic-gate return (error); 12387c478bd9Sstevel@tonic-gate } 12397c478bd9Sstevel@tonic-gate 12407c478bd9Sstevel@tonic-gate static void 12417c478bd9Sstevel@tonic-gate biodone_tnf_probe(struct buf *bp) 12427c478bd9Sstevel@tonic-gate { 12437c478bd9Sstevel@tonic-gate /* Kernel probe */ 12447c478bd9Sstevel@tonic-gate TNF_PROBE_3(biodone, "io blockio", /* CSTYLED */, 12457c478bd9Sstevel@tonic-gate tnf_device, device, bp->b_edev, 12467c478bd9Sstevel@tonic-gate tnf_diskaddr, block, bp->b_lblkno, 12477c478bd9Sstevel@tonic-gate tnf_opaque, buf, bp); 12487c478bd9Sstevel@tonic-gate } 12497c478bd9Sstevel@tonic-gate 12507c478bd9Sstevel@tonic-gate /* 12517c478bd9Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous, 12527c478bd9Sstevel@tonic-gate * and wake up anyone waiting for it. 12537c478bd9Sstevel@tonic-gate */ 12547c478bd9Sstevel@tonic-gate void 12557c478bd9Sstevel@tonic-gate biodone(struct buf *bp) 12567c478bd9Sstevel@tonic-gate { 12577c478bd9Sstevel@tonic-gate if (bp->b_flags & B_STARTED) { 12587c478bd9Sstevel@tonic-gate DTRACE_IO1(done, struct buf *, bp); 12597c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_STARTED; 12607c478bd9Sstevel@tonic-gate } 12617c478bd9Sstevel@tonic-gate 12627c478bd9Sstevel@tonic-gate /* 12637c478bd9Sstevel@tonic-gate * Call the TNF probe here instead of the inline code 12647c478bd9Sstevel@tonic-gate * to force our compiler to use the tail call optimization. 12657c478bd9Sstevel@tonic-gate */ 12667c478bd9Sstevel@tonic-gate biodone_tnf_probe(bp); 12677c478bd9Sstevel@tonic-gate 12687c478bd9Sstevel@tonic-gate if (bp->b_iodone != NULL) { 12697c478bd9Sstevel@tonic-gate (*(bp->b_iodone))(bp); 12707c478bd9Sstevel@tonic-gate return; 12717c478bd9Sstevel@tonic-gate } 12727c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_DONE) == 0); 12737c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12747c478bd9Sstevel@tonic-gate bp->b_flags |= B_DONE; 12757c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ASYNC) { 12767c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_REMAPPED)) 12777c478bd9Sstevel@tonic-gate bio_pageio_done(bp); 12787c478bd9Sstevel@tonic-gate else 12797c478bd9Sstevel@tonic-gate brelse(bp); /* release bp to freelist */ 12807c478bd9Sstevel@tonic-gate } else { 12817c478bd9Sstevel@tonic-gate sema_v(&bp->b_io); 12827c478bd9Sstevel@tonic-gate } 12837c478bd9Sstevel@tonic-gate } 12847c478bd9Sstevel@tonic-gate 12857c478bd9Sstevel@tonic-gate /* 12867c478bd9Sstevel@tonic-gate * Pick up the device's error number and pass it to the user; 12877c478bd9Sstevel@tonic-gate * if there is an error but the number is 0 set a generalized code. 12887c478bd9Sstevel@tonic-gate */ 12897c478bd9Sstevel@tonic-gate int 12907c478bd9Sstevel@tonic-gate geterror(struct buf *bp) 12917c478bd9Sstevel@tonic-gate { 12927c478bd9Sstevel@tonic-gate int error = 0; 12937c478bd9Sstevel@tonic-gate 12947c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12957c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 12967c478bd9Sstevel@tonic-gate error = bp->b_error; 12977c478bd9Sstevel@tonic-gate if (!error) 12987c478bd9Sstevel@tonic-gate error = EIO; 12997c478bd9Sstevel@tonic-gate } 13007c478bd9Sstevel@tonic-gate return (error); 13017c478bd9Sstevel@tonic-gate } 13027c478bd9Sstevel@tonic-gate 13037c478bd9Sstevel@tonic-gate /* 13047c478bd9Sstevel@tonic-gate * Support for pageio buffers. 13057c478bd9Sstevel@tonic-gate * 13067c478bd9Sstevel@tonic-gate * This stuff should be generalized to provide a generalized bp 13077c478bd9Sstevel@tonic-gate * header facility that can be used for things other than pageio. 13087c478bd9Sstevel@tonic-gate */ 13097c478bd9Sstevel@tonic-gate 13107c478bd9Sstevel@tonic-gate /* 13117c478bd9Sstevel@tonic-gate * Allocate and initialize a buf struct for use with pageio. 13127c478bd9Sstevel@tonic-gate */ 13137c478bd9Sstevel@tonic-gate struct buf * 13147c478bd9Sstevel@tonic-gate pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags) 13157c478bd9Sstevel@tonic-gate { 13167c478bd9Sstevel@tonic-gate struct buf *bp; 13177c478bd9Sstevel@tonic-gate struct cpu *cpup; 13187c478bd9Sstevel@tonic-gate 13197c478bd9Sstevel@tonic-gate if (flags & B_READ) { 13207c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 13217c478bd9Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */ 13227c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgin, 1); 13237c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len)); 13247c478bd9Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) { 13257c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 13267c478bd9Sstevel@tonic-gate if (lwp != NULL) 13277c478bd9Sstevel@tonic-gate lwp->lwp_ru.majflt++; 13287c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, maj_fault, 1); 13297c478bd9Sstevel@tonic-gate /* Kernel probe */ 13307c478bd9Sstevel@tonic-gate TNF_PROBE_2(major_fault, "vm pagefault", /* CSTYLED */, 13317c478bd9Sstevel@tonic-gate tnf_opaque, vnode, pp->p_vnode, 13327c478bd9Sstevel@tonic-gate tnf_offset, offset, pp->p_offset); 13337c478bd9Sstevel@tonic-gate } 13347c478bd9Sstevel@tonic-gate /* 13357c478bd9Sstevel@tonic-gate * Update statistics for pages being paged in 13367c478bd9Sstevel@tonic-gate */ 13377c478bd9Sstevel@tonic-gate if (pp != NULL && pp->p_vnode != NULL) { 13387c478bd9Sstevel@tonic-gate if (IS_SWAPFSVP(pp->p_vnode)) { 1339d3d50737SRafael Vanoni CPU_STATS_ADDQ(cpup, vm, anonpgin, btopr(len)); 13407c478bd9Sstevel@tonic-gate } else { 13417c478bd9Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) { 13427c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, execpgin, 13437c478bd9Sstevel@tonic-gate btopr(len)); 13447c478bd9Sstevel@tonic-gate } else { 13457c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, fspgin, 13467c478bd9Sstevel@tonic-gate btopr(len)); 13477c478bd9Sstevel@tonic-gate } 13487c478bd9Sstevel@tonic-gate } 13497c478bd9Sstevel@tonic-gate } 13507c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 13517c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_WS_IN, 13527c478bd9Sstevel@tonic-gate "page_ws_in:pp %p", pp); 13537c478bd9Sstevel@tonic-gate /* Kernel probe */ 13547c478bd9Sstevel@tonic-gate TNF_PROBE_3(pagein, "vm pageio io", /* CSTYLED */, 13557c478bd9Sstevel@tonic-gate tnf_opaque, vnode, pp->p_vnode, 13567c478bd9Sstevel@tonic-gate tnf_offset, offset, pp->p_offset, 13577c478bd9Sstevel@tonic-gate tnf_size, size, len); 13587c478bd9Sstevel@tonic-gate } 13597c478bd9Sstevel@tonic-gate 13607c478bd9Sstevel@tonic-gate bp = kmem_zalloc(sizeof (struct buf), KM_SLEEP); 13617c478bd9Sstevel@tonic-gate bp->b_bcount = len; 13627c478bd9Sstevel@tonic-gate bp->b_bufsize = len; 13637c478bd9Sstevel@tonic-gate bp->b_pages = pp; 13647c478bd9Sstevel@tonic-gate bp->b_flags = B_PAGEIO | B_NOCACHE | B_BUSY | flags; 13657c478bd9Sstevel@tonic-gate bp->b_offset = -1; 13667c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 13677c478bd9Sstevel@tonic-gate 13687c478bd9Sstevel@tonic-gate /* Initialize bp->b_sem in "locked" state */ 13697c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 13707c478bd9Sstevel@tonic-gate 13717c478bd9Sstevel@tonic-gate VN_HOLD(vp); 13727c478bd9Sstevel@tonic-gate bp->b_vp = vp; 13737c478bd9Sstevel@tonic-gate 13747c478bd9Sstevel@tonic-gate /* 13757c478bd9Sstevel@tonic-gate * Caller sets dev & blkno and can adjust 13767c478bd9Sstevel@tonic-gate * b_addr for page offset and can use bp_mapin 13777c478bd9Sstevel@tonic-gate * to make pages kernel addressable. 13787c478bd9Sstevel@tonic-gate */ 13797c478bd9Sstevel@tonic-gate return (bp); 13807c478bd9Sstevel@tonic-gate } 13817c478bd9Sstevel@tonic-gate 13827c478bd9Sstevel@tonic-gate void 13837c478bd9Sstevel@tonic-gate pageio_done(struct buf *bp) 13847c478bd9Sstevel@tonic-gate { 13857c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 13867c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 13877c478bd9Sstevel@tonic-gate bp_mapout(bp); 13887c478bd9Sstevel@tonic-gate VN_RELE(bp->b_vp); 13897c478bd9Sstevel@tonic-gate bp->b_vp = NULL; 13907c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) != 0); 13917c478bd9Sstevel@tonic-gate 13927c478bd9Sstevel@tonic-gate /* A sema_v(bp->b_sem) is implied if we are destroying it */ 13937c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 13947c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 13957c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf)); 13967c478bd9Sstevel@tonic-gate } 13977c478bd9Sstevel@tonic-gate 13987c478bd9Sstevel@tonic-gate /* 13997c478bd9Sstevel@tonic-gate * Check to see whether the buffers, except the one pointed by sbp, 14007c478bd9Sstevel@tonic-gate * associated with the device are busy. 14017c478bd9Sstevel@tonic-gate * NOTE: This expensive operation shall be improved together with ufs_icheck(). 14027c478bd9Sstevel@tonic-gate */ 14037c478bd9Sstevel@tonic-gate int 14047c478bd9Sstevel@tonic-gate bcheck(dev_t dev, struct buf *sbp) 14057c478bd9Sstevel@tonic-gate { 14067c478bd9Sstevel@tonic-gate struct buf *bp; 14077c478bd9Sstevel@tonic-gate struct buf *dp; 14087c478bd9Sstevel@tonic-gate int i; 14097c478bd9Sstevel@tonic-gate kmutex_t *hmp; 14107c478bd9Sstevel@tonic-gate 14117c478bd9Sstevel@tonic-gate /* 14127c478bd9Sstevel@tonic-gate * check for busy bufs for this filesystem 14137c478bd9Sstevel@tonic-gate */ 14147c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 14157c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 14167c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 14177c478bd9Sstevel@tonic-gate 14187c478bd9Sstevel@tonic-gate mutex_enter(hmp); 14197c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 14207c478bd9Sstevel@tonic-gate /* 14217c478bd9Sstevel@tonic-gate * if buf is busy or dirty, then filesystem is busy 14227c478bd9Sstevel@tonic-gate */ 14237c478bd9Sstevel@tonic-gate if ((bp->b_edev == dev) && 14247c478bd9Sstevel@tonic-gate ((bp->b_flags & B_STALE) == 0) && 14257c478bd9Sstevel@tonic-gate (bp->b_flags & (B_DELWRI|B_BUSY)) && 14267c478bd9Sstevel@tonic-gate (bp != sbp)) { 14277c478bd9Sstevel@tonic-gate mutex_exit(hmp); 14287c478bd9Sstevel@tonic-gate return (1); 14297c478bd9Sstevel@tonic-gate } 14307c478bd9Sstevel@tonic-gate } 14317c478bd9Sstevel@tonic-gate mutex_exit(hmp); 14327c478bd9Sstevel@tonic-gate } 14337c478bd9Sstevel@tonic-gate return (0); 14347c478bd9Sstevel@tonic-gate } 14357c478bd9Sstevel@tonic-gate 14367c478bd9Sstevel@tonic-gate /* 14377c478bd9Sstevel@tonic-gate * Hash two 32 bit entities. 14387c478bd9Sstevel@tonic-gate */ 14397c478bd9Sstevel@tonic-gate int 14407c478bd9Sstevel@tonic-gate hash2ints(int x, int y) 14417c478bd9Sstevel@tonic-gate { 14427c478bd9Sstevel@tonic-gate int hash = 0; 14437c478bd9Sstevel@tonic-gate 14447c478bd9Sstevel@tonic-gate hash = x - 1; 14457c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 8)) - 1; 14467c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 16)) - 1; 14477c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 24)) - 1; 14487c478bd9Sstevel@tonic-gate hash = ((hash * 7) + y) - 1; 14497c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 8)) - 1; 14507c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 16)) - 1; 14517c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 24)) - 1; 14527c478bd9Sstevel@tonic-gate 14537c478bd9Sstevel@tonic-gate return (hash); 14547c478bd9Sstevel@tonic-gate } 14557c478bd9Sstevel@tonic-gate 14567c478bd9Sstevel@tonic-gate 14577c478bd9Sstevel@tonic-gate /* 14587c478bd9Sstevel@tonic-gate * Return a new buffer struct. 14597c478bd9Sstevel@tonic-gate * Create a new buffer if we haven't gone over our high water 14607c478bd9Sstevel@tonic-gate * mark for memory, otherwise try to get one off the freelist. 14617c478bd9Sstevel@tonic-gate * 14627c478bd9Sstevel@tonic-gate * Returns a locked buf that has no id and is not on any hash or free 14637c478bd9Sstevel@tonic-gate * list. 14647c478bd9Sstevel@tonic-gate */ 14657c478bd9Sstevel@tonic-gate static struct buf * 14667c478bd9Sstevel@tonic-gate bio_getfreeblk(long bsize) 14677c478bd9Sstevel@tonic-gate { 14687c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 14697c478bd9Sstevel@tonic-gate struct hbuf *hp; 14707c478bd9Sstevel@tonic-gate kmutex_t *hmp; 14717c478bd9Sstevel@tonic-gate uint_t start, end; 14727c478bd9Sstevel@tonic-gate 14737c478bd9Sstevel@tonic-gate /* 14747c478bd9Sstevel@tonic-gate * mutex_enter(&bfree_lock); 14757c478bd9Sstevel@tonic-gate * bfreelist.b_bufsize represents the amount of memory 14767c478bd9Sstevel@tonic-gate * mutex_exit(&bfree_lock); protect ref to bfreelist 14777c478bd9Sstevel@tonic-gate * we are allowed to allocate in the cache before we hit our hwm. 14787c478bd9Sstevel@tonic-gate */ 14797c478bd9Sstevel@tonic-gate bio_mem_get(bsize); /* Account for our memory request */ 14807c478bd9Sstevel@tonic-gate 14817c478bd9Sstevel@tonic-gate again: 14827c478bd9Sstevel@tonic-gate bp = bio_bhdr_alloc(); /* Get a buf hdr */ 14837c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); /* Should never fail */ 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL); 14867c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_NOSLEEP); 14877c478bd9Sstevel@tonic-gate if (bp->b_un.b_addr != NULL) { 14887c478bd9Sstevel@tonic-gate /* 14897c478bd9Sstevel@tonic-gate * Make the common path short 14907c478bd9Sstevel@tonic-gate */ 14917c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize; 14927c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 14937c478bd9Sstevel@tonic-gate return (bp); 14947c478bd9Sstevel@tonic-gate } else { 14957c478bd9Sstevel@tonic-gate struct buf *save; 14967c478bd9Sstevel@tonic-gate 14977c478bd9Sstevel@tonic-gate save = bp; /* Save bp we allocated */ 14987c478bd9Sstevel@tonic-gate start = end = lastindex; 14997c478bd9Sstevel@tonic-gate 15007c478bd9Sstevel@tonic-gate biostats.bio_bufwant.value.ui32++; 15017c478bd9Sstevel@tonic-gate 15027c478bd9Sstevel@tonic-gate /* 15037c478bd9Sstevel@tonic-gate * Memory isn't available from the system now. Scan 15047c478bd9Sstevel@tonic-gate * the hash buckets till enough space is found. 15057c478bd9Sstevel@tonic-gate */ 15067c478bd9Sstevel@tonic-gate do { 15077c478bd9Sstevel@tonic-gate hp = &hbuf[start]; 15087c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 15097c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 15107c478bd9Sstevel@tonic-gate 15117c478bd9Sstevel@tonic-gate mutex_enter(hmp); 15127c478bd9Sstevel@tonic-gate bp = dp->av_forw; 15137c478bd9Sstevel@tonic-gate 15147c478bd9Sstevel@tonic-gate while (bp != dp) { 15157c478bd9Sstevel@tonic-gate 15167c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 15177c478bd9Sstevel@tonic-gate 15187c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 15197c478bd9Sstevel@tonic-gate bp = bp->av_forw; 15207c478bd9Sstevel@tonic-gate continue; 15217c478bd9Sstevel@tonic-gate } 15227c478bd9Sstevel@tonic-gate 15237c478bd9Sstevel@tonic-gate /* 15247c478bd9Sstevel@tonic-gate * Since we are going down the freelist 15257c478bd9Sstevel@tonic-gate * associated with this hash bucket the 15267c478bd9Sstevel@tonic-gate * B_DELWRI flag should not be set. 15277c478bd9Sstevel@tonic-gate */ 15287c478bd9Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI)); 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate if (bp->b_bufsize == bsize) { 15317c478bd9Sstevel@tonic-gate hp->b_length--; 15327c478bd9Sstevel@tonic-gate notavail(bp); 15337c478bd9Sstevel@tonic-gate bremhash(bp); 15347c478bd9Sstevel@tonic-gate mutex_exit(hmp); 15357c478bd9Sstevel@tonic-gate 15367c478bd9Sstevel@tonic-gate /* 15377c478bd9Sstevel@tonic-gate * Didn't kmem_alloc any more, so don't 15387c478bd9Sstevel@tonic-gate * count it twice. 15397c478bd9Sstevel@tonic-gate */ 15407c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 15417c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += bsize; 15427c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 15437c478bd9Sstevel@tonic-gate 15447c478bd9Sstevel@tonic-gate /* 15457c478bd9Sstevel@tonic-gate * Update the lastindex value. 15467c478bd9Sstevel@tonic-gate */ 15477c478bd9Sstevel@tonic-gate lastindex = start; 15487c478bd9Sstevel@tonic-gate 15497c478bd9Sstevel@tonic-gate /* 15507c478bd9Sstevel@tonic-gate * Put our saved bp back on the list 15517c478bd9Sstevel@tonic-gate */ 15527c478bd9Sstevel@tonic-gate sema_v(&save->b_sem); 15537c478bd9Sstevel@tonic-gate bio_bhdr_free(save); 15547c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 15557c478bd9Sstevel@tonic-gate return (bp); 15567c478bd9Sstevel@tonic-gate } 15577c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 15587c478bd9Sstevel@tonic-gate bp = bp->av_forw; 15597c478bd9Sstevel@tonic-gate } 15607c478bd9Sstevel@tonic-gate mutex_exit(hmp); 15617c478bd9Sstevel@tonic-gate start = ((start + 1) % v.v_hbuf); 15627c478bd9Sstevel@tonic-gate } while (start != end); 15637c478bd9Sstevel@tonic-gate 15647c478bd9Sstevel@tonic-gate biostats.bio_bufwait.value.ui32++; 15657c478bd9Sstevel@tonic-gate bp = save; /* Use original bp */ 15667c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP); 15677c478bd9Sstevel@tonic-gate } 15687c478bd9Sstevel@tonic-gate 15697c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize; 15707c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 15717c478bd9Sstevel@tonic-gate return (bp); 15727c478bd9Sstevel@tonic-gate } 15737c478bd9Sstevel@tonic-gate 15747c478bd9Sstevel@tonic-gate /* 15757c478bd9Sstevel@tonic-gate * Allocate a buffer header. If none currently available, allocate 15767c478bd9Sstevel@tonic-gate * a new pool. 15777c478bd9Sstevel@tonic-gate */ 15787c478bd9Sstevel@tonic-gate static struct buf * 15797c478bd9Sstevel@tonic-gate bio_bhdr_alloc(void) 15807c478bd9Sstevel@tonic-gate { 15817c478bd9Sstevel@tonic-gate struct buf *dp, *sdp; 15827c478bd9Sstevel@tonic-gate struct buf *bp; 15837c478bd9Sstevel@tonic-gate int i; 15847c478bd9Sstevel@tonic-gate 15857c478bd9Sstevel@tonic-gate for (;;) { 15867c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock); 15877c478bd9Sstevel@tonic-gate if (bhdrlist != NULL) { 15887c478bd9Sstevel@tonic-gate bp = bhdrlist; 15897c478bd9Sstevel@tonic-gate bhdrlist = bp->av_forw; 15907c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 15917c478bd9Sstevel@tonic-gate bp->av_forw = NULL; 15927c478bd9Sstevel@tonic-gate return (bp); 15937c478bd9Sstevel@tonic-gate } 15947c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 15957c478bd9Sstevel@tonic-gate 15967c478bd9Sstevel@tonic-gate /* 15977c478bd9Sstevel@tonic-gate * Need to allocate a new pool. If the system is currently 15987c478bd9Sstevel@tonic-gate * out of memory, then try freeing things on the freelist. 15997c478bd9Sstevel@tonic-gate */ 16007c478bd9Sstevel@tonic-gate dp = kmem_zalloc(sizeof (struct buf) * v.v_buf, KM_NOSLEEP); 16017c478bd9Sstevel@tonic-gate if (dp == NULL) { 16027c478bd9Sstevel@tonic-gate /* 16037c478bd9Sstevel@tonic-gate * System can't give us a pool of headers, try 16047c478bd9Sstevel@tonic-gate * recycling from the free lists. 16057c478bd9Sstevel@tonic-gate */ 16067c478bd9Sstevel@tonic-gate bio_recycle(BIO_HEADER, 0); 16077c478bd9Sstevel@tonic-gate } else { 16087c478bd9Sstevel@tonic-gate sdp = dp; 16097c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_buf; i++, dp++) { 16107c478bd9Sstevel@tonic-gate /* 16117c478bd9Sstevel@tonic-gate * The next two lines are needed since NODEV 16127c478bd9Sstevel@tonic-gate * is -1 and not NULL 16137c478bd9Sstevel@tonic-gate */ 16147c478bd9Sstevel@tonic-gate dp->b_dev = (o_dev_t)NODEV; 16157c478bd9Sstevel@tonic-gate dp->b_edev = NODEV; 16167c478bd9Sstevel@tonic-gate dp->av_forw = dp + 1; 16177c478bd9Sstevel@tonic-gate sema_init(&dp->b_sem, 1, NULL, SEMA_DEFAULT, 16187c478bd9Sstevel@tonic-gate NULL); 16197c478bd9Sstevel@tonic-gate sema_init(&dp->b_io, 0, NULL, SEMA_DEFAULT, 16207c478bd9Sstevel@tonic-gate NULL); 16217c478bd9Sstevel@tonic-gate dp->b_offset = -1; 16227c478bd9Sstevel@tonic-gate } 16237c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock); 16247c478bd9Sstevel@tonic-gate (--dp)->av_forw = bhdrlist; /* Fix last pointer */ 16257c478bd9Sstevel@tonic-gate bhdrlist = sdp; 16267c478bd9Sstevel@tonic-gate nbuf += v.v_buf; 16277c478bd9Sstevel@tonic-gate bp = bhdrlist; 16287c478bd9Sstevel@tonic-gate bhdrlist = bp->av_forw; 16297c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16307c478bd9Sstevel@tonic-gate 16317c478bd9Sstevel@tonic-gate bp->av_forw = NULL; 16327c478bd9Sstevel@tonic-gate return (bp); 16337c478bd9Sstevel@tonic-gate } 16347c478bd9Sstevel@tonic-gate } 16357c478bd9Sstevel@tonic-gate } 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate static void 16387c478bd9Sstevel@tonic-gate bio_bhdr_free(struct buf *bp) 16397c478bd9Sstevel@tonic-gate { 16407c478bd9Sstevel@tonic-gate ASSERT(bp->b_back == NULL); 16417c478bd9Sstevel@tonic-gate ASSERT(bp->b_forw == NULL); 16427c478bd9Sstevel@tonic-gate ASSERT(bp->av_back == NULL); 16437c478bd9Sstevel@tonic-gate ASSERT(bp->av_forw == NULL); 16447c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL); 16457c478bd9Sstevel@tonic-gate ASSERT(bp->b_dev == (o_dev_t)NODEV); 16467c478bd9Sstevel@tonic-gate ASSERT(bp->b_edev == NODEV); 16477c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags == 0); 16487c478bd9Sstevel@tonic-gate 16497c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock); 16507c478bd9Sstevel@tonic-gate bp->av_forw = bhdrlist; 16517c478bd9Sstevel@tonic-gate bhdrlist = bp; 16527c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16537c478bd9Sstevel@tonic-gate } 16547c478bd9Sstevel@tonic-gate 16557c478bd9Sstevel@tonic-gate /* 16567c478bd9Sstevel@tonic-gate * If we haven't gone over the high water mark, it's o.k. to 16577c478bd9Sstevel@tonic-gate * allocate more buffer space, otherwise recycle buffers 16587c478bd9Sstevel@tonic-gate * from the freelist until enough memory is free for a bsize request. 16597c478bd9Sstevel@tonic-gate * 16607c478bd9Sstevel@tonic-gate * We account for this memory, even though 16617c478bd9Sstevel@tonic-gate * we don't allocate it here. 16627c478bd9Sstevel@tonic-gate */ 16637c478bd9Sstevel@tonic-gate static void 16647c478bd9Sstevel@tonic-gate bio_mem_get(long bsize) 16657c478bd9Sstevel@tonic-gate { 16667c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 16677c478bd9Sstevel@tonic-gate if (bfreelist.b_bufsize > bsize) { 16687c478bd9Sstevel@tonic-gate bfreelist.b_bufsize -= bsize; 16697c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 16707c478bd9Sstevel@tonic-gate return; 16717c478bd9Sstevel@tonic-gate } 16727c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 16737c478bd9Sstevel@tonic-gate bio_recycle(BIO_MEM, bsize); 16747c478bd9Sstevel@tonic-gate } 16757c478bd9Sstevel@tonic-gate 16767c478bd9Sstevel@tonic-gate /* 16777c478bd9Sstevel@tonic-gate * flush a list of delayed write buffers. 16787c478bd9Sstevel@tonic-gate * (currently used only by bio_recycle below.) 16797c478bd9Sstevel@tonic-gate */ 16807c478bd9Sstevel@tonic-gate static void 16817c478bd9Sstevel@tonic-gate bio_flushlist(struct buf *delwri_list) 16827c478bd9Sstevel@tonic-gate { 16837c478bd9Sstevel@tonic-gate struct buf *bp; 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) { 16867c478bd9Sstevel@tonic-gate bp = delwri_list; 16877c478bd9Sstevel@tonic-gate bp->b_flags |= B_AGE | B_ASYNC; 16887c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */ 16897c478bd9Sstevel@tonic-gate BWRITE(bp); 16907c478bd9Sstevel@tonic-gate } else { /* ufs */ 16917c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); 16927c478bd9Sstevel@tonic-gate } 16937c478bd9Sstevel@tonic-gate delwri_list = bp->b_list; 16947c478bd9Sstevel@tonic-gate bp->b_list = NULL; 16957c478bd9Sstevel@tonic-gate } 16967c478bd9Sstevel@tonic-gate } 16977c478bd9Sstevel@tonic-gate 16987c478bd9Sstevel@tonic-gate /* 16997c478bd9Sstevel@tonic-gate * Start recycling buffers on the freelist for one of 2 reasons: 17007c478bd9Sstevel@tonic-gate * - we need a buffer header 17017c478bd9Sstevel@tonic-gate * - we need to free up memory 17027c478bd9Sstevel@tonic-gate * Once started we continue to recycle buffers until the B_AGE 17037c478bd9Sstevel@tonic-gate * buffers are gone. 17047c478bd9Sstevel@tonic-gate */ 17057c478bd9Sstevel@tonic-gate static void 17067c478bd9Sstevel@tonic-gate bio_recycle(int want, long bsize) 17077c478bd9Sstevel@tonic-gate { 17087c478bd9Sstevel@tonic-gate struct buf *bp, *dp, *dwp, *nbp; 17097c478bd9Sstevel@tonic-gate struct hbuf *hp; 17107c478bd9Sstevel@tonic-gate int found = 0; 17117c478bd9Sstevel@tonic-gate kmutex_t *hmp; 17127c478bd9Sstevel@tonic-gate int start, end; 17137c478bd9Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST; 17147c478bd9Sstevel@tonic-gate 17157c478bd9Sstevel@tonic-gate /* 17167c478bd9Sstevel@tonic-gate * Recycle buffers. 17177c478bd9Sstevel@tonic-gate */ 17187c478bd9Sstevel@tonic-gate top: 17197c478bd9Sstevel@tonic-gate start = end = lastindex; 17207c478bd9Sstevel@tonic-gate do { 17217c478bd9Sstevel@tonic-gate hp = &hbuf[start]; 17227c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 17237c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 17247c478bd9Sstevel@tonic-gate 17257c478bd9Sstevel@tonic-gate mutex_enter(hmp); 17267c478bd9Sstevel@tonic-gate bp = dp->av_forw; 17277c478bd9Sstevel@tonic-gate 17287c478bd9Sstevel@tonic-gate while (bp != dp) { 17297c478bd9Sstevel@tonic-gate 17307c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 17317c478bd9Sstevel@tonic-gate 17327c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 17337c478bd9Sstevel@tonic-gate bp = bp->av_forw; 17347c478bd9Sstevel@tonic-gate continue; 17357c478bd9Sstevel@tonic-gate } 17367c478bd9Sstevel@tonic-gate /* 17377c478bd9Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff?? 17387c478bd9Sstevel@tonic-gate */ 17397c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) { 17407c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 17417c478bd9Sstevel@tonic-gate mutex_exit(hmp); 17427c478bd9Sstevel@tonic-gate lastindex = start; 17437c478bd9Sstevel@tonic-gate return; /* All done */ 17447c478bd9Sstevel@tonic-gate } 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&hp->b_lock)); 17477c478bd9Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI)); 17487c478bd9Sstevel@tonic-gate hp->b_length--; 17497c478bd9Sstevel@tonic-gate notavail(bp); 17507c478bd9Sstevel@tonic-gate 17517c478bd9Sstevel@tonic-gate /* 17527c478bd9Sstevel@tonic-gate * Remove bhdr from cache, free up memory, 17537c478bd9Sstevel@tonic-gate * and add the hdr to the freelist. 17547c478bd9Sstevel@tonic-gate */ 17557c478bd9Sstevel@tonic-gate bremhash(bp); 17567c478bd9Sstevel@tonic-gate mutex_exit(hmp); 17577c478bd9Sstevel@tonic-gate 17587c478bd9Sstevel@tonic-gate if (bp->b_bufsize) { 17597c478bd9Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize); 17607c478bd9Sstevel@tonic-gate bp->b_un.b_addr = NULL; 17617c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 17627c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += bp->b_bufsize; 17637c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 17647c478bd9Sstevel@tonic-gate } 17657c478bd9Sstevel@tonic-gate 17667c478bd9Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 17677c478bd9Sstevel@tonic-gate bp->b_edev = NODEV; 17687c478bd9Sstevel@tonic-gate bp->b_flags = 0; 17697c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 17707c478bd9Sstevel@tonic-gate bio_bhdr_free(bp); 17717c478bd9Sstevel@tonic-gate if (want == BIO_HEADER) { 17727c478bd9Sstevel@tonic-gate found = 1; 17737c478bd9Sstevel@tonic-gate } else { 17747c478bd9Sstevel@tonic-gate ASSERT(want == BIO_MEM); 17757c478bd9Sstevel@tonic-gate if (!found && bfreelist.b_bufsize >= bsize) { 17767c478bd9Sstevel@tonic-gate /* Account for the memory we want */ 17777c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 17787c478bd9Sstevel@tonic-gate if (bfreelist.b_bufsize >= bsize) { 17797c478bd9Sstevel@tonic-gate bfreelist.b_bufsize -= bsize; 17807c478bd9Sstevel@tonic-gate found = 1; 17817c478bd9Sstevel@tonic-gate } 17827c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 17837c478bd9Sstevel@tonic-gate } 17847c478bd9Sstevel@tonic-gate } 17857c478bd9Sstevel@tonic-gate 17867c478bd9Sstevel@tonic-gate /* 17877c478bd9Sstevel@tonic-gate * Since we dropped hmp start from the 17887c478bd9Sstevel@tonic-gate * begining. 17897c478bd9Sstevel@tonic-gate */ 17907c478bd9Sstevel@tonic-gate mutex_enter(hmp); 17917c478bd9Sstevel@tonic-gate bp = dp->av_forw; 17927c478bd9Sstevel@tonic-gate } 17937c478bd9Sstevel@tonic-gate mutex_exit(hmp); 17947c478bd9Sstevel@tonic-gate 17957c478bd9Sstevel@tonic-gate /* 17967c478bd9Sstevel@tonic-gate * Look at the delayed write list. 17977c478bd9Sstevel@tonic-gate * First gather into a private list, then write them. 17987c478bd9Sstevel@tonic-gate */ 17997c478bd9Sstevel@tonic-gate dwp = (struct buf *)&dwbuf[start]; 18007c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 18017c478bd9Sstevel@tonic-gate bio_doingflush++; 18027c478bd9Sstevel@tonic-gate mutex_enter(hmp); 18037c478bd9Sstevel@tonic-gate for (bp = dwp->av_forw; bp != dwp; bp = nbp) { 18047c478bd9Sstevel@tonic-gate 18057c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 18067c478bd9Sstevel@tonic-gate nbp = bp->av_forw; 18077c478bd9Sstevel@tonic-gate 18087c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) 18097c478bd9Sstevel@tonic-gate continue; 18107c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_DELWRI); 18117c478bd9Sstevel@tonic-gate /* 18127c478bd9Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff?? 18137c478bd9Sstevel@tonic-gate */ 18147c478bd9Sstevel@tonic-gate 18157c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) { 18167c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 18177c478bd9Sstevel@tonic-gate mutex_exit(hmp); 18187c478bd9Sstevel@tonic-gate lastindex = start; 18197c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18207c478bd9Sstevel@tonic-gate bio_flushlist(delwri_list); 18217c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 18227c478bd9Sstevel@tonic-gate bio_doingflush--; 18237c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 18247c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 18257c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 18267c478bd9Sstevel@tonic-gate } 18277c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18287c478bd9Sstevel@tonic-gate return; /* All done */ 18297c478bd9Sstevel@tonic-gate } 18307c478bd9Sstevel@tonic-gate 18317c478bd9Sstevel@tonic-gate /* 18327c478bd9Sstevel@tonic-gate * If the buffer is already on a flush or 18337c478bd9Sstevel@tonic-gate * invalidate list then just skip it. 18347c478bd9Sstevel@tonic-gate */ 18357c478bd9Sstevel@tonic-gate if (bp->b_list != NULL) { 18367c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 18377c478bd9Sstevel@tonic-gate continue; 18387c478bd9Sstevel@tonic-gate } 18397c478bd9Sstevel@tonic-gate /* 18407c478bd9Sstevel@tonic-gate * We are still on the same bucket. 18417c478bd9Sstevel@tonic-gate */ 18427c478bd9Sstevel@tonic-gate hp->b_length--; 18437c478bd9Sstevel@tonic-gate notavail(bp); 18447c478bd9Sstevel@tonic-gate bp->b_list = delwri_list; 18457c478bd9Sstevel@tonic-gate delwri_list = bp; 18467c478bd9Sstevel@tonic-gate } 18477c478bd9Sstevel@tonic-gate mutex_exit(hmp); 18487c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18497c478bd9Sstevel@tonic-gate bio_flushlist(delwri_list); 18507c478bd9Sstevel@tonic-gate delwri_list = EMPTY_LIST; 18517c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 18527c478bd9Sstevel@tonic-gate bio_doingflush--; 18537c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 18547c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 18557c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 18567c478bd9Sstevel@tonic-gate } 18577c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18587c478bd9Sstevel@tonic-gate start = (start + 1) % v.v_hbuf; 18597c478bd9Sstevel@tonic-gate 18607c478bd9Sstevel@tonic-gate } while (start != end); 18617c478bd9Sstevel@tonic-gate 18627c478bd9Sstevel@tonic-gate if (found) 18637c478bd9Sstevel@tonic-gate return; 18647c478bd9Sstevel@tonic-gate 18657c478bd9Sstevel@tonic-gate /* 18667c478bd9Sstevel@tonic-gate * Free lists exhausted and we haven't satisfied the request. 18677c478bd9Sstevel@tonic-gate * Wait here for more entries to be added to freelist. 18687c478bd9Sstevel@tonic-gate * Because this might have just happened, make it timed. 18697c478bd9Sstevel@tonic-gate */ 18707c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 18717c478bd9Sstevel@tonic-gate bfreelist.b_flags |= B_WANTED; 1872d3d50737SRafael Vanoni (void) cv_reltimedwait(&bio_mem_cv, &bfree_lock, hz, TR_CLOCK_TICK); 18737c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 18747c478bd9Sstevel@tonic-gate goto top; 18757c478bd9Sstevel@tonic-gate } 18767c478bd9Sstevel@tonic-gate 18777c478bd9Sstevel@tonic-gate /* 18787c478bd9Sstevel@tonic-gate * See if the block is associated with some buffer 18797c478bd9Sstevel@tonic-gate * (mainly to avoid getting hung up on a wait in breada). 18807c478bd9Sstevel@tonic-gate */ 18817c478bd9Sstevel@tonic-gate static int 18827c478bd9Sstevel@tonic-gate bio_incore(dev_t dev, daddr_t blkno) 18837c478bd9Sstevel@tonic-gate { 18847c478bd9Sstevel@tonic-gate struct buf *bp; 18857c478bd9Sstevel@tonic-gate struct buf *dp; 18867c478bd9Sstevel@tonic-gate uint_t index; 18877c478bd9Sstevel@tonic-gate kmutex_t *hmp; 18887c478bd9Sstevel@tonic-gate 18897c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 18907c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[index]; 18917c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 18927c478bd9Sstevel@tonic-gate 18937c478bd9Sstevel@tonic-gate mutex_enter(hmp); 18947c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 18957c478bd9Sstevel@tonic-gate if (bp->b_blkno == blkno && bp->b_edev == dev && 18967c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE) == 0) { 18977c478bd9Sstevel@tonic-gate mutex_exit(hmp); 18987c478bd9Sstevel@tonic-gate return (1); 18997c478bd9Sstevel@tonic-gate } 19007c478bd9Sstevel@tonic-gate } 19017c478bd9Sstevel@tonic-gate mutex_exit(hmp); 19027c478bd9Sstevel@tonic-gate return (0); 19037c478bd9Sstevel@tonic-gate } 19047c478bd9Sstevel@tonic-gate 19057c478bd9Sstevel@tonic-gate static void 19067c478bd9Sstevel@tonic-gate bio_pageio_done(struct buf *bp) 19077c478bd9Sstevel@tonic-gate { 19087c478bd9Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) { 19097c478bd9Sstevel@tonic-gate 19107c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 19117c478bd9Sstevel@tonic-gate bp_mapout(bp); 19127c478bd9Sstevel@tonic-gate 19137c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) 19147c478bd9Sstevel@tonic-gate pvn_read_done(bp->b_pages, bp->b_flags); 19157c478bd9Sstevel@tonic-gate else 19167c478bd9Sstevel@tonic-gate pvn_write_done(bp->b_pages, B_WRITE | bp->b_flags); 19177c478bd9Sstevel@tonic-gate pageio_done(bp); 19187c478bd9Sstevel@tonic-gate } else { 19197c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_REMAPPED); 19207c478bd9Sstevel@tonic-gate bp_mapout(bp); 19217c478bd9Sstevel@tonic-gate brelse(bp); 19227c478bd9Sstevel@tonic-gate } 19237c478bd9Sstevel@tonic-gate } 19247c478bd9Sstevel@tonic-gate 19257c478bd9Sstevel@tonic-gate /* 19267c478bd9Sstevel@tonic-gate * bioerror(9F) - indicate error in buffer header 19277c478bd9Sstevel@tonic-gate * If 'error' is zero, remove the error indication. 19287c478bd9Sstevel@tonic-gate */ 19297c478bd9Sstevel@tonic-gate void 19307c478bd9Sstevel@tonic-gate bioerror(struct buf *bp, int error) 19317c478bd9Sstevel@tonic-gate { 19327c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 19337c478bd9Sstevel@tonic-gate ASSERT(error >= 0); 19347c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 19357c478bd9Sstevel@tonic-gate 19367c478bd9Sstevel@tonic-gate if (error != 0) { 19377c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 19387c478bd9Sstevel@tonic-gate } else { 19397c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_ERROR; 19407c478bd9Sstevel@tonic-gate } 19417c478bd9Sstevel@tonic-gate bp->b_error = error; 19427c478bd9Sstevel@tonic-gate } 19437c478bd9Sstevel@tonic-gate 19447c478bd9Sstevel@tonic-gate /* 19457c478bd9Sstevel@tonic-gate * bioreset(9F) - reuse a private buffer header after I/O is complete 19467c478bd9Sstevel@tonic-gate */ 19477c478bd9Sstevel@tonic-gate void 19487c478bd9Sstevel@tonic-gate bioreset(struct buf *bp) 19497c478bd9Sstevel@tonic-gate { 19507c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 19517c478bd9Sstevel@tonic-gate 19527c478bd9Sstevel@tonic-gate biofini(bp); 19537c478bd9Sstevel@tonic-gate bioinit(bp); 19547c478bd9Sstevel@tonic-gate } 19557c478bd9Sstevel@tonic-gate 19567c478bd9Sstevel@tonic-gate /* 19577c478bd9Sstevel@tonic-gate * biosize(9F) - return size of a buffer header 19587c478bd9Sstevel@tonic-gate */ 19597c478bd9Sstevel@tonic-gate size_t 19607c478bd9Sstevel@tonic-gate biosize(void) 19617c478bd9Sstevel@tonic-gate { 19627c478bd9Sstevel@tonic-gate return (sizeof (struct buf)); 19637c478bd9Sstevel@tonic-gate } 19647c478bd9Sstevel@tonic-gate 19657c478bd9Sstevel@tonic-gate /* 19667c478bd9Sstevel@tonic-gate * biomodified(9F) - check if buffer is modified 19677c478bd9Sstevel@tonic-gate */ 19687c478bd9Sstevel@tonic-gate int 19697c478bd9Sstevel@tonic-gate biomodified(struct buf *bp) 19707c478bd9Sstevel@tonic-gate { 19717c478bd9Sstevel@tonic-gate int npf; 19727c478bd9Sstevel@tonic-gate int ppattr; 19737c478bd9Sstevel@tonic-gate struct page *pp; 19747c478bd9Sstevel@tonic-gate 19757c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 19767c478bd9Sstevel@tonic-gate 19777c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) == 0) { 19787c478bd9Sstevel@tonic-gate return (-1); 19797c478bd9Sstevel@tonic-gate } 19807c478bd9Sstevel@tonic-gate pp = bp->b_pages; 19817c478bd9Sstevel@tonic-gate npf = btopr(bp->b_bcount + ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET)); 19827c478bd9Sstevel@tonic-gate 19837c478bd9Sstevel@tonic-gate while (npf > 0) { 19847c478bd9Sstevel@tonic-gate ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO | 19857c478bd9Sstevel@tonic-gate HAT_SYNC_STOPON_MOD); 19867c478bd9Sstevel@tonic-gate if (ppattr & P_MOD) 19877c478bd9Sstevel@tonic-gate return (1); 19887c478bd9Sstevel@tonic-gate pp = pp->p_next; 19897c478bd9Sstevel@tonic-gate npf--; 19907c478bd9Sstevel@tonic-gate } 19917c478bd9Sstevel@tonic-gate 19927c478bd9Sstevel@tonic-gate return (0); 19937c478bd9Sstevel@tonic-gate } 19947c478bd9Sstevel@tonic-gate 19957c478bd9Sstevel@tonic-gate /* 19967c478bd9Sstevel@tonic-gate * bioinit(9F) - initialize a buffer structure 19977c478bd9Sstevel@tonic-gate */ 19987c478bd9Sstevel@tonic-gate void 19997c478bd9Sstevel@tonic-gate bioinit(struct buf *bp) 20007c478bd9Sstevel@tonic-gate { 20017c478bd9Sstevel@tonic-gate bzero(bp, sizeof (struct buf)); 20027c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 20037c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 20047c478bd9Sstevel@tonic-gate bp->b_offset = -1; 20057c478bd9Sstevel@tonic-gate } 20067c478bd9Sstevel@tonic-gate 20077c478bd9Sstevel@tonic-gate /* 20087c478bd9Sstevel@tonic-gate * biofini(9F) - uninitialize a buffer structure 20097c478bd9Sstevel@tonic-gate */ 20107c478bd9Sstevel@tonic-gate void 20117c478bd9Sstevel@tonic-gate biofini(struct buf *bp) 20127c478bd9Sstevel@tonic-gate { 20137c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 20147c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 20157c478bd9Sstevel@tonic-gate } 20167c478bd9Sstevel@tonic-gate 20177c478bd9Sstevel@tonic-gate /* 20187c478bd9Sstevel@tonic-gate * bioclone(9F) - clone a buffer 20197c478bd9Sstevel@tonic-gate */ 20207c478bd9Sstevel@tonic-gate struct buf * 20217c478bd9Sstevel@tonic-gate bioclone(struct buf *bp, off_t off, size_t len, dev_t dev, daddr_t blkno, 20227c478bd9Sstevel@tonic-gate int (*iodone)(struct buf *), struct buf *bp_mem, int sleep) 20237c478bd9Sstevel@tonic-gate { 20247c478bd9Sstevel@tonic-gate struct buf *bufp; 20257c478bd9Sstevel@tonic-gate 20267c478bd9Sstevel@tonic-gate ASSERT(bp); 20277c478bd9Sstevel@tonic-gate if (bp_mem == NULL) { 20287c478bd9Sstevel@tonic-gate bufp = kmem_alloc(sizeof (struct buf), sleep); 20297c478bd9Sstevel@tonic-gate if (bufp == NULL) { 20307c478bd9Sstevel@tonic-gate return (NULL); 20317c478bd9Sstevel@tonic-gate } 20327c478bd9Sstevel@tonic-gate bioinit(bufp); 20337c478bd9Sstevel@tonic-gate } else { 20347c478bd9Sstevel@tonic-gate bufp = bp_mem; 20357c478bd9Sstevel@tonic-gate bioreset(bufp); 20367c478bd9Sstevel@tonic-gate } 20377c478bd9Sstevel@tonic-gate 20387c478bd9Sstevel@tonic-gate #define BUF_CLONE_FLAGS (B_READ|B_WRITE|B_SHADOW|B_PHYS|B_PAGEIO|B_FAILFAST|\ 20397c478bd9Sstevel@tonic-gate B_ABRWRITE) 20407c478bd9Sstevel@tonic-gate 20417c478bd9Sstevel@tonic-gate /* 20426f84fed5Scth * The cloned buffer does not inherit the B_REMAPPED flag. 20437c478bd9Sstevel@tonic-gate */ 20447c478bd9Sstevel@tonic-gate bufp->b_flags = (bp->b_flags & BUF_CLONE_FLAGS) | B_BUSY; 20457c478bd9Sstevel@tonic-gate bufp->b_bcount = len; 20467c478bd9Sstevel@tonic-gate bufp->b_blkno = blkno; 20477c478bd9Sstevel@tonic-gate bufp->b_iodone = iodone; 20487c478bd9Sstevel@tonic-gate bufp->b_proc = bp->b_proc; 20497c478bd9Sstevel@tonic-gate bufp->b_edev = dev; 20507c478bd9Sstevel@tonic-gate bufp->b_file = bp->b_file; 20517c478bd9Sstevel@tonic-gate bufp->b_offset = bp->b_offset; 20527c478bd9Sstevel@tonic-gate 20537c478bd9Sstevel@tonic-gate if (bp->b_flags & B_SHADOW) { 20547c478bd9Sstevel@tonic-gate ASSERT(bp->b_shadow); 20557c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_PHYS); 20567c478bd9Sstevel@tonic-gate 20577c478bd9Sstevel@tonic-gate bufp->b_shadow = bp->b_shadow + 20587c478bd9Sstevel@tonic-gate btop(((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off); 20597c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)((uintptr_t)bp->b_un.b_addr + off); 20606f84fed5Scth if (bp->b_flags & B_REMAPPED) 20616f84fed5Scth bufp->b_proc = NULL; 20627c478bd9Sstevel@tonic-gate } else { 20637c478bd9Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) { 20647c478bd9Sstevel@tonic-gate struct page *pp; 20657c478bd9Sstevel@tonic-gate off_t o; 20667c478bd9Sstevel@tonic-gate int i; 20677c478bd9Sstevel@tonic-gate 20687c478bd9Sstevel@tonic-gate pp = bp->b_pages; 20697c478bd9Sstevel@tonic-gate o = ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off; 20707c478bd9Sstevel@tonic-gate for (i = btop(o); i > 0; i--) { 20717c478bd9Sstevel@tonic-gate pp = pp->p_next; 20727c478bd9Sstevel@tonic-gate } 20737c478bd9Sstevel@tonic-gate bufp->b_pages = pp; 20747c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)(o & PAGEOFFSET); 20757c478bd9Sstevel@tonic-gate } else { 20767c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = 20777c478bd9Sstevel@tonic-gate (caddr_t)((uintptr_t)bp->b_un.b_addr + off); 20787c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 20797c478bd9Sstevel@tonic-gate bufp->b_proc = NULL; 20807c478bd9Sstevel@tonic-gate } 20817c478bd9Sstevel@tonic-gate } 20827c478bd9Sstevel@tonic-gate return (bufp); 20837c478bd9Sstevel@tonic-gate } 2084