17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
56f84fed5Scth * Common Development and Distribution License (the "License").
66f84fed5Scth * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
24*2c164fafSPatrick Mooney * Copyright 2019 Joyent, Inc.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
287c478bd9Sstevel@tonic-gate /* All Rights Reserved */
297c478bd9Sstevel@tonic-gate
307c478bd9Sstevel@tonic-gate /*
317c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988
327c478bd9Sstevel@tonic-gate * The Regents of the University of California
337c478bd9Sstevel@tonic-gate * All Rights Reserved
347c478bd9Sstevel@tonic-gate *
357c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from
367c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its
377c478bd9Sstevel@tonic-gate * contributors.
387c478bd9Sstevel@tonic-gate */
397c478bd9Sstevel@tonic-gate
407c478bd9Sstevel@tonic-gate #include <sys/types.h>
417c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
427c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
437c478bd9Sstevel@tonic-gate #include <sys/conf.h>
447c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
457c478bd9Sstevel@tonic-gate #include <sys/errno.h>
467c478bd9Sstevel@tonic-gate #include <sys/debug.h>
477c478bd9Sstevel@tonic-gate #include <sys/buf.h>
487c478bd9Sstevel@tonic-gate #include <sys/var.h>
497c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
507c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
527c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
537c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
547c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
557c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
567c478bd9Sstevel@tonic-gate #include <vm/page.h>
577c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
587c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
597c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h>
607c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
617c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
627c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
637c478bd9Sstevel@tonic-gate #include <sys/systm.h>
647c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
657c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
667c478bd9Sstevel@tonic-gate
677c478bd9Sstevel@tonic-gate /* Locks */
687c478bd9Sstevel@tonic-gate static kmutex_t blist_lock; /* protects b_list */
697c478bd9Sstevel@tonic-gate static kmutex_t bhdr_lock; /* protects the bhdrlist */
707c478bd9Sstevel@tonic-gate static kmutex_t bfree_lock; /* protects the bfreelist structure */
717c478bd9Sstevel@tonic-gate
727c478bd9Sstevel@tonic-gate struct hbuf *hbuf; /* Hash buckets */
737c478bd9Sstevel@tonic-gate struct dwbuf *dwbuf; /* Delayed write buckets */
747c478bd9Sstevel@tonic-gate static struct buf *bhdrlist; /* buf header free list */
757c478bd9Sstevel@tonic-gate static int nbuf; /* number of buffer headers allocated */
767c478bd9Sstevel@tonic-gate
777c478bd9Sstevel@tonic-gate static int lastindex; /* Reference point on where to start */
787c478bd9Sstevel@tonic-gate /* when looking for free buffers */
797c478bd9Sstevel@tonic-gate
807c478bd9Sstevel@tonic-gate #define bio_bhash(dev, bn) (hash2ints((dev), (int)(bn)) & v.v_hmask)
817c478bd9Sstevel@tonic-gate #define EMPTY_LIST ((struct buf *)-1)
827c478bd9Sstevel@tonic-gate
837c478bd9Sstevel@tonic-gate static kcondvar_t bio_mem_cv; /* Condition variables */
847c478bd9Sstevel@tonic-gate static kcondvar_t bio_flushinval_cv;
857c478bd9Sstevel@tonic-gate static int bio_doingflush; /* flush in progress */
867c478bd9Sstevel@tonic-gate static int bio_doinginval; /* inval in progress */
877c478bd9Sstevel@tonic-gate static int bio_flinv_cv_wanted; /* someone waiting for cv */
887c478bd9Sstevel@tonic-gate
897c478bd9Sstevel@tonic-gate /*
907c478bd9Sstevel@tonic-gate * Statistics on the buffer cache
917c478bd9Sstevel@tonic-gate */
927c478bd9Sstevel@tonic-gate struct biostats biostats = {
937c478bd9Sstevel@tonic-gate { "buffer_cache_lookups", KSTAT_DATA_UINT32 },
947c478bd9Sstevel@tonic-gate { "buffer_cache_hits", KSTAT_DATA_UINT32 },
957c478bd9Sstevel@tonic-gate { "new_buffer_requests", KSTAT_DATA_UINT32 },
967c478bd9Sstevel@tonic-gate { "waits_for_buffer_allocs", KSTAT_DATA_UINT32 },
977c478bd9Sstevel@tonic-gate { "buffers_locked_by_someone", KSTAT_DATA_UINT32 },
987c478bd9Sstevel@tonic-gate { "duplicate_buffers_found", KSTAT_DATA_UINT32 }
997c478bd9Sstevel@tonic-gate };
1007c478bd9Sstevel@tonic-gate
1017c478bd9Sstevel@tonic-gate /*
1027c478bd9Sstevel@tonic-gate * kstat data
1037c478bd9Sstevel@tonic-gate */
1047c478bd9Sstevel@tonic-gate kstat_named_t *biostats_ptr = (kstat_named_t *)&biostats;
1057c478bd9Sstevel@tonic-gate uint_t biostats_ndata = (uint_t)(sizeof (biostats) /
1067c478bd9Sstevel@tonic-gate sizeof (kstat_named_t));
1077c478bd9Sstevel@tonic-gate
1087c478bd9Sstevel@tonic-gate /*
1097c478bd9Sstevel@tonic-gate * Statistics on ufs buffer cache
1107c478bd9Sstevel@tonic-gate * Not protected by locks
1117c478bd9Sstevel@tonic-gate */
1127c478bd9Sstevel@tonic-gate struct ufsbiostats ub = {
1137c478bd9Sstevel@tonic-gate { "breads", KSTAT_DATA_UINT32 },
1147c478bd9Sstevel@tonic-gate { "bwrites", KSTAT_DATA_UINT32 },
1157c478bd9Sstevel@tonic-gate { "fbiwrites", KSTAT_DATA_UINT32 },
1167c478bd9Sstevel@tonic-gate { "getpages", KSTAT_DATA_UINT32 },
1177c478bd9Sstevel@tonic-gate { "getras", KSTAT_DATA_UINT32 },
1187c478bd9Sstevel@tonic-gate { "putsyncs", KSTAT_DATA_UINT32 },
1197c478bd9Sstevel@tonic-gate { "putasyncs", KSTAT_DATA_UINT32 },
1207c478bd9Sstevel@tonic-gate { "putpageios", KSTAT_DATA_UINT32 },
1217c478bd9Sstevel@tonic-gate };
1227c478bd9Sstevel@tonic-gate
1237c478bd9Sstevel@tonic-gate /*
1247c478bd9Sstevel@tonic-gate * more UFS Logging eccentricities...
1257c478bd9Sstevel@tonic-gate *
1267c478bd9Sstevel@tonic-gate * required since "#pragma weak ..." doesn't work in reverse order.
1277c478bd9Sstevel@tonic-gate * i.e.: genunix (bio.c) is loaded before the ufs modules and pointers
1287c478bd9Sstevel@tonic-gate * to ufs routines don't get plugged into bio.c calls so
1297c478bd9Sstevel@tonic-gate * we initialize it when setting up the "lufsops" table
1307c478bd9Sstevel@tonic-gate * in "lufs.c:_init()"
1317c478bd9Sstevel@tonic-gate */
1327c478bd9Sstevel@tonic-gate void (*bio_lufs_strategy)(void *, buf_t *);
1337c478bd9Sstevel@tonic-gate void (*bio_snapshot_strategy)(void *, buf_t *);
1347c478bd9Sstevel@tonic-gate
1357c478bd9Sstevel@tonic-gate
1367c478bd9Sstevel@tonic-gate /* Private routines */
1377c478bd9Sstevel@tonic-gate static struct buf *bio_getfreeblk(long);
1387c478bd9Sstevel@tonic-gate static void bio_mem_get(long);
1397c478bd9Sstevel@tonic-gate static void bio_bhdr_free(struct buf *);
1407c478bd9Sstevel@tonic-gate static struct buf *bio_bhdr_alloc(void);
1417c478bd9Sstevel@tonic-gate static void bio_recycle(int, long);
1427c478bd9Sstevel@tonic-gate static void bio_pageio_done(struct buf *);
1437c478bd9Sstevel@tonic-gate static int bio_incore(dev_t, daddr_t);
1447c478bd9Sstevel@tonic-gate
1457c478bd9Sstevel@tonic-gate /*
1467c478bd9Sstevel@tonic-gate * Buffer cache constants
1477c478bd9Sstevel@tonic-gate */
1487c478bd9Sstevel@tonic-gate #define BIO_BUF_PERCENT (100/2) /* default: 2% of memory */
1497c478bd9Sstevel@tonic-gate #define BIO_MAX_PERCENT (100/20) /* max is 20% of real memory */
1507c478bd9Sstevel@tonic-gate #define BIO_BHDR_POOL 100 /* Default bhdr pool size */
1517c478bd9Sstevel@tonic-gate #define BIO_MIN_HDR 10 /* Minimum number of buffer headers */
1527c478bd9Sstevel@tonic-gate #define BIO_MIN_HWM (BIO_MIN_HDR * MAXBSIZE / 1024)
1537c478bd9Sstevel@tonic-gate #define BIO_HASHLEN 4 /* Target length of hash chains */
1547c478bd9Sstevel@tonic-gate
1557c478bd9Sstevel@tonic-gate
1567c478bd9Sstevel@tonic-gate /* Flags for bio_recycle() */
1577c478bd9Sstevel@tonic-gate #define BIO_HEADER 0x01
1587c478bd9Sstevel@tonic-gate #define BIO_MEM 0x02
1597c478bd9Sstevel@tonic-gate
1607c478bd9Sstevel@tonic-gate extern int bufhwm; /* User tunable - high water mark for mem */
1617c478bd9Sstevel@tonic-gate extern int bufhwm_pct; /* ditto - given in % of physmem */
1627c478bd9Sstevel@tonic-gate
1637c478bd9Sstevel@tonic-gate /*
1647c478bd9Sstevel@tonic-gate * The following routines allocate and free
1657c478bd9Sstevel@tonic-gate * buffers with various side effects. In general the
1667c478bd9Sstevel@tonic-gate * arguments to an allocate routine are a device and
1677c478bd9Sstevel@tonic-gate * a block number, and the value is a pointer to
1687c478bd9Sstevel@tonic-gate * to the buffer header; the buffer returned is locked with a
1697c478bd9Sstevel@tonic-gate * binary semaphore so that no one else can touch it. If the block was
1707c478bd9Sstevel@tonic-gate * already in core, no I/O need be done; if it is
1717c478bd9Sstevel@tonic-gate * already locked, the process waits until it becomes free.
1727c478bd9Sstevel@tonic-gate * The following routines allocate a buffer:
1737c478bd9Sstevel@tonic-gate * getblk
1747c478bd9Sstevel@tonic-gate * bread/BREAD
1757c478bd9Sstevel@tonic-gate * breada
1767c478bd9Sstevel@tonic-gate * Eventually the buffer must be released, possibly with the
1777c478bd9Sstevel@tonic-gate * side effect of writing it out, by using one of
1787c478bd9Sstevel@tonic-gate * bwrite/BWRITE/brwrite
1797c478bd9Sstevel@tonic-gate * bdwrite/bdrwrite
1807c478bd9Sstevel@tonic-gate * bawrite
1817c478bd9Sstevel@tonic-gate * brelse
1827c478bd9Sstevel@tonic-gate *
1837c478bd9Sstevel@tonic-gate * The B_WANTED/B_BUSY bits are NOT used by these routines for synchronization.
1847c478bd9Sstevel@tonic-gate * Instead, a binary semaphore, b_sem is used to gain exclusive access to
1857c478bd9Sstevel@tonic-gate * a buffer and a binary semaphore, b_io is used for I/O synchronization.
1867c478bd9Sstevel@tonic-gate * B_DONE is still used to denote a buffer with I/O complete on it.
1877c478bd9Sstevel@tonic-gate *
1887c478bd9Sstevel@tonic-gate * The bfreelist.b_bcount field is computed everytime fsflush runs. It is
1897c478bd9Sstevel@tonic-gate * should not be used where a very accurate count of the free buffers is
1907c478bd9Sstevel@tonic-gate * needed.
1917c478bd9Sstevel@tonic-gate */
1927c478bd9Sstevel@tonic-gate
1937c478bd9Sstevel@tonic-gate /*
1947c478bd9Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer.
1957c478bd9Sstevel@tonic-gate *
1967c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using
1977c478bd9Sstevel@tonic-gate * BREAD() directly avoids the extra function call overhead invoked
1987c478bd9Sstevel@tonic-gate * by calling this routine.
1997c478bd9Sstevel@tonic-gate */
2007c478bd9Sstevel@tonic-gate struct buf *
bread(dev_t dev,daddr_t blkno,long bsize)2017c478bd9Sstevel@tonic-gate bread(dev_t dev, daddr_t blkno, long bsize)
2027c478bd9Sstevel@tonic-gate {
2037c478bd9Sstevel@tonic-gate return (BREAD(dev, blkno, bsize));
2047c478bd9Sstevel@tonic-gate }
2057c478bd9Sstevel@tonic-gate
2067c478bd9Sstevel@tonic-gate /*
2077c478bd9Sstevel@tonic-gate * Common code for reading a buffer with various options
2087c478bd9Sstevel@tonic-gate *
2097c478bd9Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer.
2107c478bd9Sstevel@tonic-gate */
2117c478bd9Sstevel@tonic-gate struct buf *
bread_common(void * arg,dev_t dev,daddr_t blkno,long bsize)2127c478bd9Sstevel@tonic-gate bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize)
2137c478bd9Sstevel@tonic-gate {
2147c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
2157c478bd9Sstevel@tonic-gate struct buf *bp;
2167c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
2177c478bd9Sstevel@tonic-gate
2187c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1);
2197c478bd9Sstevel@tonic-gate bp = getblk_common(ufsvfsp, dev, blkno, bsize, /* errflg */ 1);
2207c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DONE)
2217c478bd9Sstevel@tonic-gate return (bp);
2227c478bd9Sstevel@tonic-gate bp->b_flags |= B_READ;
2237c478bd9Sstevel@tonic-gate ASSERT(bp->b_bcount == bsize);
2247c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { /* !ufs */
2257c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp);
2267c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
2277c478bd9Sstevel@tonic-gate /* ufs && logging */
2287c478bd9Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
2297c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
2307c478bd9Sstevel@tonic-gate /* ufs && snapshots */
2317c478bd9Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
2327c478bd9Sstevel@tonic-gate } else {
233d3d50737SRafael Vanoni ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
2347c478bd9Sstevel@tonic-gate ub.ub_breads.value.ul++; /* ufs && !logging */
2357c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp);
2367c478bd9Sstevel@tonic-gate }
2377c478bd9Sstevel@tonic-gate if (lwp != NULL)
2387c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++;
2397c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1);
2407c478bd9Sstevel@tonic-gate (void) biowait(bp);
2417c478bd9Sstevel@tonic-gate return (bp);
2427c478bd9Sstevel@tonic-gate }
2437c478bd9Sstevel@tonic-gate
2447c478bd9Sstevel@tonic-gate /*
2457c478bd9Sstevel@tonic-gate * Read in the block, like bread, but also start I/O on the
2467c478bd9Sstevel@tonic-gate * read-ahead block (which is not allocated to the caller).
2477c478bd9Sstevel@tonic-gate */
2487c478bd9Sstevel@tonic-gate struct buf *
breada(dev_t dev,daddr_t blkno,daddr_t rablkno,long bsize)2497c478bd9Sstevel@tonic-gate breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize)
2507c478bd9Sstevel@tonic-gate {
2517c478bd9Sstevel@tonic-gate struct buf *bp, *rabp;
2527c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
2537c478bd9Sstevel@tonic-gate
2547c478bd9Sstevel@tonic-gate bp = NULL;
2557c478bd9Sstevel@tonic-gate if (!bio_incore(dev, blkno)) {
2567c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1);
2577c478bd9Sstevel@tonic-gate bp = GETBLK(dev, blkno, bsize);
2587c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DONE) == 0) {
2597c478bd9Sstevel@tonic-gate bp->b_flags |= B_READ;
2607c478bd9Sstevel@tonic-gate bp->b_bcount = bsize;
2617c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp);
2627c478bd9Sstevel@tonic-gate if (lwp != NULL)
2637c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++;
2647c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1);
2657c478bd9Sstevel@tonic-gate }
2667c478bd9Sstevel@tonic-gate }
2677c478bd9Sstevel@tonic-gate if (rablkno && bfreelist.b_bcount > 1 &&
2687c478bd9Sstevel@tonic-gate !bio_incore(dev, rablkno)) {
2697c478bd9Sstevel@tonic-gate rabp = GETBLK(dev, rablkno, bsize);
2707c478bd9Sstevel@tonic-gate if (rabp->b_flags & B_DONE)
2717c478bd9Sstevel@tonic-gate brelse(rabp);
2727c478bd9Sstevel@tonic-gate else {
2737c478bd9Sstevel@tonic-gate rabp->b_flags |= B_READ|B_ASYNC;
2747c478bd9Sstevel@tonic-gate rabp->b_bcount = bsize;
2757c478bd9Sstevel@tonic-gate (void) bdev_strategy(rabp);
2767c478bd9Sstevel@tonic-gate if (lwp != NULL)
2777c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++;
2787c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1);
2797c478bd9Sstevel@tonic-gate }
2807c478bd9Sstevel@tonic-gate }
2817c478bd9Sstevel@tonic-gate if (bp == NULL)
2827c478bd9Sstevel@tonic-gate return (BREAD(dev, blkno, bsize));
2837c478bd9Sstevel@tonic-gate (void) biowait(bp);
2847c478bd9Sstevel@tonic-gate return (bp);
2857c478bd9Sstevel@tonic-gate }
2867c478bd9Sstevel@tonic-gate
2877c478bd9Sstevel@tonic-gate /*
2887c478bd9Sstevel@tonic-gate * Common code for writing a buffer with various options.
2897c478bd9Sstevel@tonic-gate *
2907c478bd9Sstevel@tonic-gate * force_wait - wait for write completion regardless of B_ASYNC flag
2917c478bd9Sstevel@tonic-gate * do_relse - release the buffer when we are done
2927c478bd9Sstevel@tonic-gate * clear_flags - flags to clear from the buffer
2937c478bd9Sstevel@tonic-gate */
2947c478bd9Sstevel@tonic-gate void
bwrite_common(void * arg,struct buf * bp,int force_wait,int do_relse,int clear_flags)2957c478bd9Sstevel@tonic-gate bwrite_common(void *arg, struct buf *bp, int force_wait,
2967c478bd9Sstevel@tonic-gate int do_relse, int clear_flags)
2977c478bd9Sstevel@tonic-gate {
2987c478bd9Sstevel@tonic-gate register int do_wait;
2997c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
3007c478bd9Sstevel@tonic-gate int flag;
3017c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
3027c478bd9Sstevel@tonic-gate struct cpu *cpup;
3037c478bd9Sstevel@tonic-gate
3047c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
3057c478bd9Sstevel@tonic-gate flag = bp->b_flags;
3067c478bd9Sstevel@tonic-gate bp->b_flags &= ~clear_flags;
3077c478bd9Sstevel@tonic-gate if (lwp != NULL)
3087c478bd9Sstevel@tonic-gate lwp->lwp_ru.oublock++;
3097c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K();
3107c478bd9Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */
3117c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, lwrite, 1);
3127c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bwrite, 1);
3137c478bd9Sstevel@tonic-gate do_wait = ((flag & B_ASYNC) == 0 || force_wait);
3147c478bd9Sstevel@tonic-gate if (do_wait == 0)
3157c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bawrite, 1);
3167c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K();
3177c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) {
3187c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp);
3197c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
3207c478bd9Sstevel@tonic-gate /* ufs && logging */
3217c478bd9Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
3227c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
3237c478bd9Sstevel@tonic-gate /* ufs && snapshots */
3247c478bd9Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
3257c478bd9Sstevel@tonic-gate } else {
3267c478bd9Sstevel@tonic-gate ub.ub_bwrites.value.ul++; /* ufs && !logging */
3277c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp);
3287c478bd9Sstevel@tonic-gate }
3297c478bd9Sstevel@tonic-gate if (do_wait) {
3307c478bd9Sstevel@tonic-gate (void) biowait(bp);
3317c478bd9Sstevel@tonic-gate if (do_relse) {
3327c478bd9Sstevel@tonic-gate brelse(bp);
3337c478bd9Sstevel@tonic-gate }
3347c478bd9Sstevel@tonic-gate }
3357c478bd9Sstevel@tonic-gate }
3367c478bd9Sstevel@tonic-gate
3377c478bd9Sstevel@tonic-gate /*
3387c478bd9Sstevel@tonic-gate * Write the buffer, waiting for completion (unless B_ASYNC is set).
3397c478bd9Sstevel@tonic-gate * Then release the buffer.
3407c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using
3417c478bd9Sstevel@tonic-gate * BWRITE() directly avoids the extra function call overhead invoked
3427c478bd9Sstevel@tonic-gate * by calling this routine.
3437c478bd9Sstevel@tonic-gate */
3447c478bd9Sstevel@tonic-gate void
bwrite(struct buf * bp)3457c478bd9Sstevel@tonic-gate bwrite(struct buf *bp)
3467c478bd9Sstevel@tonic-gate {
3477c478bd9Sstevel@tonic-gate BWRITE(bp);
3487c478bd9Sstevel@tonic-gate }
3497c478bd9Sstevel@tonic-gate
3507c478bd9Sstevel@tonic-gate /*
3517c478bd9Sstevel@tonic-gate * Write the buffer, waiting for completion.
3527c478bd9Sstevel@tonic-gate * But don't release the buffer afterwards.
3537c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using
3547c478bd9Sstevel@tonic-gate * BWRITE2() directly avoids the extra function call overhead.
3557c478bd9Sstevel@tonic-gate */
3567c478bd9Sstevel@tonic-gate void
bwrite2(struct buf * bp)3577c478bd9Sstevel@tonic-gate bwrite2(struct buf *bp)
3587c478bd9Sstevel@tonic-gate {
3597c478bd9Sstevel@tonic-gate BWRITE2(bp);
3607c478bd9Sstevel@tonic-gate }
3617c478bd9Sstevel@tonic-gate
3627c478bd9Sstevel@tonic-gate /*
3637c478bd9Sstevel@tonic-gate * Release the buffer, marking it so that if it is grabbed
3647c478bd9Sstevel@tonic-gate * for another purpose it will be written out before being
3657c478bd9Sstevel@tonic-gate * given up (e.g. when writing a partial block where it is
3667c478bd9Sstevel@tonic-gate * assumed that another write for the same block will soon follow).
3677c478bd9Sstevel@tonic-gate * Also save the time that the block is first marked as delayed
3687c478bd9Sstevel@tonic-gate * so that it will be written in a reasonable time.
3697c478bd9Sstevel@tonic-gate */
3707c478bd9Sstevel@tonic-gate void
bdwrite(struct buf * bp)3717c478bd9Sstevel@tonic-gate bdwrite(struct buf *bp)
3727c478bd9Sstevel@tonic-gate {
3737c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
3747c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lwrite, 1);
3757c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0)
376d3d50737SRafael Vanoni bp->b_start = ddi_get_lbolt();
3777c478bd9Sstevel@tonic-gate /*
3787c478bd9Sstevel@tonic-gate * B_DONE allows others to use the buffer, B_DELWRI causes the
3797c478bd9Sstevel@tonic-gate * buffer to be written before being reused, and setting b_resid
3807c478bd9Sstevel@tonic-gate * to zero says the buffer is complete.
3817c478bd9Sstevel@tonic-gate */
3827c478bd9Sstevel@tonic-gate bp->b_flags |= B_DELWRI | B_DONE;
3837c478bd9Sstevel@tonic-gate bp->b_resid = 0;
3847c478bd9Sstevel@tonic-gate brelse(bp);
3857c478bd9Sstevel@tonic-gate }
3867c478bd9Sstevel@tonic-gate
3877c478bd9Sstevel@tonic-gate /*
3887c478bd9Sstevel@tonic-gate * Release the buffer, start I/O on it, but don't wait for completion.
3897c478bd9Sstevel@tonic-gate */
3907c478bd9Sstevel@tonic-gate void
bawrite(struct buf * bp)3917c478bd9Sstevel@tonic-gate bawrite(struct buf *bp)
3927c478bd9Sstevel@tonic-gate {
3937c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
3947c478bd9Sstevel@tonic-gate
3957c478bd9Sstevel@tonic-gate /* Use bfreelist.b_bcount as a weird-ass heuristic */
3967c478bd9Sstevel@tonic-gate if (bfreelist.b_bcount > 4)
3977c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC;
3987c478bd9Sstevel@tonic-gate BWRITE(bp);
3997c478bd9Sstevel@tonic-gate }
4007c478bd9Sstevel@tonic-gate
4017c478bd9Sstevel@tonic-gate /*
4027c478bd9Sstevel@tonic-gate * Release the buffer, with no I/O implied.
4037c478bd9Sstevel@tonic-gate */
4047c478bd9Sstevel@tonic-gate void
brelse(struct buf * bp)4057c478bd9Sstevel@tonic-gate brelse(struct buf *bp)
4067c478bd9Sstevel@tonic-gate {
4077c478bd9Sstevel@tonic-gate struct buf **backp;
4087c478bd9Sstevel@tonic-gate uint_t index;
4097c478bd9Sstevel@tonic-gate kmutex_t *hmp;
4107c478bd9Sstevel@tonic-gate struct buf *dp;
4117c478bd9Sstevel@tonic-gate struct hbuf *hp;
4127c478bd9Sstevel@tonic-gate
4137c478bd9Sstevel@tonic-gate
4147c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
4157c478bd9Sstevel@tonic-gate
4167c478bd9Sstevel@tonic-gate /*
4177c478bd9Sstevel@tonic-gate * Clear the retry write flag if the buffer was written without
4187c478bd9Sstevel@tonic-gate * error. The presence of B_DELWRI means the buffer has not yet
4197c478bd9Sstevel@tonic-gate * been written and the presence of B_ERROR means that an error
4207c478bd9Sstevel@tonic-gate * is still occurring.
4217c478bd9Sstevel@tonic-gate */
4227c478bd9Sstevel@tonic-gate if ((bp->b_flags & (B_ERROR | B_DELWRI | B_RETRYWRI)) == B_RETRYWRI) {
4237c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_RETRYWRI;
4247c478bd9Sstevel@tonic-gate }
4257c478bd9Sstevel@tonic-gate
4267c478bd9Sstevel@tonic-gate /* Check for anomalous conditions */
4277c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_ERROR|B_NOCACHE)) {
4287c478bd9Sstevel@tonic-gate if (bp->b_flags & B_NOCACHE) {
4297c478bd9Sstevel@tonic-gate /* Don't add to the freelist. Destroy it now */
4307c478bd9Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize);
4317c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem);
4327c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io);
4337c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf));
4347c478bd9Sstevel@tonic-gate return;
4357c478bd9Sstevel@tonic-gate }
4367c478bd9Sstevel@tonic-gate /*
4377c478bd9Sstevel@tonic-gate * If a write failed and we are supposed to retry write,
4387c478bd9Sstevel@tonic-gate * don't toss the buffer. Keep it around and mark it
4397c478bd9Sstevel@tonic-gate * delayed write in the hopes that it will eventually
4407c478bd9Sstevel@tonic-gate * get flushed (and still keep the system running.)
4417c478bd9Sstevel@tonic-gate */
4427c478bd9Sstevel@tonic-gate if ((bp->b_flags & (B_READ | B_RETRYWRI)) == B_RETRYWRI) {
4437c478bd9Sstevel@tonic-gate bp->b_flags |= B_DELWRI;
4447c478bd9Sstevel@tonic-gate /* keep fsflush from trying continuously to flush */
445d3d50737SRafael Vanoni bp->b_start = ddi_get_lbolt();
4467c478bd9Sstevel@tonic-gate } else
4477c478bd9Sstevel@tonic-gate bp->b_flags |= B_AGE|B_STALE;
4487c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_ERROR;
4497c478bd9Sstevel@tonic-gate bp->b_error = 0;
4507c478bd9Sstevel@tonic-gate }
4517c478bd9Sstevel@tonic-gate
4527c478bd9Sstevel@tonic-gate /*
4537c478bd9Sstevel@tonic-gate * If delayed write is set then put in on the delayed
4547c478bd9Sstevel@tonic-gate * write list instead of the free buffer list.
4557c478bd9Sstevel@tonic-gate */
4567c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno);
4577c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock;
4587c478bd9Sstevel@tonic-gate
4597c478bd9Sstevel@tonic-gate mutex_enter(hmp);
4607c478bd9Sstevel@tonic-gate hp = &hbuf[index];
4617c478bd9Sstevel@tonic-gate dp = (struct buf *)hp;
4627c478bd9Sstevel@tonic-gate
4637c478bd9Sstevel@tonic-gate /*
4647c478bd9Sstevel@tonic-gate * Make sure that the number of entries on this list are
4657c478bd9Sstevel@tonic-gate * Zero <= count <= total # buffers
4667c478bd9Sstevel@tonic-gate */
4677c478bd9Sstevel@tonic-gate ASSERT(hp->b_length >= 0);
4687c478bd9Sstevel@tonic-gate ASSERT(hp->b_length < nbuf);
4697c478bd9Sstevel@tonic-gate
4707c478bd9Sstevel@tonic-gate hp->b_length++; /* We are adding this buffer */
4717c478bd9Sstevel@tonic-gate
4727c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) {
4737c478bd9Sstevel@tonic-gate /*
4747c478bd9Sstevel@tonic-gate * This buffer goes on the delayed write buffer list
4757c478bd9Sstevel@tonic-gate */
4767c478bd9Sstevel@tonic-gate dp = (struct buf *)&dwbuf[index];
4777c478bd9Sstevel@tonic-gate }
4787c478bd9Sstevel@tonic-gate ASSERT(bp->b_bufsize > 0);
4797c478bd9Sstevel@tonic-gate ASSERT(bp->b_bcount > 0);
4807c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr != NULL);
4817c478bd9Sstevel@tonic-gate
4827c478bd9Sstevel@tonic-gate if (bp->b_flags & B_AGE) {
4837c478bd9Sstevel@tonic-gate backp = &dp->av_forw;
4847c478bd9Sstevel@tonic-gate (*backp)->av_back = bp;
4857c478bd9Sstevel@tonic-gate bp->av_forw = *backp;
4867c478bd9Sstevel@tonic-gate *backp = bp;
4877c478bd9Sstevel@tonic-gate bp->av_back = dp;
4887c478bd9Sstevel@tonic-gate } else {
4897c478bd9Sstevel@tonic-gate backp = &dp->av_back;
4907c478bd9Sstevel@tonic-gate (*backp)->av_forw = bp;
4917c478bd9Sstevel@tonic-gate bp->av_back = *backp;
4927c478bd9Sstevel@tonic-gate *backp = bp;
4937c478bd9Sstevel@tonic-gate bp->av_forw = dp;
4947c478bd9Sstevel@tonic-gate }
4957c478bd9Sstevel@tonic-gate mutex_exit(hmp);
4967c478bd9Sstevel@tonic-gate
4977c478bd9Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) {
4987c478bd9Sstevel@tonic-gate /*
4997c478bd9Sstevel@tonic-gate * Should come here very very rarely.
5007c478bd9Sstevel@tonic-gate */
5017c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock);
5027c478bd9Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) {
5037c478bd9Sstevel@tonic-gate bfreelist.b_flags &= ~B_WANTED;
5047c478bd9Sstevel@tonic-gate cv_broadcast(&bio_mem_cv);
5057c478bd9Sstevel@tonic-gate }
5067c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
5077c478bd9Sstevel@tonic-gate }
5087c478bd9Sstevel@tonic-gate
5097c478bd9Sstevel@tonic-gate bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC);
5107c478bd9Sstevel@tonic-gate /*
5117c478bd9Sstevel@tonic-gate * Don't let anyone get the buffer off the freelist before we
5127c478bd9Sstevel@tonic-gate * release our hold on it.
5137c478bd9Sstevel@tonic-gate */
5147c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
5157c478bd9Sstevel@tonic-gate }
5167c478bd9Sstevel@tonic-gate
5177c478bd9Sstevel@tonic-gate /*
5187c478bd9Sstevel@tonic-gate * Return a count of the number of B_BUSY buffers in the system
5197c478bd9Sstevel@tonic-gate * Can only be used as a good estimate. If 'cleanit' is set,
5207c478bd9Sstevel@tonic-gate * try to flush all bufs.
5217c478bd9Sstevel@tonic-gate */
5227c478bd9Sstevel@tonic-gate int
bio_busy(int cleanit)5237c478bd9Sstevel@tonic-gate bio_busy(int cleanit)
5247c478bd9Sstevel@tonic-gate {
5257c478bd9Sstevel@tonic-gate struct buf *bp, *dp;
5267c478bd9Sstevel@tonic-gate int busy = 0;
5277c478bd9Sstevel@tonic-gate int i;
5287c478bd9Sstevel@tonic-gate kmutex_t *hmp;
5297c478bd9Sstevel@tonic-gate
5307c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
5317c478bd9Sstevel@tonic-gate vfs_syncprogress();
5327c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i];
5337c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
5347c478bd9Sstevel@tonic-gate
5357c478bd9Sstevel@tonic-gate mutex_enter(hmp);
5367c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
5377c478bd9Sstevel@tonic-gate if (bp->b_flags & B_BUSY)
5387c478bd9Sstevel@tonic-gate busy++;
5397c478bd9Sstevel@tonic-gate }
5407c478bd9Sstevel@tonic-gate mutex_exit(hmp);
5417c478bd9Sstevel@tonic-gate }
5427c478bd9Sstevel@tonic-gate
5437c478bd9Sstevel@tonic-gate if (cleanit && busy != 0) {
5447c478bd9Sstevel@tonic-gate bflush(NODEV);
5457c478bd9Sstevel@tonic-gate }
5467c478bd9Sstevel@tonic-gate
5477c478bd9Sstevel@tonic-gate return (busy);
5487c478bd9Sstevel@tonic-gate }
5497c478bd9Sstevel@tonic-gate
5507c478bd9Sstevel@tonic-gate /*
5517c478bd9Sstevel@tonic-gate * this interface is provided for binary compatibility.
5527c478bd9Sstevel@tonic-gate *
5537c478bd9Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate
5547c478bd9Sstevel@tonic-gate * block is already associated, return it; otherwise search
5557c478bd9Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it.
5567c478bd9Sstevel@tonic-gate */
5577c478bd9Sstevel@tonic-gate struct buf *
getblk(dev_t dev,daddr_t blkno,long bsize)5587c478bd9Sstevel@tonic-gate getblk(dev_t dev, daddr_t blkno, long bsize)
5597c478bd9Sstevel@tonic-gate {
5607c478bd9Sstevel@tonic-gate return (getblk_common(/* ufsvfsp */ NULL, dev,
5617c478bd9Sstevel@tonic-gate blkno, bsize, /* errflg */ 0));
5627c478bd9Sstevel@tonic-gate }
5637c478bd9Sstevel@tonic-gate
5647c478bd9Sstevel@tonic-gate /*
5657c478bd9Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate
5667c478bd9Sstevel@tonic-gate * block is already associated, return it; otherwise search
5677c478bd9Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it.
5687c478bd9Sstevel@tonic-gate */
5697c478bd9Sstevel@tonic-gate struct buf *
getblk_common(void * arg,dev_t dev,daddr_t blkno,long bsize,int errflg)5707c478bd9Sstevel@tonic-gate getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg)
5717c478bd9Sstevel@tonic-gate {
5727c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg;
5737c478bd9Sstevel@tonic-gate struct buf *bp;
5747c478bd9Sstevel@tonic-gate struct buf *dp;
5757c478bd9Sstevel@tonic-gate struct buf *nbp = NULL;
5767c478bd9Sstevel@tonic-gate struct buf *errbp;
5777c478bd9Sstevel@tonic-gate uint_t index;
5787c478bd9Sstevel@tonic-gate kmutex_t *hmp;
5797c478bd9Sstevel@tonic-gate struct hbuf *hp;
5807c478bd9Sstevel@tonic-gate
5817c478bd9Sstevel@tonic-gate if (getmajor(dev) >= devcnt)
5827c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "blkdev");
5837c478bd9Sstevel@tonic-gate
5847c478bd9Sstevel@tonic-gate biostats.bio_lookup.value.ui32++;
5857c478bd9Sstevel@tonic-gate
5867c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno);
5877c478bd9Sstevel@tonic-gate hp = &hbuf[index];
5887c478bd9Sstevel@tonic-gate dp = (struct buf *)hp;
5897c478bd9Sstevel@tonic-gate hmp = &hp->b_lock;
5907c478bd9Sstevel@tonic-gate
5917c478bd9Sstevel@tonic-gate mutex_enter(hmp);
5927c478bd9Sstevel@tonic-gate loop:
5937c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
5947c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
5957c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE))
5967c478bd9Sstevel@tonic-gate continue;
5977c478bd9Sstevel@tonic-gate /*
5987c478bd9Sstevel@tonic-gate * Avoid holding the hash lock in the event that
5997c478bd9Sstevel@tonic-gate * the buffer is locked by someone. Since the hash chain
6007c478bd9Sstevel@tonic-gate * may change when we drop the hash lock
6017c478bd9Sstevel@tonic-gate * we have to start at the beginning of the chain if the
6027c478bd9Sstevel@tonic-gate * buffer identity/contents aren't valid.
6037c478bd9Sstevel@tonic-gate */
6047c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) {
6057c478bd9Sstevel@tonic-gate biostats.bio_bufbusy.value.ui32++;
6067c478bd9Sstevel@tonic-gate mutex_exit(hmp);
6077c478bd9Sstevel@tonic-gate /*
6087c478bd9Sstevel@tonic-gate * OK, we are dealing with a busy buffer.
6097c478bd9Sstevel@tonic-gate * In the case that we are panicking and we
6107c478bd9Sstevel@tonic-gate * got called from bread(), we have some chance
6117c478bd9Sstevel@tonic-gate * for error recovery. So better bail out from
6127c478bd9Sstevel@tonic-gate * here since sema_p() won't block. If we got
6137c478bd9Sstevel@tonic-gate * called directly from ufs routines, there is
6147c478bd9Sstevel@tonic-gate * no way to report an error yet.
6157c478bd9Sstevel@tonic-gate */
6167c478bd9Sstevel@tonic-gate if (panicstr && errflg)
6177c478bd9Sstevel@tonic-gate goto errout;
6187c478bd9Sstevel@tonic-gate /*
6197c478bd9Sstevel@tonic-gate * For the following line of code to work
6207c478bd9Sstevel@tonic-gate * correctly never kmem_free the buffer "header".
6217c478bd9Sstevel@tonic-gate */
6227c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem);
6237c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
6247c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) {
6257c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
6267c478bd9Sstevel@tonic-gate mutex_enter(hmp);
6277c478bd9Sstevel@tonic-gate goto loop; /* start over */
6287c478bd9Sstevel@tonic-gate }
6297c478bd9Sstevel@tonic-gate mutex_enter(hmp);
6307c478bd9Sstevel@tonic-gate }
6317c478bd9Sstevel@tonic-gate /* Found */
6327c478bd9Sstevel@tonic-gate biostats.bio_hit.value.ui32++;
6337c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_AGE;
6347c478bd9Sstevel@tonic-gate
6357c478bd9Sstevel@tonic-gate /*
6367c478bd9Sstevel@tonic-gate * Yank it off the free/delayed write lists
6377c478bd9Sstevel@tonic-gate */
6387c478bd9Sstevel@tonic-gate hp->b_length--;
6397c478bd9Sstevel@tonic-gate notavail(bp);
6407c478bd9Sstevel@tonic-gate mutex_exit(hmp);
6417c478bd9Sstevel@tonic-gate
6427c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) == NULL);
6437c478bd9Sstevel@tonic-gate
6447c478bd9Sstevel@tonic-gate if (nbp == NULL) {
6457c478bd9Sstevel@tonic-gate /*
6467c478bd9Sstevel@tonic-gate * Make the common path short.
6477c478bd9Sstevel@tonic-gate */
6487c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
6497c478bd9Sstevel@tonic-gate return (bp);
6507c478bd9Sstevel@tonic-gate }
6517c478bd9Sstevel@tonic-gate
6527c478bd9Sstevel@tonic-gate biostats.bio_bufdup.value.ui32++;
6537c478bd9Sstevel@tonic-gate
6547c478bd9Sstevel@tonic-gate /*
6557c478bd9Sstevel@tonic-gate * The buffer must have entered during the lock upgrade
6567c478bd9Sstevel@tonic-gate * so free the new buffer we allocated and return the
6577c478bd9Sstevel@tonic-gate * found buffer.
6587c478bd9Sstevel@tonic-gate */
6597c478bd9Sstevel@tonic-gate kmem_free(nbp->b_un.b_addr, nbp->b_bufsize);
6607c478bd9Sstevel@tonic-gate nbp->b_un.b_addr = NULL;
6617c478bd9Sstevel@tonic-gate
6627c478bd9Sstevel@tonic-gate /*
6637c478bd9Sstevel@tonic-gate * Account for the memory
6647c478bd9Sstevel@tonic-gate */
6657c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock);
6667c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += nbp->b_bufsize;
6677c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
6687c478bd9Sstevel@tonic-gate
6697c478bd9Sstevel@tonic-gate /*
6707c478bd9Sstevel@tonic-gate * Destroy buf identity, and place on avail list
6717c478bd9Sstevel@tonic-gate */
6727c478bd9Sstevel@tonic-gate nbp->b_dev = (o_dev_t)NODEV;
6737c478bd9Sstevel@tonic-gate nbp->b_edev = NODEV;
6747c478bd9Sstevel@tonic-gate nbp->b_flags = 0;
6757c478bd9Sstevel@tonic-gate nbp->b_file = NULL;
6767c478bd9Sstevel@tonic-gate nbp->b_offset = -1;
6777c478bd9Sstevel@tonic-gate
6787c478bd9Sstevel@tonic-gate sema_v(&nbp->b_sem);
6797c478bd9Sstevel@tonic-gate bio_bhdr_free(nbp);
6807c478bd9Sstevel@tonic-gate
6817c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
6827c478bd9Sstevel@tonic-gate return (bp);
6837c478bd9Sstevel@tonic-gate }
6847c478bd9Sstevel@tonic-gate
6857c478bd9Sstevel@tonic-gate /*
6867c478bd9Sstevel@tonic-gate * bio_getfreeblk may block so check the hash chain again.
6877c478bd9Sstevel@tonic-gate */
6887c478bd9Sstevel@tonic-gate if (nbp == NULL) {
6897c478bd9Sstevel@tonic-gate mutex_exit(hmp);
6907c478bd9Sstevel@tonic-gate nbp = bio_getfreeblk(bsize);
6917c478bd9Sstevel@tonic-gate mutex_enter(hmp);
6927c478bd9Sstevel@tonic-gate goto loop;
6937c478bd9Sstevel@tonic-gate }
6947c478bd9Sstevel@tonic-gate
6957c478bd9Sstevel@tonic-gate /*
6967c478bd9Sstevel@tonic-gate * New buffer. Assign nbp and stick it on the hash.
6977c478bd9Sstevel@tonic-gate */
6987c478bd9Sstevel@tonic-gate nbp->b_flags = B_BUSY;
6997c478bd9Sstevel@tonic-gate nbp->b_edev = dev;
7007c478bd9Sstevel@tonic-gate nbp->b_dev = (o_dev_t)cmpdev(dev);
7017c478bd9Sstevel@tonic-gate nbp->b_blkno = blkno;
7027c478bd9Sstevel@tonic-gate nbp->b_iodone = NULL;
7037c478bd9Sstevel@tonic-gate nbp->b_bcount = bsize;
7047c478bd9Sstevel@tonic-gate /*
7057c478bd9Sstevel@tonic-gate * If we are given a ufsvfsp and the vfs_root field is NULL
7067c478bd9Sstevel@tonic-gate * then this must be I/O for a superblock. A superblock's
7077c478bd9Sstevel@tonic-gate * buffer is set up in mountfs() and there is no root vnode
7087c478bd9Sstevel@tonic-gate * at that point.
7097c478bd9Sstevel@tonic-gate */
7107c478bd9Sstevel@tonic-gate if (ufsvfsp && ufsvfsp->vfs_root) {
7117c478bd9Sstevel@tonic-gate nbp->b_vp = ufsvfsp->vfs_root;
7127c478bd9Sstevel@tonic-gate } else {
7137c478bd9Sstevel@tonic-gate nbp->b_vp = NULL;
7147c478bd9Sstevel@tonic-gate }
7157c478bd9Sstevel@tonic-gate
7167c478bd9Sstevel@tonic-gate ASSERT((nbp->b_flags & B_NOCACHE) == NULL);
7177c478bd9Sstevel@tonic-gate
7187c478bd9Sstevel@tonic-gate binshash(nbp, dp);
7197c478bd9Sstevel@tonic-gate mutex_exit(hmp);
7207c478bd9Sstevel@tonic-gate
7217c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&nbp->b_sem));
7227c478bd9Sstevel@tonic-gate
7237c478bd9Sstevel@tonic-gate return (nbp);
7247c478bd9Sstevel@tonic-gate
7257c478bd9Sstevel@tonic-gate
7267c478bd9Sstevel@tonic-gate /*
7277c478bd9Sstevel@tonic-gate * Come here in case of an internal error. At this point we couldn't
7287c478bd9Sstevel@tonic-gate * get a buffer, but he have to return one. Hence we allocate some
7297c478bd9Sstevel@tonic-gate * kind of error reply buffer on the fly. This buffer is marked as
7307c478bd9Sstevel@tonic-gate * B_NOCACHE | B_AGE | B_ERROR | B_DONE to assure the following:
7317c478bd9Sstevel@tonic-gate * - B_ERROR will indicate error to the caller.
7327c478bd9Sstevel@tonic-gate * - B_DONE will prevent us from reading the buffer from
7337c478bd9Sstevel@tonic-gate * the device.
7347c478bd9Sstevel@tonic-gate * - B_NOCACHE will cause that this buffer gets free'd in
7357c478bd9Sstevel@tonic-gate * brelse().
7367c478bd9Sstevel@tonic-gate */
7377c478bd9Sstevel@tonic-gate
7387c478bd9Sstevel@tonic-gate errout:
7397c478bd9Sstevel@tonic-gate errbp = geteblk();
7407c478bd9Sstevel@tonic-gate sema_p(&errbp->b_sem);
7417c478bd9Sstevel@tonic-gate errbp->b_flags &= ~B_BUSY;
7427c478bd9Sstevel@tonic-gate errbp->b_flags |= (B_ERROR | B_DONE);
7437c478bd9Sstevel@tonic-gate return (errbp);
7447c478bd9Sstevel@tonic-gate }
7457c478bd9Sstevel@tonic-gate
7467c478bd9Sstevel@tonic-gate /*
7477c478bd9Sstevel@tonic-gate * Get an empty block, not assigned to any particular device.
7487c478bd9Sstevel@tonic-gate * Returns a locked buffer that is not on any hash or free list.
7497c478bd9Sstevel@tonic-gate */
7507c478bd9Sstevel@tonic-gate struct buf *
ngeteblk(long bsize)7517c478bd9Sstevel@tonic-gate ngeteblk(long bsize)
7527c478bd9Sstevel@tonic-gate {
7537c478bd9Sstevel@tonic-gate struct buf *bp;
7547c478bd9Sstevel@tonic-gate
7557c478bd9Sstevel@tonic-gate bp = kmem_alloc(sizeof (struct buf), KM_SLEEP);
7567c478bd9Sstevel@tonic-gate bioinit(bp);
7577c478bd9Sstevel@tonic-gate bp->av_forw = bp->av_back = NULL;
7587c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP);
7597c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize;
7607c478bd9Sstevel@tonic-gate bp->b_flags = B_BUSY | B_NOCACHE | B_AGE;
7617c478bd9Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV;
7627c478bd9Sstevel@tonic-gate bp->b_edev = NODEV;
7637c478bd9Sstevel@tonic-gate bp->b_lblkno = 0;
7647c478bd9Sstevel@tonic-gate bp->b_bcount = bsize;
7657c478bd9Sstevel@tonic-gate bp->b_iodone = NULL;
7667c478bd9Sstevel@tonic-gate return (bp);
7677c478bd9Sstevel@tonic-gate }
7687c478bd9Sstevel@tonic-gate
7697c478bd9Sstevel@tonic-gate /*
7707c478bd9Sstevel@tonic-gate * Interface of geteblk() is kept intact to maintain driver compatibility.
7717c478bd9Sstevel@tonic-gate * Use ngeteblk() to allocate block size other than 1 KB.
7727c478bd9Sstevel@tonic-gate */
7737c478bd9Sstevel@tonic-gate struct buf *
geteblk(void)7747c478bd9Sstevel@tonic-gate geteblk(void)
7757c478bd9Sstevel@tonic-gate {
7767c478bd9Sstevel@tonic-gate return (ngeteblk((long)1024));
7777c478bd9Sstevel@tonic-gate }
7787c478bd9Sstevel@tonic-gate
7797c478bd9Sstevel@tonic-gate /*
7807c478bd9Sstevel@tonic-gate * Return a buffer w/o sleeping
7817c478bd9Sstevel@tonic-gate */
7827c478bd9Sstevel@tonic-gate struct buf *
trygetblk(dev_t dev,daddr_t blkno)7837c478bd9Sstevel@tonic-gate trygetblk(dev_t dev, daddr_t blkno)
7847c478bd9Sstevel@tonic-gate {
7857c478bd9Sstevel@tonic-gate struct buf *bp;
7867c478bd9Sstevel@tonic-gate struct buf *dp;
7877c478bd9Sstevel@tonic-gate struct hbuf *hp;
7887c478bd9Sstevel@tonic-gate kmutex_t *hmp;
7897c478bd9Sstevel@tonic-gate uint_t index;
7907c478bd9Sstevel@tonic-gate
7917c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno);
7927c478bd9Sstevel@tonic-gate hp = &hbuf[index];
7937c478bd9Sstevel@tonic-gate hmp = &hp->b_lock;
7947c478bd9Sstevel@tonic-gate
7957c478bd9Sstevel@tonic-gate if (!mutex_tryenter(hmp))
7967c478bd9Sstevel@tonic-gate return (NULL);
7977c478bd9Sstevel@tonic-gate
7987c478bd9Sstevel@tonic-gate dp = (struct buf *)hp;
7997c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
8007c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
8017c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE))
8027c478bd9Sstevel@tonic-gate continue;
8037c478bd9Sstevel@tonic-gate /*
8047c478bd9Sstevel@tonic-gate * Get access to a valid buffer without sleeping
8057c478bd9Sstevel@tonic-gate */
8067c478bd9Sstevel@tonic-gate if (sema_tryp(&bp->b_sem)) {
8077c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DONE) {
8087c478bd9Sstevel@tonic-gate hp->b_length--;
8097c478bd9Sstevel@tonic-gate notavail(bp);
8107c478bd9Sstevel@tonic-gate mutex_exit(hmp);
8117c478bd9Sstevel@tonic-gate return (bp);
8127c478bd9Sstevel@tonic-gate } else {
8137c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
8147c478bd9Sstevel@tonic-gate break;
8157c478bd9Sstevel@tonic-gate }
8167c478bd9Sstevel@tonic-gate }
8177c478bd9Sstevel@tonic-gate break;
8187c478bd9Sstevel@tonic-gate }
8197c478bd9Sstevel@tonic-gate mutex_exit(hmp);
8207c478bd9Sstevel@tonic-gate return (NULL);
8217c478bd9Sstevel@tonic-gate }
8227c478bd9Sstevel@tonic-gate
8237c478bd9Sstevel@tonic-gate /*
8247c478bd9Sstevel@tonic-gate * Wait for I/O completion on the buffer; return errors
8257c478bd9Sstevel@tonic-gate * to the user.
8267c478bd9Sstevel@tonic-gate */
8277c478bd9Sstevel@tonic-gate int
iowait(struct buf * bp)8287c478bd9Sstevel@tonic-gate iowait(struct buf *bp)
8297c478bd9Sstevel@tonic-gate {
8307c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
8317c478bd9Sstevel@tonic-gate return (biowait(bp));
8327c478bd9Sstevel@tonic-gate }
8337c478bd9Sstevel@tonic-gate
8347c478bd9Sstevel@tonic-gate /*
8357c478bd9Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous,
8367c478bd9Sstevel@tonic-gate * and wake up anyone waiting for it.
8377c478bd9Sstevel@tonic-gate */
8387c478bd9Sstevel@tonic-gate void
iodone(struct buf * bp)8397c478bd9Sstevel@tonic-gate iodone(struct buf *bp)
8407c478bd9Sstevel@tonic-gate {
8417c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
8427c478bd9Sstevel@tonic-gate (void) biodone(bp);
8437c478bd9Sstevel@tonic-gate }
8447c478bd9Sstevel@tonic-gate
8457c478bd9Sstevel@tonic-gate /*
8467c478bd9Sstevel@tonic-gate * Zero the core associated with a buffer.
8477c478bd9Sstevel@tonic-gate */
8487c478bd9Sstevel@tonic-gate void
clrbuf(struct buf * bp)8497c478bd9Sstevel@tonic-gate clrbuf(struct buf *bp)
8507c478bd9Sstevel@tonic-gate {
8517c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
8527c478bd9Sstevel@tonic-gate bzero(bp->b_un.b_addr, bp->b_bcount);
8537c478bd9Sstevel@tonic-gate bp->b_resid = 0;
8547c478bd9Sstevel@tonic-gate }
8557c478bd9Sstevel@tonic-gate
8567c478bd9Sstevel@tonic-gate
8577c478bd9Sstevel@tonic-gate /*
8587c478bd9Sstevel@tonic-gate * Make sure all write-behind blocks on dev (or NODEV for all)
8597c478bd9Sstevel@tonic-gate * are flushed out.
8607c478bd9Sstevel@tonic-gate */
8617c478bd9Sstevel@tonic-gate void
bflush(dev_t dev)8627c478bd9Sstevel@tonic-gate bflush(dev_t dev)
8637c478bd9Sstevel@tonic-gate {
8647c478bd9Sstevel@tonic-gate struct buf *bp, *dp;
8657c478bd9Sstevel@tonic-gate struct hbuf *hp;
8667c478bd9Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST;
8677c478bd9Sstevel@tonic-gate int i, index;
8687c478bd9Sstevel@tonic-gate kmutex_t *hmp;
8697c478bd9Sstevel@tonic-gate
8707c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock);
8717c478bd9Sstevel@tonic-gate /*
8727c478bd9Sstevel@tonic-gate * Wait for any invalidates or flushes ahead of us to finish.
8737c478bd9Sstevel@tonic-gate * We really could split blist_lock up per device for better
8747c478bd9Sstevel@tonic-gate * parallelism here.
8757c478bd9Sstevel@tonic-gate */
8767c478bd9Sstevel@tonic-gate while (bio_doinginval || bio_doingflush) {
8777c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 1;
8787c478bd9Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock);
8797c478bd9Sstevel@tonic-gate }
8807c478bd9Sstevel@tonic-gate bio_doingflush++;
8817c478bd9Sstevel@tonic-gate /*
8827c478bd9Sstevel@tonic-gate * Gather all B_DELWRI buffer for device.
8837c478bd9Sstevel@tonic-gate * Lock ordering is b_sem > hash lock (brelse).
8847c478bd9Sstevel@tonic-gate * Since we are finding the buffer via the delayed write list,
8857c478bd9Sstevel@tonic-gate * it may be busy and we would block trying to get the
8867c478bd9Sstevel@tonic-gate * b_sem lock while holding hash lock. So transfer all the
8877c478bd9Sstevel@tonic-gate * candidates on the delwri_list and then drop the hash locks.
8887c478bd9Sstevel@tonic-gate */
8897c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
8907c478bd9Sstevel@tonic-gate vfs_syncprogress();
8917c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
8927c478bd9Sstevel@tonic-gate dp = (struct buf *)&dwbuf[i];
8937c478bd9Sstevel@tonic-gate mutex_enter(hmp);
8947c478bd9Sstevel@tonic-gate for (bp = dp->av_forw; bp != dp; bp = bp->av_forw) {
8957c478bd9Sstevel@tonic-gate if (dev == NODEV || bp->b_edev == dev) {
8967c478bd9Sstevel@tonic-gate if (bp->b_list == NULL) {
8977c478bd9Sstevel@tonic-gate bp->b_list = delwri_list;
8987c478bd9Sstevel@tonic-gate delwri_list = bp;
8997c478bd9Sstevel@tonic-gate }
9007c478bd9Sstevel@tonic-gate }
9017c478bd9Sstevel@tonic-gate }
9027c478bd9Sstevel@tonic-gate mutex_exit(hmp);
9037c478bd9Sstevel@tonic-gate }
9047c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
9057c478bd9Sstevel@tonic-gate
9067c478bd9Sstevel@tonic-gate /*
9077c478bd9Sstevel@tonic-gate * Now that the hash locks have been dropped grab the semaphores
9087c478bd9Sstevel@tonic-gate * and write back all the buffers that have B_DELWRI set.
9097c478bd9Sstevel@tonic-gate */
9107c478bd9Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) {
9117c478bd9Sstevel@tonic-gate vfs_syncprogress();
9127c478bd9Sstevel@tonic-gate bp = delwri_list;
9137c478bd9Sstevel@tonic-gate
9147c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); /* may block */
9157c478bd9Sstevel@tonic-gate if ((dev != bp->b_edev && dev != NODEV) ||
9167c478bd9Sstevel@tonic-gate (panicstr && bp->b_flags & B_BUSY)) {
9177c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
9187c478bd9Sstevel@tonic-gate delwri_list = bp->b_list;
9197c478bd9Sstevel@tonic-gate bp->b_list = NULL;
9207c478bd9Sstevel@tonic-gate continue; /* No longer a candidate */
9217c478bd9Sstevel@tonic-gate }
9227c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) {
9237c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno);
9247c478bd9Sstevel@tonic-gate hp = &hbuf[index];
9257c478bd9Sstevel@tonic-gate hmp = &hp->b_lock;
9267c478bd9Sstevel@tonic-gate dp = (struct buf *)hp;
9277c478bd9Sstevel@tonic-gate
9287c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC;
9297c478bd9Sstevel@tonic-gate mutex_enter(hmp);
9307c478bd9Sstevel@tonic-gate hp->b_length--;
9317c478bd9Sstevel@tonic-gate notavail(bp);
9327c478bd9Sstevel@tonic-gate mutex_exit(hmp);
9337c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */
9347c478bd9Sstevel@tonic-gate BWRITE(bp);
9357c478bd9Sstevel@tonic-gate } else { /* ufs */
9367c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp);
9377c478bd9Sstevel@tonic-gate }
9387c478bd9Sstevel@tonic-gate } else {
9397c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
9407c478bd9Sstevel@tonic-gate }
9417c478bd9Sstevel@tonic-gate delwri_list = bp->b_list;
9427c478bd9Sstevel@tonic-gate bp->b_list = NULL;
9437c478bd9Sstevel@tonic-gate }
9447c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock);
9457c478bd9Sstevel@tonic-gate bio_doingflush--;
9467c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
9477c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
9487c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
9497c478bd9Sstevel@tonic-gate }
9507c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
9517c478bd9Sstevel@tonic-gate }
9527c478bd9Sstevel@tonic-gate
9537c478bd9Sstevel@tonic-gate /*
9547c478bd9Sstevel@tonic-gate * Ensure that a specified block is up-to-date on disk.
9557c478bd9Sstevel@tonic-gate */
9567c478bd9Sstevel@tonic-gate void
blkflush(dev_t dev,daddr_t blkno)9577c478bd9Sstevel@tonic-gate blkflush(dev_t dev, daddr_t blkno)
9587c478bd9Sstevel@tonic-gate {
9597c478bd9Sstevel@tonic-gate struct buf *bp, *dp;
9607c478bd9Sstevel@tonic-gate struct hbuf *hp;
9617c478bd9Sstevel@tonic-gate struct buf *sbp = NULL;
9627c478bd9Sstevel@tonic-gate uint_t index;
9637c478bd9Sstevel@tonic-gate kmutex_t *hmp;
9647c478bd9Sstevel@tonic-gate
9657c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno);
9667c478bd9Sstevel@tonic-gate hp = &hbuf[index];
9677c478bd9Sstevel@tonic-gate dp = (struct buf *)hp;
9687c478bd9Sstevel@tonic-gate hmp = &hp->b_lock;
9697c478bd9Sstevel@tonic-gate
9707c478bd9Sstevel@tonic-gate /*
9717c478bd9Sstevel@tonic-gate * Identify the buffer in the cache belonging to
9727c478bd9Sstevel@tonic-gate * this device and blkno (if any).
9737c478bd9Sstevel@tonic-gate */
9747c478bd9Sstevel@tonic-gate mutex_enter(hmp);
9757c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
9767c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev ||
9777c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE))
9787c478bd9Sstevel@tonic-gate continue;
9797c478bd9Sstevel@tonic-gate sbp = bp;
9807c478bd9Sstevel@tonic-gate break;
9817c478bd9Sstevel@tonic-gate }
9827c478bd9Sstevel@tonic-gate mutex_exit(hmp);
9837c478bd9Sstevel@tonic-gate if (sbp == NULL)
9847c478bd9Sstevel@tonic-gate return;
9857c478bd9Sstevel@tonic-gate /*
9867c478bd9Sstevel@tonic-gate * Now check the buffer we have identified and
9877c478bd9Sstevel@tonic-gate * make sure it still belongs to the device and is B_DELWRI
9887c478bd9Sstevel@tonic-gate */
9897c478bd9Sstevel@tonic-gate sema_p(&sbp->b_sem);
9907c478bd9Sstevel@tonic-gate if (sbp->b_blkno == blkno && sbp->b_edev == dev &&
9917c478bd9Sstevel@tonic-gate (sbp->b_flags & (B_DELWRI|B_STALE)) == B_DELWRI) {
9927c478bd9Sstevel@tonic-gate mutex_enter(hmp);
9937c478bd9Sstevel@tonic-gate hp->b_length--;
9947c478bd9Sstevel@tonic-gate notavail(sbp);
9957c478bd9Sstevel@tonic-gate mutex_exit(hmp);
9967c478bd9Sstevel@tonic-gate /*
9977c478bd9Sstevel@tonic-gate * XXX - There is nothing to guarantee a synchronous
9987c478bd9Sstevel@tonic-gate * write here if the B_ASYNC flag is set. This needs
9997c478bd9Sstevel@tonic-gate * some investigation.
10007c478bd9Sstevel@tonic-gate */
10017c478bd9Sstevel@tonic-gate if (sbp->b_vp == NULL) { /* !ufs */
10027c478bd9Sstevel@tonic-gate BWRITE(sbp); /* synchronous write */
10037c478bd9Sstevel@tonic-gate } else { /* ufs */
10047c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(sbp->b_vp)->i_ufsvfs, sbp);
10057c478bd9Sstevel@tonic-gate }
10067c478bd9Sstevel@tonic-gate } else {
10077c478bd9Sstevel@tonic-gate sema_v(&sbp->b_sem);
10087c478bd9Sstevel@tonic-gate }
10097c478bd9Sstevel@tonic-gate }
10107c478bd9Sstevel@tonic-gate
10117c478bd9Sstevel@tonic-gate /*
10127c478bd9Sstevel@tonic-gate * Same as binval, except can force-invalidate delayed-write buffers
10137c478bd9Sstevel@tonic-gate * (which are not be already flushed because of device errors). Also
10147c478bd9Sstevel@tonic-gate * makes sure that the retry write flag is cleared.
10157c478bd9Sstevel@tonic-gate */
10167c478bd9Sstevel@tonic-gate int
bfinval(dev_t dev,int force)10177c478bd9Sstevel@tonic-gate bfinval(dev_t dev, int force)
10187c478bd9Sstevel@tonic-gate {
10197c478bd9Sstevel@tonic-gate struct buf *dp;
10207c478bd9Sstevel@tonic-gate struct buf *bp;
10217c478bd9Sstevel@tonic-gate struct buf *binval_list = EMPTY_LIST;
10227c478bd9Sstevel@tonic-gate int i, error = 0;
10237c478bd9Sstevel@tonic-gate kmutex_t *hmp;
10247c478bd9Sstevel@tonic-gate uint_t index;
10257c478bd9Sstevel@tonic-gate struct buf **backp;
10267c478bd9Sstevel@tonic-gate
10277c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock);
10287c478bd9Sstevel@tonic-gate /*
10297c478bd9Sstevel@tonic-gate * Wait for any flushes ahead of us to finish, it's ok to
10307c478bd9Sstevel@tonic-gate * do invalidates in parallel.
10317c478bd9Sstevel@tonic-gate */
10327c478bd9Sstevel@tonic-gate while (bio_doingflush) {
10337c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 1;
10347c478bd9Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock);
10357c478bd9Sstevel@tonic-gate }
10367c478bd9Sstevel@tonic-gate bio_doinginval++;
10377c478bd9Sstevel@tonic-gate
10387c478bd9Sstevel@tonic-gate /* Gather bp's */
10397c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
10407c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i];
10417c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
10427c478bd9Sstevel@tonic-gate
10437c478bd9Sstevel@tonic-gate mutex_enter(hmp);
10447c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
10457c478bd9Sstevel@tonic-gate if (bp->b_edev == dev) {
10467c478bd9Sstevel@tonic-gate if (bp->b_list == NULL) {
10477c478bd9Sstevel@tonic-gate bp->b_list = binval_list;
10487c478bd9Sstevel@tonic-gate binval_list = bp;
10497c478bd9Sstevel@tonic-gate }
10507c478bd9Sstevel@tonic-gate }
10517c478bd9Sstevel@tonic-gate }
10527c478bd9Sstevel@tonic-gate mutex_exit(hmp);
10537c478bd9Sstevel@tonic-gate }
10547c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
10557c478bd9Sstevel@tonic-gate
10567c478bd9Sstevel@tonic-gate /* Invalidate all bp's found */
10577c478bd9Sstevel@tonic-gate while (binval_list != EMPTY_LIST) {
10587c478bd9Sstevel@tonic-gate bp = binval_list;
10597c478bd9Sstevel@tonic-gate
10607c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem);
10617c478bd9Sstevel@tonic-gate if (bp->b_edev == dev) {
10627c478bd9Sstevel@tonic-gate if (force && (bp->b_flags & B_DELWRI)) {
10637c478bd9Sstevel@tonic-gate /* clear B_DELWRI, move to non-dw freelist */
10647c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno);
10657c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock;
10667c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[index];
10677c478bd9Sstevel@tonic-gate mutex_enter(hmp);
10687c478bd9Sstevel@tonic-gate
10697c478bd9Sstevel@tonic-gate /* remove from delayed write freelist */
10707c478bd9Sstevel@tonic-gate notavail(bp);
10717c478bd9Sstevel@tonic-gate
10727c478bd9Sstevel@tonic-gate /* add to B_AGE side of non-dw freelist */
10737c478bd9Sstevel@tonic-gate backp = &dp->av_forw;
10747c478bd9Sstevel@tonic-gate (*backp)->av_back = bp;
10757c478bd9Sstevel@tonic-gate bp->av_forw = *backp;
10767c478bd9Sstevel@tonic-gate *backp = bp;
10777c478bd9Sstevel@tonic-gate bp->av_back = dp;
10787c478bd9Sstevel@tonic-gate
10797c478bd9Sstevel@tonic-gate /*
10807c478bd9Sstevel@tonic-gate * make sure write retries and busy are cleared
10817c478bd9Sstevel@tonic-gate */
10827c478bd9Sstevel@tonic-gate bp->b_flags &=
10837c478bd9Sstevel@tonic-gate ~(B_BUSY | B_DELWRI | B_RETRYWRI);
10847c478bd9Sstevel@tonic-gate mutex_exit(hmp);
10857c478bd9Sstevel@tonic-gate }
10867c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0)
10877c478bd9Sstevel@tonic-gate bp->b_flags |= B_STALE|B_AGE;
10887c478bd9Sstevel@tonic-gate else
10897c478bd9Sstevel@tonic-gate error = EIO;
10907c478bd9Sstevel@tonic-gate }
10917c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
10927c478bd9Sstevel@tonic-gate binval_list = bp->b_list;
10937c478bd9Sstevel@tonic-gate bp->b_list = NULL;
10947c478bd9Sstevel@tonic-gate }
10957c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock);
10967c478bd9Sstevel@tonic-gate bio_doinginval--;
10977c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
10987c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
10997c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
11007c478bd9Sstevel@tonic-gate }
11017c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
11027c478bd9Sstevel@tonic-gate return (error);
11037c478bd9Sstevel@tonic-gate }
11047c478bd9Sstevel@tonic-gate
11057c478bd9Sstevel@tonic-gate /*
11067c478bd9Sstevel@tonic-gate * If possible, invalidate blocks for a dev on demand
11077c478bd9Sstevel@tonic-gate */
11087c478bd9Sstevel@tonic-gate void
binval(dev_t dev)11097c478bd9Sstevel@tonic-gate binval(dev_t dev)
11107c478bd9Sstevel@tonic-gate {
11117c478bd9Sstevel@tonic-gate (void) bfinval(dev, 0);
11127c478bd9Sstevel@tonic-gate }
11137c478bd9Sstevel@tonic-gate
11147c478bd9Sstevel@tonic-gate /*
11157c478bd9Sstevel@tonic-gate * Initialize the buffer I/O system by freeing
11167c478bd9Sstevel@tonic-gate * all buffers and setting all device hash buffer lists to empty.
11177c478bd9Sstevel@tonic-gate */
11187c478bd9Sstevel@tonic-gate void
binit(void)11197c478bd9Sstevel@tonic-gate binit(void)
11207c478bd9Sstevel@tonic-gate {
11217c478bd9Sstevel@tonic-gate struct buf *bp;
11227c478bd9Sstevel@tonic-gate unsigned int i, pct;
11237c478bd9Sstevel@tonic-gate ulong_t bio_max_hwm, bio_default_hwm;
11247c478bd9Sstevel@tonic-gate
11257c478bd9Sstevel@tonic-gate /*
11267c478bd9Sstevel@tonic-gate * Maximum/Default values for bufhwm are set to the smallest of:
11277c478bd9Sstevel@tonic-gate * - BIO_MAX_PERCENT resp. BIO_BUF_PERCENT of real memory
11287c478bd9Sstevel@tonic-gate * - 1/4 of kernel virtual memory
11297c478bd9Sstevel@tonic-gate * - INT32_MAX to prevent overflows of v.v_bufhwm (which is int).
11307c478bd9Sstevel@tonic-gate * Additionally, in order to allow simple tuning by percentage of
11317c478bd9Sstevel@tonic-gate * physical memory, bufhwm_pct is used to calculate the default if
11327c478bd9Sstevel@tonic-gate * the value of this tunable is between 0 and BIO_MAX_PERCENT.
11337c478bd9Sstevel@tonic-gate *
11347c478bd9Sstevel@tonic-gate * Since the unit for v.v_bufhwm is kilobytes, this allows for
11357c478bd9Sstevel@tonic-gate * a maximum of 1024 * 2GB == 2TB memory usage by buffer headers.
11367c478bd9Sstevel@tonic-gate */
11377c478bd9Sstevel@tonic-gate bio_max_hwm = MIN(physmem / BIO_MAX_PERCENT,
11387c478bd9Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024);
11397c478bd9Sstevel@tonic-gate bio_max_hwm = MIN(INT32_MAX, bio_max_hwm);
11407c478bd9Sstevel@tonic-gate
11417c478bd9Sstevel@tonic-gate pct = BIO_BUF_PERCENT;
11427c478bd9Sstevel@tonic-gate if (bufhwm_pct != 0 &&
11437c478bd9Sstevel@tonic-gate ((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) {
11447c478bd9Sstevel@tonic-gate pct = BIO_BUF_PERCENT;
11457c478bd9Sstevel@tonic-gate /*
11467c478bd9Sstevel@tonic-gate * Invalid user specified value, emit a warning.
11477c478bd9Sstevel@tonic-gate */
11487c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \
11497c478bd9Sstevel@tonic-gate range(1..%d). Using %d as default.",
11507c478bd9Sstevel@tonic-gate bufhwm_pct,
11517c478bd9Sstevel@tonic-gate 100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT);
11527c478bd9Sstevel@tonic-gate }
11537c478bd9Sstevel@tonic-gate
11547c478bd9Sstevel@tonic-gate bio_default_hwm = MIN(physmem / pct,
11557c478bd9Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024);
11567c478bd9Sstevel@tonic-gate bio_default_hwm = MIN(INT32_MAX, bio_default_hwm);
11577c478bd9Sstevel@tonic-gate
11587c478bd9Sstevel@tonic-gate if ((v.v_bufhwm = bufhwm) == 0)
11597c478bd9Sstevel@tonic-gate v.v_bufhwm = bio_default_hwm;
11607c478bd9Sstevel@tonic-gate
11617c478bd9Sstevel@tonic-gate if (v.v_bufhwm < BIO_MIN_HWM || v.v_bufhwm > bio_max_hwm) {
11627c478bd9Sstevel@tonic-gate v.v_bufhwm = (int)bio_max_hwm;
11637c478bd9Sstevel@tonic-gate /*
11647c478bd9Sstevel@tonic-gate * Invalid user specified value, emit a warning.
11657c478bd9Sstevel@tonic-gate */
11667c478bd9Sstevel@tonic-gate cmn_err(CE_WARN,
11677c478bd9Sstevel@tonic-gate "binit: bufhwm(%d) out \
11687c478bd9Sstevel@tonic-gate of range(%d..%lu). Using %lu as default",
11697c478bd9Sstevel@tonic-gate bufhwm,
11707c478bd9Sstevel@tonic-gate BIO_MIN_HWM, bio_max_hwm, bio_max_hwm);
11717c478bd9Sstevel@tonic-gate }
11727c478bd9Sstevel@tonic-gate
11737c478bd9Sstevel@tonic-gate /*
11747c478bd9Sstevel@tonic-gate * Determine the number of hash buckets. Default is to
11757c478bd9Sstevel@tonic-gate * create ~BIO_HASHLEN entries per chain based on MAXBSIZE buffers.
11767c478bd9Sstevel@tonic-gate * Round up number to the next power of 2.
11777c478bd9Sstevel@tonic-gate */
11787c478bd9Sstevel@tonic-gate v.v_hbuf = 1 << highbit((((ulong_t)v.v_bufhwm * 1024) / MAXBSIZE) /
11797c478bd9Sstevel@tonic-gate BIO_HASHLEN);
11807c478bd9Sstevel@tonic-gate v.v_hmask = v.v_hbuf - 1;
11817c478bd9Sstevel@tonic-gate v.v_buf = BIO_BHDR_POOL;
11827c478bd9Sstevel@tonic-gate
11837c478bd9Sstevel@tonic-gate hbuf = kmem_zalloc(v.v_hbuf * sizeof (struct hbuf), KM_SLEEP);
11847c478bd9Sstevel@tonic-gate
11857c478bd9Sstevel@tonic-gate dwbuf = kmem_zalloc(v.v_hbuf * sizeof (struct dwbuf), KM_SLEEP);
11867c478bd9Sstevel@tonic-gate
11877c478bd9Sstevel@tonic-gate bfreelist.b_bufsize = (size_t)v.v_bufhwm * 1024;
11887c478bd9Sstevel@tonic-gate bp = &bfreelist;
11897c478bd9Sstevel@tonic-gate bp->b_forw = bp->b_back = bp->av_forw = bp->av_back = bp;
11907c478bd9Sstevel@tonic-gate
11917c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
11927c478bd9Sstevel@tonic-gate hbuf[i].b_forw = hbuf[i].b_back = (struct buf *)&hbuf[i];
11937c478bd9Sstevel@tonic-gate hbuf[i].av_forw = hbuf[i].av_back = (struct buf *)&hbuf[i];
11947c478bd9Sstevel@tonic-gate
11957c478bd9Sstevel@tonic-gate /*
11967c478bd9Sstevel@tonic-gate * Initialize the delayed write buffer list.
11977c478bd9Sstevel@tonic-gate */
11987c478bd9Sstevel@tonic-gate dwbuf[i].b_forw = dwbuf[i].b_back = (struct buf *)&dwbuf[i];
11997c478bd9Sstevel@tonic-gate dwbuf[i].av_forw = dwbuf[i].av_back = (struct buf *)&dwbuf[i];
12007c478bd9Sstevel@tonic-gate }
12017c478bd9Sstevel@tonic-gate }
12027c478bd9Sstevel@tonic-gate
12037c478bd9Sstevel@tonic-gate /*
12047c478bd9Sstevel@tonic-gate * Wait for I/O completion on the buffer; return error code.
12057c478bd9Sstevel@tonic-gate * If bp was for synchronous I/O, bp is invalid and associated
12067c478bd9Sstevel@tonic-gate * resources are freed on return.
12077c478bd9Sstevel@tonic-gate */
12087c478bd9Sstevel@tonic-gate int
biowait(struct buf * bp)12097c478bd9Sstevel@tonic-gate biowait(struct buf *bp)
12107c478bd9Sstevel@tonic-gate {
12117c478bd9Sstevel@tonic-gate int error = 0;
12127c478bd9Sstevel@tonic-gate struct cpu *cpup;
12137c478bd9Sstevel@tonic-gate
12147c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
12157c478bd9Sstevel@tonic-gate
12167c478bd9Sstevel@tonic-gate cpup = CPU;
12171a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&cpup->cpu_stats.sys.iowait);
12187c478bd9Sstevel@tonic-gate DTRACE_IO1(wait__start, struct buf *, bp);
12197c478bd9Sstevel@tonic-gate
12207c478bd9Sstevel@tonic-gate /*
12217c478bd9Sstevel@tonic-gate * In case of panic, busy wait for completion
12227c478bd9Sstevel@tonic-gate */
12237c478bd9Sstevel@tonic-gate if (panicstr) {
12247c478bd9Sstevel@tonic-gate while ((bp->b_flags & B_DONE) == 0)
12257c478bd9Sstevel@tonic-gate drv_usecwait(10);
12267c478bd9Sstevel@tonic-gate } else
12277c478bd9Sstevel@tonic-gate sema_p(&bp->b_io);
12287c478bd9Sstevel@tonic-gate
12297c478bd9Sstevel@tonic-gate DTRACE_IO1(wait__done, struct buf *, bp);
12301a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&cpup->cpu_stats.sys.iowait);
12317c478bd9Sstevel@tonic-gate
12327c478bd9Sstevel@tonic-gate error = geterror(bp);
12337c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_ASYNC) == 0) {
12347c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
12357c478bd9Sstevel@tonic-gate bp_mapout(bp);
12367c478bd9Sstevel@tonic-gate }
12377c478bd9Sstevel@tonic-gate return (error);
12387c478bd9Sstevel@tonic-gate }
12397c478bd9Sstevel@tonic-gate
12407c478bd9Sstevel@tonic-gate static void
biodone_tnf_probe(struct buf * bp)12417c478bd9Sstevel@tonic-gate biodone_tnf_probe(struct buf *bp)
12427c478bd9Sstevel@tonic-gate {
12437c478bd9Sstevel@tonic-gate /* Kernel probe */
12447c478bd9Sstevel@tonic-gate TNF_PROBE_3(biodone, "io blockio", /* CSTYLED */,
12457c478bd9Sstevel@tonic-gate tnf_device, device, bp->b_edev,
12467c478bd9Sstevel@tonic-gate tnf_diskaddr, block, bp->b_lblkno,
12477c478bd9Sstevel@tonic-gate tnf_opaque, buf, bp);
12487c478bd9Sstevel@tonic-gate }
12497c478bd9Sstevel@tonic-gate
12507c478bd9Sstevel@tonic-gate /*
12517c478bd9Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous,
12527c478bd9Sstevel@tonic-gate * and wake up anyone waiting for it.
12537c478bd9Sstevel@tonic-gate */
12547c478bd9Sstevel@tonic-gate void
biodone(struct buf * bp)12557c478bd9Sstevel@tonic-gate biodone(struct buf *bp)
12567c478bd9Sstevel@tonic-gate {
12577c478bd9Sstevel@tonic-gate if (bp->b_flags & B_STARTED) {
12587c478bd9Sstevel@tonic-gate DTRACE_IO1(done, struct buf *, bp);
12597c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_STARTED;
12607c478bd9Sstevel@tonic-gate }
12617c478bd9Sstevel@tonic-gate
12627c478bd9Sstevel@tonic-gate /*
12637c478bd9Sstevel@tonic-gate * Call the TNF probe here instead of the inline code
12647c478bd9Sstevel@tonic-gate * to force our compiler to use the tail call optimization.
12657c478bd9Sstevel@tonic-gate */
12667c478bd9Sstevel@tonic-gate biodone_tnf_probe(bp);
12677c478bd9Sstevel@tonic-gate
12687c478bd9Sstevel@tonic-gate if (bp->b_iodone != NULL) {
12697c478bd9Sstevel@tonic-gate (*(bp->b_iodone))(bp);
12707c478bd9Sstevel@tonic-gate return;
12717c478bd9Sstevel@tonic-gate }
12727c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_DONE) == 0);
12737c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
12747c478bd9Sstevel@tonic-gate bp->b_flags |= B_DONE;
12757c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ASYNC) {
12767c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_REMAPPED))
12777c478bd9Sstevel@tonic-gate bio_pageio_done(bp);
12787c478bd9Sstevel@tonic-gate else
12797c478bd9Sstevel@tonic-gate brelse(bp); /* release bp to freelist */
12807c478bd9Sstevel@tonic-gate } else {
12817c478bd9Sstevel@tonic-gate sema_v(&bp->b_io);
12827c478bd9Sstevel@tonic-gate }
12837c478bd9Sstevel@tonic-gate }
12847c478bd9Sstevel@tonic-gate
12857c478bd9Sstevel@tonic-gate /*
12867c478bd9Sstevel@tonic-gate * Pick up the device's error number and pass it to the user;
12877c478bd9Sstevel@tonic-gate * if there is an error but the number is 0 set a generalized code.
12887c478bd9Sstevel@tonic-gate */
12897c478bd9Sstevel@tonic-gate int
geterror(struct buf * bp)12907c478bd9Sstevel@tonic-gate geterror(struct buf *bp)
12917c478bd9Sstevel@tonic-gate {
12927c478bd9Sstevel@tonic-gate int error = 0;
12937c478bd9Sstevel@tonic-gate
12947c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
12957c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) {
12967c478bd9Sstevel@tonic-gate error = bp->b_error;
12977c478bd9Sstevel@tonic-gate if (!error)
12987c478bd9Sstevel@tonic-gate error = EIO;
12997c478bd9Sstevel@tonic-gate }
13007c478bd9Sstevel@tonic-gate return (error);
13017c478bd9Sstevel@tonic-gate }
13027c478bd9Sstevel@tonic-gate
13037c478bd9Sstevel@tonic-gate /*
13047c478bd9Sstevel@tonic-gate * Support for pageio buffers.
13057c478bd9Sstevel@tonic-gate *
13067c478bd9Sstevel@tonic-gate * This stuff should be generalized to provide a generalized bp
13077c478bd9Sstevel@tonic-gate * header facility that can be used for things other than pageio.
13087c478bd9Sstevel@tonic-gate */
13097c478bd9Sstevel@tonic-gate
13107c478bd9Sstevel@tonic-gate /*
13117c478bd9Sstevel@tonic-gate * Allocate and initialize a buf struct for use with pageio.
13127c478bd9Sstevel@tonic-gate */
13137c478bd9Sstevel@tonic-gate struct buf *
pageio_setup(struct page * pp,size_t len,struct vnode * vp,int flags)13147c478bd9Sstevel@tonic-gate pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags)
13157c478bd9Sstevel@tonic-gate {
13167c478bd9Sstevel@tonic-gate struct buf *bp;
13177c478bd9Sstevel@tonic-gate struct cpu *cpup;
13187c478bd9Sstevel@tonic-gate
13197c478bd9Sstevel@tonic-gate if (flags & B_READ) {
13207c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K();
13217c478bd9Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */
13227c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgin, 1);
13237c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len));
13247c478bd9Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) {
13257c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
13267c478bd9Sstevel@tonic-gate if (lwp != NULL)
13277c478bd9Sstevel@tonic-gate lwp->lwp_ru.majflt++;
13287c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, maj_fault, 1);
13297c478bd9Sstevel@tonic-gate /* Kernel probe */
13307c478bd9Sstevel@tonic-gate TNF_PROBE_2(major_fault, "vm pagefault", /* CSTYLED */,
13317c478bd9Sstevel@tonic-gate tnf_opaque, vnode, pp->p_vnode,
13327c478bd9Sstevel@tonic-gate tnf_offset, offset, pp->p_offset);
13337c478bd9Sstevel@tonic-gate }
13347c478bd9Sstevel@tonic-gate /*
13357c478bd9Sstevel@tonic-gate * Update statistics for pages being paged in
13367c478bd9Sstevel@tonic-gate */
13377c478bd9Sstevel@tonic-gate if (pp != NULL && pp->p_vnode != NULL) {
13387c478bd9Sstevel@tonic-gate if (IS_SWAPFSVP(pp->p_vnode)) {
1339d3d50737SRafael Vanoni CPU_STATS_ADDQ(cpup, vm, anonpgin, btopr(len));
13407c478bd9Sstevel@tonic-gate } else {
13417c478bd9Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) {
13427c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, execpgin,
13437c478bd9Sstevel@tonic-gate btopr(len));
13447c478bd9Sstevel@tonic-gate } else {
13457c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, fspgin,
13467c478bd9Sstevel@tonic-gate btopr(len));
13477c478bd9Sstevel@tonic-gate }
13487c478bd9Sstevel@tonic-gate }
13497c478bd9Sstevel@tonic-gate }
13507c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K();
13517c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_WS_IN,
13527c478bd9Sstevel@tonic-gate "page_ws_in:pp %p", pp);
13537c478bd9Sstevel@tonic-gate /* Kernel probe */
13547c478bd9Sstevel@tonic-gate TNF_PROBE_3(pagein, "vm pageio io", /* CSTYLED */,
13557c478bd9Sstevel@tonic-gate tnf_opaque, vnode, pp->p_vnode,
13567c478bd9Sstevel@tonic-gate tnf_offset, offset, pp->p_offset,
13577c478bd9Sstevel@tonic-gate tnf_size, size, len);
13587c478bd9Sstevel@tonic-gate }
13597c478bd9Sstevel@tonic-gate
13607c478bd9Sstevel@tonic-gate bp = kmem_zalloc(sizeof (struct buf), KM_SLEEP);
13617c478bd9Sstevel@tonic-gate bp->b_bcount = len;
13627c478bd9Sstevel@tonic-gate bp->b_bufsize = len;
13637c478bd9Sstevel@tonic-gate bp->b_pages = pp;
13647c478bd9Sstevel@tonic-gate bp->b_flags = B_PAGEIO | B_NOCACHE | B_BUSY | flags;
13657c478bd9Sstevel@tonic-gate bp->b_offset = -1;
13667c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
13677c478bd9Sstevel@tonic-gate
13687c478bd9Sstevel@tonic-gate /* Initialize bp->b_sem in "locked" state */
13697c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
13707c478bd9Sstevel@tonic-gate
13717c478bd9Sstevel@tonic-gate VN_HOLD(vp);
13727c478bd9Sstevel@tonic-gate bp->b_vp = vp;
13737c478bd9Sstevel@tonic-gate
13747c478bd9Sstevel@tonic-gate /*
13757c478bd9Sstevel@tonic-gate * Caller sets dev & blkno and can adjust
13767c478bd9Sstevel@tonic-gate * b_addr for page offset and can use bp_mapin
13777c478bd9Sstevel@tonic-gate * to make pages kernel addressable.
13787c478bd9Sstevel@tonic-gate */
13797c478bd9Sstevel@tonic-gate return (bp);
13807c478bd9Sstevel@tonic-gate }
13817c478bd9Sstevel@tonic-gate
13827c478bd9Sstevel@tonic-gate void
pageio_done(struct buf * bp)13837c478bd9Sstevel@tonic-gate pageio_done(struct buf *bp)
13847c478bd9Sstevel@tonic-gate {
13857c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
13867c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
13877c478bd9Sstevel@tonic-gate bp_mapout(bp);
13887c478bd9Sstevel@tonic-gate VN_RELE(bp->b_vp);
13897c478bd9Sstevel@tonic-gate bp->b_vp = NULL;
13907c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) != 0);
13917c478bd9Sstevel@tonic-gate
13927c478bd9Sstevel@tonic-gate /* A sema_v(bp->b_sem) is implied if we are destroying it */
13937c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem);
13947c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io);
13957c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf));
13967c478bd9Sstevel@tonic-gate }
13977c478bd9Sstevel@tonic-gate
13987c478bd9Sstevel@tonic-gate /*
13997c478bd9Sstevel@tonic-gate * Check to see whether the buffers, except the one pointed by sbp,
14007c478bd9Sstevel@tonic-gate * associated with the device are busy.
14017c478bd9Sstevel@tonic-gate * NOTE: This expensive operation shall be improved together with ufs_icheck().
14027c478bd9Sstevel@tonic-gate */
14037c478bd9Sstevel@tonic-gate int
bcheck(dev_t dev,struct buf * sbp)14047c478bd9Sstevel@tonic-gate bcheck(dev_t dev, struct buf *sbp)
14057c478bd9Sstevel@tonic-gate {
14067c478bd9Sstevel@tonic-gate struct buf *bp;
14077c478bd9Sstevel@tonic-gate struct buf *dp;
14087c478bd9Sstevel@tonic-gate int i;
14097c478bd9Sstevel@tonic-gate kmutex_t *hmp;
14107c478bd9Sstevel@tonic-gate
14117c478bd9Sstevel@tonic-gate /*
14127c478bd9Sstevel@tonic-gate * check for busy bufs for this filesystem
14137c478bd9Sstevel@tonic-gate */
14147c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) {
14157c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i];
14167c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock;
14177c478bd9Sstevel@tonic-gate
14187c478bd9Sstevel@tonic-gate mutex_enter(hmp);
14197c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
14207c478bd9Sstevel@tonic-gate /*
14217c478bd9Sstevel@tonic-gate * if buf is busy or dirty, then filesystem is busy
14227c478bd9Sstevel@tonic-gate */
14237c478bd9Sstevel@tonic-gate if ((bp->b_edev == dev) &&
14247c478bd9Sstevel@tonic-gate ((bp->b_flags & B_STALE) == 0) &&
14257c478bd9Sstevel@tonic-gate (bp->b_flags & (B_DELWRI|B_BUSY)) &&
14267c478bd9Sstevel@tonic-gate (bp != sbp)) {
14277c478bd9Sstevel@tonic-gate mutex_exit(hmp);
14287c478bd9Sstevel@tonic-gate return (1);
14297c478bd9Sstevel@tonic-gate }
14307c478bd9Sstevel@tonic-gate }
14317c478bd9Sstevel@tonic-gate mutex_exit(hmp);
14327c478bd9Sstevel@tonic-gate }
14337c478bd9Sstevel@tonic-gate return (0);
14347c478bd9Sstevel@tonic-gate }
14357c478bd9Sstevel@tonic-gate
14367c478bd9Sstevel@tonic-gate /*
14377c478bd9Sstevel@tonic-gate * Hash two 32 bit entities.
14387c478bd9Sstevel@tonic-gate */
14397c478bd9Sstevel@tonic-gate int
hash2ints(int x,int y)14407c478bd9Sstevel@tonic-gate hash2ints(int x, int y)
14417c478bd9Sstevel@tonic-gate {
14427c478bd9Sstevel@tonic-gate int hash = 0;
14437c478bd9Sstevel@tonic-gate
14447c478bd9Sstevel@tonic-gate hash = x - 1;
14457c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 8)) - 1;
14467c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 16)) - 1;
14477c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 24)) - 1;
14487c478bd9Sstevel@tonic-gate hash = ((hash * 7) + y) - 1;
14497c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 8)) - 1;
14507c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 16)) - 1;
14517c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 24)) - 1;
14527c478bd9Sstevel@tonic-gate
14537c478bd9Sstevel@tonic-gate return (hash);
14547c478bd9Sstevel@tonic-gate }
14557c478bd9Sstevel@tonic-gate
14567c478bd9Sstevel@tonic-gate
14577c478bd9Sstevel@tonic-gate /*
14587c478bd9Sstevel@tonic-gate * Return a new buffer struct.
14597c478bd9Sstevel@tonic-gate * Create a new buffer if we haven't gone over our high water
14607c478bd9Sstevel@tonic-gate * mark for memory, otherwise try to get one off the freelist.
14617c478bd9Sstevel@tonic-gate *
14627c478bd9Sstevel@tonic-gate * Returns a locked buf that has no id and is not on any hash or free
14637c478bd9Sstevel@tonic-gate * list.
14647c478bd9Sstevel@tonic-gate */
14657c478bd9Sstevel@tonic-gate static struct buf *
bio_getfreeblk(long bsize)14667c478bd9Sstevel@tonic-gate bio_getfreeblk(long bsize)
14677c478bd9Sstevel@tonic-gate {
14687c478bd9Sstevel@tonic-gate struct buf *bp, *dp;
14697c478bd9Sstevel@tonic-gate struct hbuf *hp;
14707c478bd9Sstevel@tonic-gate kmutex_t *hmp;
14717c478bd9Sstevel@tonic-gate uint_t start, end;
14727c478bd9Sstevel@tonic-gate
14737c478bd9Sstevel@tonic-gate /*
14747c478bd9Sstevel@tonic-gate * mutex_enter(&bfree_lock);
14757c478bd9Sstevel@tonic-gate * bfreelist.b_bufsize represents the amount of memory
14767c478bd9Sstevel@tonic-gate * mutex_exit(&bfree_lock); protect ref to bfreelist
14777c478bd9Sstevel@tonic-gate * we are allowed to allocate in the cache before we hit our hwm.
14787c478bd9Sstevel@tonic-gate */
14797c478bd9Sstevel@tonic-gate bio_mem_get(bsize); /* Account for our memory request */
14807c478bd9Sstevel@tonic-gate
14817c478bd9Sstevel@tonic-gate again:
14827c478bd9Sstevel@tonic-gate bp = bio_bhdr_alloc(); /* Get a buf hdr */
14837c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); /* Should never fail */
14847c478bd9Sstevel@tonic-gate
14857c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL);
14867c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_NOSLEEP);
14877c478bd9Sstevel@tonic-gate if (bp->b_un.b_addr != NULL) {
14887c478bd9Sstevel@tonic-gate /*
14897c478bd9Sstevel@tonic-gate * Make the common path short
14907c478bd9Sstevel@tonic-gate */
14917c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize;
14927c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
14937c478bd9Sstevel@tonic-gate return (bp);
14947c478bd9Sstevel@tonic-gate } else {
14957c478bd9Sstevel@tonic-gate struct buf *save;
14967c478bd9Sstevel@tonic-gate
14977c478bd9Sstevel@tonic-gate save = bp; /* Save bp we allocated */
14987c478bd9Sstevel@tonic-gate start = end = lastindex;
14997c478bd9Sstevel@tonic-gate
15007c478bd9Sstevel@tonic-gate biostats.bio_bufwant.value.ui32++;
15017c478bd9Sstevel@tonic-gate
15027c478bd9Sstevel@tonic-gate /*
15037c478bd9Sstevel@tonic-gate * Memory isn't available from the system now. Scan
15047c478bd9Sstevel@tonic-gate * the hash buckets till enough space is found.
15057c478bd9Sstevel@tonic-gate */
15067c478bd9Sstevel@tonic-gate do {
15077c478bd9Sstevel@tonic-gate hp = &hbuf[start];
15087c478bd9Sstevel@tonic-gate hmp = &hp->b_lock;
15097c478bd9Sstevel@tonic-gate dp = (struct buf *)hp;
15107c478bd9Sstevel@tonic-gate
15117c478bd9Sstevel@tonic-gate mutex_enter(hmp);
15127c478bd9Sstevel@tonic-gate bp = dp->av_forw;
15137c478bd9Sstevel@tonic-gate
15147c478bd9Sstevel@tonic-gate while (bp != dp) {
15157c478bd9Sstevel@tonic-gate
15167c478bd9Sstevel@tonic-gate ASSERT(bp != NULL);
15177c478bd9Sstevel@tonic-gate
15187c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) {
15197c478bd9Sstevel@tonic-gate bp = bp->av_forw;
15207c478bd9Sstevel@tonic-gate continue;
15217c478bd9Sstevel@tonic-gate }
15227c478bd9Sstevel@tonic-gate
15237c478bd9Sstevel@tonic-gate /*
15247c478bd9Sstevel@tonic-gate * Since we are going down the freelist
15257c478bd9Sstevel@tonic-gate * associated with this hash bucket the
15267c478bd9Sstevel@tonic-gate * B_DELWRI flag should not be set.
15277c478bd9Sstevel@tonic-gate */
15287c478bd9Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI));
15297c478bd9Sstevel@tonic-gate
15307c478bd9Sstevel@tonic-gate if (bp->b_bufsize == bsize) {
15317c478bd9Sstevel@tonic-gate hp->b_length--;
15327c478bd9Sstevel@tonic-gate notavail(bp);
15337c478bd9Sstevel@tonic-gate bremhash(bp);
15347c478bd9Sstevel@tonic-gate mutex_exit(hmp);
15357c478bd9Sstevel@tonic-gate
15367c478bd9Sstevel@tonic-gate /*
15377c478bd9Sstevel@tonic-gate * Didn't kmem_alloc any more, so don't
15387c478bd9Sstevel@tonic-gate * count it twice.
15397c478bd9Sstevel@tonic-gate */
15407c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock);
15417c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += bsize;
15427c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
15437c478bd9Sstevel@tonic-gate
15447c478bd9Sstevel@tonic-gate /*
15457c478bd9Sstevel@tonic-gate * Update the lastindex value.
15467c478bd9Sstevel@tonic-gate */
15477c478bd9Sstevel@tonic-gate lastindex = start;
15487c478bd9Sstevel@tonic-gate
15497c478bd9Sstevel@tonic-gate /*
15507c478bd9Sstevel@tonic-gate * Put our saved bp back on the list
15517c478bd9Sstevel@tonic-gate */
15527c478bd9Sstevel@tonic-gate sema_v(&save->b_sem);
15537c478bd9Sstevel@tonic-gate bio_bhdr_free(save);
15547c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
15557c478bd9Sstevel@tonic-gate return (bp);
15567c478bd9Sstevel@tonic-gate }
15577c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
15587c478bd9Sstevel@tonic-gate bp = bp->av_forw;
15597c478bd9Sstevel@tonic-gate }
15607c478bd9Sstevel@tonic-gate mutex_exit(hmp);
15617c478bd9Sstevel@tonic-gate start = ((start + 1) % v.v_hbuf);
15627c478bd9Sstevel@tonic-gate } while (start != end);
15637c478bd9Sstevel@tonic-gate
15647c478bd9Sstevel@tonic-gate biostats.bio_bufwait.value.ui32++;
15657c478bd9Sstevel@tonic-gate bp = save; /* Use original bp */
15667c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP);
15677c478bd9Sstevel@tonic-gate }
15687c478bd9Sstevel@tonic-gate
15697c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize;
15707c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
15717c478bd9Sstevel@tonic-gate return (bp);
15727c478bd9Sstevel@tonic-gate }
15737c478bd9Sstevel@tonic-gate
15747c478bd9Sstevel@tonic-gate /*
15757c478bd9Sstevel@tonic-gate * Allocate a buffer header. If none currently available, allocate
15767c478bd9Sstevel@tonic-gate * a new pool.
15777c478bd9Sstevel@tonic-gate */
15787c478bd9Sstevel@tonic-gate static struct buf *
bio_bhdr_alloc(void)15797c478bd9Sstevel@tonic-gate bio_bhdr_alloc(void)
15807c478bd9Sstevel@tonic-gate {
15817c478bd9Sstevel@tonic-gate struct buf *dp, *sdp;
15827c478bd9Sstevel@tonic-gate struct buf *bp;
15837c478bd9Sstevel@tonic-gate int i;
15847c478bd9Sstevel@tonic-gate
15857c478bd9Sstevel@tonic-gate for (;;) {
15867c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock);
15877c478bd9Sstevel@tonic-gate if (bhdrlist != NULL) {
15887c478bd9Sstevel@tonic-gate bp = bhdrlist;
15897c478bd9Sstevel@tonic-gate bhdrlist = bp->av_forw;
15907c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock);
15917c478bd9Sstevel@tonic-gate bp->av_forw = NULL;
15927c478bd9Sstevel@tonic-gate return (bp);
15937c478bd9Sstevel@tonic-gate }
15947c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock);
15957c478bd9Sstevel@tonic-gate
15967c478bd9Sstevel@tonic-gate /*
15977c478bd9Sstevel@tonic-gate * Need to allocate a new pool. If the system is currently
15987c478bd9Sstevel@tonic-gate * out of memory, then try freeing things on the freelist.
15997c478bd9Sstevel@tonic-gate */
16007c478bd9Sstevel@tonic-gate dp = kmem_zalloc(sizeof (struct buf) * v.v_buf, KM_NOSLEEP);
16017c478bd9Sstevel@tonic-gate if (dp == NULL) {
16027c478bd9Sstevel@tonic-gate /*
16037c478bd9Sstevel@tonic-gate * System can't give us a pool of headers, try
16047c478bd9Sstevel@tonic-gate * recycling from the free lists.
16057c478bd9Sstevel@tonic-gate */
16067c478bd9Sstevel@tonic-gate bio_recycle(BIO_HEADER, 0);
16077c478bd9Sstevel@tonic-gate } else {
16087c478bd9Sstevel@tonic-gate sdp = dp;
16097c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_buf; i++, dp++) {
16107c478bd9Sstevel@tonic-gate /*
16117c478bd9Sstevel@tonic-gate * The next two lines are needed since NODEV
16127c478bd9Sstevel@tonic-gate * is -1 and not NULL
16137c478bd9Sstevel@tonic-gate */
16147c478bd9Sstevel@tonic-gate dp->b_dev = (o_dev_t)NODEV;
16157c478bd9Sstevel@tonic-gate dp->b_edev = NODEV;
16167c478bd9Sstevel@tonic-gate dp->av_forw = dp + 1;
16177c478bd9Sstevel@tonic-gate sema_init(&dp->b_sem, 1, NULL, SEMA_DEFAULT,
16187c478bd9Sstevel@tonic-gate NULL);
16197c478bd9Sstevel@tonic-gate sema_init(&dp->b_io, 0, NULL, SEMA_DEFAULT,
16207c478bd9Sstevel@tonic-gate NULL);
16217c478bd9Sstevel@tonic-gate dp->b_offset = -1;
16227c478bd9Sstevel@tonic-gate }
16237c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock);
16247c478bd9Sstevel@tonic-gate (--dp)->av_forw = bhdrlist; /* Fix last pointer */
16257c478bd9Sstevel@tonic-gate bhdrlist = sdp;
16267c478bd9Sstevel@tonic-gate nbuf += v.v_buf;
16277c478bd9Sstevel@tonic-gate bp = bhdrlist;
16287c478bd9Sstevel@tonic-gate bhdrlist = bp->av_forw;
16297c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock);
16307c478bd9Sstevel@tonic-gate
16317c478bd9Sstevel@tonic-gate bp->av_forw = NULL;
16327c478bd9Sstevel@tonic-gate return (bp);
16337c478bd9Sstevel@tonic-gate }
16347c478bd9Sstevel@tonic-gate }
16357c478bd9Sstevel@tonic-gate }
16367c478bd9Sstevel@tonic-gate
16377c478bd9Sstevel@tonic-gate static void
bio_bhdr_free(struct buf * bp)16387c478bd9Sstevel@tonic-gate bio_bhdr_free(struct buf *bp)
16397c478bd9Sstevel@tonic-gate {
16407c478bd9Sstevel@tonic-gate ASSERT(bp->b_back == NULL);
16417c478bd9Sstevel@tonic-gate ASSERT(bp->b_forw == NULL);
16427c478bd9Sstevel@tonic-gate ASSERT(bp->av_back == NULL);
16437c478bd9Sstevel@tonic-gate ASSERT(bp->av_forw == NULL);
16447c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL);
16457c478bd9Sstevel@tonic-gate ASSERT(bp->b_dev == (o_dev_t)NODEV);
16467c478bd9Sstevel@tonic-gate ASSERT(bp->b_edev == NODEV);
16477c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags == 0);
16487c478bd9Sstevel@tonic-gate
16497c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock);
16507c478bd9Sstevel@tonic-gate bp->av_forw = bhdrlist;
16517c478bd9Sstevel@tonic-gate bhdrlist = bp;
16527c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock);
16537c478bd9Sstevel@tonic-gate }
16547c478bd9Sstevel@tonic-gate
16557c478bd9Sstevel@tonic-gate /*
16567c478bd9Sstevel@tonic-gate * If we haven't gone over the high water mark, it's o.k. to
16577c478bd9Sstevel@tonic-gate * allocate more buffer space, otherwise recycle buffers
16587c478bd9Sstevel@tonic-gate * from the freelist until enough memory is free for a bsize request.
16597c478bd9Sstevel@tonic-gate *
16607c478bd9Sstevel@tonic-gate * We account for this memory, even though
16617c478bd9Sstevel@tonic-gate * we don't allocate it here.
16627c478bd9Sstevel@tonic-gate */
16637c478bd9Sstevel@tonic-gate static void
bio_mem_get(long bsize)16647c478bd9Sstevel@tonic-gate bio_mem_get(long bsize)
16657c478bd9Sstevel@tonic-gate {
16667c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock);
16677c478bd9Sstevel@tonic-gate if (bfreelist.b_bufsize > bsize) {
16687c478bd9Sstevel@tonic-gate bfreelist.b_bufsize -= bsize;
16697c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
16707c478bd9Sstevel@tonic-gate return;
16717c478bd9Sstevel@tonic-gate }
16727c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
16737c478bd9Sstevel@tonic-gate bio_recycle(BIO_MEM, bsize);
16747c478bd9Sstevel@tonic-gate }
16757c478bd9Sstevel@tonic-gate
16767c478bd9Sstevel@tonic-gate /*
16777c478bd9Sstevel@tonic-gate * flush a list of delayed write buffers.
16787c478bd9Sstevel@tonic-gate * (currently used only by bio_recycle below.)
16797c478bd9Sstevel@tonic-gate */
16807c478bd9Sstevel@tonic-gate static void
bio_flushlist(struct buf * delwri_list)16817c478bd9Sstevel@tonic-gate bio_flushlist(struct buf *delwri_list)
16827c478bd9Sstevel@tonic-gate {
16837c478bd9Sstevel@tonic-gate struct buf *bp;
16847c478bd9Sstevel@tonic-gate
16857c478bd9Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) {
16867c478bd9Sstevel@tonic-gate bp = delwri_list;
16877c478bd9Sstevel@tonic-gate bp->b_flags |= B_AGE | B_ASYNC;
16887c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */
16897c478bd9Sstevel@tonic-gate BWRITE(bp);
16907c478bd9Sstevel@tonic-gate } else { /* ufs */
16917c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp);
16927c478bd9Sstevel@tonic-gate }
16937c478bd9Sstevel@tonic-gate delwri_list = bp->b_list;
16947c478bd9Sstevel@tonic-gate bp->b_list = NULL;
16957c478bd9Sstevel@tonic-gate }
16967c478bd9Sstevel@tonic-gate }
16977c478bd9Sstevel@tonic-gate
16987c478bd9Sstevel@tonic-gate /*
16997c478bd9Sstevel@tonic-gate * Start recycling buffers on the freelist for one of 2 reasons:
17007c478bd9Sstevel@tonic-gate * - we need a buffer header
17017c478bd9Sstevel@tonic-gate * - we need to free up memory
17027c478bd9Sstevel@tonic-gate * Once started we continue to recycle buffers until the B_AGE
17037c478bd9Sstevel@tonic-gate * buffers are gone.
17047c478bd9Sstevel@tonic-gate */
17057c478bd9Sstevel@tonic-gate static void
bio_recycle(int want,long bsize)17067c478bd9Sstevel@tonic-gate bio_recycle(int want, long bsize)
17077c478bd9Sstevel@tonic-gate {
17087c478bd9Sstevel@tonic-gate struct buf *bp, *dp, *dwp, *nbp;
17097c478bd9Sstevel@tonic-gate struct hbuf *hp;
17107c478bd9Sstevel@tonic-gate int found = 0;
17117c478bd9Sstevel@tonic-gate kmutex_t *hmp;
17127c478bd9Sstevel@tonic-gate int start, end;
17137c478bd9Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST;
17147c478bd9Sstevel@tonic-gate
17157c478bd9Sstevel@tonic-gate /*
17167c478bd9Sstevel@tonic-gate * Recycle buffers.
17177c478bd9Sstevel@tonic-gate */
17187c478bd9Sstevel@tonic-gate top:
17197c478bd9Sstevel@tonic-gate start = end = lastindex;
17207c478bd9Sstevel@tonic-gate do {
17217c478bd9Sstevel@tonic-gate hp = &hbuf[start];
17227c478bd9Sstevel@tonic-gate hmp = &hp->b_lock;
17237c478bd9Sstevel@tonic-gate dp = (struct buf *)hp;
17247c478bd9Sstevel@tonic-gate
17257c478bd9Sstevel@tonic-gate mutex_enter(hmp);
17267c478bd9Sstevel@tonic-gate bp = dp->av_forw;
17277c478bd9Sstevel@tonic-gate
17287c478bd9Sstevel@tonic-gate while (bp != dp) {
17297c478bd9Sstevel@tonic-gate
17307c478bd9Sstevel@tonic-gate ASSERT(bp != NULL);
17317c478bd9Sstevel@tonic-gate
17327c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) {
17337c478bd9Sstevel@tonic-gate bp = bp->av_forw;
17347c478bd9Sstevel@tonic-gate continue;
17357c478bd9Sstevel@tonic-gate }
17367c478bd9Sstevel@tonic-gate /*
17377c478bd9Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff??
17387c478bd9Sstevel@tonic-gate */
17397c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) {
17407c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
17417c478bd9Sstevel@tonic-gate mutex_exit(hmp);
17427c478bd9Sstevel@tonic-gate lastindex = start;
17437c478bd9Sstevel@tonic-gate return; /* All done */
17447c478bd9Sstevel@tonic-gate }
17457c478bd9Sstevel@tonic-gate
17467c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&hp->b_lock));
17477c478bd9Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI));
17487c478bd9Sstevel@tonic-gate hp->b_length--;
17497c478bd9Sstevel@tonic-gate notavail(bp);
17507c478bd9Sstevel@tonic-gate
17517c478bd9Sstevel@tonic-gate /*
17527c478bd9Sstevel@tonic-gate * Remove bhdr from cache, free up memory,
17537c478bd9Sstevel@tonic-gate * and add the hdr to the freelist.
17547c478bd9Sstevel@tonic-gate */
17557c478bd9Sstevel@tonic-gate bremhash(bp);
17567c478bd9Sstevel@tonic-gate mutex_exit(hmp);
17577c478bd9Sstevel@tonic-gate
17587c478bd9Sstevel@tonic-gate if (bp->b_bufsize) {
17597c478bd9Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize);
17607c478bd9Sstevel@tonic-gate bp->b_un.b_addr = NULL;
17617c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock);
17627c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += bp->b_bufsize;
17637c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
17647c478bd9Sstevel@tonic-gate }
17657c478bd9Sstevel@tonic-gate
17667c478bd9Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV;
17677c478bd9Sstevel@tonic-gate bp->b_edev = NODEV;
17687c478bd9Sstevel@tonic-gate bp->b_flags = 0;
17697c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
17707c478bd9Sstevel@tonic-gate bio_bhdr_free(bp);
17717c478bd9Sstevel@tonic-gate if (want == BIO_HEADER) {
17727c478bd9Sstevel@tonic-gate found = 1;
17737c478bd9Sstevel@tonic-gate } else {
17747c478bd9Sstevel@tonic-gate ASSERT(want == BIO_MEM);
17757c478bd9Sstevel@tonic-gate if (!found && bfreelist.b_bufsize >= bsize) {
17767c478bd9Sstevel@tonic-gate /* Account for the memory we want */
17777c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock);
17787c478bd9Sstevel@tonic-gate if (bfreelist.b_bufsize >= bsize) {
17797c478bd9Sstevel@tonic-gate bfreelist.b_bufsize -= bsize;
17807c478bd9Sstevel@tonic-gate found = 1;
17817c478bd9Sstevel@tonic-gate }
17827c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
17837c478bd9Sstevel@tonic-gate }
17847c478bd9Sstevel@tonic-gate }
17857c478bd9Sstevel@tonic-gate
17867c478bd9Sstevel@tonic-gate /*
17877c478bd9Sstevel@tonic-gate * Since we dropped hmp start from the
17887c478bd9Sstevel@tonic-gate * begining.
17897c478bd9Sstevel@tonic-gate */
17907c478bd9Sstevel@tonic-gate mutex_enter(hmp);
17917c478bd9Sstevel@tonic-gate bp = dp->av_forw;
17927c478bd9Sstevel@tonic-gate }
17937c478bd9Sstevel@tonic-gate mutex_exit(hmp);
17947c478bd9Sstevel@tonic-gate
17957c478bd9Sstevel@tonic-gate /*
17967c478bd9Sstevel@tonic-gate * Look at the delayed write list.
17977c478bd9Sstevel@tonic-gate * First gather into a private list, then write them.
17987c478bd9Sstevel@tonic-gate */
17997c478bd9Sstevel@tonic-gate dwp = (struct buf *)&dwbuf[start];
18007c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock);
18017c478bd9Sstevel@tonic-gate bio_doingflush++;
18027c478bd9Sstevel@tonic-gate mutex_enter(hmp);
18037c478bd9Sstevel@tonic-gate for (bp = dwp->av_forw; bp != dwp; bp = nbp) {
18047c478bd9Sstevel@tonic-gate
18057c478bd9Sstevel@tonic-gate ASSERT(bp != NULL);
18067c478bd9Sstevel@tonic-gate nbp = bp->av_forw;
18077c478bd9Sstevel@tonic-gate
18087c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem))
18097c478bd9Sstevel@tonic-gate continue;
18107c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_DELWRI);
18117c478bd9Sstevel@tonic-gate /*
18127c478bd9Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff??
18137c478bd9Sstevel@tonic-gate */
18147c478bd9Sstevel@tonic-gate
18157c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) {
18167c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
18177c478bd9Sstevel@tonic-gate mutex_exit(hmp);
18187c478bd9Sstevel@tonic-gate lastindex = start;
18197c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
18207c478bd9Sstevel@tonic-gate bio_flushlist(delwri_list);
18217c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock);
18227c478bd9Sstevel@tonic-gate bio_doingflush--;
18237c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
18247c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
18257c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
18267c478bd9Sstevel@tonic-gate }
18277c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
18287c478bd9Sstevel@tonic-gate return; /* All done */
18297c478bd9Sstevel@tonic-gate }
18307c478bd9Sstevel@tonic-gate
18317c478bd9Sstevel@tonic-gate /*
18327c478bd9Sstevel@tonic-gate * If the buffer is already on a flush or
18337c478bd9Sstevel@tonic-gate * invalidate list then just skip it.
18347c478bd9Sstevel@tonic-gate */
18357c478bd9Sstevel@tonic-gate if (bp->b_list != NULL) {
18367c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem);
18377c478bd9Sstevel@tonic-gate continue;
18387c478bd9Sstevel@tonic-gate }
18397c478bd9Sstevel@tonic-gate /*
18407c478bd9Sstevel@tonic-gate * We are still on the same bucket.
18417c478bd9Sstevel@tonic-gate */
18427c478bd9Sstevel@tonic-gate hp->b_length--;
18437c478bd9Sstevel@tonic-gate notavail(bp);
18447c478bd9Sstevel@tonic-gate bp->b_list = delwri_list;
18457c478bd9Sstevel@tonic-gate delwri_list = bp;
18467c478bd9Sstevel@tonic-gate }
18477c478bd9Sstevel@tonic-gate mutex_exit(hmp);
18487c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
18497c478bd9Sstevel@tonic-gate bio_flushlist(delwri_list);
18507c478bd9Sstevel@tonic-gate delwri_list = EMPTY_LIST;
18517c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock);
18527c478bd9Sstevel@tonic-gate bio_doingflush--;
18537c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) {
18547c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0;
18557c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv);
18567c478bd9Sstevel@tonic-gate }
18577c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock);
18587c478bd9Sstevel@tonic-gate start = (start + 1) % v.v_hbuf;
18597c478bd9Sstevel@tonic-gate
18607c478bd9Sstevel@tonic-gate } while (start != end);
18617c478bd9Sstevel@tonic-gate
18627c478bd9Sstevel@tonic-gate if (found)
18637c478bd9Sstevel@tonic-gate return;
18647c478bd9Sstevel@tonic-gate
18657c478bd9Sstevel@tonic-gate /*
18667c478bd9Sstevel@tonic-gate * Free lists exhausted and we haven't satisfied the request.
18677c478bd9Sstevel@tonic-gate * Wait here for more entries to be added to freelist.
18687c478bd9Sstevel@tonic-gate * Because this might have just happened, make it timed.
18697c478bd9Sstevel@tonic-gate */
18707c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock);
18717c478bd9Sstevel@tonic-gate bfreelist.b_flags |= B_WANTED;
1872d3d50737SRafael Vanoni (void) cv_reltimedwait(&bio_mem_cv, &bfree_lock, hz, TR_CLOCK_TICK);
18737c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock);
18747c478bd9Sstevel@tonic-gate goto top;
18757c478bd9Sstevel@tonic-gate }
18767c478bd9Sstevel@tonic-gate
18777c478bd9Sstevel@tonic-gate /*
18787c478bd9Sstevel@tonic-gate * See if the block is associated with some buffer
18797c478bd9Sstevel@tonic-gate * (mainly to avoid getting hung up on a wait in breada).
18807c478bd9Sstevel@tonic-gate */
18817c478bd9Sstevel@tonic-gate static int
bio_incore(dev_t dev,daddr_t blkno)18827c478bd9Sstevel@tonic-gate bio_incore(dev_t dev, daddr_t blkno)
18837c478bd9Sstevel@tonic-gate {
18847c478bd9Sstevel@tonic-gate struct buf *bp;
18857c478bd9Sstevel@tonic-gate struct buf *dp;
18867c478bd9Sstevel@tonic-gate uint_t index;
18877c478bd9Sstevel@tonic-gate kmutex_t *hmp;
18887c478bd9Sstevel@tonic-gate
18897c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno);
18907c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[index];
18917c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock;
18927c478bd9Sstevel@tonic-gate
18937c478bd9Sstevel@tonic-gate mutex_enter(hmp);
18947c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
18957c478bd9Sstevel@tonic-gate if (bp->b_blkno == blkno && bp->b_edev == dev &&
18967c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE) == 0) {
18977c478bd9Sstevel@tonic-gate mutex_exit(hmp);
18987c478bd9Sstevel@tonic-gate return (1);
18997c478bd9Sstevel@tonic-gate }
19007c478bd9Sstevel@tonic-gate }
19017c478bd9Sstevel@tonic-gate mutex_exit(hmp);
19027c478bd9Sstevel@tonic-gate return (0);
19037c478bd9Sstevel@tonic-gate }
19047c478bd9Sstevel@tonic-gate
19057c478bd9Sstevel@tonic-gate static void
bio_pageio_done(struct buf * bp)19067c478bd9Sstevel@tonic-gate bio_pageio_done(struct buf *bp)
19077c478bd9Sstevel@tonic-gate {
19087c478bd9Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) {
19097c478bd9Sstevel@tonic-gate
19107c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
19117c478bd9Sstevel@tonic-gate bp_mapout(bp);
19127c478bd9Sstevel@tonic-gate
19137c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ)
19147c478bd9Sstevel@tonic-gate pvn_read_done(bp->b_pages, bp->b_flags);
19157c478bd9Sstevel@tonic-gate else
19167c478bd9Sstevel@tonic-gate pvn_write_done(bp->b_pages, B_WRITE | bp->b_flags);
19177c478bd9Sstevel@tonic-gate pageio_done(bp);
19187c478bd9Sstevel@tonic-gate } else {
19197c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_REMAPPED);
19207c478bd9Sstevel@tonic-gate bp_mapout(bp);
19217c478bd9Sstevel@tonic-gate brelse(bp);
19227c478bd9Sstevel@tonic-gate }
19237c478bd9Sstevel@tonic-gate }
19247c478bd9Sstevel@tonic-gate
19257c478bd9Sstevel@tonic-gate /*
19267c478bd9Sstevel@tonic-gate * bioerror(9F) - indicate error in buffer header
19277c478bd9Sstevel@tonic-gate * If 'error' is zero, remove the error indication.
19287c478bd9Sstevel@tonic-gate */
19297c478bd9Sstevel@tonic-gate void
bioerror(struct buf * bp,int error)19307c478bd9Sstevel@tonic-gate bioerror(struct buf *bp, int error)
19317c478bd9Sstevel@tonic-gate {
19327c478bd9Sstevel@tonic-gate ASSERT(bp != NULL);
19337c478bd9Sstevel@tonic-gate ASSERT(error >= 0);
19347c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem));
19357c478bd9Sstevel@tonic-gate
19367c478bd9Sstevel@tonic-gate if (error != 0) {
19377c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR;
19387c478bd9Sstevel@tonic-gate } else {
19397c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_ERROR;
19407c478bd9Sstevel@tonic-gate }
19417c478bd9Sstevel@tonic-gate bp->b_error = error;
19427c478bd9Sstevel@tonic-gate }
19437c478bd9Sstevel@tonic-gate
19447c478bd9Sstevel@tonic-gate /*
19457c478bd9Sstevel@tonic-gate * bioreset(9F) - reuse a private buffer header after I/O is complete
19467c478bd9Sstevel@tonic-gate */
19477c478bd9Sstevel@tonic-gate void
bioreset(struct buf * bp)19487c478bd9Sstevel@tonic-gate bioreset(struct buf *bp)
19497c478bd9Sstevel@tonic-gate {
19507c478bd9Sstevel@tonic-gate ASSERT(bp != NULL);
19517c478bd9Sstevel@tonic-gate
19527c478bd9Sstevel@tonic-gate biofini(bp);
19537c478bd9Sstevel@tonic-gate bioinit(bp);
19547c478bd9Sstevel@tonic-gate }
19557c478bd9Sstevel@tonic-gate
19567c478bd9Sstevel@tonic-gate /*
19577c478bd9Sstevel@tonic-gate * biosize(9F) - return size of a buffer header
19587c478bd9Sstevel@tonic-gate */
19597c478bd9Sstevel@tonic-gate size_t
biosize(void)19607c478bd9Sstevel@tonic-gate biosize(void)
19617c478bd9Sstevel@tonic-gate {
19627c478bd9Sstevel@tonic-gate return (sizeof (struct buf));
19637c478bd9Sstevel@tonic-gate }
19647c478bd9Sstevel@tonic-gate
19657c478bd9Sstevel@tonic-gate /*
19667c478bd9Sstevel@tonic-gate * biomodified(9F) - check if buffer is modified
19677c478bd9Sstevel@tonic-gate */
19687c478bd9Sstevel@tonic-gate int
biomodified(struct buf * bp)19697c478bd9Sstevel@tonic-gate biomodified(struct buf *bp)
19707c478bd9Sstevel@tonic-gate {
19717c478bd9Sstevel@tonic-gate int npf;
19727c478bd9Sstevel@tonic-gate int ppattr;
19737c478bd9Sstevel@tonic-gate struct page *pp;
19747c478bd9Sstevel@tonic-gate
19757c478bd9Sstevel@tonic-gate ASSERT(bp != NULL);
19767c478bd9Sstevel@tonic-gate
19777c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) == 0) {
19787c478bd9Sstevel@tonic-gate return (-1);
19797c478bd9Sstevel@tonic-gate }
19807c478bd9Sstevel@tonic-gate pp = bp->b_pages;
19817c478bd9Sstevel@tonic-gate npf = btopr(bp->b_bcount + ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET));
19827c478bd9Sstevel@tonic-gate
19837c478bd9Sstevel@tonic-gate while (npf > 0) {
19847c478bd9Sstevel@tonic-gate ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO |
19857c478bd9Sstevel@tonic-gate HAT_SYNC_STOPON_MOD);
19867c478bd9Sstevel@tonic-gate if (ppattr & P_MOD)
19877c478bd9Sstevel@tonic-gate return (1);
19887c478bd9Sstevel@tonic-gate pp = pp->p_next;
19897c478bd9Sstevel@tonic-gate npf--;
19907c478bd9Sstevel@tonic-gate }
19917c478bd9Sstevel@tonic-gate
19927c478bd9Sstevel@tonic-gate return (0);
19937c478bd9Sstevel@tonic-gate }
19947c478bd9Sstevel@tonic-gate
19957c478bd9Sstevel@tonic-gate /*
19967c478bd9Sstevel@tonic-gate * bioinit(9F) - initialize a buffer structure
19977c478bd9Sstevel@tonic-gate */
19987c478bd9Sstevel@tonic-gate void
bioinit(struct buf * bp)19997c478bd9Sstevel@tonic-gate bioinit(struct buf *bp)
20007c478bd9Sstevel@tonic-gate {
20017c478bd9Sstevel@tonic-gate bzero(bp, sizeof (struct buf));
20027c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
20037c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
20047c478bd9Sstevel@tonic-gate bp->b_offset = -1;
20057c478bd9Sstevel@tonic-gate }
20067c478bd9Sstevel@tonic-gate
20077c478bd9Sstevel@tonic-gate /*
20087c478bd9Sstevel@tonic-gate * biofini(9F) - uninitialize a buffer structure
20097c478bd9Sstevel@tonic-gate */
20107c478bd9Sstevel@tonic-gate void
biofini(struct buf * bp)20117c478bd9Sstevel@tonic-gate biofini(struct buf *bp)
20127c478bd9Sstevel@tonic-gate {
20137c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io);
20147c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem);
20157c478bd9Sstevel@tonic-gate }
20167c478bd9Sstevel@tonic-gate
20177c478bd9Sstevel@tonic-gate /*
20187c478bd9Sstevel@tonic-gate * bioclone(9F) - clone a buffer
20197c478bd9Sstevel@tonic-gate */
20207c478bd9Sstevel@tonic-gate struct buf *
bioclone(struct buf * bp,off_t off,size_t len,dev_t dev,daddr_t blkno,int (* iodone)(struct buf *),struct buf * bp_mem,int sleep)20217c478bd9Sstevel@tonic-gate bioclone(struct buf *bp, off_t off, size_t len, dev_t dev, daddr_t blkno,
20227c478bd9Sstevel@tonic-gate int (*iodone)(struct buf *), struct buf *bp_mem, int sleep)
20237c478bd9Sstevel@tonic-gate {
20247c478bd9Sstevel@tonic-gate struct buf *bufp;
20257c478bd9Sstevel@tonic-gate
20267c478bd9Sstevel@tonic-gate ASSERT(bp);
20277c478bd9Sstevel@tonic-gate if (bp_mem == NULL) {
20287c478bd9Sstevel@tonic-gate bufp = kmem_alloc(sizeof (struct buf), sleep);
20297c478bd9Sstevel@tonic-gate if (bufp == NULL) {
20307c478bd9Sstevel@tonic-gate return (NULL);
20317c478bd9Sstevel@tonic-gate }
20327c478bd9Sstevel@tonic-gate bioinit(bufp);
20337c478bd9Sstevel@tonic-gate } else {
20347c478bd9Sstevel@tonic-gate bufp = bp_mem;
20357c478bd9Sstevel@tonic-gate bioreset(bufp);
20367c478bd9Sstevel@tonic-gate }
20377c478bd9Sstevel@tonic-gate
20387c478bd9Sstevel@tonic-gate #define BUF_CLONE_FLAGS (B_READ|B_WRITE|B_SHADOW|B_PHYS|B_PAGEIO|B_FAILFAST|\
20397c478bd9Sstevel@tonic-gate B_ABRWRITE)
20407c478bd9Sstevel@tonic-gate
20417c478bd9Sstevel@tonic-gate /*
20426f84fed5Scth * The cloned buffer does not inherit the B_REMAPPED flag.
20437c478bd9Sstevel@tonic-gate */
20447c478bd9Sstevel@tonic-gate bufp->b_flags = (bp->b_flags & BUF_CLONE_FLAGS) | B_BUSY;
20457c478bd9Sstevel@tonic-gate bufp->b_bcount = len;
20467c478bd9Sstevel@tonic-gate bufp->b_blkno = blkno;
20477c478bd9Sstevel@tonic-gate bufp->b_iodone = iodone;
20487c478bd9Sstevel@tonic-gate bufp->b_proc = bp->b_proc;
20497c478bd9Sstevel@tonic-gate bufp->b_edev = dev;
20507c478bd9Sstevel@tonic-gate bufp->b_file = bp->b_file;
20517c478bd9Sstevel@tonic-gate bufp->b_offset = bp->b_offset;
20527c478bd9Sstevel@tonic-gate
20537c478bd9Sstevel@tonic-gate if (bp->b_flags & B_SHADOW) {
20547c478bd9Sstevel@tonic-gate ASSERT(bp->b_shadow);
20557c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_PHYS);
20567c478bd9Sstevel@tonic-gate
20577c478bd9Sstevel@tonic-gate bufp->b_shadow = bp->b_shadow +
20587c478bd9Sstevel@tonic-gate btop(((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off);
20597c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)((uintptr_t)bp->b_un.b_addr + off);
20606f84fed5Scth if (bp->b_flags & B_REMAPPED)
20616f84fed5Scth bufp->b_proc = NULL;
20627c478bd9Sstevel@tonic-gate } else {
20637c478bd9Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) {
20647c478bd9Sstevel@tonic-gate struct page *pp;
20657c478bd9Sstevel@tonic-gate off_t o;
20667c478bd9Sstevel@tonic-gate int i;
20677c478bd9Sstevel@tonic-gate
20687c478bd9Sstevel@tonic-gate pp = bp->b_pages;
20697c478bd9Sstevel@tonic-gate o = ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off;
20707c478bd9Sstevel@tonic-gate for (i = btop(o); i > 0; i--) {
20717c478bd9Sstevel@tonic-gate pp = pp->p_next;
20727c478bd9Sstevel@tonic-gate }
20737c478bd9Sstevel@tonic-gate bufp->b_pages = pp;
20747c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)(o & PAGEOFFSET);
20757c478bd9Sstevel@tonic-gate } else {
20767c478bd9Sstevel@tonic-gate bufp->b_un.b_addr =
20777c478bd9Sstevel@tonic-gate (caddr_t)((uintptr_t)bp->b_un.b_addr + off);
20787c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED)
20797c478bd9Sstevel@tonic-gate bufp->b_proc = NULL;
20807c478bd9Sstevel@tonic-gate }
20817c478bd9Sstevel@tonic-gate }
20827c478bd9Sstevel@tonic-gate return (bufp);
20837c478bd9Sstevel@tonic-gate }
2084