xref: /freebsd/sys/ufs/ffs/ffs_rawread.c (revision d37286b9bf92ec923ab6823bbedef9e39e7e1ebb)
15bbb8060STor Egge /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni  *
45bbb8060STor Egge  * Copyright (c) 2000-2003 Tor Egge
55bbb8060STor Egge  * All rights reserved.
65bbb8060STor Egge  *
75bbb8060STor Egge  * Redistribution and use in source and binary forms, with or without
85bbb8060STor Egge  * modification, are permitted provided that the following conditions
95bbb8060STor Egge  * are met:
105bbb8060STor Egge  * 1. Redistributions of source code must retain the above copyright
115bbb8060STor Egge  *    notice, this list of conditions and the following disclaimer.
125bbb8060STor Egge  * 2. Redistributions in binary form must reproduce the above copyright
135bbb8060STor Egge  *    notice, this list of conditions and the following disclaimer in the
145bbb8060STor Egge  *    documentation and/or other materials provided with the distribution.
155bbb8060STor Egge  *
165bbb8060STor Egge  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
175bbb8060STor Egge  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
185bbb8060STor Egge  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
195bbb8060STor Egge  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
205bbb8060STor Egge  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
215bbb8060STor Egge  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
225bbb8060STor Egge  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
235bbb8060STor Egge  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
245bbb8060STor Egge  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
255bbb8060STor Egge  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
265bbb8060STor Egge  * SUCH DAMAGE.
275bbb8060STor Egge  */
285bbb8060STor Egge 
295bbb8060STor Egge #include <sys/systm.h>
308a457742SKonstantin Belousov #include <sys/bio.h>
318a457742SKonstantin Belousov #include <sys/buf.h>
328a457742SKonstantin Belousov #include <sys/conf.h>
335bbb8060STor Egge #include <sys/fcntl.h>
348a457742SKonstantin Belousov #include <sys/kernel.h>
35104a9b7eSAlexander Kabaev #include <sys/limits.h>
365bbb8060STor Egge #include <sys/mount.h>
375bbb8060STor Egge #include <sys/namei.h>
388a457742SKonstantin Belousov #include <sys/proc.h>
3989f6b863SAttilio Rao #include <sys/rwlock.h>
408a457742SKonstantin Belousov #include <sys/stat.h>
418a457742SKonstantin Belousov #include <sys/sysctl.h>
428a457742SKonstantin Belousov #include <sys/vnode.h>
438a457742SKonstantin Belousov 
44f6c098e5SAlan Cox #include <ufs/ufs/extattr.h>
455bbb8060STor Egge #include <ufs/ufs/quota.h>
465bbb8060STor Egge #include <ufs/ufs/inode.h>
47f6c098e5SAlan Cox #include <ufs/ufs/ufsmount.h>
489cf8f2f7SAlan Cox #include <ufs/ufs/ufs_extern.h>
495bbb8060STor Egge #include <ufs/ffs/fs.h>
5040854ff5SPoul-Henning Kamp #include <ufs/ffs/ffs_extern.h>
515bbb8060STor Egge 
525bbb8060STor Egge #include <vm/vm.h>
535bbb8060STor Egge #include <vm/vm_extern.h>
545bbb8060STor Egge #include <vm/vm_object.h>
55*b068bb09SKonstantin Belousov #include <vm/vnode_pager.h>
565bbb8060STor Egge 
575bbb8060STor Egge static int ffs_rawread_readahead(struct vnode *vp,
585bbb8060STor Egge 				 caddr_t udata,
595bbb8060STor Egge 				 off_t offset,
605bbb8060STor Egge 				 size_t len,
615bbb8060STor Egge 				 struct thread *td,
62fade8dd7SJeff Roberson 				 struct buf *bp);
635bbb8060STor Egge static int ffs_rawread_main(struct vnode *vp,
645bbb8060STor Egge 			    struct uio *uio);
655bbb8060STor Egge 
6681c794f9SAttilio Rao static int ffs_rawread_sync(struct vnode *vp);
675bbb8060STor Egge 
685bbb8060STor Egge int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
695bbb8060STor Egge 
705bbb8060STor Egge SYSCTL_DECL(_vfs_ffs);
715bbb8060STor Egge 
72756a5412SGleb Smirnoff static uma_zone_t ffsraw_pbuf_zone;
735bbb8060STor Egge 
745bbb8060STor Egge static int allowrawread = 1;
755bbb8060STor Egge SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
765bbb8060STor Egge 	   "Flag to enable raw reads");
775bbb8060STor Egge 
785bbb8060STor Egge static int rawreadahead = 1;
795bbb8060STor Egge SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
805bbb8060STor Egge 	   "Flag to enable readahead for long raw reads");
815bbb8060STor Egge 
8223f6698fSKonstantin Belousov static void
8323f6698fSKonstantin Belousov ffs_rawread_setup(void *arg __unused)
845bbb8060STor Egge {
8523f6698fSKonstantin Belousov 
86756a5412SGleb Smirnoff 	ffsraw_pbuf_zone = pbuf_zsecond_create("ffsrawpbuf",
87756a5412SGleb Smirnoff 	    (nswbuf > 100 ) ?  (nswbuf - (nswbuf >> 4)) : nswbuf - 8);
885bbb8060STor Egge }
8923f6698fSKonstantin Belousov SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
905bbb8060STor Egge 
915bbb8060STor Egge static int
9281c794f9SAttilio Rao ffs_rawread_sync(struct vnode *vp)
935bbb8060STor Egge {
945bbb8060STor Egge 	int error;
955bbb8060STor Egge 	int upgraded;
96156cb265SPoul-Henning Kamp 	struct bufobj *bo;
973b582b4eSTor Egge 	struct mount *mp;
9849e3050eSKonstantin Belousov 	vm_object_t obj;
995bbb8060STor Egge 
1005bbb8060STor Egge 	/* Check for dirty mmap, pending writes and dirty buffers */
101156cb265SPoul-Henning Kamp 	bo = &vp->v_bufobj;
102698b1a66SJeff Roberson 	BO_LOCK(bo);
103698b1a66SJeff Roberson 	VI_LOCK(vp);
104156cb265SPoul-Henning Kamp 	if (bo->bo_numoutput > 0 ||
10558883a1fSPoul-Henning Kamp 	    bo->bo_dirty.bv_cnt > 0 ||
10649e3050eSKonstantin Belousov 	    ((obj = vp->v_object) != NULL &&
10767d0e293SJeff Roberson 	     vm_object_mightbedirty(obj))) {
1085bbb8060STor Egge 		VI_UNLOCK(vp);
109698b1a66SJeff Roberson 		BO_UNLOCK(bo);
1105bbb8060STor Egge 
1113b582b4eSTor Egge 		if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
11281c794f9SAttilio Rao 			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
1133b582b4eSTor Egge 				upgraded = 1;
1143b582b4eSTor Egge 			else
1153b582b4eSTor Egge 				upgraded = 0;
116b249ce48SMateusz Guzik 			VOP_UNLOCK(vp);
1173b582b4eSTor Egge 			(void) vn_start_write(vp, &mp, V_WAIT);
11822db15c0SAttilio Rao 			VOP_LOCK(vp, LK_EXCLUSIVE);
11981c794f9SAttilio Rao 		} else if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
1205bbb8060STor Egge 			upgraded = 1;
1215bbb8060STor Egge 			/* Upgrade to exclusive lock, this might block */
12222db15c0SAttilio Rao 			VOP_LOCK(vp, LK_UPGRADE);
1235bbb8060STor Egge 		} else
1245bbb8060STor Egge 			upgraded = 0;
1255bbb8060STor Egge 
1265bbb8060STor Egge 
1275bbb8060STor Egge 		VI_LOCK(vp);
128b673e7b7STor Egge 		/* Check if vnode was reclaimed while unlocked. */
129abd80ddbSMateusz Guzik 		if (VN_IS_DOOMED(vp)) {
130b673e7b7STor Egge 			VI_UNLOCK(vp);
131b673e7b7STor Egge 			if (upgraded != 0)
13222db15c0SAttilio Rao 				VOP_LOCK(vp, LK_DOWNGRADE);
133b673e7b7STor Egge 			vn_finished_write(mp);
134b673e7b7STor Egge 			return (EIO);
135b673e7b7STor Egge 		}
136*b068bb09SKonstantin Belousov 		VI_UNLOCK(vp);
137*b068bb09SKonstantin Belousov 
138b673e7b7STor Egge 		/* Attempt to msync mmap() regions to clean dirty mmap */
139*b068bb09SKonstantin Belousov 		vnode_pager_clean_sync(vp);
1405bbb8060STor Egge 
1415bbb8060STor Egge 		/* Wait for pending writes to complete */
142698b1a66SJeff Roberson 		BO_LOCK(bo);
143a76d8f4eSPoul-Henning Kamp 		error = bufobj_wwait(&vp->v_bufobj, 0, 0);
1445bbb8060STor Egge 		if (error != 0) {
145a76d8f4eSPoul-Henning Kamp 			/* XXX: can't happen with a zero timeout ??? */
146698b1a66SJeff Roberson 			BO_UNLOCK(bo);
1475bbb8060STor Egge 			if (upgraded != 0)
14822db15c0SAttilio Rao 				VOP_LOCK(vp, LK_DOWNGRADE);
149b673e7b7STor Egge 			vn_finished_write(mp);
1505bbb8060STor Egge 			return (error);
1515bbb8060STor Egge 		}
1525bbb8060STor Egge 		/* Flush dirty buffers */
15358883a1fSPoul-Henning Kamp 		if (bo->bo_dirty.bv_cnt > 0) {
154698b1a66SJeff Roberson 			BO_UNLOCK(bo);
15575a58389SKirk McKusick 			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) {
1565bbb8060STor Egge 				if (upgraded != 0)
15722db15c0SAttilio Rao 					VOP_LOCK(vp, LK_DOWNGRADE);
158b673e7b7STor Egge 				vn_finished_write(mp);
1595bbb8060STor Egge 				return (error);
1605bbb8060STor Egge 			}
161698b1a66SJeff Roberson 			BO_LOCK(bo);
16258883a1fSPoul-Henning Kamp 			if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
1635bbb8060STor Egge 				panic("ffs_rawread_sync: dirty bufs");
1645bbb8060STor Egge 		}
165698b1a66SJeff Roberson 		BO_UNLOCK(bo);
1665bbb8060STor Egge 		if (upgraded != 0)
16722db15c0SAttilio Rao 			VOP_LOCK(vp, LK_DOWNGRADE);
1683b582b4eSTor Egge 		vn_finished_write(mp);
1695bbb8060STor Egge 	} else {
1705bbb8060STor Egge 		VI_UNLOCK(vp);
171698b1a66SJeff Roberson 		BO_UNLOCK(bo);
1725bbb8060STor Egge 	}
1735bbb8060STor Egge 	return 0;
1745bbb8060STor Egge }
1755bbb8060STor Egge 
1765bbb8060STor Egge static int
1775bbb8060STor Egge ffs_rawread_readahead(struct vnode *vp,
1785bbb8060STor Egge 		      caddr_t udata,
1795bbb8060STor Egge 		      off_t offset,
1805bbb8060STor Egge 		      size_t len,
1815bbb8060STor Egge 		      struct thread *td,
182fade8dd7SJeff Roberson 		      struct buf *bp)
1835bbb8060STor Egge {
1845bbb8060STor Egge 	int error;
185831b1ff7SKirk McKusick 	uint64_t iolen;
1865bbb8060STor Egge 	off_t blockno;
1875bbb8060STor Egge 	int blockoff;
1885bbb8060STor Egge 	int bsize;
1895bbb8060STor Egge 	struct vnode *dp;
1905bbb8060STor Egge 	int bforwards;
191b1038548SPoul-Henning Kamp 	struct inode *ip;
192b1038548SPoul-Henning Kamp 	ufs2_daddr_t blkno;
1935bbb8060STor Egge 
1945bbb8060STor Egge 	bsize = vp->v_mount->mnt_stat.f_iosize;
1955bbb8060STor Egge 
196b1038548SPoul-Henning Kamp 	ip = VTOI(vp);
197e1db6897SKonstantin Belousov 	dp = ITODEVVP(ip);
198b1038548SPoul-Henning Kamp 
1995bbb8060STor Egge 	iolen = ((vm_offset_t) udata) & PAGE_MASK;
2005bbb8060STor Egge 	bp->b_bcount = len;
2015bbb8060STor Egge 	if (bp->b_bcount + iolen > bp->b_kvasize) {
2025bbb8060STor Egge 		bp->b_bcount = bp->b_kvasize;
2035bbb8060STor Egge 		if (iolen != 0)
2045bbb8060STor Egge 			bp->b_bcount -= PAGE_SIZE;
2055bbb8060STor Egge 	}
20600cbe31bSPoul-Henning Kamp 	bp->b_flags = 0;	/* XXX necessary ? */
2075bbb8060STor Egge 	bp->b_iocmd = BIO_READ;
2082524c26dSJeff Roberson 	bp->b_iodone = bdone;
209f0da6ec9STor Egge 	blockno = offset / bsize;
210f0da6ec9STor Egge 	blockoff = (offset % bsize) / DEV_BSIZE;
2115bbb8060STor Egge 	if ((daddr_t) blockno != blockno) {
2125bbb8060STor Egge 		return EINVAL; /* blockno overflow */
2135bbb8060STor Egge 	}
2145bbb8060STor Egge 
2155bbb8060STor Egge 	bp->b_lblkno = bp->b_blkno = blockno;
2165bbb8060STor Egge 
217b1038548SPoul-Henning Kamp 	error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, &bforwards, NULL);
218b1038548SPoul-Henning Kamp 	if (error != 0)
2195bbb8060STor Egge 		return error;
220f0da6ec9STor Egge 	if (blkno == -1) {
2215bbb8060STor Egge 		/* Fill holes with NULs to preserve semantics */
2225bbb8060STor Egge 
2235bbb8060STor Egge 		if (bp->b_bcount + blockoff * DEV_BSIZE > bsize)
2245bbb8060STor Egge 			bp->b_bcount = bsize - blockoff * DEV_BSIZE;
2255bbb8060STor Egge 
22644ca4575SBrooks Davis 		if (vmapbuf(bp, udata, bp->b_bcount, 1) < 0)
2275bbb8060STor Egge 			return EFAULT;
2285bbb8060STor Egge 
22908b163faSMatthew D Fleming 		maybe_yield();
2305bbb8060STor Egge 		bzero(bp->b_data, bp->b_bufsize);
2315bbb8060STor Egge 
2325bbb8060STor Egge 		/* Mark operation completed (similar to bufdone()) */
2335bbb8060STor Egge 
2345bbb8060STor Egge 		bp->b_resid = 0;
2355bbb8060STor Egge 		bp->b_flags |= B_DONE;
2365bbb8060STor Egge 		return 0;
2375bbb8060STor Egge 	}
238f0da6ec9STor Egge 	bp->b_blkno = blkno + blockoff;
239f0da6ec9STor Egge 	bp->b_offset = bp->b_iooffset = (blkno + blockoff) * DEV_BSIZE;
2405bbb8060STor Egge 
2415bbb8060STor Egge 	if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards))
2425bbb8060STor Egge 		bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
2435bbb8060STor Egge 
24444ca4575SBrooks Davis 	if (vmapbuf(bp, udata, bp->b_bcount, 1) < 0)
2455bbb8060STor Egge 		return EFAULT;
2465bbb8060STor Egge 
2470391e5a1SPoul-Henning Kamp 	BO_STRATEGY(&dp->v_bufobj, bp);
2485bbb8060STor Egge 	return 0;
2495bbb8060STor Egge }
2505bbb8060STor Egge 
2515bbb8060STor Egge static int
2525bbb8060STor Egge ffs_rawread_main(struct vnode *vp,
2535bbb8060STor Egge 		 struct uio *uio)
2545bbb8060STor Egge {
2555bbb8060STor Egge 	int error, nerror;
2565bbb8060STor Egge 	struct buf *bp, *nbp, *tbp;
257831b1ff7SKirk McKusick 	uint64_t iolen;
2585bbb8060STor Egge 	caddr_t udata;
2595bbb8060STor Egge 	long resid;
2605bbb8060STor Egge 	off_t offset;
2615bbb8060STor Egge 	struct thread *td;
2625bbb8060STor Egge 
2635bbb8060STor Egge 	td = uio->uio_td ? uio->uio_td : curthread;
2645bbb8060STor Egge 	udata = uio->uio_iov->iov_base;
2655bbb8060STor Egge 	resid = uio->uio_resid;
2665bbb8060STor Egge 	offset = uio->uio_offset;
2675bbb8060STor Egge 
2685bbb8060STor Egge 	error = 0;
2695bbb8060STor Egge 	nerror = 0;
2705bbb8060STor Egge 
2715bbb8060STor Egge 	bp = NULL;
2725bbb8060STor Egge 	nbp = NULL;
2735bbb8060STor Egge 
2745bbb8060STor Egge 	while (resid > 0) {
2755bbb8060STor Egge 
2765bbb8060STor Egge 		if (bp == NULL) { /* Setup first read */
277756a5412SGleb Smirnoff 			bp = uma_zalloc(ffsraw_pbuf_zone, M_WAITOK);
2780d86a7f7STor Egge 			pbgetvp(vp, bp);
2795bbb8060STor Egge 			error = ffs_rawread_readahead(vp, udata, offset,
280fade8dd7SJeff Roberson 						     resid, td, bp);
2815bbb8060STor Egge 			if (error != 0)
2825bbb8060STor Egge 				break;
2835bbb8060STor Egge 
2845bbb8060STor Egge 			if (resid > bp->b_bufsize) { /* Setup fist readahead */
2855bbb8060STor Egge 				if (rawreadahead != 0)
286756a5412SGleb Smirnoff 					nbp = uma_zalloc(ffsraw_pbuf_zone,
287756a5412SGleb Smirnoff 					    M_NOWAIT);
2885bbb8060STor Egge 				else
2895bbb8060STor Egge 					nbp = NULL;
2905bbb8060STor Egge 				if (nbp != NULL) {
2910d86a7f7STor Egge 					pbgetvp(vp, nbp);
2925bbb8060STor Egge 
2935bbb8060STor Egge 					nerror = ffs_rawread_readahead(vp,
2945bbb8060STor Egge 								       udata +
2955bbb8060STor Egge 								       bp->b_bufsize,
2965bbb8060STor Egge 								       offset +
2975bbb8060STor Egge 								       bp->b_bufsize,
2985bbb8060STor Egge 								       resid -
2995bbb8060STor Egge 								       bp->b_bufsize,
3005bbb8060STor Egge 								       td,
301fade8dd7SJeff Roberson 								       nbp);
3025bbb8060STor Egge 					if (nerror) {
3030d86a7f7STor Egge 						pbrelvp(nbp);
304756a5412SGleb Smirnoff 						uma_zfree(ffsraw_pbuf_zone,
305756a5412SGleb Smirnoff 						    nbp);
3065bbb8060STor Egge 						nbp = NULL;
3075bbb8060STor Egge 					}
3085bbb8060STor Egge 				}
3095bbb8060STor Egge 			}
3105bbb8060STor Egge 		}
3115bbb8060STor Egge 
31210dccf8fSTor Egge 		bwait(bp, PRIBIO, "rawrd");
3135bbb8060STor Egge 		vunmapbuf(bp);
3145bbb8060STor Egge 
3155bbb8060STor Egge 		iolen = bp->b_bcount - bp->b_resid;
3165bbb8060STor Egge 		if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) {
3175bbb8060STor Egge 			nerror = 0;	/* Ignore possible beyond EOF error */
3185bbb8060STor Egge 			break; /* EOF */
3195bbb8060STor Egge 		}
3205bbb8060STor Egge 
3215bbb8060STor Egge 		if ((bp->b_ioflags & BIO_ERROR) != 0) {
3225bbb8060STor Egge 			error = bp->b_error;
3235bbb8060STor Egge 			break;
3245bbb8060STor Egge 		}
3255bbb8060STor Egge 		resid -= iolen;
3265bbb8060STor Egge 		udata += iolen;
3275bbb8060STor Egge 		offset += iolen;
3285bbb8060STor Egge 		if (iolen < bp->b_bufsize) {
3295bbb8060STor Egge 			/* Incomplete read.  Try to read remaining part */
3305bbb8060STor Egge 			error = ffs_rawread_readahead(vp,
3315bbb8060STor Egge 						      udata,
3325bbb8060STor Egge 						      offset,
3335bbb8060STor Egge 						      bp->b_bufsize - iolen,
3345bbb8060STor Egge 						      td,
335fade8dd7SJeff Roberson 						      bp);
3365bbb8060STor Egge 			if (error != 0)
3375bbb8060STor Egge 				break;
3385bbb8060STor Egge 		} else if (nbp != NULL) { /* Complete read with readahead */
3395bbb8060STor Egge 
3405bbb8060STor Egge 			tbp = bp;
3415bbb8060STor Egge 			bp = nbp;
3425bbb8060STor Egge 			nbp = tbp;
3435bbb8060STor Egge 
3445bbb8060STor Egge 			if (resid <= bp->b_bufsize) { /* No more readaheads */
3450d86a7f7STor Egge 				pbrelvp(nbp);
346756a5412SGleb Smirnoff 				uma_zfree(ffsraw_pbuf_zone, nbp);
3475bbb8060STor Egge 				nbp = NULL;
3485bbb8060STor Egge 			} else { /* Setup next readahead */
3495bbb8060STor Egge 				nerror = ffs_rawread_readahead(vp,
3505bbb8060STor Egge 							       udata +
3515bbb8060STor Egge 							       bp->b_bufsize,
3525bbb8060STor Egge 							       offset +
3535bbb8060STor Egge 							       bp->b_bufsize,
3545bbb8060STor Egge 							       resid -
3555bbb8060STor Egge 							       bp->b_bufsize,
3565bbb8060STor Egge 							       td,
357fade8dd7SJeff Roberson 							       nbp);
3585bbb8060STor Egge 				if (nerror != 0) {
3590d86a7f7STor Egge 					pbrelvp(nbp);
360756a5412SGleb Smirnoff 					uma_zfree(ffsraw_pbuf_zone, nbp);
3615bbb8060STor Egge 					nbp = NULL;
3625bbb8060STor Egge 				}
3635bbb8060STor Egge 			}
3645bbb8060STor Egge 		} else if (nerror != 0) {/* Deferred Readahead error */
3655bbb8060STor Egge 			break;
3665bbb8060STor Egge 		}  else if (resid > 0) { /* More to read, no readahead */
3675bbb8060STor Egge 			error = ffs_rawread_readahead(vp, udata, offset,
368fade8dd7SJeff Roberson 						      resid, td, bp);
3695bbb8060STor Egge 			if (error != 0)
3705bbb8060STor Egge 				break;
3715bbb8060STor Egge 		}
3725bbb8060STor Egge 	}
3735bbb8060STor Egge 
3740d86a7f7STor Egge 	if (bp != NULL) {
3750d86a7f7STor Egge 		pbrelvp(bp);
376756a5412SGleb Smirnoff 		uma_zfree(ffsraw_pbuf_zone, bp);
3770d86a7f7STor Egge 	}
3785bbb8060STor Egge 	if (nbp != NULL) {			/* Run down readahead buffer */
37910dccf8fSTor Egge 		bwait(nbp, PRIBIO, "rawrd");
3805bbb8060STor Egge 		vunmapbuf(nbp);
3810d86a7f7STor Egge 		pbrelvp(nbp);
382756a5412SGleb Smirnoff 		uma_zfree(ffsraw_pbuf_zone, nbp);
3835bbb8060STor Egge 	}
3845bbb8060STor Egge 
3855bbb8060STor Egge 	if (error == 0)
3865bbb8060STor Egge 		error = nerror;
3875bbb8060STor Egge 	uio->uio_iov->iov_base = udata;
3885bbb8060STor Egge 	uio->uio_resid = resid;
3895bbb8060STor Egge 	uio->uio_offset = offset;
3905bbb8060STor Egge 	return error;
3915bbb8060STor Egge }
3925bbb8060STor Egge 
3935bbb8060STor Egge int
3945bbb8060STor Egge ffs_rawread(struct vnode *vp,
3955bbb8060STor Egge 	    struct uio *uio,
3965bbb8060STor Egge 	    int *workdone)
3975bbb8060STor Egge {
3985bbb8060STor Egge 	if (allowrawread != 0 &&
3995bbb8060STor Egge 	    uio->uio_iovcnt == 1 &&
4005bbb8060STor Egge 	    uio->uio_segflg == UIO_USERSPACE &&
4015bbb8060STor Egge 	    uio->uio_resid == uio->uio_iov->iov_len &&
402fa2a4d05STim J. Robbins 	    (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_pflags &
403fa2a4d05STim J. Robbins 	     TDP_DEADLKTREAT) == 0) {
4045bbb8060STor Egge 		int secsize;		/* Media sector size */
4055bbb8060STor Egge 		off_t filebytes;	/* Bytes left of file */
4065bbb8060STor Egge 		int blockbytes;		/* Bytes left of file in full blocks */
4075bbb8060STor Egge 		int partialbytes;	/* Bytes in last partial block */
4085bbb8060STor Egge 		int skipbytes;		/* Bytes not to read in ffs_rawread */
4095bbb8060STor Egge 		struct inode *ip;
4105bbb8060STor Egge 		int error;
4115bbb8060STor Egge 
4125bbb8060STor Egge 
4135bbb8060STor Egge 		/* Only handle sector aligned reads */
4145bbb8060STor Egge 		ip = VTOI(vp);
415e1db6897SKonstantin Belousov 		secsize = ITODEVVP(ip)->v_bufobj.bo_bsize;
4165bbb8060STor Egge 		if ((uio->uio_offset & (secsize - 1)) == 0 &&
4175bbb8060STor Egge 		    (uio->uio_resid & (secsize - 1)) == 0) {
4185bbb8060STor Egge 
4195bbb8060STor Egge 			/* Sync dirty pages and buffers if needed */
42081c794f9SAttilio Rao 			error = ffs_rawread_sync(vp);
4215bbb8060STor Egge 			if (error != 0)
4225bbb8060STor Egge 				return error;
4235bbb8060STor Egge 
4245bbb8060STor Egge 			/* Check for end of file */
4255bbb8060STor Egge 			if (ip->i_size > uio->uio_offset) {
4265bbb8060STor Egge 				filebytes = ip->i_size - uio->uio_offset;
4275bbb8060STor Egge 
4285bbb8060STor Egge 				/* No special eof handling needed ? */
4295bbb8060STor Egge 				if (uio->uio_resid <= filebytes) {
4305bbb8060STor Egge 					*workdone = 1;
4315bbb8060STor Egge 					return ffs_rawread_main(vp, uio);
4325bbb8060STor Egge 				}
4335bbb8060STor Egge 
4345bbb8060STor Egge 				partialbytes = ((unsigned int) ip->i_size) %
435e1db6897SKonstantin Belousov 				    ITOFS(ip)->fs_bsize;
4365bbb8060STor Egge 				blockbytes = (int) filebytes - partialbytes;
4375bbb8060STor Egge 				if (blockbytes > 0) {
4385bbb8060STor Egge 					skipbytes = uio->uio_resid -
4395bbb8060STor Egge 						blockbytes;
4405bbb8060STor Egge 					uio->uio_resid = blockbytes;
4415bbb8060STor Egge 					error = ffs_rawread_main(vp, uio);
4425bbb8060STor Egge 					uio->uio_resid += skipbytes;
4435bbb8060STor Egge 					if (error != 0)
4445bbb8060STor Egge 						return error;
4455bbb8060STor Egge 					/* Read remaining part using buffer */
4465bbb8060STor Egge 				}
4475bbb8060STor Egge 			}
4485bbb8060STor Egge 		}
4495bbb8060STor Egge 	}
4505bbb8060STor Egge 	*workdone = 0;
4515bbb8060STor Egge 	return 0;
4525bbb8060STor Egge }
453