xref: /freebsd/sys/ufs/ffs/ffs_vnops.c (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	from: @(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
62  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
63  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
64  */
65 
66 #include <sys/cdefs.h>
67 __FBSDID("$FreeBSD$");
68 
69 #include <sys/param.h>
70 #include <sys/bio.h>
71 #include <sys/systm.h>
72 #include <sys/buf.h>
73 #include <sys/conf.h>
74 #include <sys/extattr.h>
75 #include <sys/kernel.h>
76 #include <sys/limits.h>
77 #include <sys/malloc.h>
78 #include <sys/mount.h>
79 #include <sys/priv.h>
80 #include <sys/rwlock.h>
81 #include <sys/stat.h>
82 #include <sys/sysctl.h>
83 #include <sys/vmmeter.h>
84 #include <sys/vnode.h>
85 
86 #include <vm/vm.h>
87 #include <vm/vm_param.h>
88 #include <vm/vm_extern.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_pager.h>
92 #include <vm/vnode_pager.h>
93 
94 #include <ufs/ufs/extattr.h>
95 #include <ufs/ufs/quota.h>
96 #include <ufs/ufs/inode.h>
97 #include <ufs/ufs/ufs_extern.h>
98 #include <ufs/ufs/ufsmount.h>
99 
100 #include <ufs/ffs/fs.h>
101 #include <ufs/ffs/ffs_extern.h>
102 #include "opt_directio.h"
103 #include "opt_ffs.h"
104 
105 #define	ALIGNED_TO(ptr, s)	\
106 	(((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0)
107 
108 #ifdef DIRECTIO
109 extern int	ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
110 #endif
111 static vop_fdatasync_t	ffs_fdatasync;
112 static vop_fsync_t	ffs_fsync;
113 static vop_getpages_t	ffs_getpages;
114 static vop_getpages_async_t	ffs_getpages_async;
115 static vop_lock1_t	ffs_lock;
116 #ifdef INVARIANTS
117 static vop_unlock_t	ffs_unlock_debug;
118 #endif
119 static vop_read_t	ffs_read;
120 static vop_write_t	ffs_write;
121 static int	ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
122 static int	ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
123 		    struct ucred *cred);
124 static vop_strategy_t	ffsext_strategy;
125 static vop_closeextattr_t	ffs_closeextattr;
126 static vop_deleteextattr_t	ffs_deleteextattr;
127 static vop_getextattr_t	ffs_getextattr;
128 static vop_listextattr_t	ffs_listextattr;
129 static vop_openextattr_t	ffs_openextattr;
130 static vop_setextattr_t	ffs_setextattr;
131 static vop_vptofh_t	ffs_vptofh;
132 
133 /* Global vfs data structures for ufs. */
134 struct vop_vector ffs_vnodeops1 = {
135 	.vop_default =		&ufs_vnodeops,
136 	.vop_fsync =		ffs_fsync,
137 	.vop_fdatasync =	ffs_fdatasync,
138 	.vop_getpages =		ffs_getpages,
139 	.vop_getpages_async =	ffs_getpages_async,
140 	.vop_lock1 =		ffs_lock,
141 #ifdef INVARIANTS
142 	.vop_unlock =		ffs_unlock_debug,
143 #endif
144 	.vop_read =		ffs_read,
145 	.vop_reallocblks =	ffs_reallocblks,
146 	.vop_write =		ffs_write,
147 	.vop_vptofh =		ffs_vptofh,
148 };
149 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops1);
150 
151 struct vop_vector ffs_fifoops1 = {
152 	.vop_default =		&ufs_fifoops,
153 	.vop_fsync =		ffs_fsync,
154 	.vop_fdatasync =	ffs_fdatasync,
155 	.vop_lock1 =		ffs_lock,
156 #ifdef INVARIANTS
157 	.vop_unlock =		ffs_unlock_debug,
158 #endif
159 	.vop_vptofh =		ffs_vptofh,
160 };
161 VFS_VOP_VECTOR_REGISTER(ffs_fifoops1);
162 
163 /* Global vfs data structures for ufs. */
164 struct vop_vector ffs_vnodeops2 = {
165 	.vop_default =		&ufs_vnodeops,
166 	.vop_fsync =		ffs_fsync,
167 	.vop_fdatasync =	ffs_fdatasync,
168 	.vop_getpages =		ffs_getpages,
169 	.vop_getpages_async =	ffs_getpages_async,
170 	.vop_lock1 =		ffs_lock,
171 #ifdef INVARIANTS
172 	.vop_unlock =		ffs_unlock_debug,
173 #endif
174 	.vop_read =		ffs_read,
175 	.vop_reallocblks =	ffs_reallocblks,
176 	.vop_write =		ffs_write,
177 	.vop_closeextattr =	ffs_closeextattr,
178 	.vop_deleteextattr =	ffs_deleteextattr,
179 	.vop_getextattr =	ffs_getextattr,
180 	.vop_listextattr =	ffs_listextattr,
181 	.vop_openextattr =	ffs_openextattr,
182 	.vop_setextattr =	ffs_setextattr,
183 	.vop_vptofh =		ffs_vptofh,
184 };
185 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops2);
186 
187 struct vop_vector ffs_fifoops2 = {
188 	.vop_default =		&ufs_fifoops,
189 	.vop_fsync =		ffs_fsync,
190 	.vop_fdatasync =	ffs_fdatasync,
191 	.vop_lock1 =		ffs_lock,
192 #ifdef INVARIANTS
193 	.vop_unlock =		ffs_unlock_debug,
194 #endif
195 	.vop_reallocblks =	ffs_reallocblks,
196 	.vop_strategy =		ffsext_strategy,
197 	.vop_closeextattr =	ffs_closeextattr,
198 	.vop_deleteextattr =	ffs_deleteextattr,
199 	.vop_getextattr =	ffs_getextattr,
200 	.vop_listextattr =	ffs_listextattr,
201 	.vop_openextattr =	ffs_openextattr,
202 	.vop_setextattr =	ffs_setextattr,
203 	.vop_vptofh =		ffs_vptofh,
204 };
205 VFS_VOP_VECTOR_REGISTER(ffs_fifoops2);
206 
207 /*
208  * Synch an open file.
209  */
210 /* ARGSUSED */
211 static int
212 ffs_fsync(struct vop_fsync_args *ap)
213 {
214 	struct vnode *vp;
215 	struct bufobj *bo;
216 	int error;
217 
218 	vp = ap->a_vp;
219 	bo = &vp->v_bufobj;
220 retry:
221 	error = ffs_syncvnode(vp, ap->a_waitfor, 0);
222 	if (error)
223 		return (error);
224 	if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
225 		error = softdep_fsync(vp);
226 		if (error)
227 			return (error);
228 
229 		/*
230 		 * The softdep_fsync() function may drop vp lock,
231 		 * allowing for dirty buffers to reappear on the
232 		 * bo_dirty list. Recheck and resync as needed.
233 		 */
234 		BO_LOCK(bo);
235 		if ((vp->v_type == VREG || vp->v_type == VDIR) &&
236 		    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
237 			BO_UNLOCK(bo);
238 			goto retry;
239 		}
240 		BO_UNLOCK(bo);
241 	}
242 	if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0))
243 		return (ENXIO);
244 	return (0);
245 }
246 
247 int
248 ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
249 {
250 	struct inode *ip;
251 	struct bufobj *bo;
252 	struct ufsmount *ump;
253 	struct buf *bp, *nbp;
254 	ufs_lbn_t lbn;
255 	int error, passes;
256 	bool still_dirty, wait;
257 
258 	ip = VTOI(vp);
259 	ip->i_flag &= ~IN_NEEDSYNC;
260 	bo = &vp->v_bufobj;
261 	ump = VFSTOUFS(vp->v_mount);
262 
263 	/*
264 	 * When doing MNT_WAIT we must first flush all dependencies
265 	 * on the inode.
266 	 */
267 	if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
268 	    (error = softdep_sync_metadata(vp)) != 0) {
269 		if (ffs_fsfail_cleanup(ump, error))
270 			error = 0;
271 		return (error);
272 	}
273 
274 	/*
275 	 * Flush all dirty buffers associated with a vnode.
276 	 */
277 	error = 0;
278 	passes = 0;
279 	wait = false;	/* Always do an async pass first. */
280 	lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
281 	BO_LOCK(bo);
282 loop:
283 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
284 		bp->b_vflags &= ~BV_SCANNED;
285 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
286 		/*
287 		 * Reasons to skip this buffer: it has already been considered
288 		 * on this pass, the buffer has dependencies that will cause
289 		 * it to be redirtied and it has not already been deferred,
290 		 * or it is already being written.
291 		 */
292 		if ((bp->b_vflags & BV_SCANNED) != 0)
293 			continue;
294 		bp->b_vflags |= BV_SCANNED;
295 		/*
296 		 * Flush indirects in order, if requested.
297 		 *
298 		 * Note that if only datasync is requested, we can
299 		 * skip indirect blocks when softupdates are not
300 		 * active.  Otherwise we must flush them with data,
301 		 * since dependencies prevent data block writes.
302 		 */
303 		if (waitfor == MNT_WAIT && bp->b_lblkno <= -UFS_NDADDR &&
304 		    (lbn_level(bp->b_lblkno) >= passes ||
305 		    ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
306 			continue;
307 		if (bp->b_lblkno > lbn)
308 			panic("ffs_syncvnode: syncing truncated data.");
309 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
310 			BO_UNLOCK(bo);
311 		} else if (wait) {
312 			if (BUF_LOCK(bp,
313 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
314 			    BO_LOCKPTR(bo)) != 0) {
315 				bp->b_vflags &= ~BV_SCANNED;
316 				goto next;
317 			}
318 		} else
319 			continue;
320 		if ((bp->b_flags & B_DELWRI) == 0)
321 			panic("ffs_fsync: not dirty");
322 		/*
323 		 * Check for dependencies and potentially complete them.
324 		 */
325 		if (!LIST_EMPTY(&bp->b_dep) &&
326 		    (error = softdep_sync_buf(vp, bp,
327 		    wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
328 			/* I/O error. */
329 			if (error != EBUSY) {
330 				BUF_UNLOCK(bp);
331 				return (error);
332 			}
333 			/* If we deferred once, don't defer again. */
334 		    	if ((bp->b_flags & B_DEFERRED) == 0) {
335 				bp->b_flags |= B_DEFERRED;
336 				BUF_UNLOCK(bp);
337 				goto next;
338 			}
339 		}
340 		if (wait) {
341 			bremfree(bp);
342 			error = bwrite(bp);
343 			if (ffs_fsfail_cleanup(ump, error))
344 				error = 0;
345 			if (error != 0)
346 				return (error);
347 		} else if ((bp->b_flags & B_CLUSTEROK)) {
348 			(void) vfs_bio_awrite(bp);
349 		} else {
350 			bremfree(bp);
351 			(void) bawrite(bp);
352 		}
353 next:
354 		/*
355 		 * Since we may have slept during the I/O, we need
356 		 * to start from a known point.
357 		 */
358 		BO_LOCK(bo);
359 		nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
360 	}
361 	if (waitfor != MNT_WAIT) {
362 		BO_UNLOCK(bo);
363 		if ((flags & NO_INO_UPDT) != 0)
364 			return (0);
365 		else
366 			return (ffs_update(vp, 0));
367 	}
368 	/* Drain IO to see if we're done. */
369 	bufobj_wwait(bo, 0, 0);
370 	/*
371 	 * Block devices associated with filesystems may have new I/O
372 	 * requests posted for them even if the vnode is locked, so no
373 	 * amount of trying will get them clean.  We make several passes
374 	 * as a best effort.
375 	 *
376 	 * Regular files may need multiple passes to flush all dependency
377 	 * work as it is possible that we must write once per indirect
378 	 * level, once for the leaf, and once for the inode and each of
379 	 * these will be done with one sync and one async pass.
380 	 */
381 	if (bo->bo_dirty.bv_cnt > 0) {
382 		if ((flags & DATA_ONLY) == 0) {
383 			still_dirty = true;
384 		} else {
385 			/*
386 			 * For data-only sync, dirty indirect buffers
387 			 * are ignored.
388 			 */
389 			still_dirty = false;
390 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
391 				if (bp->b_lblkno > -UFS_NDADDR) {
392 					still_dirty = true;
393 					break;
394 				}
395 			}
396 		}
397 
398 		if (still_dirty) {
399 			/* Write the inode after sync passes to flush deps. */
400 			if (wait && DOINGSOFTDEP(vp) &&
401 			    (flags & NO_INO_UPDT) == 0) {
402 				BO_UNLOCK(bo);
403 				ffs_update(vp, 1);
404 				BO_LOCK(bo);
405 			}
406 			/* switch between sync/async. */
407 			wait = !wait;
408 			if (wait || ++passes < UFS_NIADDR + 2)
409 				goto loop;
410 		}
411 	}
412 	BO_UNLOCK(bo);
413 	error = 0;
414 	if ((flags & DATA_ONLY) == 0) {
415 		if ((flags & NO_INO_UPDT) == 0)
416 			error = ffs_update(vp, 1);
417 		if (DOINGSUJ(vp))
418 			softdep_journal_fsync(VTOI(vp));
419 	} else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
420 		error = ffs_update(vp, 1);
421 	}
422 	return (error);
423 }
424 
425 static int
426 ffs_fdatasync(struct vop_fdatasync_args *ap)
427 {
428 
429 	return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
430 }
431 
432 static int
433 ffs_lock(ap)
434 	struct vop_lock1_args /* {
435 		struct vnode *a_vp;
436 		int a_flags;
437 		struct thread *a_td;
438 		char *file;
439 		int line;
440 	} */ *ap;
441 {
442 #ifndef NO_FFS_SNAPSHOT
443 	struct vnode *vp;
444 	int flags;
445 	struct lock *lkp;
446 	int result;
447 
448 	ap->a_flags |= LK_ADAPTIVE;
449 	switch (ap->a_flags & LK_TYPE_MASK) {
450 	case LK_SHARED:
451 	case LK_UPGRADE:
452 	case LK_EXCLUSIVE:
453 		vp = ap->a_vp;
454 		flags = ap->a_flags;
455 		for (;;) {
456 #ifdef DEBUG_VFS_LOCKS
457 			VNPASS(vp->v_holdcnt != 0, vp);
458 #endif
459 			lkp = vp->v_vnlock;
460 			result = lockmgr_lock_flags(lkp, flags,
461 			    &VI_MTX(vp)->lock_object, ap->a_file, ap->a_line);
462 			if (lkp == vp->v_vnlock || result != 0)
463 				break;
464 			/*
465 			 * Apparent success, except that the vnode
466 			 * mutated between snapshot file vnode and
467 			 * regular file vnode while this process
468 			 * slept.  The lock currently held is not the
469 			 * right lock.  Release it, and try to get the
470 			 * new lock.
471 			 */
472 			lockmgr_unlock(lkp);
473 			if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
474 			    (LK_INTERLOCK | LK_NOWAIT))
475 				return (EBUSY);
476 			if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
477 				flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
478 			flags &= ~LK_INTERLOCK;
479 		}
480 		break;
481 	default:
482 		result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
483 	}
484 	return (result);
485 #else
486 	ap->a_flags |= LK_ADAPTIVE;
487 	return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
488 #endif
489 }
490 
491 #ifdef INVARIANTS
492 static int
493 ffs_unlock_debug(struct vop_unlock_args *ap)
494 {
495 	struct vnode *vp = ap->a_vp;
496 	struct inode *ip = VTOI(vp);
497 
498 	if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) {
499 		if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
500 			VI_LOCK(vp);
501 			VNASSERT((vp->v_mflag & VMP_LAZYLIST), vp,
502 			    ("%s: modified vnode (%x) not on lazy list",
503 			    __func__, ip->i_flag));
504 			VI_UNLOCK(vp);
505 		}
506 	}
507 	return (VOP_UNLOCK_APV(&ufs_vnodeops, ap));
508 }
509 #endif
510 
511 static int
512 ffs_read_hole(struct uio *uio, long xfersize, long *size)
513 {
514 	ssize_t saved_resid, tlen;
515 	int error;
516 
517 	while (xfersize > 0) {
518 		tlen = min(xfersize, ZERO_REGION_SIZE);
519 		saved_resid = uio->uio_resid;
520 		error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
521 		    tlen, uio);
522 		if (error != 0)
523 			return (error);
524 		tlen = saved_resid - uio->uio_resid;
525 		xfersize -= tlen;
526 		*size -= tlen;
527 	}
528 	return (0);
529 }
530 
531 /*
532  * Vnode op for reading.
533  */
534 static int
535 ffs_read(ap)
536 	struct vop_read_args /* {
537 		struct vnode *a_vp;
538 		struct uio *a_uio;
539 		int a_ioflag;
540 		struct ucred *a_cred;
541 	} */ *ap;
542 {
543 	struct vnode *vp;
544 	struct inode *ip;
545 	struct uio *uio;
546 	struct fs *fs;
547 	struct buf *bp;
548 	ufs_lbn_t lbn, nextlbn;
549 	off_t bytesinfile;
550 	long size, xfersize, blkoffset;
551 	ssize_t orig_resid;
552 	int bflag, error, ioflag, seqcount;
553 
554 	vp = ap->a_vp;
555 	uio = ap->a_uio;
556 	ioflag = ap->a_ioflag;
557 	if (ap->a_ioflag & IO_EXT)
558 #ifdef notyet
559 		return (ffs_extread(vp, uio, ioflag));
560 #else
561 		panic("ffs_read+IO_EXT");
562 #endif
563 #ifdef DIRECTIO
564 	if ((ioflag & IO_DIRECT) != 0) {
565 		int workdone;
566 
567 		error = ffs_rawread(vp, uio, &workdone);
568 		if (error != 0 || workdone != 0)
569 			return error;
570 	}
571 #endif
572 
573 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
574 	ip = VTOI(vp);
575 
576 #ifdef INVARIANTS
577 	if (uio->uio_rw != UIO_READ)
578 		panic("ffs_read: mode");
579 
580 	if (vp->v_type == VLNK) {
581 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
582 			panic("ffs_read: short symlink");
583 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
584 		panic("ffs_read: type %d",  vp->v_type);
585 #endif
586 	orig_resid = uio->uio_resid;
587 	KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
588 	if (orig_resid == 0)
589 		return (0);
590 	KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
591 	fs = ITOFS(ip);
592 	if (uio->uio_offset < ip->i_size &&
593 	    uio->uio_offset >= fs->fs_maxfilesize)
594 		return (EOVERFLOW);
595 
596 	bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
597 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
598 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
599 			break;
600 		lbn = lblkno(fs, uio->uio_offset);
601 		nextlbn = lbn + 1;
602 
603 		/*
604 		 * size of buffer.  The buffer representing the
605 		 * end of the file is rounded up to the size of
606 		 * the block type ( fragment or full block,
607 		 * depending ).
608 		 */
609 		size = blksize(fs, ip, lbn);
610 		blkoffset = blkoff(fs, uio->uio_offset);
611 
612 		/*
613 		 * The amount we want to transfer in this iteration is
614 		 * one FS block less the amount of the data before
615 		 * our startpoint (duh!)
616 		 */
617 		xfersize = fs->fs_bsize - blkoffset;
618 
619 		/*
620 		 * But if we actually want less than the block,
621 		 * or the file doesn't have a whole block more of data,
622 		 * then use the lesser number.
623 		 */
624 		if (uio->uio_resid < xfersize)
625 			xfersize = uio->uio_resid;
626 		if (bytesinfile < xfersize)
627 			xfersize = bytesinfile;
628 
629 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
630 			/*
631 			 * Don't do readahead if this is the end of the file.
632 			 */
633 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
634 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
635 			/*
636 			 * Otherwise if we are allowed to cluster,
637 			 * grab as much as we can.
638 			 *
639 			 * XXX  This may not be a win if we are not
640 			 * doing sequential access.
641 			 */
642 			error = cluster_read(vp, ip->i_size, lbn,
643 			    size, NOCRED, blkoffset + uio->uio_resid,
644 			    seqcount, bflag, &bp);
645 		} else if (seqcount > 1) {
646 			/*
647 			 * If we are NOT allowed to cluster, then
648 			 * if we appear to be acting sequentially,
649 			 * fire off a request for a readahead
650 			 * as well as a read. Note that the 4th and 5th
651 			 * arguments point to arrays of the size specified in
652 			 * the 6th argument.
653 			 */
654 			u_int nextsize = blksize(fs, ip, nextlbn);
655 			error = breadn_flags(vp, lbn, lbn, size, &nextlbn,
656 			    &nextsize, 1, NOCRED, bflag, NULL, &bp);
657 		} else {
658 			/*
659 			 * Failing all of the above, just read what the
660 			 * user asked for. Interestingly, the same as
661 			 * the first option above.
662 			 */
663 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
664 		}
665 		if (error == EJUSTRETURN) {
666 			error = ffs_read_hole(uio, xfersize, &size);
667 			if (error == 0)
668 				continue;
669 		}
670 		if (error != 0) {
671 			brelse(bp);
672 			bp = NULL;
673 			break;
674 		}
675 
676 		/*
677 		 * We should only get non-zero b_resid when an I/O error
678 		 * has occurred, which should cause us to break above.
679 		 * However, if the short read did not cause an error,
680 		 * then we want to ensure that we do not uiomove bad
681 		 * or uninitialized data.
682 		 */
683 		size -= bp->b_resid;
684 		if (size < xfersize) {
685 			if (size == 0)
686 				break;
687 			xfersize = size;
688 		}
689 
690 		if (buf_mapped(bp)) {
691 			error = vn_io_fault_uiomove((char *)bp->b_data +
692 			    blkoffset, (int)xfersize, uio);
693 		} else {
694 			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
695 			    (int)xfersize, uio);
696 		}
697 		if (error)
698 			break;
699 
700 		vfs_bio_brelse(bp, ioflag);
701 	}
702 
703 	/*
704 	 * This can only happen in the case of an error
705 	 * because the loop above resets bp to NULL on each iteration
706 	 * and on normal completion has not set a new value into it.
707 	 * so it must have come from a 'break' statement
708 	 */
709 	if (bp != NULL)
710 		vfs_bio_brelse(bp, ioflag);
711 
712 	if ((error == 0 || uio->uio_resid != orig_resid) &&
713 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
714 		UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS);
715 	return (error);
716 }
717 
718 /*
719  * Vnode op for writing.
720  */
721 static int
722 ffs_write(ap)
723 	struct vop_write_args /* {
724 		struct vnode *a_vp;
725 		struct uio *a_uio;
726 		int a_ioflag;
727 		struct ucred *a_cred;
728 	} */ *ap;
729 {
730 	struct vnode *vp;
731 	struct uio *uio;
732 	struct inode *ip;
733 	struct fs *fs;
734 	struct buf *bp;
735 	ufs_lbn_t lbn;
736 	off_t osize;
737 	ssize_t resid;
738 	int seqcount;
739 	int blkoffset, error, flags, ioflag, size, xfersize;
740 
741 	vp = ap->a_vp;
742 	uio = ap->a_uio;
743 	ioflag = ap->a_ioflag;
744 	if (ap->a_ioflag & IO_EXT)
745 #ifdef notyet
746 		return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
747 #else
748 		panic("ffs_write+IO_EXT");
749 #endif
750 
751 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
752 	ip = VTOI(vp);
753 
754 #ifdef INVARIANTS
755 	if (uio->uio_rw != UIO_WRITE)
756 		panic("ffs_write: mode");
757 #endif
758 
759 	switch (vp->v_type) {
760 	case VREG:
761 		if (ioflag & IO_APPEND)
762 			uio->uio_offset = ip->i_size;
763 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
764 			return (EPERM);
765 		/* FALLTHROUGH */
766 	case VLNK:
767 		break;
768 	case VDIR:
769 		panic("ffs_write: dir write");
770 		break;
771 	default:
772 		panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
773 			(int)uio->uio_offset,
774 			(int)uio->uio_resid
775 		);
776 	}
777 
778 	KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
779 	KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
780 	fs = ITOFS(ip);
781 	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
782 		return (EFBIG);
783 	/*
784 	 * Maybe this should be above the vnode op call, but so long as
785 	 * file servers have no limits, I don't think it matters.
786 	 */
787 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
788 		return (EFBIG);
789 
790 	resid = uio->uio_resid;
791 	osize = ip->i_size;
792 	if (seqcount > BA_SEQMAX)
793 		flags = BA_SEQMAX << BA_SEQSHIFT;
794 	else
795 		flags = seqcount << BA_SEQSHIFT;
796 	if (ioflag & IO_SYNC)
797 		flags |= IO_SYNC;
798 	flags |= BA_UNMAPPED;
799 
800 	for (error = 0; uio->uio_resid > 0;) {
801 		lbn = lblkno(fs, uio->uio_offset);
802 		blkoffset = blkoff(fs, uio->uio_offset);
803 		xfersize = fs->fs_bsize - blkoffset;
804 		if (uio->uio_resid < xfersize)
805 			xfersize = uio->uio_resid;
806 		if (uio->uio_offset + xfersize > ip->i_size)
807 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
808 
809 		/*
810 		 * We must perform a read-before-write if the transfer size
811 		 * does not cover the entire buffer.
812 		 */
813 		if (fs->fs_bsize > xfersize)
814 			flags |= BA_CLRBUF;
815 		else
816 			flags &= ~BA_CLRBUF;
817 /* XXX is uio->uio_offset the right thing here? */
818 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
819 		    ap->a_cred, flags, &bp);
820 		if (error != 0) {
821 			vnode_pager_setsize(vp, ip->i_size);
822 			break;
823 		}
824 		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
825 			bp->b_flags |= B_NOCACHE;
826 
827 		if (uio->uio_offset + xfersize > ip->i_size) {
828 			ip->i_size = uio->uio_offset + xfersize;
829 			DIP_SET(ip, i_size, ip->i_size);
830 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
831 		}
832 
833 		size = blksize(fs, ip, lbn) - bp->b_resid;
834 		if (size < xfersize)
835 			xfersize = size;
836 
837 		if (buf_mapped(bp)) {
838 			error = vn_io_fault_uiomove((char *)bp->b_data +
839 			    blkoffset, (int)xfersize, uio);
840 		} else {
841 			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
842 			    (int)xfersize, uio);
843 		}
844 		/*
845 		 * If the buffer is not already filled and we encounter an
846 		 * error while trying to fill it, we have to clear out any
847 		 * garbage data from the pages instantiated for the buffer.
848 		 * If we do not, a failed uiomove() during a write can leave
849 		 * the prior contents of the pages exposed to a userland mmap.
850 		 *
851 		 * Note that we need only clear buffers with a transfer size
852 		 * equal to the block size because buffers with a shorter
853 		 * transfer size were cleared above by the call to UFS_BALLOC()
854 		 * with the BA_CLRBUF flag set.
855 		 *
856 		 * If the source region for uiomove identically mmaps the
857 		 * buffer, uiomove() performed the NOP copy, and the buffer
858 		 * content remains valid because the page fault handler
859 		 * validated the pages.
860 		 */
861 		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
862 		    fs->fs_bsize == xfersize)
863 			vfs_bio_clrbuf(bp);
864 
865 		vfs_bio_set_flags(bp, ioflag);
866 
867 		/*
868 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
869 		 * if we have a severe page deficiency write the buffer
870 		 * asynchronously.  Otherwise try to cluster, and if that
871 		 * doesn't do it then either do an async write (if O_DIRECT),
872 		 * or a delayed write (if not).
873 		 */
874 		if (ioflag & IO_SYNC) {
875 			(void)bwrite(bp);
876 		} else if (vm_page_count_severe() ||
877 			    buf_dirty_count_severe() ||
878 			    (ioflag & IO_ASYNC)) {
879 			bp->b_flags |= B_CLUSTEROK;
880 			bawrite(bp);
881 		} else if (xfersize + blkoffset == fs->fs_bsize) {
882 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
883 				bp->b_flags |= B_CLUSTEROK;
884 				cluster_write(vp, bp, ip->i_size, seqcount,
885 				    GB_UNMAPPED);
886 			} else {
887 				bawrite(bp);
888 			}
889 		} else if (ioflag & IO_DIRECT) {
890 			bp->b_flags |= B_CLUSTEROK;
891 			bawrite(bp);
892 		} else {
893 			bp->b_flags |= B_CLUSTEROK;
894 			bdwrite(bp);
895 		}
896 		if (error || xfersize == 0)
897 			break;
898 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
899 	}
900 	/*
901 	 * If we successfully wrote any data, and we are not the superuser
902 	 * we clear the setuid and setgid bits as a precaution against
903 	 * tampering.
904 	 */
905 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
906 	    ap->a_cred) {
907 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
908 			vn_seqc_write_begin(vp);
909 			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
910 			DIP_SET(ip, i_mode, ip->i_mode);
911 			vn_seqc_write_end(vp);
912 		}
913 	}
914 	if (error) {
915 		if (ioflag & IO_UNIT) {
916 			(void)ffs_truncate(vp, osize,
917 			    IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
918 			uio->uio_offset -= resid - uio->uio_resid;
919 			uio->uio_resid = resid;
920 		}
921 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
922 		error = ffs_update(vp, 1);
923 		if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
924 			error = ENXIO;
925 	}
926 	return (error);
927 }
928 
929 /*
930  * Extended attribute area reading.
931  */
932 static int
933 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
934 {
935 	struct inode *ip;
936 	struct ufs2_dinode *dp;
937 	struct fs *fs;
938 	struct buf *bp;
939 	ufs_lbn_t lbn, nextlbn;
940 	off_t bytesinfile;
941 	long size, xfersize, blkoffset;
942 	ssize_t orig_resid;
943 	int error;
944 
945 	ip = VTOI(vp);
946 	fs = ITOFS(ip);
947 	dp = ip->i_din2;
948 
949 #ifdef INVARIANTS
950 	if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
951 		panic("ffs_extread: mode");
952 
953 #endif
954 	orig_resid = uio->uio_resid;
955 	KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
956 	if (orig_resid == 0)
957 		return (0);
958 	KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
959 
960 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
961 		if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
962 			break;
963 		lbn = lblkno(fs, uio->uio_offset);
964 		nextlbn = lbn + 1;
965 
966 		/*
967 		 * size of buffer.  The buffer representing the
968 		 * end of the file is rounded up to the size of
969 		 * the block type ( fragment or full block,
970 		 * depending ).
971 		 */
972 		size = sblksize(fs, dp->di_extsize, lbn);
973 		blkoffset = blkoff(fs, uio->uio_offset);
974 
975 		/*
976 		 * The amount we want to transfer in this iteration is
977 		 * one FS block less the amount of the data before
978 		 * our startpoint (duh!)
979 		 */
980 		xfersize = fs->fs_bsize - blkoffset;
981 
982 		/*
983 		 * But if we actually want less than the block,
984 		 * or the file doesn't have a whole block more of data,
985 		 * then use the lesser number.
986 		 */
987 		if (uio->uio_resid < xfersize)
988 			xfersize = uio->uio_resid;
989 		if (bytesinfile < xfersize)
990 			xfersize = bytesinfile;
991 
992 		if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
993 			/*
994 			 * Don't do readahead if this is the end of the info.
995 			 */
996 			error = bread(vp, -1 - lbn, size, NOCRED, &bp);
997 		} else {
998 			/*
999 			 * If we have a second block, then
1000 			 * fire off a request for a readahead
1001 			 * as well as a read. Note that the 4th and 5th
1002 			 * arguments point to arrays of the size specified in
1003 			 * the 6th argument.
1004 			 */
1005 			u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
1006 
1007 			nextlbn = -1 - nextlbn;
1008 			error = breadn(vp, -1 - lbn,
1009 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1010 		}
1011 		if (error) {
1012 			brelse(bp);
1013 			bp = NULL;
1014 			break;
1015 		}
1016 
1017 		/*
1018 		 * We should only get non-zero b_resid when an I/O error
1019 		 * has occurred, which should cause us to break above.
1020 		 * However, if the short read did not cause an error,
1021 		 * then we want to ensure that we do not uiomove bad
1022 		 * or uninitialized data.
1023 		 */
1024 		size -= bp->b_resid;
1025 		if (size < xfersize) {
1026 			if (size == 0)
1027 				break;
1028 			xfersize = size;
1029 		}
1030 
1031 		error = uiomove((char *)bp->b_data + blkoffset,
1032 					(int)xfersize, uio);
1033 		if (error)
1034 			break;
1035 		vfs_bio_brelse(bp, ioflag);
1036 	}
1037 
1038 	/*
1039 	 * This can only happen in the case of an error
1040 	 * because the loop above resets bp to NULL on each iteration
1041 	 * and on normal completion has not set a new value into it.
1042 	 * so it must have come from a 'break' statement
1043 	 */
1044 	if (bp != NULL)
1045 		vfs_bio_brelse(bp, ioflag);
1046 	return (error);
1047 }
1048 
1049 /*
1050  * Extended attribute area writing.
1051  */
1052 static int
1053 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1054 {
1055 	struct inode *ip;
1056 	struct ufs2_dinode *dp;
1057 	struct fs *fs;
1058 	struct buf *bp;
1059 	ufs_lbn_t lbn;
1060 	off_t osize;
1061 	ssize_t resid;
1062 	int blkoffset, error, flags, size, xfersize;
1063 
1064 	ip = VTOI(vp);
1065 	fs = ITOFS(ip);
1066 	dp = ip->i_din2;
1067 
1068 #ifdef INVARIANTS
1069 	if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1070 		panic("ffs_extwrite: mode");
1071 #endif
1072 
1073 	if (ioflag & IO_APPEND)
1074 		uio->uio_offset = dp->di_extsize;
1075 	KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1076 	KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
1077 	if ((uoff_t)uio->uio_offset + uio->uio_resid >
1078 	    UFS_NXADDR * fs->fs_bsize)
1079 		return (EFBIG);
1080 
1081 	resid = uio->uio_resid;
1082 	osize = dp->di_extsize;
1083 	flags = IO_EXT;
1084 	if (ioflag & IO_SYNC)
1085 		flags |= IO_SYNC;
1086 
1087 	for (error = 0; uio->uio_resid > 0;) {
1088 		lbn = lblkno(fs, uio->uio_offset);
1089 		blkoffset = blkoff(fs, uio->uio_offset);
1090 		xfersize = fs->fs_bsize - blkoffset;
1091 		if (uio->uio_resid < xfersize)
1092 			xfersize = uio->uio_resid;
1093 
1094 		/*
1095 		 * We must perform a read-before-write if the transfer size
1096 		 * does not cover the entire buffer.
1097 		 */
1098 		if (fs->fs_bsize > xfersize)
1099 			flags |= BA_CLRBUF;
1100 		else
1101 			flags &= ~BA_CLRBUF;
1102 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1103 		    ucred, flags, &bp);
1104 		if (error != 0)
1105 			break;
1106 		/*
1107 		 * If the buffer is not valid we have to clear out any
1108 		 * garbage data from the pages instantiated for the buffer.
1109 		 * If we do not, a failed uiomove() during a write can leave
1110 		 * the prior contents of the pages exposed to a userland
1111 		 * mmap().  XXX deal with uiomove() errors a better way.
1112 		 */
1113 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1114 			vfs_bio_clrbuf(bp);
1115 
1116 		if (uio->uio_offset + xfersize > dp->di_extsize) {
1117 			dp->di_extsize = uio->uio_offset + xfersize;
1118 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
1119 		}
1120 
1121 		size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1122 		if (size < xfersize)
1123 			xfersize = size;
1124 
1125 		error =
1126 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1127 
1128 		vfs_bio_set_flags(bp, ioflag);
1129 
1130 		/*
1131 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
1132 		 * if we have a severe page deficiency write the buffer
1133 		 * asynchronously.  Otherwise try to cluster, and if that
1134 		 * doesn't do it then either do an async write (if O_DIRECT),
1135 		 * or a delayed write (if not).
1136 		 */
1137 		if (ioflag & IO_SYNC) {
1138 			(void)bwrite(bp);
1139 		} else if (vm_page_count_severe() ||
1140 			    buf_dirty_count_severe() ||
1141 			    xfersize + blkoffset == fs->fs_bsize ||
1142 			    (ioflag & (IO_ASYNC | IO_DIRECT)))
1143 			bawrite(bp);
1144 		else
1145 			bdwrite(bp);
1146 		if (error || xfersize == 0)
1147 			break;
1148 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
1149 	}
1150 	/*
1151 	 * If we successfully wrote any data, and we are not the superuser
1152 	 * we clear the setuid and setgid bits as a precaution against
1153 	 * tampering.
1154 	 */
1155 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
1156 		if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
1157 			vn_seqc_write_begin(vp);
1158 			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
1159 			dp->di_mode = ip->i_mode;
1160 			vn_seqc_write_end(vp);
1161 		}
1162 	}
1163 	if (error) {
1164 		if (ioflag & IO_UNIT) {
1165 			(void)ffs_truncate(vp, osize,
1166 			    IO_EXT | (ioflag&IO_SYNC), ucred);
1167 			uio->uio_offset -= resid - uio->uio_resid;
1168 			uio->uio_resid = resid;
1169 		}
1170 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1171 		error = ffs_update(vp, 1);
1172 	return (error);
1173 }
1174 
1175 
1176 /*
1177  * Vnode operating to retrieve a named extended attribute.
1178  *
1179  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1180  * the length of the EA, and possibly the pointer to the entry and to the data.
1181  */
1182 static int
1183 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name,
1184     struct extattr **eapp, u_char **eac)
1185 {
1186 	struct extattr *eap, *eaend;
1187 	size_t nlen;
1188 
1189 	nlen = strlen(name);
1190 	KASSERT(ALIGNED_TO(ptr, struct extattr), ("unaligned"));
1191 	eap = (struct extattr *)ptr;
1192 	eaend = (struct extattr *)(ptr + length);
1193 	for (; eap < eaend; eap = EXTATTR_NEXT(eap)) {
1194 		/* make sure this entry is complete */
1195 		if (EXTATTR_NEXT(eap) > eaend)
1196 			break;
1197 		if (eap->ea_namespace != nspace || eap->ea_namelength != nlen
1198 		    || memcmp(eap->ea_name, name, nlen) != 0)
1199 			continue;
1200 		if (eapp != NULL)
1201 			*eapp = eap;
1202 		if (eac != NULL)
1203 			*eac = EXTATTR_CONTENT(eap);
1204 		return (EXTATTR_CONTENT_SIZE(eap));
1205 	}
1206 	return (-1);
1207 }
1208 
1209 static int
1210 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1211 {
1212 	struct inode *ip;
1213 	struct ufs2_dinode *dp;
1214 	struct fs *fs;
1215 	struct uio luio;
1216 	struct iovec liovec;
1217 	u_int easize;
1218 	int error;
1219 	u_char *eae;
1220 
1221 	ip = VTOI(vp);
1222 	fs = ITOFS(ip);
1223 	dp = ip->i_din2;
1224 	easize = dp->di_extsize;
1225 	if ((uoff_t)easize + extra > UFS_NXADDR * fs->fs_bsize)
1226 		return (EFBIG);
1227 
1228 	eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1229 
1230 	liovec.iov_base = eae;
1231 	liovec.iov_len = easize;
1232 	luio.uio_iov = &liovec;
1233 	luio.uio_iovcnt = 1;
1234 	luio.uio_offset = 0;
1235 	luio.uio_resid = easize;
1236 	luio.uio_segflg = UIO_SYSSPACE;
1237 	luio.uio_rw = UIO_READ;
1238 	luio.uio_td = td;
1239 
1240 	error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1241 	if (error) {
1242 		free(eae, M_TEMP);
1243 		return(error);
1244 	}
1245 	*p = eae;
1246 	return (0);
1247 }
1248 
1249 static void
1250 ffs_lock_ea(struct vnode *vp)
1251 {
1252 	struct inode *ip;
1253 
1254 	ip = VTOI(vp);
1255 	VI_LOCK(vp);
1256 	while (ip->i_flag & IN_EA_LOCKED) {
1257 		UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT);
1258 		msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
1259 		    0);
1260 	}
1261 	UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED);
1262 	VI_UNLOCK(vp);
1263 }
1264 
1265 static void
1266 ffs_unlock_ea(struct vnode *vp)
1267 {
1268 	struct inode *ip;
1269 
1270 	ip = VTOI(vp);
1271 	VI_LOCK(vp);
1272 	if (ip->i_flag & IN_EA_LOCKWAIT)
1273 		wakeup(&ip->i_ea_refs);
1274 	ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
1275 	VI_UNLOCK(vp);
1276 }
1277 
1278 static int
1279 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1280 {
1281 	struct inode *ip;
1282 	struct ufs2_dinode *dp;
1283 	int error;
1284 
1285 	ip = VTOI(vp);
1286 
1287 	ffs_lock_ea(vp);
1288 	if (ip->i_ea_area != NULL) {
1289 		ip->i_ea_refs++;
1290 		ffs_unlock_ea(vp);
1291 		return (0);
1292 	}
1293 	dp = ip->i_din2;
1294 	error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1295 	if (error) {
1296 		ffs_unlock_ea(vp);
1297 		return (error);
1298 	}
1299 	ip->i_ea_len = dp->di_extsize;
1300 	ip->i_ea_error = 0;
1301 	ip->i_ea_refs++;
1302 	ffs_unlock_ea(vp);
1303 	return (0);
1304 }
1305 
1306 /*
1307  * Vnode extattr transaction commit/abort
1308  */
1309 static int
1310 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1311 {
1312 	struct inode *ip;
1313 	struct uio luio;
1314 	struct iovec liovec;
1315 	int error;
1316 	struct ufs2_dinode *dp;
1317 
1318 	ip = VTOI(vp);
1319 
1320 	ffs_lock_ea(vp);
1321 	if (ip->i_ea_area == NULL) {
1322 		ffs_unlock_ea(vp);
1323 		return (EINVAL);
1324 	}
1325 	dp = ip->i_din2;
1326 	error = ip->i_ea_error;
1327 	if (commit && error == 0) {
1328 		ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
1329 		if (cred == NOCRED)
1330 			cred =  vp->v_mount->mnt_cred;
1331 		liovec.iov_base = ip->i_ea_area;
1332 		liovec.iov_len = ip->i_ea_len;
1333 		luio.uio_iov = &liovec;
1334 		luio.uio_iovcnt = 1;
1335 		luio.uio_offset = 0;
1336 		luio.uio_resid = ip->i_ea_len;
1337 		luio.uio_segflg = UIO_SYSSPACE;
1338 		luio.uio_rw = UIO_WRITE;
1339 		luio.uio_td = td;
1340 		/* XXX: I'm not happy about truncating to zero size */
1341 		if (ip->i_ea_len < dp->di_extsize)
1342 			error = ffs_truncate(vp, 0, IO_EXT, cred);
1343 		error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1344 	}
1345 	if (--ip->i_ea_refs == 0) {
1346 		free(ip->i_ea_area, M_TEMP);
1347 		ip->i_ea_area = NULL;
1348 		ip->i_ea_len = 0;
1349 		ip->i_ea_error = 0;
1350 	}
1351 	ffs_unlock_ea(vp);
1352 	return (error);
1353 }
1354 
1355 /*
1356  * Vnode extattr strategy routine for fifos.
1357  *
1358  * We need to check for a read or write of the external attributes.
1359  * Otherwise we just fall through and do the usual thing.
1360  */
1361 static int
1362 ffsext_strategy(struct vop_strategy_args *ap)
1363 /*
1364 struct vop_strategy_args {
1365 	struct vnodeop_desc *a_desc;
1366 	struct vnode *a_vp;
1367 	struct buf *a_bp;
1368 };
1369 */
1370 {
1371 	struct vnode *vp;
1372 	daddr_t lbn;
1373 
1374 	vp = ap->a_vp;
1375 	lbn = ap->a_bp->b_lblkno;
1376 	if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR)
1377 		return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
1378 	if (vp->v_type == VFIFO)
1379 		return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
1380 	panic("spec nodes went here");
1381 }
1382 
1383 /*
1384  * Vnode extattr transaction commit/abort
1385  */
1386 static int
1387 ffs_openextattr(struct vop_openextattr_args *ap)
1388 /*
1389 struct vop_openextattr_args {
1390 	struct vnodeop_desc *a_desc;
1391 	struct vnode *a_vp;
1392 	IN struct ucred *a_cred;
1393 	IN struct thread *a_td;
1394 };
1395 */
1396 {
1397 
1398 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1399 		return (EOPNOTSUPP);
1400 
1401 	return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1402 }
1403 
1404 
1405 /*
1406  * Vnode extattr transaction commit/abort
1407  */
1408 static int
1409 ffs_closeextattr(struct vop_closeextattr_args *ap)
1410 /*
1411 struct vop_closeextattr_args {
1412 	struct vnodeop_desc *a_desc;
1413 	struct vnode *a_vp;
1414 	int a_commit;
1415 	IN struct ucred *a_cred;
1416 	IN struct thread *a_td;
1417 };
1418 */
1419 {
1420 
1421 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1422 		return (EOPNOTSUPP);
1423 
1424 	if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
1425 		return (EROFS);
1426 
1427 	return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1428 }
1429 
1430 /*
1431  * Vnode operation to remove a named attribute.
1432  */
1433 static int
1434 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1435 /*
1436 vop_deleteextattr {
1437 	IN struct vnode *a_vp;
1438 	IN int a_attrnamespace;
1439 	IN const char *a_name;
1440 	IN struct ucred *a_cred;
1441 	IN struct thread *a_td;
1442 };
1443 */
1444 {
1445 	struct inode *ip;
1446 	struct extattr *eap;
1447 	uint32_t ul;
1448 	int olen, error, i, easize;
1449 	u_char *eae;
1450 	void *tmp;
1451 
1452 	ip = VTOI(ap->a_vp);
1453 
1454 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1455 		return (EOPNOTSUPP);
1456 
1457 	if (strlen(ap->a_name) == 0)
1458 		return (EINVAL);
1459 
1460 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1461 		return (EROFS);
1462 
1463 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1464 	    ap->a_cred, ap->a_td, VWRITE);
1465 	if (error) {
1466 
1467 		/*
1468 		 * ffs_lock_ea is not needed there, because the vnode
1469 		 * must be exclusively locked.
1470 		 */
1471 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1472 			ip->i_ea_error = error;
1473 		return (error);
1474 	}
1475 
1476 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1477 	if (error)
1478 		return (error);
1479 
1480 	/* CEM: delete could be done in-place instead */
1481 	eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1482 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1483 	easize = ip->i_ea_len;
1484 
1485 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1486 	    &eap, NULL);
1487 	if (olen == -1) {
1488 		/* delete but nonexistent */
1489 		free(eae, M_TEMP);
1490 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1491 		return (ENOATTR);
1492 	}
1493 	ul = eap->ea_length;
1494 	i = (u_char *)EXTATTR_NEXT(eap) - eae;
1495 	bcopy(EXTATTR_NEXT(eap), eap, easize - i);
1496 	easize -= ul;
1497 
1498 	tmp = ip->i_ea_area;
1499 	ip->i_ea_area = eae;
1500 	ip->i_ea_len = easize;
1501 	free(tmp, M_TEMP);
1502 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1503 	return (error);
1504 }
1505 
1506 /*
1507  * Vnode operation to retrieve a named extended attribute.
1508  */
1509 static int
1510 ffs_getextattr(struct vop_getextattr_args *ap)
1511 /*
1512 vop_getextattr {
1513 	IN struct vnode *a_vp;
1514 	IN int a_attrnamespace;
1515 	IN const char *a_name;
1516 	INOUT struct uio *a_uio;
1517 	OUT size_t *a_size;
1518 	IN struct ucred *a_cred;
1519 	IN struct thread *a_td;
1520 };
1521 */
1522 {
1523 	struct inode *ip;
1524 	u_char *eae, *p;
1525 	unsigned easize;
1526 	int error, ealen;
1527 
1528 	ip = VTOI(ap->a_vp);
1529 
1530 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1531 		return (EOPNOTSUPP);
1532 
1533 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1534 	    ap->a_cred, ap->a_td, VREAD);
1535 	if (error)
1536 		return (error);
1537 
1538 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1539 	if (error)
1540 		return (error);
1541 
1542 	eae = ip->i_ea_area;
1543 	easize = ip->i_ea_len;
1544 
1545 	ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1546 	    NULL, &p);
1547 	if (ealen >= 0) {
1548 		error = 0;
1549 		if (ap->a_size != NULL)
1550 			*ap->a_size = ealen;
1551 		else if (ap->a_uio != NULL)
1552 			error = uiomove(p, ealen, ap->a_uio);
1553 	} else
1554 		error = ENOATTR;
1555 
1556 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1557 	return (error);
1558 }
1559 
1560 /*
1561  * Vnode operation to retrieve extended attributes on a vnode.
1562  */
1563 static int
1564 ffs_listextattr(struct vop_listextattr_args *ap)
1565 /*
1566 vop_listextattr {
1567 	IN struct vnode *a_vp;
1568 	IN int a_attrnamespace;
1569 	INOUT struct uio *a_uio;
1570 	OUT size_t *a_size;
1571 	IN struct ucred *a_cred;
1572 	IN struct thread *a_td;
1573 };
1574 */
1575 {
1576 	struct inode *ip;
1577 	struct extattr *eap, *eaend;
1578 	int error, ealen;
1579 
1580 	ip = VTOI(ap->a_vp);
1581 
1582 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1583 		return (EOPNOTSUPP);
1584 
1585 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1586 	    ap->a_cred, ap->a_td, VREAD);
1587 	if (error)
1588 		return (error);
1589 
1590 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1591 	if (error)
1592 		return (error);
1593 
1594 	error = 0;
1595 	if (ap->a_size != NULL)
1596 		*ap->a_size = 0;
1597 
1598 	KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr), ("unaligned"));
1599 	eap = (struct extattr *)ip->i_ea_area;
1600 	eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len);
1601 	for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) {
1602 		/* make sure this entry is complete */
1603 		if (EXTATTR_NEXT(eap) > eaend)
1604 			break;
1605 		if (eap->ea_namespace != ap->a_attrnamespace)
1606 			continue;
1607 
1608 		ealen = eap->ea_namelength;
1609 		if (ap->a_size != NULL)
1610 			*ap->a_size += ealen + 1;
1611 		else if (ap->a_uio != NULL)
1612 			error = uiomove(&eap->ea_namelength, ealen + 1,
1613 			    ap->a_uio);
1614 	}
1615 
1616 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1617 	return (error);
1618 }
1619 
1620 /*
1621  * Vnode operation to set a named attribute.
1622  */
1623 static int
1624 ffs_setextattr(struct vop_setextattr_args *ap)
1625 /*
1626 vop_setextattr {
1627 	IN struct vnode *a_vp;
1628 	IN int a_attrnamespace;
1629 	IN const char *a_name;
1630 	INOUT struct uio *a_uio;
1631 	IN struct ucred *a_cred;
1632 	IN struct thread *a_td;
1633 };
1634 */
1635 {
1636 	struct inode *ip;
1637 	struct fs *fs;
1638 	struct extattr *eap;
1639 	uint32_t ealength, ul;
1640 	ssize_t ealen;
1641 	int olen, eapad1, eapad2, error, i, easize;
1642 	u_char *eae;
1643 	void *tmp;
1644 
1645 	ip = VTOI(ap->a_vp);
1646 	fs = ITOFS(ip);
1647 
1648 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1649 		return (EOPNOTSUPP);
1650 
1651 	if (strlen(ap->a_name) == 0)
1652 		return (EINVAL);
1653 
1654 	/* XXX Now unsupported API to delete EAs using NULL uio. */
1655 	if (ap->a_uio == NULL)
1656 		return (EOPNOTSUPP);
1657 
1658 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1659 		return (EROFS);
1660 
1661 	ealen = ap->a_uio->uio_resid;
1662 	if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR))
1663 		return (EINVAL);
1664 
1665 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1666 	    ap->a_cred, ap->a_td, VWRITE);
1667 	if (error) {
1668 
1669 		/*
1670 		 * ffs_lock_ea is not needed there, because the vnode
1671 		 * must be exclusively locked.
1672 		 */
1673 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1674 			ip->i_ea_error = error;
1675 		return (error);
1676 	}
1677 
1678 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1679 	if (error)
1680 		return (error);
1681 
1682 	ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1683 	eapad1 = roundup2(ealength, 8) - ealength;
1684 	eapad2 = roundup2(ealen, 8) - ealen;
1685 	ealength += eapad1 + ealen + eapad2;
1686 
1687 	/*
1688 	 * CEM: rewrites of the same size or smaller could be done in-place
1689 	 * instead.  (We don't acquire any fine-grained locks in here either,
1690 	 * so we could also do bigger writes in-place.)
1691 	 */
1692 	eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1693 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1694 	easize = ip->i_ea_len;
1695 
1696 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1697 	    &eap, NULL);
1698         if (olen == -1) {
1699 		/* new, append at end */
1700 		KASSERT(ALIGNED_TO(eae + easize, struct extattr),
1701 		    ("unaligned"));
1702 		eap = (struct extattr *)(eae + easize);
1703 		easize += ealength;
1704 	} else {
1705 		ul = eap->ea_length;
1706 		i = (u_char *)EXTATTR_NEXT(eap) - eae;
1707 		if (ul != ealength) {
1708 			bcopy(EXTATTR_NEXT(eap), (u_char *)eap + ealength,
1709 			    easize - i);
1710 			easize += (ealength - ul);
1711 		}
1712 	}
1713 	if (easize > lblktosize(fs, UFS_NXADDR)) {
1714 		free(eae, M_TEMP);
1715 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1716 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1717 			ip->i_ea_error = ENOSPC;
1718 		return (ENOSPC);
1719 	}
1720 	eap->ea_length = ealength;
1721 	eap->ea_namespace = ap->a_attrnamespace;
1722 	eap->ea_contentpadlen = eapad2;
1723 	eap->ea_namelength = strlen(ap->a_name);
1724 	memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name));
1725 	bzero(&eap->ea_name[strlen(ap->a_name)], eapad1);
1726 	error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio);
1727 	if (error) {
1728 		free(eae, M_TEMP);
1729 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1730 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1731 			ip->i_ea_error = error;
1732 		return (error);
1733 	}
1734 	bzero((u_char *)EXTATTR_CONTENT(eap) + ealen, eapad2);
1735 
1736 	tmp = ip->i_ea_area;
1737 	ip->i_ea_area = eae;
1738 	ip->i_ea_len = easize;
1739 	free(tmp, M_TEMP);
1740 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1741 	return (error);
1742 }
1743 
1744 /*
1745  * Vnode pointer to File handle
1746  */
1747 static int
1748 ffs_vptofh(struct vop_vptofh_args *ap)
1749 /*
1750 vop_vptofh {
1751 	IN struct vnode *a_vp;
1752 	IN struct fid *a_fhp;
1753 };
1754 */
1755 {
1756 	struct inode *ip;
1757 	struct ufid *ufhp;
1758 
1759 	ip = VTOI(ap->a_vp);
1760 	ufhp = (struct ufid *)ap->a_fhp;
1761 	ufhp->ufid_len = sizeof(struct ufid);
1762 	ufhp->ufid_ino = ip->i_number;
1763 	ufhp->ufid_gen = ip->i_gen;
1764 	return (0);
1765 }
1766 
1767 SYSCTL_DECL(_vfs_ffs);
1768 static int use_buf_pager = 1;
1769 SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
1770     "Always use buffer pager instead of bmap");
1771 
1772 static daddr_t
1773 ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
1774 {
1775 
1776 	return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
1777 }
1778 
1779 static int
1780 ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn)
1781 {
1782 
1783 	return (blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn));
1784 }
1785 
1786 static int
1787 ffs_getpages(struct vop_getpages_args *ap)
1788 {
1789 	struct vnode *vp;
1790 	struct ufsmount *um;
1791 
1792 	vp = ap->a_vp;
1793 	um = VFSTOUFS(vp->v_mount);
1794 
1795 	if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
1796 		return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
1797 		    ap->a_rbehind, ap->a_rahead, NULL, NULL));
1798 	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
1799 	    ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
1800 }
1801 
1802 static int
1803 ffs_getpages_async(struct vop_getpages_async_args *ap)
1804 {
1805 	struct vnode *vp;
1806 	struct ufsmount *um;
1807 	bool do_iodone;
1808 	int error;
1809 
1810 	vp = ap->a_vp;
1811 	um = VFSTOUFS(vp->v_mount);
1812 	do_iodone = true;
1813 
1814 	if (um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) {
1815 		error = vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
1816 		    ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg);
1817 		if (error == 0)
1818 			do_iodone = false;
1819 	} else {
1820 		error = vfs_bio_getpages(vp, ap->a_m, ap->a_count,
1821 		    ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno,
1822 		    ffs_gbp_getblksz);
1823 	}
1824 	if (do_iodone && ap->a_iodone != NULL)
1825 		ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
1826 
1827 	return (error);
1828 }
1829 
1830