xref: /freebsd/sys/ufs/ffs/ffs_vnops.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	from: @(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
62  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
63  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
64  */
65 
66 #include <sys/cdefs.h>
67 __FBSDID("$FreeBSD$");
68 
69 #include <sys/param.h>
70 #include <sys/bio.h>
71 #include <sys/systm.h>
72 #include <sys/buf.h>
73 #include <sys/conf.h>
74 #include <sys/extattr.h>
75 #include <sys/kernel.h>
76 #include <sys/limits.h>
77 #include <sys/malloc.h>
78 #include <sys/mount.h>
79 #include <sys/priv.h>
80 #include <sys/rwlock.h>
81 #include <sys/stat.h>
82 #include <sys/sysctl.h>
83 #include <sys/vmmeter.h>
84 #include <sys/vnode.h>
85 
86 #include <vm/vm.h>
87 #include <vm/vm_param.h>
88 #include <vm/vm_extern.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_pager.h>
92 #include <vm/vnode_pager.h>
93 
94 #include <ufs/ufs/extattr.h>
95 #include <ufs/ufs/quota.h>
96 #include <ufs/ufs/inode.h>
97 #include <ufs/ufs/ufs_extern.h>
98 #include <ufs/ufs/ufsmount.h>
99 
100 #include <ufs/ffs/fs.h>
101 #include <ufs/ffs/ffs_extern.h>
102 #include "opt_directio.h"
103 #include "opt_ffs.h"
104 
105 #define	ALIGNED_TO(ptr, s)	\
106 	(((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0)
107 
108 #ifdef DIRECTIO
109 extern int	ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
110 #endif
111 static vop_fdatasync_t	ffs_fdatasync;
112 static vop_fsync_t	ffs_fsync;
113 static vop_getpages_t	ffs_getpages;
114 static vop_getpages_async_t	ffs_getpages_async;
115 static vop_lock1_t	ffs_lock;
116 #ifdef INVARIANTS
117 static vop_unlock_t	ffs_unlock_debug;
118 #endif
119 static vop_read_t	ffs_read;
120 static vop_write_t	ffs_write;
121 static int	ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
122 static int	ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
123 		    struct ucred *cred);
124 static vop_strategy_t	ffsext_strategy;
125 static vop_closeextattr_t	ffs_closeextattr;
126 static vop_deleteextattr_t	ffs_deleteextattr;
127 static vop_getextattr_t	ffs_getextattr;
128 static vop_listextattr_t	ffs_listextattr;
129 static vop_openextattr_t	ffs_openextattr;
130 static vop_setextattr_t	ffs_setextattr;
131 static vop_vptofh_t	ffs_vptofh;
132 
133 /* Global vfs data structures for ufs. */
134 struct vop_vector ffs_vnodeops1 = {
135 	.vop_default =		&ufs_vnodeops,
136 	.vop_fsync =		ffs_fsync,
137 	.vop_fdatasync =	ffs_fdatasync,
138 	.vop_getpages =		ffs_getpages,
139 	.vop_getpages_async =	ffs_getpages_async,
140 	.vop_lock1 =		ffs_lock,
141 #ifdef INVARIANTS
142 	.vop_unlock =		ffs_unlock_debug,
143 #endif
144 	.vop_read =		ffs_read,
145 	.vop_reallocblks =	ffs_reallocblks,
146 	.vop_write =		ffs_write,
147 	.vop_vptofh =		ffs_vptofh,
148 };
149 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops1);
150 
151 struct vop_vector ffs_fifoops1 = {
152 	.vop_default =		&ufs_fifoops,
153 	.vop_fsync =		ffs_fsync,
154 	.vop_fdatasync =	ffs_fdatasync,
155 	.vop_lock1 =		ffs_lock,
156 #ifdef INVARIANTS
157 	.vop_unlock =		ffs_unlock_debug,
158 #endif
159 	.vop_vptofh =		ffs_vptofh,
160 };
161 VFS_VOP_VECTOR_REGISTER(ffs_fifoops1);
162 
163 /* Global vfs data structures for ufs. */
164 struct vop_vector ffs_vnodeops2 = {
165 	.vop_default =		&ufs_vnodeops,
166 	.vop_fsync =		ffs_fsync,
167 	.vop_fdatasync =	ffs_fdatasync,
168 	.vop_getpages =		ffs_getpages,
169 	.vop_getpages_async =	ffs_getpages_async,
170 	.vop_lock1 =		ffs_lock,
171 #ifdef INVARIANTS
172 	.vop_unlock =		ffs_unlock_debug,
173 #endif
174 	.vop_read =		ffs_read,
175 	.vop_reallocblks =	ffs_reallocblks,
176 	.vop_write =		ffs_write,
177 	.vop_closeextattr =	ffs_closeextattr,
178 	.vop_deleteextattr =	ffs_deleteextattr,
179 	.vop_getextattr =	ffs_getextattr,
180 	.vop_listextattr =	ffs_listextattr,
181 	.vop_openextattr =	ffs_openextattr,
182 	.vop_setextattr =	ffs_setextattr,
183 	.vop_vptofh =		ffs_vptofh,
184 };
185 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops2);
186 
187 struct vop_vector ffs_fifoops2 = {
188 	.vop_default =		&ufs_fifoops,
189 	.vop_fsync =		ffs_fsync,
190 	.vop_fdatasync =	ffs_fdatasync,
191 	.vop_lock1 =		ffs_lock,
192 #ifdef INVARIANTS
193 	.vop_unlock =		ffs_unlock_debug,
194 #endif
195 	.vop_reallocblks =	ffs_reallocblks,
196 	.vop_strategy =		ffsext_strategy,
197 	.vop_closeextattr =	ffs_closeextattr,
198 	.vop_deleteextattr =	ffs_deleteextattr,
199 	.vop_getextattr =	ffs_getextattr,
200 	.vop_listextattr =	ffs_listextattr,
201 	.vop_openextattr =	ffs_openextattr,
202 	.vop_setextattr =	ffs_setextattr,
203 	.vop_vptofh =		ffs_vptofh,
204 };
205 VFS_VOP_VECTOR_REGISTER(ffs_fifoops2);
206 
207 /*
208  * Synch an open file.
209  */
210 /* ARGSUSED */
211 static int
212 ffs_fsync(struct vop_fsync_args *ap)
213 {
214 	struct vnode *vp;
215 	struct bufobj *bo;
216 	int error;
217 
218 	vp = ap->a_vp;
219 	bo = &vp->v_bufobj;
220 retry:
221 	error = ffs_syncvnode(vp, ap->a_waitfor, 0);
222 	if (error)
223 		return (error);
224 	if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
225 		error = softdep_fsync(vp);
226 		if (error)
227 			return (error);
228 
229 		/*
230 		 * The softdep_fsync() function may drop vp lock,
231 		 * allowing for dirty buffers to reappear on the
232 		 * bo_dirty list. Recheck and resync as needed.
233 		 */
234 		BO_LOCK(bo);
235 		if ((vp->v_type == VREG || vp->v_type == VDIR) &&
236 		    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
237 			BO_UNLOCK(bo);
238 			goto retry;
239 		}
240 		BO_UNLOCK(bo);
241 	}
242 	if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0))
243 		return (ENXIO);
244 	return (0);
245 }
246 
247 int
248 ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
249 {
250 	struct inode *ip;
251 	struct bufobj *bo;
252 	struct ufsmount *ump;
253 	struct buf *bp, *nbp;
254 	ufs_lbn_t lbn;
255 	int error, passes;
256 	bool still_dirty, wait;
257 
258 	ip = VTOI(vp);
259 	ip->i_flag &= ~IN_NEEDSYNC;
260 	bo = &vp->v_bufobj;
261 	ump = VFSTOUFS(vp->v_mount);
262 
263 	/*
264 	 * When doing MNT_WAIT we must first flush all dependencies
265 	 * on the inode.
266 	 */
267 	if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
268 	    (error = softdep_sync_metadata(vp)) != 0) {
269 		if (ffs_fsfail_cleanup(ump, error))
270 			error = 0;
271 		return (error);
272 	}
273 
274 	/*
275 	 * Flush all dirty buffers associated with a vnode.
276 	 */
277 	error = 0;
278 	passes = 0;
279 	wait = false;	/* Always do an async pass first. */
280 	lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
281 	BO_LOCK(bo);
282 loop:
283 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
284 		bp->b_vflags &= ~BV_SCANNED;
285 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
286 		/*
287 		 * Reasons to skip this buffer: it has already been considered
288 		 * on this pass, the buffer has dependencies that will cause
289 		 * it to be redirtied and it has not already been deferred,
290 		 * or it is already being written.
291 		 */
292 		if ((bp->b_vflags & BV_SCANNED) != 0)
293 			continue;
294 		bp->b_vflags |= BV_SCANNED;
295 		/*
296 		 * Flush indirects in order, if requested.
297 		 *
298 		 * Note that if only datasync is requested, we can
299 		 * skip indirect blocks when softupdates are not
300 		 * active.  Otherwise we must flush them with data,
301 		 * since dependencies prevent data block writes.
302 		 */
303 		if (waitfor == MNT_WAIT && bp->b_lblkno <= -UFS_NDADDR &&
304 		    (lbn_level(bp->b_lblkno) >= passes ||
305 		    ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
306 			continue;
307 		if (bp->b_lblkno > lbn)
308 			panic("ffs_syncvnode: syncing truncated data.");
309 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
310 			BO_UNLOCK(bo);
311 		} else if (wait) {
312 			if (BUF_LOCK(bp,
313 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
314 			    BO_LOCKPTR(bo)) != 0) {
315 				bp->b_vflags &= ~BV_SCANNED;
316 				goto next;
317 			}
318 		} else
319 			continue;
320 		if ((bp->b_flags & B_DELWRI) == 0)
321 			panic("ffs_fsync: not dirty");
322 		/*
323 		 * Check for dependencies and potentially complete them.
324 		 */
325 		if (!LIST_EMPTY(&bp->b_dep) &&
326 		    (error = softdep_sync_buf(vp, bp,
327 		    wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
328 			/* I/O error. */
329 			if (error != EBUSY) {
330 				BUF_UNLOCK(bp);
331 				return (error);
332 			}
333 			/* If we deferred once, don't defer again. */
334 		    	if ((bp->b_flags & B_DEFERRED) == 0) {
335 				bp->b_flags |= B_DEFERRED;
336 				BUF_UNLOCK(bp);
337 				goto next;
338 			}
339 		}
340 		if (wait) {
341 			bremfree(bp);
342 			error = bwrite(bp);
343 			if (ffs_fsfail_cleanup(ump, error))
344 				error = 0;
345 			if (error != 0)
346 				return (error);
347 		} else if ((bp->b_flags & B_CLUSTEROK)) {
348 			(void) vfs_bio_awrite(bp);
349 		} else {
350 			bremfree(bp);
351 			(void) bawrite(bp);
352 		}
353 next:
354 		/*
355 		 * Since we may have slept during the I/O, we need
356 		 * to start from a known point.
357 		 */
358 		BO_LOCK(bo);
359 		nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
360 	}
361 	if (waitfor != MNT_WAIT) {
362 		BO_UNLOCK(bo);
363 		if ((flags & NO_INO_UPDT) != 0)
364 			return (0);
365 		else
366 			return (ffs_update(vp, 0));
367 	}
368 	/* Drain IO to see if we're done. */
369 	bufobj_wwait(bo, 0, 0);
370 	/*
371 	 * Block devices associated with filesystems may have new I/O
372 	 * requests posted for them even if the vnode is locked, so no
373 	 * amount of trying will get them clean.  We make several passes
374 	 * as a best effort.
375 	 *
376 	 * Regular files may need multiple passes to flush all dependency
377 	 * work as it is possible that we must write once per indirect
378 	 * level, once for the leaf, and once for the inode and each of
379 	 * these will be done with one sync and one async pass.
380 	 */
381 	if (bo->bo_dirty.bv_cnt > 0) {
382 		if ((flags & DATA_ONLY) == 0) {
383 			still_dirty = true;
384 		} else {
385 			/*
386 			 * For data-only sync, dirty indirect buffers
387 			 * are ignored.
388 			 */
389 			still_dirty = false;
390 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
391 				if (bp->b_lblkno > -UFS_NDADDR) {
392 					still_dirty = true;
393 					break;
394 				}
395 			}
396 		}
397 
398 		if (still_dirty) {
399 			/* Write the inode after sync passes to flush deps. */
400 			if (wait && DOINGSOFTDEP(vp) &&
401 			    (flags & NO_INO_UPDT) == 0) {
402 				BO_UNLOCK(bo);
403 				ffs_update(vp, 1);
404 				BO_LOCK(bo);
405 			}
406 			/* switch between sync/async. */
407 			wait = !wait;
408 			if (wait || ++passes < UFS_NIADDR + 2)
409 				goto loop;
410 		}
411 	}
412 	BO_UNLOCK(bo);
413 	error = 0;
414 	if ((flags & DATA_ONLY) == 0) {
415 		if ((flags & NO_INO_UPDT) == 0)
416 			error = ffs_update(vp, 1);
417 		if (DOINGSUJ(vp))
418 			softdep_journal_fsync(VTOI(vp));
419 	} else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
420 		error = ffs_update(vp, 1);
421 	}
422 	return (error);
423 }
424 
425 static int
426 ffs_fdatasync(struct vop_fdatasync_args *ap)
427 {
428 
429 	return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
430 }
431 
432 static int
433 ffs_lock(ap)
434 	struct vop_lock1_args /* {
435 		struct vnode *a_vp;
436 		int a_flags;
437 		struct thread *a_td;
438 		char *file;
439 		int line;
440 	} */ *ap;
441 {
442 #ifndef NO_FFS_SNAPSHOT
443 	struct vnode *vp;
444 	int flags;
445 	struct lock *lkp;
446 	int result;
447 
448 	/*
449 	 * Adaptive spinning mixed with SU leads to trouble. use a giant hammer
450 	 * and only use it when LK_NODDLKTREAT is set. Currently this means it
451 	 * is only used during path lookup.
452 	 */
453 	if ((ap->a_flags & LK_NODDLKTREAT) != 0)
454 		ap->a_flags |= LK_ADAPTIVE;
455 	switch (ap->a_flags & LK_TYPE_MASK) {
456 	case LK_SHARED:
457 	case LK_UPGRADE:
458 	case LK_EXCLUSIVE:
459 		vp = ap->a_vp;
460 		flags = ap->a_flags;
461 		for (;;) {
462 #ifdef DEBUG_VFS_LOCKS
463 			VNPASS(vp->v_holdcnt != 0, vp);
464 #endif
465 			lkp = vp->v_vnlock;
466 			result = lockmgr_lock_flags(lkp, flags,
467 			    &VI_MTX(vp)->lock_object, ap->a_file, ap->a_line);
468 			if (lkp == vp->v_vnlock || result != 0)
469 				break;
470 			/*
471 			 * Apparent success, except that the vnode
472 			 * mutated between snapshot file vnode and
473 			 * regular file vnode while this process
474 			 * slept.  The lock currently held is not the
475 			 * right lock.  Release it, and try to get the
476 			 * new lock.
477 			 */
478 			lockmgr_unlock(lkp);
479 			if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
480 			    (LK_INTERLOCK | LK_NOWAIT))
481 				return (EBUSY);
482 			if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
483 				flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
484 			flags &= ~LK_INTERLOCK;
485 		}
486 		break;
487 	default:
488 		result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
489 	}
490 	return (result);
491 #else
492 	/*
493 	 * See above for an explanation.
494 	 */
495 	if ((ap->a_flags & LK_NODDLKTREAT) != 0)
496 		ap->a_flags |= LK_ADAPTIVE;
497 	return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
498 #endif
499 }
500 
501 #ifdef INVARIANTS
502 static int
503 ffs_unlock_debug(struct vop_unlock_args *ap)
504 {
505 	struct vnode *vp = ap->a_vp;
506 	struct inode *ip = VTOI(vp);
507 
508 	if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) {
509 		if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
510 			VI_LOCK(vp);
511 			VNASSERT((vp->v_mflag & VMP_LAZYLIST), vp,
512 			    ("%s: modified vnode (%x) not on lazy list",
513 			    __func__, ip->i_flag));
514 			VI_UNLOCK(vp);
515 		}
516 	}
517 	return (VOP_UNLOCK_APV(&ufs_vnodeops, ap));
518 }
519 #endif
520 
521 static int
522 ffs_read_hole(struct uio *uio, long xfersize, long *size)
523 {
524 	ssize_t saved_resid, tlen;
525 	int error;
526 
527 	while (xfersize > 0) {
528 		tlen = min(xfersize, ZERO_REGION_SIZE);
529 		saved_resid = uio->uio_resid;
530 		error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
531 		    tlen, uio);
532 		if (error != 0)
533 			return (error);
534 		tlen = saved_resid - uio->uio_resid;
535 		xfersize -= tlen;
536 		*size -= tlen;
537 	}
538 	return (0);
539 }
540 
541 /*
542  * Vnode op for reading.
543  */
544 static int
545 ffs_read(ap)
546 	struct vop_read_args /* {
547 		struct vnode *a_vp;
548 		struct uio *a_uio;
549 		int a_ioflag;
550 		struct ucred *a_cred;
551 	} */ *ap;
552 {
553 	struct vnode *vp;
554 	struct inode *ip;
555 	struct uio *uio;
556 	struct fs *fs;
557 	struct buf *bp;
558 	ufs_lbn_t lbn, nextlbn;
559 	off_t bytesinfile;
560 	long size, xfersize, blkoffset;
561 	ssize_t orig_resid;
562 	int bflag, error, ioflag, seqcount;
563 
564 	vp = ap->a_vp;
565 	uio = ap->a_uio;
566 	ioflag = ap->a_ioflag;
567 	if (ap->a_ioflag & IO_EXT)
568 #ifdef notyet
569 		return (ffs_extread(vp, uio, ioflag));
570 #else
571 		panic("ffs_read+IO_EXT");
572 #endif
573 #ifdef DIRECTIO
574 	if ((ioflag & IO_DIRECT) != 0) {
575 		int workdone;
576 
577 		error = ffs_rawread(vp, uio, &workdone);
578 		if (error != 0 || workdone != 0)
579 			return error;
580 	}
581 #endif
582 
583 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
584 	ip = VTOI(vp);
585 
586 #ifdef INVARIANTS
587 	if (uio->uio_rw != UIO_READ)
588 		panic("ffs_read: mode");
589 
590 	if (vp->v_type == VLNK) {
591 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
592 			panic("ffs_read: short symlink");
593 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
594 		panic("ffs_read: type %d",  vp->v_type);
595 #endif
596 	orig_resid = uio->uio_resid;
597 	KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
598 	if (orig_resid == 0)
599 		return (0);
600 	KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
601 	fs = ITOFS(ip);
602 	if (uio->uio_offset < ip->i_size &&
603 	    uio->uio_offset >= fs->fs_maxfilesize)
604 		return (EOVERFLOW);
605 
606 	bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
607 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
608 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
609 			break;
610 		lbn = lblkno(fs, uio->uio_offset);
611 		nextlbn = lbn + 1;
612 
613 		/*
614 		 * size of buffer.  The buffer representing the
615 		 * end of the file is rounded up to the size of
616 		 * the block type ( fragment or full block,
617 		 * depending ).
618 		 */
619 		size = blksize(fs, ip, lbn);
620 		blkoffset = blkoff(fs, uio->uio_offset);
621 
622 		/*
623 		 * The amount we want to transfer in this iteration is
624 		 * one FS block less the amount of the data before
625 		 * our startpoint (duh!)
626 		 */
627 		xfersize = fs->fs_bsize - blkoffset;
628 
629 		/*
630 		 * But if we actually want less than the block,
631 		 * or the file doesn't have a whole block more of data,
632 		 * then use the lesser number.
633 		 */
634 		if (uio->uio_resid < xfersize)
635 			xfersize = uio->uio_resid;
636 		if (bytesinfile < xfersize)
637 			xfersize = bytesinfile;
638 
639 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
640 			/*
641 			 * Don't do readahead if this is the end of the file.
642 			 */
643 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
644 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
645 			/*
646 			 * Otherwise if we are allowed to cluster,
647 			 * grab as much as we can.
648 			 *
649 			 * XXX  This may not be a win if we are not
650 			 * doing sequential access.
651 			 */
652 			error = cluster_read(vp, ip->i_size, lbn,
653 			    size, NOCRED, blkoffset + uio->uio_resid,
654 			    seqcount, bflag, &bp);
655 		} else if (seqcount > 1) {
656 			/*
657 			 * If we are NOT allowed to cluster, then
658 			 * if we appear to be acting sequentially,
659 			 * fire off a request for a readahead
660 			 * as well as a read. Note that the 4th and 5th
661 			 * arguments point to arrays of the size specified in
662 			 * the 6th argument.
663 			 */
664 			u_int nextsize = blksize(fs, ip, nextlbn);
665 			error = breadn_flags(vp, lbn, lbn, size, &nextlbn,
666 			    &nextsize, 1, NOCRED, bflag, NULL, &bp);
667 		} else {
668 			/*
669 			 * Failing all of the above, just read what the
670 			 * user asked for. Interestingly, the same as
671 			 * the first option above.
672 			 */
673 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
674 		}
675 		if (error == EJUSTRETURN) {
676 			error = ffs_read_hole(uio, xfersize, &size);
677 			if (error == 0)
678 				continue;
679 		}
680 		if (error != 0) {
681 			brelse(bp);
682 			bp = NULL;
683 			break;
684 		}
685 
686 		/*
687 		 * We should only get non-zero b_resid when an I/O error
688 		 * has occurred, which should cause us to break above.
689 		 * However, if the short read did not cause an error,
690 		 * then we want to ensure that we do not uiomove bad
691 		 * or uninitialized data.
692 		 */
693 		size -= bp->b_resid;
694 		if (size < xfersize) {
695 			if (size == 0)
696 				break;
697 			xfersize = size;
698 		}
699 
700 		if (buf_mapped(bp)) {
701 			error = vn_io_fault_uiomove((char *)bp->b_data +
702 			    blkoffset, (int)xfersize, uio);
703 		} else {
704 			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
705 			    (int)xfersize, uio);
706 		}
707 		if (error)
708 			break;
709 
710 		vfs_bio_brelse(bp, ioflag);
711 	}
712 
713 	/*
714 	 * This can only happen in the case of an error
715 	 * because the loop above resets bp to NULL on each iteration
716 	 * and on normal completion has not set a new value into it.
717 	 * so it must have come from a 'break' statement
718 	 */
719 	if (bp != NULL)
720 		vfs_bio_brelse(bp, ioflag);
721 
722 	if ((error == 0 || uio->uio_resid != orig_resid) &&
723 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
724 		UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS);
725 	return (error);
726 }
727 
728 /*
729  * Vnode op for writing.
730  */
731 static int
732 ffs_write(ap)
733 	struct vop_write_args /* {
734 		struct vnode *a_vp;
735 		struct uio *a_uio;
736 		int a_ioflag;
737 		struct ucred *a_cred;
738 	} */ *ap;
739 {
740 	struct vnode *vp;
741 	struct uio *uio;
742 	struct inode *ip;
743 	struct fs *fs;
744 	struct buf *bp;
745 	ufs_lbn_t lbn;
746 	off_t osize;
747 	ssize_t resid;
748 	int seqcount;
749 	int blkoffset, error, flags, ioflag, size, xfersize;
750 
751 	vp = ap->a_vp;
752 	uio = ap->a_uio;
753 	ioflag = ap->a_ioflag;
754 	if (ap->a_ioflag & IO_EXT)
755 #ifdef notyet
756 		return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
757 #else
758 		panic("ffs_write+IO_EXT");
759 #endif
760 
761 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
762 	ip = VTOI(vp);
763 
764 #ifdef INVARIANTS
765 	if (uio->uio_rw != UIO_WRITE)
766 		panic("ffs_write: mode");
767 #endif
768 
769 	switch (vp->v_type) {
770 	case VREG:
771 		if (ioflag & IO_APPEND)
772 			uio->uio_offset = ip->i_size;
773 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
774 			return (EPERM);
775 		/* FALLTHROUGH */
776 	case VLNK:
777 		break;
778 	case VDIR:
779 		panic("ffs_write: dir write");
780 		break;
781 	default:
782 		panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
783 			(int)uio->uio_offset,
784 			(int)uio->uio_resid
785 		);
786 	}
787 
788 	KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
789 	KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
790 	fs = ITOFS(ip);
791 	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
792 		return (EFBIG);
793 	/*
794 	 * Maybe this should be above the vnode op call, but so long as
795 	 * file servers have no limits, I don't think it matters.
796 	 */
797 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
798 		return (EFBIG);
799 
800 	resid = uio->uio_resid;
801 	osize = ip->i_size;
802 	if (seqcount > BA_SEQMAX)
803 		flags = BA_SEQMAX << BA_SEQSHIFT;
804 	else
805 		flags = seqcount << BA_SEQSHIFT;
806 	if (ioflag & IO_SYNC)
807 		flags |= IO_SYNC;
808 	flags |= BA_UNMAPPED;
809 
810 	for (error = 0; uio->uio_resid > 0;) {
811 		lbn = lblkno(fs, uio->uio_offset);
812 		blkoffset = blkoff(fs, uio->uio_offset);
813 		xfersize = fs->fs_bsize - blkoffset;
814 		if (uio->uio_resid < xfersize)
815 			xfersize = uio->uio_resid;
816 		if (uio->uio_offset + xfersize > ip->i_size)
817 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
818 
819 		/*
820 		 * We must perform a read-before-write if the transfer size
821 		 * does not cover the entire buffer.
822 		 */
823 		if (fs->fs_bsize > xfersize)
824 			flags |= BA_CLRBUF;
825 		else
826 			flags &= ~BA_CLRBUF;
827 /* XXX is uio->uio_offset the right thing here? */
828 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
829 		    ap->a_cred, flags, &bp);
830 		if (error != 0) {
831 			vnode_pager_setsize(vp, ip->i_size);
832 			break;
833 		}
834 		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
835 			bp->b_flags |= B_NOCACHE;
836 
837 		if (uio->uio_offset + xfersize > ip->i_size) {
838 			ip->i_size = uio->uio_offset + xfersize;
839 			DIP_SET(ip, i_size, ip->i_size);
840 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
841 		}
842 
843 		size = blksize(fs, ip, lbn) - bp->b_resid;
844 		if (size < xfersize)
845 			xfersize = size;
846 
847 		if (buf_mapped(bp)) {
848 			error = vn_io_fault_uiomove((char *)bp->b_data +
849 			    blkoffset, (int)xfersize, uio);
850 		} else {
851 			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
852 			    (int)xfersize, uio);
853 		}
854 		/*
855 		 * If the buffer is not already filled and we encounter an
856 		 * error while trying to fill it, we have to clear out any
857 		 * garbage data from the pages instantiated for the buffer.
858 		 * If we do not, a failed uiomove() during a write can leave
859 		 * the prior contents of the pages exposed to a userland mmap.
860 		 *
861 		 * Note that we need only clear buffers with a transfer size
862 		 * equal to the block size because buffers with a shorter
863 		 * transfer size were cleared above by the call to UFS_BALLOC()
864 		 * with the BA_CLRBUF flag set.
865 		 *
866 		 * If the source region for uiomove identically mmaps the
867 		 * buffer, uiomove() performed the NOP copy, and the buffer
868 		 * content remains valid because the page fault handler
869 		 * validated the pages.
870 		 */
871 		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
872 		    fs->fs_bsize == xfersize)
873 			vfs_bio_clrbuf(bp);
874 
875 		vfs_bio_set_flags(bp, ioflag);
876 
877 		/*
878 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
879 		 * if we have a severe page deficiency write the buffer
880 		 * asynchronously.  Otherwise try to cluster, and if that
881 		 * doesn't do it then either do an async write (if O_DIRECT),
882 		 * or a delayed write (if not).
883 		 */
884 		if (ioflag & IO_SYNC) {
885 			(void)bwrite(bp);
886 		} else if (vm_page_count_severe() ||
887 			    buf_dirty_count_severe() ||
888 			    (ioflag & IO_ASYNC)) {
889 			bp->b_flags |= B_CLUSTEROK;
890 			bawrite(bp);
891 		} else if (xfersize + blkoffset == fs->fs_bsize) {
892 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
893 				bp->b_flags |= B_CLUSTEROK;
894 				cluster_write(vp, bp, ip->i_size, seqcount,
895 				    GB_UNMAPPED);
896 			} else {
897 				bawrite(bp);
898 			}
899 		} else if (ioflag & IO_DIRECT) {
900 			bp->b_flags |= B_CLUSTEROK;
901 			bawrite(bp);
902 		} else {
903 			bp->b_flags |= B_CLUSTEROK;
904 			bdwrite(bp);
905 		}
906 		if (error || xfersize == 0)
907 			break;
908 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
909 	}
910 	/*
911 	 * If we successfully wrote any data, and we are not the superuser
912 	 * we clear the setuid and setgid bits as a precaution against
913 	 * tampering.
914 	 */
915 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
916 	    ap->a_cred) {
917 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
918 			vn_seqc_write_begin(vp);
919 			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
920 			DIP_SET(ip, i_mode, ip->i_mode);
921 			vn_seqc_write_end(vp);
922 		}
923 	}
924 	if (error) {
925 		if (ioflag & IO_UNIT) {
926 			(void)ffs_truncate(vp, osize,
927 			    IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
928 			uio->uio_offset -= resid - uio->uio_resid;
929 			uio->uio_resid = resid;
930 		}
931 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
932 		error = ffs_update(vp, 1);
933 		if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
934 			error = ENXIO;
935 	}
936 	return (error);
937 }
938 
939 /*
940  * Extended attribute area reading.
941  */
942 static int
943 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
944 {
945 	struct inode *ip;
946 	struct ufs2_dinode *dp;
947 	struct fs *fs;
948 	struct buf *bp;
949 	ufs_lbn_t lbn, nextlbn;
950 	off_t bytesinfile;
951 	long size, xfersize, blkoffset;
952 	ssize_t orig_resid;
953 	int error;
954 
955 	ip = VTOI(vp);
956 	fs = ITOFS(ip);
957 	dp = ip->i_din2;
958 
959 #ifdef INVARIANTS
960 	if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
961 		panic("ffs_extread: mode");
962 
963 #endif
964 	orig_resid = uio->uio_resid;
965 	KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
966 	if (orig_resid == 0)
967 		return (0);
968 	KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
969 
970 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
971 		if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
972 			break;
973 		lbn = lblkno(fs, uio->uio_offset);
974 		nextlbn = lbn + 1;
975 
976 		/*
977 		 * size of buffer.  The buffer representing the
978 		 * end of the file is rounded up to the size of
979 		 * the block type ( fragment or full block,
980 		 * depending ).
981 		 */
982 		size = sblksize(fs, dp->di_extsize, lbn);
983 		blkoffset = blkoff(fs, uio->uio_offset);
984 
985 		/*
986 		 * The amount we want to transfer in this iteration is
987 		 * one FS block less the amount of the data before
988 		 * our startpoint (duh!)
989 		 */
990 		xfersize = fs->fs_bsize - blkoffset;
991 
992 		/*
993 		 * But if we actually want less than the block,
994 		 * or the file doesn't have a whole block more of data,
995 		 * then use the lesser number.
996 		 */
997 		if (uio->uio_resid < xfersize)
998 			xfersize = uio->uio_resid;
999 		if (bytesinfile < xfersize)
1000 			xfersize = bytesinfile;
1001 
1002 		if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
1003 			/*
1004 			 * Don't do readahead if this is the end of the info.
1005 			 */
1006 			error = bread(vp, -1 - lbn, size, NOCRED, &bp);
1007 		} else {
1008 			/*
1009 			 * If we have a second block, then
1010 			 * fire off a request for a readahead
1011 			 * as well as a read. Note that the 4th and 5th
1012 			 * arguments point to arrays of the size specified in
1013 			 * the 6th argument.
1014 			 */
1015 			u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
1016 
1017 			nextlbn = -1 - nextlbn;
1018 			error = breadn(vp, -1 - lbn,
1019 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1020 		}
1021 		if (error) {
1022 			brelse(bp);
1023 			bp = NULL;
1024 			break;
1025 		}
1026 
1027 		/*
1028 		 * We should only get non-zero b_resid when an I/O error
1029 		 * has occurred, which should cause us to break above.
1030 		 * However, if the short read did not cause an error,
1031 		 * then we want to ensure that we do not uiomove bad
1032 		 * or uninitialized data.
1033 		 */
1034 		size -= bp->b_resid;
1035 		if (size < xfersize) {
1036 			if (size == 0)
1037 				break;
1038 			xfersize = size;
1039 		}
1040 
1041 		error = uiomove((char *)bp->b_data + blkoffset,
1042 					(int)xfersize, uio);
1043 		if (error)
1044 			break;
1045 		vfs_bio_brelse(bp, ioflag);
1046 	}
1047 
1048 	/*
1049 	 * This can only happen in the case of an error
1050 	 * because the loop above resets bp to NULL on each iteration
1051 	 * and on normal completion has not set a new value into it.
1052 	 * so it must have come from a 'break' statement
1053 	 */
1054 	if (bp != NULL)
1055 		vfs_bio_brelse(bp, ioflag);
1056 	return (error);
1057 }
1058 
1059 /*
1060  * Extended attribute area writing.
1061  */
1062 static int
1063 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1064 {
1065 	struct inode *ip;
1066 	struct ufs2_dinode *dp;
1067 	struct fs *fs;
1068 	struct buf *bp;
1069 	ufs_lbn_t lbn;
1070 	off_t osize;
1071 	ssize_t resid;
1072 	int blkoffset, error, flags, size, xfersize;
1073 
1074 	ip = VTOI(vp);
1075 	fs = ITOFS(ip);
1076 	dp = ip->i_din2;
1077 
1078 #ifdef INVARIANTS
1079 	if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1080 		panic("ffs_extwrite: mode");
1081 #endif
1082 
1083 	if (ioflag & IO_APPEND)
1084 		uio->uio_offset = dp->di_extsize;
1085 	KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1086 	KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
1087 	if ((uoff_t)uio->uio_offset + uio->uio_resid >
1088 	    UFS_NXADDR * fs->fs_bsize)
1089 		return (EFBIG);
1090 
1091 	resid = uio->uio_resid;
1092 	osize = dp->di_extsize;
1093 	flags = IO_EXT;
1094 	if (ioflag & IO_SYNC)
1095 		flags |= IO_SYNC;
1096 
1097 	for (error = 0; uio->uio_resid > 0;) {
1098 		lbn = lblkno(fs, uio->uio_offset);
1099 		blkoffset = blkoff(fs, uio->uio_offset);
1100 		xfersize = fs->fs_bsize - blkoffset;
1101 		if (uio->uio_resid < xfersize)
1102 			xfersize = uio->uio_resid;
1103 
1104 		/*
1105 		 * We must perform a read-before-write if the transfer size
1106 		 * does not cover the entire buffer.
1107 		 */
1108 		if (fs->fs_bsize > xfersize)
1109 			flags |= BA_CLRBUF;
1110 		else
1111 			flags &= ~BA_CLRBUF;
1112 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1113 		    ucred, flags, &bp);
1114 		if (error != 0)
1115 			break;
1116 		/*
1117 		 * If the buffer is not valid we have to clear out any
1118 		 * garbage data from the pages instantiated for the buffer.
1119 		 * If we do not, a failed uiomove() during a write can leave
1120 		 * the prior contents of the pages exposed to a userland
1121 		 * mmap().  XXX deal with uiomove() errors a better way.
1122 		 */
1123 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1124 			vfs_bio_clrbuf(bp);
1125 
1126 		if (uio->uio_offset + xfersize > dp->di_extsize) {
1127 			dp->di_extsize = uio->uio_offset + xfersize;
1128 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
1129 		}
1130 
1131 		size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1132 		if (size < xfersize)
1133 			xfersize = size;
1134 
1135 		error =
1136 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1137 
1138 		vfs_bio_set_flags(bp, ioflag);
1139 
1140 		/*
1141 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
1142 		 * if we have a severe page deficiency write the buffer
1143 		 * asynchronously.  Otherwise try to cluster, and if that
1144 		 * doesn't do it then either do an async write (if O_DIRECT),
1145 		 * or a delayed write (if not).
1146 		 */
1147 		if (ioflag & IO_SYNC) {
1148 			(void)bwrite(bp);
1149 		} else if (vm_page_count_severe() ||
1150 			    buf_dirty_count_severe() ||
1151 			    xfersize + blkoffset == fs->fs_bsize ||
1152 			    (ioflag & (IO_ASYNC | IO_DIRECT)))
1153 			bawrite(bp);
1154 		else
1155 			bdwrite(bp);
1156 		if (error || xfersize == 0)
1157 			break;
1158 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
1159 	}
1160 	/*
1161 	 * If we successfully wrote any data, and we are not the superuser
1162 	 * we clear the setuid and setgid bits as a precaution against
1163 	 * tampering.
1164 	 */
1165 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
1166 		if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
1167 			vn_seqc_write_begin(vp);
1168 			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
1169 			dp->di_mode = ip->i_mode;
1170 			vn_seqc_write_end(vp);
1171 		}
1172 	}
1173 	if (error) {
1174 		if (ioflag & IO_UNIT) {
1175 			(void)ffs_truncate(vp, osize,
1176 			    IO_EXT | (ioflag&IO_SYNC), ucred);
1177 			uio->uio_offset -= resid - uio->uio_resid;
1178 			uio->uio_resid = resid;
1179 		}
1180 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1181 		error = ffs_update(vp, 1);
1182 	return (error);
1183 }
1184 
1185 
1186 /*
1187  * Vnode operating to retrieve a named extended attribute.
1188  *
1189  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1190  * the length of the EA, and possibly the pointer to the entry and to the data.
1191  */
1192 static int
1193 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name,
1194     struct extattr **eapp, u_char **eac)
1195 {
1196 	struct extattr *eap, *eaend;
1197 	size_t nlen;
1198 
1199 	nlen = strlen(name);
1200 	KASSERT(ALIGNED_TO(ptr, struct extattr), ("unaligned"));
1201 	eap = (struct extattr *)ptr;
1202 	eaend = (struct extattr *)(ptr + length);
1203 	for (; eap < eaend; eap = EXTATTR_NEXT(eap)) {
1204 		/* make sure this entry is complete */
1205 		if (EXTATTR_NEXT(eap) > eaend)
1206 			break;
1207 		if (eap->ea_namespace != nspace || eap->ea_namelength != nlen
1208 		    || memcmp(eap->ea_name, name, nlen) != 0)
1209 			continue;
1210 		if (eapp != NULL)
1211 			*eapp = eap;
1212 		if (eac != NULL)
1213 			*eac = EXTATTR_CONTENT(eap);
1214 		return (EXTATTR_CONTENT_SIZE(eap));
1215 	}
1216 	return (-1);
1217 }
1218 
1219 static int
1220 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1221 {
1222 	struct inode *ip;
1223 	struct ufs2_dinode *dp;
1224 	struct fs *fs;
1225 	struct uio luio;
1226 	struct iovec liovec;
1227 	u_int easize;
1228 	int error;
1229 	u_char *eae;
1230 
1231 	ip = VTOI(vp);
1232 	fs = ITOFS(ip);
1233 	dp = ip->i_din2;
1234 	easize = dp->di_extsize;
1235 	if ((uoff_t)easize + extra > UFS_NXADDR * fs->fs_bsize)
1236 		return (EFBIG);
1237 
1238 	eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1239 
1240 	liovec.iov_base = eae;
1241 	liovec.iov_len = easize;
1242 	luio.uio_iov = &liovec;
1243 	luio.uio_iovcnt = 1;
1244 	luio.uio_offset = 0;
1245 	luio.uio_resid = easize;
1246 	luio.uio_segflg = UIO_SYSSPACE;
1247 	luio.uio_rw = UIO_READ;
1248 	luio.uio_td = td;
1249 
1250 	error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1251 	if (error) {
1252 		free(eae, M_TEMP);
1253 		return(error);
1254 	}
1255 	*p = eae;
1256 	return (0);
1257 }
1258 
1259 static void
1260 ffs_lock_ea(struct vnode *vp)
1261 {
1262 	struct inode *ip;
1263 
1264 	ip = VTOI(vp);
1265 	VI_LOCK(vp);
1266 	while (ip->i_flag & IN_EA_LOCKED) {
1267 		UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT);
1268 		msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
1269 		    0);
1270 	}
1271 	UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED);
1272 	VI_UNLOCK(vp);
1273 }
1274 
1275 static void
1276 ffs_unlock_ea(struct vnode *vp)
1277 {
1278 	struct inode *ip;
1279 
1280 	ip = VTOI(vp);
1281 	VI_LOCK(vp);
1282 	if (ip->i_flag & IN_EA_LOCKWAIT)
1283 		wakeup(&ip->i_ea_refs);
1284 	ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
1285 	VI_UNLOCK(vp);
1286 }
1287 
1288 static int
1289 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1290 {
1291 	struct inode *ip;
1292 	struct ufs2_dinode *dp;
1293 	int error;
1294 
1295 	ip = VTOI(vp);
1296 
1297 	ffs_lock_ea(vp);
1298 	if (ip->i_ea_area != NULL) {
1299 		ip->i_ea_refs++;
1300 		ffs_unlock_ea(vp);
1301 		return (0);
1302 	}
1303 	dp = ip->i_din2;
1304 	error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1305 	if (error) {
1306 		ffs_unlock_ea(vp);
1307 		return (error);
1308 	}
1309 	ip->i_ea_len = dp->di_extsize;
1310 	ip->i_ea_error = 0;
1311 	ip->i_ea_refs++;
1312 	ffs_unlock_ea(vp);
1313 	return (0);
1314 }
1315 
1316 /*
1317  * Vnode extattr transaction commit/abort
1318  */
1319 static int
1320 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1321 {
1322 	struct inode *ip;
1323 	struct uio luio;
1324 	struct iovec liovec;
1325 	int error;
1326 	struct ufs2_dinode *dp;
1327 
1328 	ip = VTOI(vp);
1329 
1330 	ffs_lock_ea(vp);
1331 	if (ip->i_ea_area == NULL) {
1332 		ffs_unlock_ea(vp);
1333 		return (EINVAL);
1334 	}
1335 	dp = ip->i_din2;
1336 	error = ip->i_ea_error;
1337 	if (commit && error == 0) {
1338 		ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
1339 		if (cred == NOCRED)
1340 			cred =  vp->v_mount->mnt_cred;
1341 		liovec.iov_base = ip->i_ea_area;
1342 		liovec.iov_len = ip->i_ea_len;
1343 		luio.uio_iov = &liovec;
1344 		luio.uio_iovcnt = 1;
1345 		luio.uio_offset = 0;
1346 		luio.uio_resid = ip->i_ea_len;
1347 		luio.uio_segflg = UIO_SYSSPACE;
1348 		luio.uio_rw = UIO_WRITE;
1349 		luio.uio_td = td;
1350 		/* XXX: I'm not happy about truncating to zero size */
1351 		if (ip->i_ea_len < dp->di_extsize)
1352 			error = ffs_truncate(vp, 0, IO_EXT, cred);
1353 		error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1354 	}
1355 	if (--ip->i_ea_refs == 0) {
1356 		free(ip->i_ea_area, M_TEMP);
1357 		ip->i_ea_area = NULL;
1358 		ip->i_ea_len = 0;
1359 		ip->i_ea_error = 0;
1360 	}
1361 	ffs_unlock_ea(vp);
1362 	return (error);
1363 }
1364 
1365 /*
1366  * Vnode extattr strategy routine for fifos.
1367  *
1368  * We need to check for a read or write of the external attributes.
1369  * Otherwise we just fall through and do the usual thing.
1370  */
1371 static int
1372 ffsext_strategy(struct vop_strategy_args *ap)
1373 /*
1374 struct vop_strategy_args {
1375 	struct vnodeop_desc *a_desc;
1376 	struct vnode *a_vp;
1377 	struct buf *a_bp;
1378 };
1379 */
1380 {
1381 	struct vnode *vp;
1382 	daddr_t lbn;
1383 
1384 	vp = ap->a_vp;
1385 	lbn = ap->a_bp->b_lblkno;
1386 	if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR)
1387 		return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
1388 	if (vp->v_type == VFIFO)
1389 		return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
1390 	panic("spec nodes went here");
1391 }
1392 
1393 /*
1394  * Vnode extattr transaction commit/abort
1395  */
1396 static int
1397 ffs_openextattr(struct vop_openextattr_args *ap)
1398 /*
1399 struct vop_openextattr_args {
1400 	struct vnodeop_desc *a_desc;
1401 	struct vnode *a_vp;
1402 	IN struct ucred *a_cred;
1403 	IN struct thread *a_td;
1404 };
1405 */
1406 {
1407 
1408 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1409 		return (EOPNOTSUPP);
1410 
1411 	return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1412 }
1413 
1414 
1415 /*
1416  * Vnode extattr transaction commit/abort
1417  */
1418 static int
1419 ffs_closeextattr(struct vop_closeextattr_args *ap)
1420 /*
1421 struct vop_closeextattr_args {
1422 	struct vnodeop_desc *a_desc;
1423 	struct vnode *a_vp;
1424 	int a_commit;
1425 	IN struct ucred *a_cred;
1426 	IN struct thread *a_td;
1427 };
1428 */
1429 {
1430 
1431 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1432 		return (EOPNOTSUPP);
1433 
1434 	if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
1435 		return (EROFS);
1436 
1437 	return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1438 }
1439 
1440 /*
1441  * Vnode operation to remove a named attribute.
1442  */
1443 static int
1444 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1445 /*
1446 vop_deleteextattr {
1447 	IN struct vnode *a_vp;
1448 	IN int a_attrnamespace;
1449 	IN const char *a_name;
1450 	IN struct ucred *a_cred;
1451 	IN struct thread *a_td;
1452 };
1453 */
1454 {
1455 	struct inode *ip;
1456 	struct extattr *eap;
1457 	uint32_t ul;
1458 	int olen, error, i, easize;
1459 	u_char *eae;
1460 	void *tmp;
1461 
1462 	ip = VTOI(ap->a_vp);
1463 
1464 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1465 		return (EOPNOTSUPP);
1466 
1467 	if (strlen(ap->a_name) == 0)
1468 		return (EINVAL);
1469 
1470 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1471 		return (EROFS);
1472 
1473 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1474 	    ap->a_cred, ap->a_td, VWRITE);
1475 	if (error) {
1476 
1477 		/*
1478 		 * ffs_lock_ea is not needed there, because the vnode
1479 		 * must be exclusively locked.
1480 		 */
1481 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1482 			ip->i_ea_error = error;
1483 		return (error);
1484 	}
1485 
1486 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1487 	if (error)
1488 		return (error);
1489 
1490 	/* CEM: delete could be done in-place instead */
1491 	eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1492 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1493 	easize = ip->i_ea_len;
1494 
1495 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1496 	    &eap, NULL);
1497 	if (olen == -1) {
1498 		/* delete but nonexistent */
1499 		free(eae, M_TEMP);
1500 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1501 		return (ENOATTR);
1502 	}
1503 	ul = eap->ea_length;
1504 	i = (u_char *)EXTATTR_NEXT(eap) - eae;
1505 	bcopy(EXTATTR_NEXT(eap), eap, easize - i);
1506 	easize -= ul;
1507 
1508 	tmp = ip->i_ea_area;
1509 	ip->i_ea_area = eae;
1510 	ip->i_ea_len = easize;
1511 	free(tmp, M_TEMP);
1512 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1513 	return (error);
1514 }
1515 
1516 /*
1517  * Vnode operation to retrieve a named extended attribute.
1518  */
1519 static int
1520 ffs_getextattr(struct vop_getextattr_args *ap)
1521 /*
1522 vop_getextattr {
1523 	IN struct vnode *a_vp;
1524 	IN int a_attrnamespace;
1525 	IN const char *a_name;
1526 	INOUT struct uio *a_uio;
1527 	OUT size_t *a_size;
1528 	IN struct ucred *a_cred;
1529 	IN struct thread *a_td;
1530 };
1531 */
1532 {
1533 	struct inode *ip;
1534 	u_char *eae, *p;
1535 	unsigned easize;
1536 	int error, ealen;
1537 
1538 	ip = VTOI(ap->a_vp);
1539 
1540 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1541 		return (EOPNOTSUPP);
1542 
1543 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1544 	    ap->a_cred, ap->a_td, VREAD);
1545 	if (error)
1546 		return (error);
1547 
1548 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1549 	if (error)
1550 		return (error);
1551 
1552 	eae = ip->i_ea_area;
1553 	easize = ip->i_ea_len;
1554 
1555 	ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1556 	    NULL, &p);
1557 	if (ealen >= 0) {
1558 		error = 0;
1559 		if (ap->a_size != NULL)
1560 			*ap->a_size = ealen;
1561 		else if (ap->a_uio != NULL)
1562 			error = uiomove(p, ealen, ap->a_uio);
1563 	} else
1564 		error = ENOATTR;
1565 
1566 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1567 	return (error);
1568 }
1569 
1570 /*
1571  * Vnode operation to retrieve extended attributes on a vnode.
1572  */
1573 static int
1574 ffs_listextattr(struct vop_listextattr_args *ap)
1575 /*
1576 vop_listextattr {
1577 	IN struct vnode *a_vp;
1578 	IN int a_attrnamespace;
1579 	INOUT struct uio *a_uio;
1580 	OUT size_t *a_size;
1581 	IN struct ucred *a_cred;
1582 	IN struct thread *a_td;
1583 };
1584 */
1585 {
1586 	struct inode *ip;
1587 	struct extattr *eap, *eaend;
1588 	int error, ealen;
1589 
1590 	ip = VTOI(ap->a_vp);
1591 
1592 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1593 		return (EOPNOTSUPP);
1594 
1595 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1596 	    ap->a_cred, ap->a_td, VREAD);
1597 	if (error)
1598 		return (error);
1599 
1600 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1601 	if (error)
1602 		return (error);
1603 
1604 	error = 0;
1605 	if (ap->a_size != NULL)
1606 		*ap->a_size = 0;
1607 
1608 	KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr), ("unaligned"));
1609 	eap = (struct extattr *)ip->i_ea_area;
1610 	eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len);
1611 	for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) {
1612 		/* make sure this entry is complete */
1613 		if (EXTATTR_NEXT(eap) > eaend)
1614 			break;
1615 		if (eap->ea_namespace != ap->a_attrnamespace)
1616 			continue;
1617 
1618 		ealen = eap->ea_namelength;
1619 		if (ap->a_size != NULL)
1620 			*ap->a_size += ealen + 1;
1621 		else if (ap->a_uio != NULL)
1622 			error = uiomove(&eap->ea_namelength, ealen + 1,
1623 			    ap->a_uio);
1624 	}
1625 
1626 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1627 	return (error);
1628 }
1629 
1630 /*
1631  * Vnode operation to set a named attribute.
1632  */
1633 static int
1634 ffs_setextattr(struct vop_setextattr_args *ap)
1635 /*
1636 vop_setextattr {
1637 	IN struct vnode *a_vp;
1638 	IN int a_attrnamespace;
1639 	IN const char *a_name;
1640 	INOUT struct uio *a_uio;
1641 	IN struct ucred *a_cred;
1642 	IN struct thread *a_td;
1643 };
1644 */
1645 {
1646 	struct inode *ip;
1647 	struct fs *fs;
1648 	struct extattr *eap;
1649 	uint32_t ealength, ul;
1650 	ssize_t ealen;
1651 	int olen, eapad1, eapad2, error, i, easize;
1652 	u_char *eae;
1653 	void *tmp;
1654 
1655 	ip = VTOI(ap->a_vp);
1656 	fs = ITOFS(ip);
1657 
1658 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1659 		return (EOPNOTSUPP);
1660 
1661 	if (strlen(ap->a_name) == 0)
1662 		return (EINVAL);
1663 
1664 	/* XXX Now unsupported API to delete EAs using NULL uio. */
1665 	if (ap->a_uio == NULL)
1666 		return (EOPNOTSUPP);
1667 
1668 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1669 		return (EROFS);
1670 
1671 	ealen = ap->a_uio->uio_resid;
1672 	if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR))
1673 		return (EINVAL);
1674 
1675 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1676 	    ap->a_cred, ap->a_td, VWRITE);
1677 	if (error) {
1678 
1679 		/*
1680 		 * ffs_lock_ea is not needed there, because the vnode
1681 		 * must be exclusively locked.
1682 		 */
1683 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1684 			ip->i_ea_error = error;
1685 		return (error);
1686 	}
1687 
1688 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1689 	if (error)
1690 		return (error);
1691 
1692 	ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1693 	eapad1 = roundup2(ealength, 8) - ealength;
1694 	eapad2 = roundup2(ealen, 8) - ealen;
1695 	ealength += eapad1 + ealen + eapad2;
1696 
1697 	/*
1698 	 * CEM: rewrites of the same size or smaller could be done in-place
1699 	 * instead.  (We don't acquire any fine-grained locks in here either,
1700 	 * so we could also do bigger writes in-place.)
1701 	 */
1702 	eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1703 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1704 	easize = ip->i_ea_len;
1705 
1706 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1707 	    &eap, NULL);
1708         if (olen == -1) {
1709 		/* new, append at end */
1710 		KASSERT(ALIGNED_TO(eae + easize, struct extattr),
1711 		    ("unaligned"));
1712 		eap = (struct extattr *)(eae + easize);
1713 		easize += ealength;
1714 	} else {
1715 		ul = eap->ea_length;
1716 		i = (u_char *)EXTATTR_NEXT(eap) - eae;
1717 		if (ul != ealength) {
1718 			bcopy(EXTATTR_NEXT(eap), (u_char *)eap + ealength,
1719 			    easize - i);
1720 			easize += (ealength - ul);
1721 		}
1722 	}
1723 	if (easize > lblktosize(fs, UFS_NXADDR)) {
1724 		free(eae, M_TEMP);
1725 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1726 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1727 			ip->i_ea_error = ENOSPC;
1728 		return (ENOSPC);
1729 	}
1730 	eap->ea_length = ealength;
1731 	eap->ea_namespace = ap->a_attrnamespace;
1732 	eap->ea_contentpadlen = eapad2;
1733 	eap->ea_namelength = strlen(ap->a_name);
1734 	memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name));
1735 	bzero(&eap->ea_name[strlen(ap->a_name)], eapad1);
1736 	error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio);
1737 	if (error) {
1738 		free(eae, M_TEMP);
1739 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1740 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1741 			ip->i_ea_error = error;
1742 		return (error);
1743 	}
1744 	bzero((u_char *)EXTATTR_CONTENT(eap) + ealen, eapad2);
1745 
1746 	tmp = ip->i_ea_area;
1747 	ip->i_ea_area = eae;
1748 	ip->i_ea_len = easize;
1749 	free(tmp, M_TEMP);
1750 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1751 	return (error);
1752 }
1753 
1754 /*
1755  * Vnode pointer to File handle
1756  */
1757 static int
1758 ffs_vptofh(struct vop_vptofh_args *ap)
1759 /*
1760 vop_vptofh {
1761 	IN struct vnode *a_vp;
1762 	IN struct fid *a_fhp;
1763 };
1764 */
1765 {
1766 	struct inode *ip;
1767 	struct ufid *ufhp;
1768 
1769 	ip = VTOI(ap->a_vp);
1770 	ufhp = (struct ufid *)ap->a_fhp;
1771 	ufhp->ufid_len = sizeof(struct ufid);
1772 	ufhp->ufid_ino = ip->i_number;
1773 	ufhp->ufid_gen = ip->i_gen;
1774 	return (0);
1775 }
1776 
1777 SYSCTL_DECL(_vfs_ffs);
1778 static int use_buf_pager = 1;
1779 SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
1780     "Always use buffer pager instead of bmap");
1781 
1782 static daddr_t
1783 ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
1784 {
1785 
1786 	return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
1787 }
1788 
1789 static int
1790 ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn)
1791 {
1792 
1793 	return (blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn));
1794 }
1795 
1796 static int
1797 ffs_getpages(struct vop_getpages_args *ap)
1798 {
1799 	struct vnode *vp;
1800 	struct ufsmount *um;
1801 
1802 	vp = ap->a_vp;
1803 	um = VFSTOUFS(vp->v_mount);
1804 
1805 	if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
1806 		return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
1807 		    ap->a_rbehind, ap->a_rahead, NULL, NULL));
1808 	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
1809 	    ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
1810 }
1811 
1812 static int
1813 ffs_getpages_async(struct vop_getpages_async_args *ap)
1814 {
1815 	struct vnode *vp;
1816 	struct ufsmount *um;
1817 	bool do_iodone;
1818 	int error;
1819 
1820 	vp = ap->a_vp;
1821 	um = VFSTOUFS(vp->v_mount);
1822 	do_iodone = true;
1823 
1824 	if (um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) {
1825 		error = vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
1826 		    ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg);
1827 		if (error == 0)
1828 			do_iodone = false;
1829 	} else {
1830 		error = vfs_bio_getpages(vp, ap->a_m, ap->a_count,
1831 		    ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno,
1832 		    ffs_gbp_getblksz);
1833 	}
1834 	if (do_iodone && ap->a_iodone != NULL)
1835 		ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
1836 
1837 	return (error);
1838 }
1839 
1840