xref: /freebsd/sys/ufs/ffs/ffs_vnops.c (revision ae83180158c4c937f170e31eff311b18c0286a93)
1 /*
2  * Copyright (c) 2002 Networks Associates Technology, Inc.
3  * All rights reserved.
4  *
5  * This software was developed for the FreeBSD Project by Marshall
6  * Kirk McKusick and Network Associates Laboratories, the Security
7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9  * research program
10  *
11  * Copyright (c) 1982, 1986, 1989, 1993
12  *	The Regents of the University of California.  All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
43  * $FreeBSD$
44  */
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/resourcevar.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/stat.h>
52 #include <sys/bio.h>
53 #include <sys/buf.h>
54 #include <sys/proc.h>
55 #include <sys/mount.h>
56 #include <sys/vnode.h>
57 #include <sys/conf.h>
58 
59 #include <machine/limits.h>
60 
61 #include <vm/vm.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_object.h>
64 #include <vm/vm_extern.h>
65 #include <vm/vm_pager.h>
66 #include <vm/vnode_pager.h>
67 
68 #include <ufs/ufs/extattr.h>
69 #include <ufs/ufs/quota.h>
70 #include <ufs/ufs/inode.h>
71 #include <ufs/ufs/ufsmount.h>
72 #include <ufs/ufs/ufs_extern.h>
73 
74 #include <ufs/ffs/fs.h>
75 #include <ufs/ffs/ffs_extern.h>
76 
77 int	ffs_fsync(struct vop_fsync_args *);
78 static int	ffs_getpages(struct vop_getpages_args *);
79 static int	ffs_read(struct vop_read_args *);
80 static int	ffs_write(struct vop_write_args *);
81 static int	ffs_extread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred);
82 static int	ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred);
83 static int	ffs_getextattr(struct vop_getextattr_args *);
84 static int	ffs_setextattr(struct vop_setextattr_args *);
85 
86 
87 /* Global vfs data structures for ufs. */
88 vop_t **ffs_vnodeop_p;
89 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
90 	{ &vop_default_desc,		(vop_t *) ufs_vnoperate },
91 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
92 	{ &vop_getpages_desc,		(vop_t *) ffs_getpages },
93 	{ &vop_read_desc,		(vop_t *) ffs_read },
94 	{ &vop_reallocblks_desc,	(vop_t *) ffs_reallocblks },
95 	{ &vop_write_desc,		(vop_t *) ffs_write },
96 	{ &vop_getextattr_desc,		(vop_t *) ffs_getextattr },
97 	{ &vop_setextattr_desc,		(vop_t *) ffs_setextattr },
98 	{ NULL, NULL }
99 };
100 static struct vnodeopv_desc ffs_vnodeop_opv_desc =
101 	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
102 
103 vop_t **ffs_specop_p;
104 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
105 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatespec },
106 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
107 	{ &vop_getextattr_desc,		(vop_t *) ffs_getextattr },
108 	{ &vop_setextattr_desc,		(vop_t *) ffs_setextattr },
109 	{ NULL, NULL }
110 };
111 static struct vnodeopv_desc ffs_specop_opv_desc =
112 	{ &ffs_specop_p, ffs_specop_entries };
113 
114 vop_t **ffs_fifoop_p;
115 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
116 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatefifo },
117 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
118 	{ &vop_getextattr_desc,		(vop_t *) ffs_getextattr },
119 	{ &vop_setextattr_desc,		(vop_t *) ffs_setextattr },
120 	{ NULL, NULL }
121 };
122 static struct vnodeopv_desc ffs_fifoop_opv_desc =
123 	{ &ffs_fifoop_p, ffs_fifoop_entries };
124 
125 VNODEOP_SET(ffs_vnodeop_opv_desc);
126 VNODEOP_SET(ffs_specop_opv_desc);
127 VNODEOP_SET(ffs_fifoop_opv_desc);
128 
129 /*
130  * Synch an open file.
131  */
132 /* ARGSUSED */
133 int
134 ffs_fsync(ap)
135 	struct vop_fsync_args /* {
136 		struct vnode *a_vp;
137 		struct ucred *a_cred;
138 		int a_waitfor;
139 		struct thread *a_td;
140 	} */ *ap;
141 {
142 	struct vnode *vp = ap->a_vp;
143 	struct inode *ip = VTOI(vp);
144 	struct buf *bp;
145 	struct buf *nbp;
146 	int s, error, wait, passes, skipmeta;
147 	ufs_lbn_t lbn;
148 
149 	wait = (ap->a_waitfor == MNT_WAIT);
150 	if (vn_isdisk(vp, NULL)) {
151 		lbn = INT_MAX;
152 		if (vp->v_rdev->si_mountpoint != NULL &&
153 		    (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP))
154 			softdep_fsync_mountdev(vp);
155 	} else {
156 		lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
157 	}
158 
159 	/*
160 	 * Flush all dirty buffers associated with a vnode.
161 	 */
162 	passes = NIADDR + 1;
163 	skipmeta = 0;
164 	if (wait)
165 		skipmeta = 1;
166 	s = splbio();
167 loop:
168 	TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
169 		bp->b_flags &= ~B_SCANNED;
170 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
171 		nbp = TAILQ_NEXT(bp, b_vnbufs);
172 		/*
173 		 * Reasons to skip this buffer: it has already been considered
174 		 * on this pass, this pass is the first time through on a
175 		 * synchronous flush request and the buffer being considered
176 		 * is metadata, the buffer has dependencies that will cause
177 		 * it to be redirtied and it has not already been deferred,
178 		 * or it is already being written.
179 		 */
180 		if ((bp->b_flags & B_SCANNED) != 0)
181 			continue;
182 		bp->b_flags |= B_SCANNED;
183 		if ((skipmeta == 1 && bp->b_lblkno < 0))
184 			continue;
185 		if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
186 		    (bp->b_flags & B_DEFERRED) == 0 &&
187 		    buf_countdeps(bp, 0)) {
188 			bp->b_flags |= B_DEFERRED;
189 			continue;
190 		}
191 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
192 			continue;
193 		if ((bp->b_flags & B_DELWRI) == 0)
194 			panic("ffs_fsync: not dirty");
195 		if (vp != bp->b_vp)
196 			panic("ffs_fsync: vp != vp->b_vp");
197 		/*
198 		 * If this is a synchronous flush request, or it is not a
199 		 * file or device, start the write on this buffer immediatly.
200 		 */
201 		if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
202 
203 			/*
204 			 * On our final pass through, do all I/O synchronously
205 			 * so that we can find out if our flush is failing
206 			 * because of write errors.
207 			 */
208 			if (passes > 0 || !wait) {
209 				if ((bp->b_flags & B_CLUSTEROK) && !wait) {
210 					BUF_UNLOCK(bp);
211 					(void) vfs_bio_awrite(bp);
212 				} else {
213 					bremfree(bp);
214 					splx(s);
215 					(void) bawrite(bp);
216 					s = splbio();
217 				}
218 			} else {
219 				bremfree(bp);
220 				splx(s);
221 				if ((error = bwrite(bp)) != 0)
222 					return (error);
223 				s = splbio();
224 			}
225 		} else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
226 			/*
227 			 * If the buffer is for data that has been truncated
228 			 * off the file, then throw it away.
229 			 */
230 			bremfree(bp);
231 			bp->b_flags |= B_INVAL | B_NOCACHE;
232 			splx(s);
233 			brelse(bp);
234 			s = splbio();
235 		} else {
236 			BUF_UNLOCK(bp);
237 			vfs_bio_awrite(bp);
238 		}
239 		/*
240 		 * Since we may have slept during the I/O, we need
241 		 * to start from a known point.
242 		 */
243 		nbp = TAILQ_FIRST(&vp->v_dirtyblkhd);
244 	}
245 	/*
246 	 * If we were asked to do this synchronously, then go back for
247 	 * another pass, this time doing the metadata.
248 	 */
249 	if (skipmeta) {
250 		skipmeta = 0;
251 		goto loop;
252 	}
253 
254 	if (wait) {
255 		VI_LOCK(vp);
256 		while (vp->v_numoutput) {
257 			vp->v_iflag |= VI_BWAIT;
258 			msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
259 			    PRIBIO + 4, "ffsfsn", 0);
260   		}
261 		VI_UNLOCK(vp);
262 
263 		/*
264 		 * Ensure that any filesystem metatdata associated
265 		 * with the vnode has been written.
266 		 */
267 		splx(s);
268 		if ((error = softdep_sync_metadata(ap)) != 0)
269 			return (error);
270 		s = splbio();
271 
272 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
273 			/*
274 			 * Block devices associated with filesystems may
275 			 * have new I/O requests posted for them even if
276 			 * the vnode is locked, so no amount of trying will
277 			 * get them clean. Thus we give block devices a
278 			 * good effort, then just give up. For all other file
279 			 * types, go around and try again until it is clean.
280 			 */
281 			if (passes > 0) {
282 				passes -= 1;
283 				goto loop;
284 			}
285 #ifdef DIAGNOSTIC
286 			if (!vn_isdisk(vp, NULL))
287 				vprint("ffs_fsync: dirty", vp);
288 #endif
289 		}
290 	}
291 	splx(s);
292 	return (UFS_UPDATE(vp, wait));
293 }
294 
295 
296 /*
297  * Vnode op for reading.
298  */
299 /* ARGSUSED */
300 int
301 ffs_read(ap)
302 	struct vop_read_args /* {
303 		struct vnode *a_vp;
304 		struct uio *a_uio;
305 		int a_ioflag;
306 		struct ucred *a_cred;
307 	} */ *ap;
308 {
309 	struct vnode *vp;
310 	struct inode *ip;
311 	struct uio *uio;
312 	struct fs *fs;
313 	struct buf *bp;
314 	ufs_lbn_t lbn, nextlbn;
315 	off_t bytesinfile;
316 	long size, xfersize, blkoffset;
317 	int error, orig_resid;
318 	mode_t mode;
319 	int seqcount;
320 	int ioflag;
321 	vm_object_t object;
322 
323 	vp = ap->a_vp;
324 	uio = ap->a_uio;
325 	ioflag = ap->a_ioflag;
326 	if (ap->a_ioflag & IO_EXT)
327 		return (ffs_extread(vp, uio, ioflag, ap->a_cred));
328 
329 	GIANT_REQUIRED;
330 
331 	seqcount = ap->a_ioflag >> 16;
332 	ip = VTOI(vp);
333 	mode = ip->i_mode;
334 
335 #ifdef DIAGNOSTIC
336 	if (uio->uio_rw != UIO_READ)
337 		panic("ffs_read: mode");
338 
339 	if (vp->v_type == VLNK) {
340 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
341 			panic("ffs_read: short symlink");
342 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
343 		panic("ffs_read: type %d",  vp->v_type);
344 #endif
345 	fs = ip->i_fs;
346 	if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
347 		return (EFBIG);
348 
349 	orig_resid = uio->uio_resid;
350 	if (orig_resid <= 0)
351 		return (0);
352 
353 	object = vp->v_object;
354 
355 	bytesinfile = ip->i_size - uio->uio_offset;
356 	if (bytesinfile <= 0) {
357 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
358 			ip->i_flag |= IN_ACCESS;
359 		return 0;
360 	}
361 
362 	if (object) {
363 		vm_object_reference(object);
364 	}
365 
366 #ifdef ENABLE_VFS_IOOPT
367 	/*
368 	 * If IO optimisation is turned on,
369 	 * and we are NOT a VM based IO request,
370 	 * (i.e. not headed for the buffer cache)
371 	 * but there IS a vm object associated with it.
372 	 */
373 	if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
374 		int nread, toread;
375 
376 		toread = uio->uio_resid;
377 		if (toread > bytesinfile)
378 			toread = bytesinfile;
379 		if (toread >= PAGE_SIZE) {
380 			/*
381 			 * Then if it's at least a page in size, try
382 			 * get the data from the object using vm tricks
383 			 */
384 			error = uioread(toread, uio, object, &nread);
385 			if ((uio->uio_resid == 0) || (error != 0)) {
386 				/*
387 				 * If we finished or there was an error
388 				 * then finish up (the reference previously
389 				 * obtained on object must be released).
390 				 */
391 				if ((error == 0 ||
392 				    uio->uio_resid != orig_resid) &&
393 				    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
394 					ip->i_flag |= IN_ACCESS;
395 
396 				if (object) {
397 					vm_object_vndeallocate(object);
398 				}
399 				return error;
400 			}
401 		}
402 	}
403 #endif
404 
405 	/*
406 	 * Ok so we couldn't do it all in one vm trick...
407 	 * so cycle around trying smaller bites..
408 	 */
409 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
410 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
411 			break;
412 #ifdef ENABLE_VFS_IOOPT
413 		if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
414 			/*
415 			 * Obviously we didn't finish above, but we
416 			 * didn't get an error either. Try the same trick again.
417 			 * but this time we are looping.
418 			 */
419 			int nread, toread;
420 			toread = uio->uio_resid;
421 			if (toread > bytesinfile)
422 				toread = bytesinfile;
423 
424 			/*
425 			 * Once again, if there isn't enough for a
426 			 * whole page, don't try optimising.
427 			 */
428 			if (toread >= PAGE_SIZE) {
429 				error = uioread(toread, uio, object, &nread);
430 				if ((uio->uio_resid == 0) || (error != 0)) {
431 					/*
432 					 * If we finished or there was an
433 					 * error then finish up (the reference
434 					 * previously obtained on object must
435 					 * be released).
436 					 */
437 					if ((error == 0 ||
438 					    uio->uio_resid != orig_resid) &&
439 					    (vp->v_mount->mnt_flag &
440 					    MNT_NOATIME) == 0)
441 						ip->i_flag |= IN_ACCESS;
442 					if (object) {
443 						vm_object_vndeallocate(object);
444 					}
445 					return error;
446 				}
447 				/*
448 				 * To get here we didnt't finish or err.
449 				 * If we did get some data,
450 				 * loop to try another bite.
451 				 */
452 				if (nread > 0) {
453 					continue;
454 				}
455 			}
456 		}
457 #endif
458 
459 		lbn = lblkno(fs, uio->uio_offset);
460 		nextlbn = lbn + 1;
461 
462 		/*
463 		 * size of buffer.  The buffer representing the
464 		 * end of the file is rounded up to the size of
465 		 * the block type ( fragment or full block,
466 		 * depending ).
467 		 */
468 		size = blksize(fs, ip, lbn);
469 		blkoffset = blkoff(fs, uio->uio_offset);
470 
471 		/*
472 		 * The amount we want to transfer in this iteration is
473 		 * one FS block less the amount of the data before
474 		 * our startpoint (duh!)
475 		 */
476 		xfersize = fs->fs_bsize - blkoffset;
477 
478 		/*
479 		 * But if we actually want less than the block,
480 		 * or the file doesn't have a whole block more of data,
481 		 * then use the lesser number.
482 		 */
483 		if (uio->uio_resid < xfersize)
484 			xfersize = uio->uio_resid;
485 		if (bytesinfile < xfersize)
486 			xfersize = bytesinfile;
487 
488 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
489 			/*
490 			 * Don't do readahead if this is the end of the file.
491 			 */
492 			error = bread(vp, lbn, size, NOCRED, &bp);
493 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
494 			/*
495 			 * Otherwise if we are allowed to cluster,
496 			 * grab as much as we can.
497 			 *
498 			 * XXX  This may not be a win if we are not
499 			 * doing sequential access.
500 			 */
501 			error = cluster_read(vp, ip->i_size, lbn,
502 				size, NOCRED, uio->uio_resid, seqcount, &bp);
503 		} else if (seqcount > 1) {
504 			/*
505 			 * If we are NOT allowed to cluster, then
506 			 * if we appear to be acting sequentially,
507 			 * fire off a request for a readahead
508 			 * as well as a read. Note that the 4th and 5th
509 			 * arguments point to arrays of the size specified in
510 			 * the 6th argument.
511 			 */
512 			int nextsize = blksize(fs, ip, nextlbn);
513 			error = breadn(vp, lbn,
514 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
515 		} else {
516 			/*
517 			 * Failing all of the above, just read what the
518 			 * user asked for. Interestingly, the same as
519 			 * the first option above.
520 			 */
521 			error = bread(vp, lbn, size, NOCRED, &bp);
522 		}
523 		if (error) {
524 			brelse(bp);
525 			bp = NULL;
526 			break;
527 		}
528 
529 		/*
530 		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
531 		 * will cause us to attempt to release the buffer later on
532 		 * and will cause the buffer cache to attempt to free the
533 		 * underlying pages.
534 		 */
535 		if (ioflag & IO_DIRECT)
536 			bp->b_flags |= B_DIRECT;
537 
538 		/*
539 		 * We should only get non-zero b_resid when an I/O error
540 		 * has occurred, which should cause us to break above.
541 		 * However, if the short read did not cause an error,
542 		 * then we want to ensure that we do not uiomove bad
543 		 * or uninitialized data.
544 		 */
545 		size -= bp->b_resid;
546 		if (size < xfersize) {
547 			if (size == 0)
548 				break;
549 			xfersize = size;
550 		}
551 
552 #ifdef ENABLE_VFS_IOOPT
553 		if (vfs_ioopt && object &&
554 		    (bp->b_flags & B_VMIO) &&
555 		    ((blkoffset & PAGE_MASK) == 0) &&
556 		    ((xfersize & PAGE_MASK) == 0)) {
557 			/*
558 			 * If VFS IO  optimisation is turned on,
559 			 * and it's an exact page multiple
560 			 * And a normal VM based op,
561 			 * then use uiomiveco()
562 			 */
563 			error =
564 				uiomoveco((char *)bp->b_data + blkoffset,
565 					(int)xfersize, uio, object, 0);
566 		} else
567 #endif
568 		{
569 			/*
570 			 * otherwise use the general form
571 			 */
572 			error =
573 				uiomove((char *)bp->b_data + blkoffset,
574 					(int)xfersize, uio);
575 		}
576 
577 		if (error)
578 			break;
579 
580 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
581 		   (LIST_FIRST(&bp->b_dep) == NULL)) {
582 			/*
583 			 * If there are no dependencies, and it's VMIO,
584 			 * then we don't need the buf, mark it available
585 			 * for freeing. The VM has the data.
586 			 */
587 			bp->b_flags |= B_RELBUF;
588 			brelse(bp);
589 		} else {
590 			/*
591 			 * Otherwise let whoever
592 			 * made the request take care of
593 			 * freeing it. We just queue
594 			 * it onto another list.
595 			 */
596 			bqrelse(bp);
597 		}
598 	}
599 
600 	/*
601 	 * This can only happen in the case of an error
602 	 * because the loop above resets bp to NULL on each iteration
603 	 * and on normal completion has not set a new value into it.
604 	 * so it must have come from a 'break' statement
605 	 */
606 	if (bp != NULL) {
607 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
608 		   (LIST_FIRST(&bp->b_dep) == NULL)) {
609 			bp->b_flags |= B_RELBUF;
610 			brelse(bp);
611 		} else {
612 			bqrelse(bp);
613 		}
614 	}
615 
616 	if (object) {
617 		vm_object_vndeallocate(object);
618 	}
619 	if ((error == 0 || uio->uio_resid != orig_resid) &&
620 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
621 		ip->i_flag |= IN_ACCESS;
622 	return (error);
623 }
624 
625 /*
626  * Vnode op for writing.
627  */
628 int
629 ffs_write(ap)
630 	struct vop_write_args /* {
631 		struct vnode *a_vp;
632 		struct uio *a_uio;
633 		int a_ioflag;
634 		struct ucred *a_cred;
635 	} */ *ap;
636 {
637 	struct vnode *vp;
638 	struct uio *uio;
639 	struct inode *ip;
640 	struct fs *fs;
641 	struct buf *bp;
642 	struct thread *td;
643 	ufs_lbn_t lbn;
644 	off_t osize;
645 	int seqcount;
646 	int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
647 	vm_object_t object;
648 
649 	vp = ap->a_vp;
650 	uio = ap->a_uio;
651 	ioflag = ap->a_ioflag;
652 	if (ap->a_ioflag & IO_EXT)
653 		return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
654 
655 	GIANT_REQUIRED;
656 
657 	extended = 0;
658 	seqcount = ap->a_ioflag >> 16;
659 	ip = VTOI(vp);
660 
661 	object = vp->v_object;
662 	if (object) {
663 		vm_object_reference(object);
664 	}
665 
666 #ifdef DIAGNOSTIC
667 	if (uio->uio_rw != UIO_WRITE)
668 		panic("ffswrite: mode");
669 #endif
670 
671 	switch (vp->v_type) {
672 	case VREG:
673 		if (ioflag & IO_APPEND)
674 			uio->uio_offset = ip->i_size;
675 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
676 			if (object) {
677 				vm_object_vndeallocate(object);
678 			}
679 			return (EPERM);
680 		}
681 		/* FALLTHROUGH */
682 	case VLNK:
683 		break;
684 	case VDIR:
685 		panic("ffswrite: dir write");
686 		break;
687 	default:
688 		panic("ffswrite: type %p %d (%d,%d)", vp, (int)vp->v_type,
689 			(int)uio->uio_offset,
690 			(int)uio->uio_resid
691 		);
692 	}
693 
694 	fs = ip->i_fs;
695 	if (uio->uio_offset < 0 ||
696 	    (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
697 		if (object) {
698 			vm_object_vndeallocate(object);
699 		}
700 		return (EFBIG);
701 	}
702 	/*
703 	 * Maybe this should be above the vnode op call, but so long as
704 	 * file servers have no limits, I don't think it matters.
705 	 */
706 	td = uio->uio_td;
707 	if (vp->v_type == VREG && td &&
708 	    uio->uio_offset + uio->uio_resid >
709 	    td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
710 		PROC_LOCK(td->td_proc);
711 		psignal(td->td_proc, SIGXFSZ);
712 		PROC_UNLOCK(td->td_proc);
713 		if (object) {
714 			vm_object_vndeallocate(object);
715 		}
716 		return (EFBIG);
717 	}
718 
719 	resid = uio->uio_resid;
720 	osize = ip->i_size;
721 	flags = 0;
722 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
723 		flags = IO_SYNC;
724 
725 #ifdef ENABLE_VFS_IOOPT
726 	if (object && (object->flags & OBJ_OPT)) {
727 		vm_freeze_copyopts(object,
728 			OFF_TO_IDX(uio->uio_offset),
729 			OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
730 	}
731 #endif
732 	for (error = 0; uio->uio_resid > 0;) {
733 		lbn = lblkno(fs, uio->uio_offset);
734 		blkoffset = blkoff(fs, uio->uio_offset);
735 		xfersize = fs->fs_bsize - blkoffset;
736 		if (uio->uio_resid < xfersize)
737 			xfersize = uio->uio_resid;
738 
739 		if (uio->uio_offset + xfersize > ip->i_size)
740 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
741 
742                 /*
743 		 * We must perform a read-before-write if the transfer size
744 		 * does not cover the entire buffer.
745                  */
746 		if (fs->fs_bsize > xfersize)
747 			flags |= BA_CLRBUF;
748 		else
749 			flags &= ~BA_CLRBUF;
750 /* XXX is uio->uio_offset the right thing here? */
751 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
752 		    ap->a_cred, flags, &bp);
753 		if (error != 0)
754 			break;
755 		/*
756 		 * If the buffer is not valid we have to clear out any
757 		 * garbage data from the pages instantiated for the buffer.
758 		 * If we do not, a failed uiomove() during a write can leave
759 		 * the prior contents of the pages exposed to a userland
760 		 * mmap().  XXX deal with uiomove() errors a better way.
761 		 */
762 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
763 			vfs_bio_clrbuf(bp);
764 		if (ioflag & IO_DIRECT)
765 			bp->b_flags |= B_DIRECT;
766 		if (ioflag & IO_NOWDRAIN)
767 			bp->b_flags |= B_NOWDRAIN;
768 
769 		if (uio->uio_offset + xfersize > ip->i_size) {
770 			ip->i_size = uio->uio_offset + xfersize;
771 			DIP(ip, i_size) = ip->i_size;
772 			extended = 1;
773 		}
774 
775 		size = blksize(fs, ip, lbn) - bp->b_resid;
776 		if (size < xfersize)
777 			xfersize = size;
778 
779 		error =
780 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
781 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
782 		   (LIST_FIRST(&bp->b_dep) == NULL)) {
783 			bp->b_flags |= B_RELBUF;
784 		}
785 
786 		/*
787 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
788 		 * if we have a severe page deficiency write the buffer
789 		 * asynchronously.  Otherwise try to cluster, and if that
790 		 * doesn't do it then either do an async write (if O_DIRECT),
791 		 * or a delayed write (if not).
792 		 */
793 		if (ioflag & IO_SYNC) {
794 			(void)bwrite(bp);
795 		} else if (vm_page_count_severe() ||
796 			    buf_dirty_count_severe() ||
797 			    (ioflag & IO_ASYNC)) {
798 			bp->b_flags |= B_CLUSTEROK;
799 			bawrite(bp);
800 		} else if (xfersize + blkoffset == fs->fs_bsize) {
801 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
802 				bp->b_flags |= B_CLUSTEROK;
803 				cluster_write(bp, ip->i_size, seqcount);
804 			} else {
805 				bawrite(bp);
806 			}
807 		} else if (ioflag & IO_DIRECT) {
808 			bp->b_flags |= B_CLUSTEROK;
809 			bawrite(bp);
810 		} else {
811 			bp->b_flags |= B_CLUSTEROK;
812 			bdwrite(bp);
813 		}
814 		if (error || xfersize == 0)
815 			break;
816 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
817 	}
818 	/*
819 	 * If we successfully wrote any data, and we are not the superuser
820 	 * we clear the setuid and setgid bits as a precaution against
821 	 * tampering.
822 	 */
823 	if (resid > uio->uio_resid && ap->a_cred &&
824 	    suser_cred(ap->a_cred, PRISON_ROOT)) {
825 		ip->i_mode &= ~(ISUID | ISGID);
826 		DIP(ip, i_mode) = ip->i_mode;
827 	}
828 	if (resid > uio->uio_resid)
829 		VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
830 	if (error) {
831 		if (ioflag & IO_UNIT) {
832 			(void)UFS_TRUNCATE(vp, osize,
833 			    IO_NORMAL | (ioflag & IO_SYNC),
834 			    ap->a_cred, uio->uio_td);
835 			uio->uio_offset -= resid - uio->uio_resid;
836 			uio->uio_resid = resid;
837 		}
838 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
839 		error = UFS_UPDATE(vp, 1);
840 
841 	if (object) {
842 		vm_object_vndeallocate(object);
843 	}
844 
845 	return (error);
846 }
847 
848 /*
849  * get page routine
850  */
851 int
852 ffs_getpages(ap)
853 	struct vop_getpages_args *ap;
854 {
855 	off_t foff, physoffset;
856 	int i, size, bsize;
857 	struct vnode *dp, *vp;
858 	vm_object_t obj;
859 	vm_pindex_t pindex, firstindex;
860 	vm_page_t mreq;
861 	int bbackwards, bforwards;
862 	int pbackwards, pforwards;
863 	int firstpage;
864 	ufs2_daddr_t reqblkno, reqlblkno;
865 	int poff;
866 	int pcount;
867 	int rtval;
868 	int pagesperblock;
869 
870 	GIANT_REQUIRED;
871 
872 	pcount = round_page(ap->a_count) / PAGE_SIZE;
873 	mreq = ap->a_m[ap->a_reqpage];
874 	firstindex = ap->a_m[0]->pindex;
875 
876 	/*
877 	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
878 	 * then the entire page is valid.  Since the page may be mapped,
879 	 * user programs might reference data beyond the actual end of file
880 	 * occuring within the page.  We have to zero that data.
881 	 */
882 	if (mreq->valid) {
883 		if (mreq->valid != VM_PAGE_BITS_ALL)
884 			vm_page_zero_invalid(mreq, TRUE);
885 		vm_page_lock_queues();
886 		for (i = 0; i < pcount; i++) {
887 			if (i != ap->a_reqpage) {
888 				vm_page_free(ap->a_m[i]);
889 			}
890 		}
891 		vm_page_unlock_queues();
892 		return VM_PAGER_OK;
893 	}
894 
895 	vp = ap->a_vp;
896 	obj = vp->v_object;
897 	bsize = vp->v_mount->mnt_stat.f_iosize;
898 	pindex = mreq->pindex;
899 	foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
900 
901 	if (bsize < PAGE_SIZE)
902 		return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
903 						    ap->a_count,
904 						    ap->a_reqpage);
905 
906 	/*
907 	 * foff is the file offset of the required page
908 	 * reqlblkno is the logical block that contains the page
909 	 * poff is the index of the page into the logical block
910 	 */
911 	reqlblkno = foff / bsize;
912 	poff = (foff % bsize) / PAGE_SIZE;
913 
914 	dp = VTOI(vp)->i_devvp;
915 	if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards)
916 	    || (reqblkno == -1)) {
917 		vm_page_lock_queues();
918 		for(i = 0; i < pcount; i++) {
919 			if (i != ap->a_reqpage)
920 				vm_page_free(ap->a_m[i]);
921 		}
922 		vm_page_unlock_queues();
923 		if (reqblkno == -1) {
924 			if ((mreq->flags & PG_ZERO) == 0)
925 				vm_page_zero_fill(mreq);
926 			vm_page_undirty(mreq);
927 			mreq->valid = VM_PAGE_BITS_ALL;
928 			return VM_PAGER_OK;
929 		} else {
930 			return VM_PAGER_ERROR;
931 		}
932 	}
933 
934 	physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
935 	pagesperblock = bsize / PAGE_SIZE;
936 	/*
937 	 * find the first page that is contiguous...
938 	 * note that pbackwards is the number of pages that are contiguous
939 	 * backwards.
940 	 */
941 	firstpage = 0;
942 	if (ap->a_count) {
943 		pbackwards = poff + bbackwards * pagesperblock;
944 		if (ap->a_reqpage > pbackwards) {
945 			firstpage = ap->a_reqpage - pbackwards;
946 			vm_page_lock_queues();
947 			for(i=0;i<firstpage;i++)
948 				vm_page_free(ap->a_m[i]);
949 			vm_page_unlock_queues();
950 		}
951 
952 	/*
953 	 * pforwards is the number of pages that are contiguous
954 	 * after the current page.
955 	 */
956 		pforwards = (pagesperblock - (poff + 1)) +
957 			bforwards * pagesperblock;
958 		if (pforwards < (pcount - (ap->a_reqpage + 1))) {
959 			vm_page_lock_queues();
960 			for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
961 				vm_page_free(ap->a_m[i]);
962 			vm_page_unlock_queues();
963 			pcount = ap->a_reqpage + pforwards + 1;
964 		}
965 
966 	/*
967 	 * number of pages for I/O corrected for the non-contig pages at
968 	 * the beginning of the array.
969 	 */
970 		pcount -= firstpage;
971 	}
972 
973 	/*
974 	 * calculate the size of the transfer
975 	 */
976 
977 	size = pcount * PAGE_SIZE;
978 
979 	if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
980 		obj->un_pager.vnp.vnp_size)
981 		size = obj->un_pager.vnp.vnp_size -
982 			IDX_TO_OFF(ap->a_m[firstpage]->pindex);
983 
984 	physoffset -= foff;
985 	rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
986 		(ap->a_reqpage - firstpage), physoffset);
987 
988 	return (rtval);
989 }
990 
991 /*
992  * Vnode op for extended attribute reading.
993  */
994 static int
995 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
996 {
997 	struct inode *ip;
998 	struct ufs2_dinode *dp;
999 	struct fs *fs;
1000 	struct buf *bp;
1001 	ufs_lbn_t lbn, nextlbn;
1002 	off_t bytesinfile;
1003 	long size, xfersize, blkoffset;
1004 	int error, orig_resid;
1005 	mode_t mode;
1006 
1007 	GIANT_REQUIRED;
1008 
1009 	ip = VTOI(vp);
1010 	fs = ip->i_fs;
1011 	dp = ip->i_din2;
1012 	mode = ip->i_mode;
1013 
1014 #ifdef DIAGNOSTIC
1015 	if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
1016 		panic("ffs_extread: mode");
1017 
1018 #endif
1019 	orig_resid = uio->uio_resid;
1020 	if (orig_resid <= 0)
1021 		return (0);
1022 
1023 	bytesinfile = dp->di_extsize - uio->uio_offset;
1024 	if (bytesinfile <= 0) {
1025 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1026 			ip->i_flag |= IN_ACCESS;
1027 		return 0;
1028 	}
1029 
1030 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
1031 		if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
1032 			break;
1033 
1034 		lbn = lblkno(fs, uio->uio_offset);
1035 		nextlbn = lbn + 1;
1036 
1037 		/*
1038 		 * size of buffer.  The buffer representing the
1039 		 * end of the file is rounded up to the size of
1040 		 * the block type ( fragment or full block,
1041 		 * depending ).
1042 		 */
1043 		size = sblksize(fs, dp->di_extsize, lbn);
1044 		blkoffset = blkoff(fs, uio->uio_offset);
1045 
1046 		/*
1047 		 * The amount we want to transfer in this iteration is
1048 		 * one FS block less the amount of the data before
1049 		 * our startpoint (duh!)
1050 		 */
1051 		xfersize = fs->fs_bsize - blkoffset;
1052 
1053 		/*
1054 		 * But if we actually want less than the block,
1055 		 * or the file doesn't have a whole block more of data,
1056 		 * then use the lesser number.
1057 		 */
1058 		if (uio->uio_resid < xfersize)
1059 			xfersize = uio->uio_resid;
1060 		if (bytesinfile < xfersize)
1061 			xfersize = bytesinfile;
1062 
1063 		if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
1064 			/*
1065 			 * Don't do readahead if this is the end of the info.
1066 			 */
1067 			error = bread(vp, -1 - lbn, size, NOCRED, &bp);
1068 		} else {
1069 			/*
1070 			 * If we have a second block, then
1071 			 * fire off a request for a readahead
1072 			 * as well as a read. Note that the 4th and 5th
1073 			 * arguments point to arrays of the size specified in
1074 			 * the 6th argument.
1075 			 */
1076 			int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
1077 
1078 			nextlbn = -1 - nextlbn;
1079 			error = breadn(vp, -1 - lbn,
1080 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1081 		}
1082 		if (error) {
1083 			brelse(bp);
1084 			bp = NULL;
1085 			break;
1086 		}
1087 
1088 		/*
1089 		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
1090 		 * will cause us to attempt to release the buffer later on
1091 		 * and will cause the buffer cache to attempt to free the
1092 		 * underlying pages.
1093 		 */
1094 		if (ioflag & IO_DIRECT)
1095 			bp->b_flags |= B_DIRECT;
1096 
1097 		/*
1098 		 * We should only get non-zero b_resid when an I/O error
1099 		 * has occurred, which should cause us to break above.
1100 		 * However, if the short read did not cause an error,
1101 		 * then we want to ensure that we do not uiomove bad
1102 		 * or uninitialized data.
1103 		 */
1104 		size -= bp->b_resid;
1105 		if (size < xfersize) {
1106 			if (size == 0)
1107 				break;
1108 			xfersize = size;
1109 		}
1110 
1111 		error = uiomove((char *)bp->b_data + blkoffset,
1112 					(int)xfersize, uio);
1113 		if (error)
1114 			break;
1115 
1116 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1117 		   (LIST_FIRST(&bp->b_dep) == NULL)) {
1118 			/*
1119 			 * If there are no dependencies, and it's VMIO,
1120 			 * then we don't need the buf, mark it available
1121 			 * for freeing. The VM has the data.
1122 			 */
1123 			bp->b_flags |= B_RELBUF;
1124 			brelse(bp);
1125 		} else {
1126 			/*
1127 			 * Otherwise let whoever
1128 			 * made the request take care of
1129 			 * freeing it. We just queue
1130 			 * it onto another list.
1131 			 */
1132 			bqrelse(bp);
1133 		}
1134 	}
1135 
1136 	/*
1137 	 * This can only happen in the case of an error
1138 	 * because the loop above resets bp to NULL on each iteration
1139 	 * and on normal completion has not set a new value into it.
1140 	 * so it must have come from a 'break' statement
1141 	 */
1142 	if (bp != NULL) {
1143 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1144 		   (LIST_FIRST(&bp->b_dep) == NULL)) {
1145 			bp->b_flags |= B_RELBUF;
1146 			brelse(bp);
1147 		} else {
1148 			bqrelse(bp);
1149 		}
1150 	}
1151 
1152 	if ((error == 0 || uio->uio_resid != orig_resid) &&
1153 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1154 		ip->i_flag |= IN_ACCESS;
1155 	return (error);
1156 }
1157 
1158 /*
1159  * Vnode op for external attribute writing.
1160  */
1161 static int
1162 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1163 {
1164 	struct inode *ip;
1165 	struct ufs2_dinode *dp;
1166 	struct fs *fs;
1167 	struct buf *bp;
1168 	ufs_lbn_t lbn;
1169 	off_t osize;
1170 	int blkoffset, error, flags, resid, size, xfersize;
1171 
1172 	GIANT_REQUIRED;
1173 
1174 	ip = VTOI(vp);
1175 	fs = ip->i_fs;
1176 	dp = ip->i_din2;
1177 
1178 #ifdef DIAGNOSTIC
1179 	if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1180 		panic("ext_write: mode");
1181 #endif
1182 
1183 	if (ioflag & IO_APPEND)
1184 		uio->uio_offset = dp->di_extsize;
1185 
1186 	if (uio->uio_offset < 0 ||
1187 	    (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
1188 		return (EFBIG);
1189 
1190 	resid = uio->uio_resid;
1191 	osize = dp->di_extsize;
1192 	flags = IO_EXT;
1193 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1194 		flags |= IO_SYNC;
1195 
1196 	for (error = 0; uio->uio_resid > 0;) {
1197 		lbn = lblkno(fs, uio->uio_offset);
1198 		blkoffset = blkoff(fs, uio->uio_offset);
1199 		xfersize = fs->fs_bsize - blkoffset;
1200 		if (uio->uio_resid < xfersize)
1201 			xfersize = uio->uio_resid;
1202 
1203                 /*
1204 		 * We must perform a read-before-write if the transfer size
1205 		 * does not cover the entire buffer.
1206                  */
1207 		if (fs->fs_bsize > xfersize)
1208 			flags |= BA_CLRBUF;
1209 		else
1210 			flags &= ~BA_CLRBUF;
1211 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1212 		    ucred, flags, &bp);
1213 		if (error != 0)
1214 			break;
1215 		/*
1216 		 * If the buffer is not valid we have to clear out any
1217 		 * garbage data from the pages instantiated for the buffer.
1218 		 * If we do not, a failed uiomove() during a write can leave
1219 		 * the prior contents of the pages exposed to a userland
1220 		 * mmap().  XXX deal with uiomove() errors a better way.
1221 		 */
1222 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1223 			vfs_bio_clrbuf(bp);
1224 		if (ioflag & IO_DIRECT)
1225 			bp->b_flags |= B_DIRECT;
1226 		if (ioflag & IO_NOWDRAIN)
1227 			bp->b_flags |= B_NOWDRAIN;
1228 
1229 		if (uio->uio_offset + xfersize > dp->di_extsize)
1230 			dp->di_extsize = uio->uio_offset + xfersize;
1231 
1232 		size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1233 		if (size < xfersize)
1234 			xfersize = size;
1235 
1236 		error =
1237 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1238 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1239 		   (LIST_FIRST(&bp->b_dep) == NULL)) {
1240 			bp->b_flags |= B_RELBUF;
1241 		}
1242 
1243 		/*
1244 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
1245 		 * if we have a severe page deficiency write the buffer
1246 		 * asynchronously.  Otherwise try to cluster, and if that
1247 		 * doesn't do it then either do an async write (if O_DIRECT),
1248 		 * or a delayed write (if not).
1249 		 */
1250 		if (ioflag & IO_SYNC) {
1251 			(void)bwrite(bp);
1252 		} else if (vm_page_count_severe() ||
1253 			    buf_dirty_count_severe() ||
1254 			    xfersize + blkoffset == fs->fs_bsize ||
1255 			    (ioflag & (IO_ASYNC | IO_DIRECT)))
1256 			bawrite(bp);
1257 		else
1258 			bdwrite(bp);
1259 		if (error || xfersize == 0)
1260 			break;
1261 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1262 	}
1263 	/*
1264 	 * If we successfully wrote any data, and we are not the superuser
1265 	 * we clear the setuid and setgid bits as a precaution against
1266 	 * tampering.
1267 	 */
1268 	if (resid > uio->uio_resid && ucred &&
1269 	    suser_cred(ucred, PRISON_ROOT)) {
1270 		ip->i_mode &= ~(ISUID | ISGID);
1271 		dp->di_mode = ip->i_mode;
1272 	}
1273 	if (error) {
1274 		if (ioflag & IO_UNIT) {
1275 			(void)UFS_TRUNCATE(vp, osize,
1276 			    IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
1277 			uio->uio_offset -= resid - uio->uio_resid;
1278 			uio->uio_resid = resid;
1279 		}
1280 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1281 		error = UFS_UPDATE(vp, 1);
1282 	return (error);
1283 }
1284 
1285 /*
1286  * Vnode operating to retrieve a named extended attribute.
1287  */
1288 int
1289 ffs_getextattr(struct vop_getextattr_args *ap)
1290 /*
1291 vop_getextattr {
1292 	IN struct vnode *a_vp;
1293 	IN int a_attrnamespace;
1294 	IN const char *a_name;
1295 	INOUT struct uio *a_uio;
1296 	OUT struct size_t *a_size;
1297 	IN struct ucred *a_cred;
1298 	IN struct thread *a_td;
1299 };
1300 */
1301 {
1302 
1303 	return (ufs_vnoperate((struct vop_generic_args *)ap));
1304 }
1305 
1306 /*
1307  * Vnode operation to set a named attribute.
1308  */
1309 int
1310 ffs_setextattr(struct vop_setextattr_args *ap)
1311 /*
1312 vop_setextattr {
1313 	IN struct vnode *a_vp;
1314 	IN int a_attrnamespace;
1315 	IN const char *a_name;
1316 	INOUT struct uio *a_uio;
1317 	IN struct ucred *a_cred;
1318 	IN struct thread *a_td;
1319 };
1320 */
1321 {
1322 
1323 	return (ufs_vnoperate((struct vop_generic_args *)ap));
1324 }
1325