xref: /freebsd/sys/kern/vfs_default.c (revision 7e1d3eefd410ca0fbae5a217422821244c3eeee4)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed
8  * to Berkeley by John Heidemann of the UCLA Ficus project.
9  *
10  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/bio.h>
43 #include <sys/buf.h>
44 #include <sys/conf.h>
45 #include <sys/event.h>
46 #include <sys/filio.h>
47 #include <sys/kernel.h>
48 #include <sys/limits.h>
49 #include <sys/lock.h>
50 #include <sys/lockf.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/namei.h>
54 #include <sys/rwlock.h>
55 #include <sys/fcntl.h>
56 #include <sys/unistd.h>
57 #include <sys/vnode.h>
58 #include <sys/dirent.h>
59 #include <sys/poll.h>
60 #include <sys/stat.h>
61 #include <security/audit/audit.h>
62 #include <sys/priv.h>
63 
64 #include <security/mac/mac_framework.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_extern.h>
69 #include <vm/pmap.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_page.h>
72 #include <vm/vm_pager.h>
73 #include <vm/vnode_pager.h>
74 
75 static int	vop_nolookup(struct vop_lookup_args *);
76 static int	vop_norename(struct vop_rename_args *);
77 static int	vop_nostrategy(struct vop_strategy_args *);
78 static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
79 				char *dirbuf, int dirbuflen, off_t *off,
80 				char **cpos, int *len, int *eofflag,
81 				struct thread *td);
82 static int	dirent_exists(struct vnode *vp, const char *dirname,
83 			      struct thread *td);
84 
85 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
86 
87 static int vop_stdis_text(struct vop_is_text_args *ap);
88 static int vop_stdunset_text(struct vop_unset_text_args *ap);
89 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
90 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
91 static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
92 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
93 static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap);
94 static int vop_stdstat(struct vop_stat_args *ap);
95 static int vop_stdvput_pair(struct vop_vput_pair_args *ap);
96 
97 /*
98  * This vnode table stores what we want to do if the filesystem doesn't
99  * implement a particular VOP.
100  *
101  * If there is no specific entry here, we will return EOPNOTSUPP.
102  *
103  * Note that every filesystem has to implement either vop_access
104  * or vop_accessx; failing to do so will result in immediate crash
105  * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
106  * which calls vop_stdaccess() etc.
107  */
108 
109 struct vop_vector default_vnodeops = {
110 	.vop_default =		NULL,
111 	.vop_bypass =		VOP_EOPNOTSUPP,
112 
113 	.vop_access =		vop_stdaccess,
114 	.vop_accessx =		vop_stdaccessx,
115 	.vop_advise =		vop_stdadvise,
116 	.vop_advlock =		vop_stdadvlock,
117 	.vop_advlockasync =	vop_stdadvlockasync,
118 	.vop_advlockpurge =	vop_stdadvlockpurge,
119 	.vop_allocate =		vop_stdallocate,
120 	.vop_deallocate =	vop_stddeallocate,
121 	.vop_bmap =		vop_stdbmap,
122 	.vop_close =		VOP_NULL,
123 	.vop_fsync =		VOP_NULL,
124 	.vop_stat =		vop_stdstat,
125 	.vop_fdatasync =	vop_stdfdatasync,
126 	.vop_getpages =		vop_stdgetpages,
127 	.vop_getpages_async =	vop_stdgetpages_async,
128 	.vop_getwritemount = 	vop_stdgetwritemount,
129 	.vop_inactive =		VOP_NULL,
130 	.vop_need_inactive =	vop_stdneed_inactive,
131 	.vop_ioctl =		vop_stdioctl,
132 	.vop_kqfilter =		vop_stdkqfilter,
133 	.vop_islocked =		vop_stdislocked,
134 	.vop_lock1 =		vop_stdlock,
135 	.vop_lookup =		vop_nolookup,
136 	.vop_open =		VOP_NULL,
137 	.vop_pathconf =		VOP_EINVAL,
138 	.vop_poll =		vop_nopoll,
139 	.vop_putpages =		vop_stdputpages,
140 	.vop_readlink =		VOP_EINVAL,
141 	.vop_read_pgcache =	vop_stdread_pgcache,
142 	.vop_rename =		vop_norename,
143 	.vop_revoke =		VOP_PANIC,
144 	.vop_strategy =		vop_nostrategy,
145 	.vop_unlock =		vop_stdunlock,
146 	.vop_vptocnp =		vop_stdvptocnp,
147 	.vop_vptofh =		vop_stdvptofh,
148 	.vop_unp_bind =		vop_stdunp_bind,
149 	.vop_unp_connect =	vop_stdunp_connect,
150 	.vop_unp_detach =	vop_stdunp_detach,
151 	.vop_is_text =		vop_stdis_text,
152 	.vop_set_text =		vop_stdset_text,
153 	.vop_unset_text =	vop_stdunset_text,
154 	.vop_add_writecount =	vop_stdadd_writecount,
155 	.vop_copy_file_range =	vop_stdcopy_file_range,
156 	.vop_vput_pair =	vop_stdvput_pair,
157 };
158 VFS_VOP_VECTOR_REGISTER(default_vnodeops);
159 
160 /*
161  * Series of placeholder functions for various error returns for
162  * VOPs.
163  */
164 
165 int
166 vop_eopnotsupp(struct vop_generic_args *ap)
167 {
168 	/*
169 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
170 	*/
171 
172 	return (EOPNOTSUPP);
173 }
174 
175 int
176 vop_ebadf(struct vop_generic_args *ap)
177 {
178 
179 	return (EBADF);
180 }
181 
182 int
183 vop_enotty(struct vop_generic_args *ap)
184 {
185 
186 	return (ENOTTY);
187 }
188 
189 int
190 vop_einval(struct vop_generic_args *ap)
191 {
192 
193 	return (EINVAL);
194 }
195 
196 int
197 vop_enoent(struct vop_generic_args *ap)
198 {
199 
200 	return (ENOENT);
201 }
202 
203 int
204 vop_eagain(struct vop_generic_args *ap)
205 {
206 
207 	return (EAGAIN);
208 }
209 
210 int
211 vop_null(struct vop_generic_args *ap)
212 {
213 
214 	return (0);
215 }
216 
217 /*
218  * Helper function to panic on some bad VOPs in some filesystems.
219  */
220 int
221 vop_panic(struct vop_generic_args *ap)
222 {
223 
224 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
225 }
226 
227 /*
228  * vop_std<something> and vop_no<something> are default functions for use by
229  * filesystems that need the "default reasonable" implementation for a
230  * particular operation.
231  *
232  * The documentation for the operations they implement exists (if it exists)
233  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
234  */
235 
236 /*
237  * Default vop for filesystems that do not support name lookup
238  */
239 static int
240 vop_nolookup(ap)
241 	struct vop_lookup_args /* {
242 		struct vnode *a_dvp;
243 		struct vnode **a_vpp;
244 		struct componentname *a_cnp;
245 	} */ *ap;
246 {
247 
248 	*ap->a_vpp = NULL;
249 	return (ENOTDIR);
250 }
251 
252 /*
253  * vop_norename:
254  *
255  * Handle unlock and reference counting for arguments of vop_rename
256  * for filesystems that do not implement rename operation.
257  */
258 static int
259 vop_norename(struct vop_rename_args *ap)
260 {
261 
262 	vop_rename_fail(ap);
263 	return (EOPNOTSUPP);
264 }
265 
266 /*
267  *	vop_nostrategy:
268  *
269  *	Strategy routine for VFS devices that have none.
270  *
271  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
272  *	routine.  Typically this is done for a BIO_READ strategy call.
273  *	Typically B_INVAL is assumed to already be clear prior to a write
274  *	and should not be cleared manually unless you just made the buffer
275  *	invalid.  BIO_ERROR should be cleared either way.
276  */
277 
278 static int
279 vop_nostrategy (struct vop_strategy_args *ap)
280 {
281 	printf("No strategy for buffer at %p\n", ap->a_bp);
282 	vn_printf(ap->a_vp, "vnode ");
283 	ap->a_bp->b_ioflags |= BIO_ERROR;
284 	ap->a_bp->b_error = EOPNOTSUPP;
285 	bufdone(ap->a_bp);
286 	return (EOPNOTSUPP);
287 }
288 
289 static int
290 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
291 		int dirbuflen, off_t *off, char **cpos, int *len,
292 		int *eofflag, struct thread *td)
293 {
294 	int error, reclen;
295 	struct uio uio;
296 	struct iovec iov;
297 	struct dirent *dp;
298 
299 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
300 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
301 
302 	if (*len == 0) {
303 		iov.iov_base = dirbuf;
304 		iov.iov_len = dirbuflen;
305 
306 		uio.uio_iov = &iov;
307 		uio.uio_iovcnt = 1;
308 		uio.uio_offset = *off;
309 		uio.uio_resid = dirbuflen;
310 		uio.uio_segflg = UIO_SYSSPACE;
311 		uio.uio_rw = UIO_READ;
312 		uio.uio_td = td;
313 
314 		*eofflag = 0;
315 
316 #ifdef MAC
317 		error = mac_vnode_check_readdir(td->td_ucred, vp);
318 		if (error == 0)
319 #endif
320 			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
321 		    		NULL, NULL);
322 		if (error)
323 			return (error);
324 
325 		*off = uio.uio_offset;
326 
327 		*cpos = dirbuf;
328 		*len = (dirbuflen - uio.uio_resid);
329 
330 		if (*len == 0)
331 			return (ENOENT);
332 	}
333 
334 	dp = (struct dirent *)(*cpos);
335 	reclen = dp->d_reclen;
336 	*dpp = dp;
337 
338 	/* check for malformed directory.. */
339 	if (reclen < DIRENT_MINSIZE)
340 		return (EINVAL);
341 
342 	*cpos += reclen;
343 	*len -= reclen;
344 
345 	return (0);
346 }
347 
348 /*
349  * Check if a named file exists in a given directory vnode.
350  */
351 static int
352 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
353 {
354 	char *dirbuf, *cpos;
355 	int error, eofflag, dirbuflen, len, found;
356 	off_t off;
357 	struct dirent *dp;
358 	struct vattr va;
359 
360 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
361 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
362 
363 	found = 0;
364 
365 	error = VOP_GETATTR(vp, &va, td->td_ucred);
366 	if (error)
367 		return (found);
368 
369 	dirbuflen = DEV_BSIZE;
370 	if (dirbuflen < va.va_blocksize)
371 		dirbuflen = va.va_blocksize;
372 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
373 
374 	off = 0;
375 	len = 0;
376 	do {
377 		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
378 					&cpos, &len, &eofflag, td);
379 		if (error)
380 			goto out;
381 
382 		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
383 		    strcmp(dp->d_name, dirname) == 0) {
384 			found = 1;
385 			goto out;
386 		}
387 	} while (len > 0 || !eofflag);
388 
389 out:
390 	free(dirbuf, M_TEMP);
391 	return (found);
392 }
393 
394 int
395 vop_stdaccess(struct vop_access_args *ap)
396 {
397 
398 	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
399 	    VAPPEND)) == 0, ("invalid bit in accmode"));
400 
401 	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
402 }
403 
404 int
405 vop_stdaccessx(struct vop_accessx_args *ap)
406 {
407 	int error;
408 	accmode_t accmode = ap->a_accmode;
409 
410 	error = vfs_unixify_accmode(&accmode);
411 	if (error != 0)
412 		return (error);
413 
414 	if (accmode == 0)
415 		return (0);
416 
417 	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
418 }
419 
420 /*
421  * Advisory record locking support
422  */
423 int
424 vop_stdadvlock(struct vop_advlock_args *ap)
425 {
426 	struct vnode *vp;
427 	struct mount *mp;
428 	struct vattr vattr;
429 	int error;
430 
431 	vp = ap->a_vp;
432 
433 	/*
434 	 * Provide atomicity of open(O_CREAT | O_EXCL | O_EXLOCK) for
435 	 * local filesystems.  See vn_open_cred() for reciprocal part.
436 	 */
437 	mp = vp->v_mount;
438 	if (mp != NULL && (mp->mnt_flag & MNT_LOCAL) != 0 &&
439 	    ap->a_op == F_SETLK && (ap->a_flags & F_FIRSTOPEN) == 0) {
440 		VI_LOCK(vp);
441 		while ((vp->v_iflag & VI_FOPENING) != 0)
442 			msleep(vp, VI_MTX(vp), PLOCK, "lockfo", 0);
443 		VI_UNLOCK(vp);
444 	}
445 
446 	if (ap->a_fl->l_whence == SEEK_END) {
447 		/*
448 		 * The NFSv4 server must avoid doing a vn_lock() here, since it
449 		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
450 		 * the NFSv4 server always uses SEEK_SET and this code is
451 		 * only required for the SEEK_END case.
452 		 */
453 		vn_lock(vp, LK_SHARED | LK_RETRY);
454 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
455 		VOP_UNLOCK(vp);
456 		if (error)
457 			return (error);
458 	} else
459 		vattr.va_size = 0;
460 
461 	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
462 }
463 
464 int
465 vop_stdadvlockasync(struct vop_advlockasync_args *ap)
466 {
467 	struct vnode *vp;
468 	struct vattr vattr;
469 	int error;
470 
471 	vp = ap->a_vp;
472 	if (ap->a_fl->l_whence == SEEK_END) {
473 		/* The size argument is only needed for SEEK_END. */
474 		vn_lock(vp, LK_SHARED | LK_RETRY);
475 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
476 		VOP_UNLOCK(vp);
477 		if (error)
478 			return (error);
479 	} else
480 		vattr.va_size = 0;
481 
482 	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
483 }
484 
485 int
486 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
487 {
488 	struct vnode *vp;
489 
490 	vp = ap->a_vp;
491 	lf_purgelocks(vp, &vp->v_lockf);
492 	return (0);
493 }
494 
495 /*
496  * vop_stdpathconf:
497  *
498  * Standard implementation of POSIX pathconf, to get information about limits
499  * for a filesystem.
500  * Override per filesystem for the case where the filesystem has smaller
501  * limits.
502  */
503 int
504 vop_stdpathconf(ap)
505 	struct vop_pathconf_args /* {
506 	struct vnode *a_vp;
507 	int a_name;
508 	int *a_retval;
509 	} */ *ap;
510 {
511 
512 	switch (ap->a_name) {
513 		case _PC_ASYNC_IO:
514 			*ap->a_retval = _POSIX_ASYNCHRONOUS_IO;
515 			return (0);
516 		case _PC_PATH_MAX:
517 			*ap->a_retval = PATH_MAX;
518 			return (0);
519 		case _PC_ACL_EXTENDED:
520 		case _PC_ACL_NFS4:
521 		case _PC_CAP_PRESENT:
522 		case _PC_DEALLOC_PRESENT:
523 		case _PC_INF_PRESENT:
524 		case _PC_MAC_PRESENT:
525 			*ap->a_retval = 0;
526 			return (0);
527 		default:
528 			return (EINVAL);
529 	}
530 	/* NOTREACHED */
531 }
532 
533 /*
534  * Standard lock, unlock and islocked functions.
535  */
536 int
537 vop_stdlock(ap)
538 	struct vop_lock1_args /* {
539 		struct vnode *a_vp;
540 		int a_flags;
541 		char *file;
542 		int line;
543 	} */ *ap;
544 {
545 	struct vnode *vp = ap->a_vp;
546 	struct mtx *ilk;
547 
548 	ilk = VI_MTX(vp);
549 	return (lockmgr_lock_flags(vp->v_vnlock, ap->a_flags,
550 	    &ilk->lock_object, ap->a_file, ap->a_line));
551 }
552 
553 /* See above. */
554 int
555 vop_stdunlock(ap)
556 	struct vop_unlock_args /* {
557 		struct vnode *a_vp;
558 	} */ *ap;
559 {
560 	struct vnode *vp = ap->a_vp;
561 
562 	return (lockmgr_unlock(vp->v_vnlock));
563 }
564 
565 /* See above. */
566 int
567 vop_stdislocked(ap)
568 	struct vop_islocked_args /* {
569 		struct vnode *a_vp;
570 	} */ *ap;
571 {
572 
573 	return (lockstatus(ap->a_vp->v_vnlock));
574 }
575 
576 /*
577  * Variants of the above set.
578  *
579  * Differences are:
580  * - shared locking disablement is not supported
581  * - v_vnlock pointer is not honored
582  */
583 int
584 vop_lock(ap)
585 	struct vop_lock1_args /* {
586 		struct vnode *a_vp;
587 		int a_flags;
588 		char *file;
589 		int line;
590 	} */ *ap;
591 {
592 	struct vnode *vp = ap->a_vp;
593 	int flags = ap->a_flags;
594 	struct mtx *ilk;
595 
596 	MPASS(vp->v_vnlock == &vp->v_lock);
597 
598 	if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0))
599 		goto other;
600 
601 	switch (flags & LK_TYPE_MASK) {
602 	case LK_SHARED:
603 		return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line));
604 	case LK_EXCLUSIVE:
605 		return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line));
606 	}
607 other:
608 	ilk = VI_MTX(vp);
609 	return (lockmgr_lock_flags(&vp->v_lock, flags,
610 	    &ilk->lock_object, ap->a_file, ap->a_line));
611 }
612 
613 int
614 vop_unlock(ap)
615 	struct vop_unlock_args /* {
616 		struct vnode *a_vp;
617 	} */ *ap;
618 {
619 	struct vnode *vp = ap->a_vp;
620 
621 	MPASS(vp->v_vnlock == &vp->v_lock);
622 
623 	return (lockmgr_unlock(&vp->v_lock));
624 }
625 
626 int
627 vop_islocked(ap)
628 	struct vop_islocked_args /* {
629 		struct vnode *a_vp;
630 	} */ *ap;
631 {
632 	struct vnode *vp = ap->a_vp;
633 
634 	MPASS(vp->v_vnlock == &vp->v_lock);
635 
636 	return (lockstatus(&vp->v_lock));
637 }
638 
639 /*
640  * Return true for select/poll.
641  */
642 int
643 vop_nopoll(ap)
644 	struct vop_poll_args /* {
645 		struct vnode *a_vp;
646 		int  a_events;
647 		struct ucred *a_cred;
648 		struct thread *a_td;
649 	} */ *ap;
650 {
651 
652 	if (ap->a_events & ~POLLSTANDARD)
653 		return (POLLNVAL);
654 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
655 }
656 
657 /*
658  * Implement poll for local filesystems that support it.
659  */
660 int
661 vop_stdpoll(ap)
662 	struct vop_poll_args /* {
663 		struct vnode *a_vp;
664 		int  a_events;
665 		struct ucred *a_cred;
666 		struct thread *a_td;
667 	} */ *ap;
668 {
669 	if (ap->a_events & ~POLLSTANDARD)
670 		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
671 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
672 }
673 
674 /*
675  * Return our mount point, as we will take charge of the writes.
676  */
677 int
678 vop_stdgetwritemount(ap)
679 	struct vop_getwritemount_args /* {
680 		struct vnode *a_vp;
681 		struct mount **a_mpp;
682 	} */ *ap;
683 {
684 	struct mount *mp;
685 	struct vnode *vp;
686 
687 	/*
688 	 * Note that having a reference does not prevent forced unmount from
689 	 * setting ->v_mount to NULL after the lock gets released. This is of
690 	 * no consequence for typical consumers (most notably vn_start_write)
691 	 * since in this case the vnode is VIRF_DOOMED. Unmount might have
692 	 * progressed far enough that its completion is only delayed by the
693 	 * reference obtained here. The consumer only needs to concern itself
694 	 * with releasing it.
695 	 */
696 	vp = ap->a_vp;
697 	mp = vfs_ref_from_vp(vp);
698 	*(ap->a_mpp) = mp;
699 	return (0);
700 }
701 
702 /*
703  * If the file system doesn't implement VOP_BMAP, then return sensible defaults:
704  * - Return the vnode's bufobj instead of any underlying device's bufobj
705  * - Calculate the physical block number as if there were equal size
706  *   consecutive blocks, but
707  * - Report no contiguous runs of blocks.
708  */
709 int
710 vop_stdbmap(ap)
711 	struct vop_bmap_args /* {
712 		struct vnode *a_vp;
713 		daddr_t  a_bn;
714 		struct bufobj **a_bop;
715 		daddr_t *a_bnp;
716 		int *a_runp;
717 		int *a_runb;
718 	} */ *ap;
719 {
720 
721 	if (ap->a_bop != NULL)
722 		*ap->a_bop = &ap->a_vp->v_bufobj;
723 	if (ap->a_bnp != NULL)
724 		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
725 	if (ap->a_runp != NULL)
726 		*ap->a_runp = 0;
727 	if (ap->a_runb != NULL)
728 		*ap->a_runb = 0;
729 	return (0);
730 }
731 
732 int
733 vop_stdfsync(ap)
734 	struct vop_fsync_args /* {
735 		struct vnode *a_vp;
736 		int a_waitfor;
737 		struct thread *a_td;
738 	} */ *ap;
739 {
740 
741 	return (vn_fsync_buf(ap->a_vp, ap->a_waitfor));
742 }
743 
744 static int
745 vop_stdfdatasync(struct vop_fdatasync_args *ap)
746 {
747 
748 	return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td));
749 }
750 
751 int
752 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap)
753 {
754 
755 	return (vn_fsync_buf(ap->a_vp, MNT_WAIT));
756 }
757 
758 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
759 int
760 vop_stdgetpages(ap)
761 	struct vop_getpages_args /* {
762 		struct vnode *a_vp;
763 		vm_page_t *a_m;
764 		int a_count;
765 		int *a_rbehind;
766 		int *a_rahead;
767 	} */ *ap;
768 {
769 
770 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
771 	    ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL);
772 }
773 
774 static int
775 vop_stdgetpages_async(struct vop_getpages_async_args *ap)
776 {
777 	int error;
778 
779 	error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
780 	    ap->a_rahead);
781 	if (ap->a_iodone != NULL)
782 		ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
783 	return (error);
784 }
785 
786 int
787 vop_stdkqfilter(struct vop_kqfilter_args *ap)
788 {
789 	return vfs_kqfilter(ap);
790 }
791 
792 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
793 int
794 vop_stdputpages(ap)
795 	struct vop_putpages_args /* {
796 		struct vnode *a_vp;
797 		vm_page_t *a_m;
798 		int a_count;
799 		int a_sync;
800 		int *a_rtvals;
801 	} */ *ap;
802 {
803 
804 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
805 	     ap->a_sync, ap->a_rtvals);
806 }
807 
808 int
809 vop_stdvptofh(struct vop_vptofh_args *ap)
810 {
811 	return (EOPNOTSUPP);
812 }
813 
814 int
815 vop_stdvptocnp(struct vop_vptocnp_args *ap)
816 {
817 	struct vnode *vp = ap->a_vp;
818 	struct vnode **dvp = ap->a_vpp;
819 	struct ucred *cred;
820 	char *buf = ap->a_buf;
821 	size_t *buflen = ap->a_buflen;
822 	char *dirbuf, *cpos;
823 	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
824 	off_t off;
825 	ino_t fileno;
826 	struct vattr va;
827 	struct nameidata nd;
828 	struct thread *td;
829 	struct dirent *dp;
830 	struct vnode *mvp;
831 
832 	i = *buflen;
833 	error = 0;
834 	covered = 0;
835 	td = curthread;
836 	cred = td->td_ucred;
837 
838 	if (vp->v_type != VDIR)
839 		return (ENOENT);
840 
841 	error = VOP_GETATTR(vp, &va, cred);
842 	if (error)
843 		return (error);
844 
845 	VREF(vp);
846 	locked = VOP_ISLOCKED(vp);
847 	VOP_UNLOCK(vp);
848 	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
849 	    "..", vp);
850 	flags = FREAD;
851 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
852 	if (error) {
853 		vn_lock(vp, locked | LK_RETRY);
854 		return (error);
855 	}
856 	NDFREE(&nd, NDF_ONLY_PNBUF);
857 
858 	mvp = *dvp = nd.ni_vp;
859 
860 	if (vp->v_mount != (*dvp)->v_mount &&
861 	    ((*dvp)->v_vflag & VV_ROOT) &&
862 	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
863 		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
864 		VREF(mvp);
865 		VOP_UNLOCK(mvp);
866 		vn_close(mvp, FREAD, cred, td);
867 		VREF(*dvp);
868 		vn_lock(*dvp, LK_SHARED | LK_RETRY);
869 		covered = 1;
870 	}
871 
872 	fileno = va.va_fileid;
873 
874 	dirbuflen = DEV_BSIZE;
875 	if (dirbuflen < va.va_blocksize)
876 		dirbuflen = va.va_blocksize;
877 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
878 
879 	if ((*dvp)->v_type != VDIR) {
880 		error = ENOENT;
881 		goto out;
882 	}
883 
884 	off = 0;
885 	len = 0;
886 	do {
887 		/* call VOP_READDIR of parent */
888 		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
889 					&cpos, &len, &eofflag, td);
890 		if (error)
891 			goto out;
892 
893 		if ((dp->d_type != DT_WHT) &&
894 		    (dp->d_fileno == fileno)) {
895 			if (covered) {
896 				VOP_UNLOCK(*dvp);
897 				vn_lock(mvp, LK_SHARED | LK_RETRY);
898 				if (dirent_exists(mvp, dp->d_name, td)) {
899 					error = ENOENT;
900 					VOP_UNLOCK(mvp);
901 					vn_lock(*dvp, LK_SHARED | LK_RETRY);
902 					goto out;
903 				}
904 				VOP_UNLOCK(mvp);
905 				vn_lock(*dvp, LK_SHARED | LK_RETRY);
906 			}
907 			i -= dp->d_namlen;
908 
909 			if (i < 0) {
910 				error = ENOMEM;
911 				goto out;
912 			}
913 			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
914 				error = ENOENT;
915 			} else {
916 				bcopy(dp->d_name, buf + i, dp->d_namlen);
917 				error = 0;
918 			}
919 			goto out;
920 		}
921 	} while (len > 0 || !eofflag);
922 	error = ENOENT;
923 
924 out:
925 	free(dirbuf, M_TEMP);
926 	if (!error) {
927 		*buflen = i;
928 		vref(*dvp);
929 	}
930 	if (covered) {
931 		vput(*dvp);
932 		vrele(mvp);
933 	} else {
934 		VOP_UNLOCK(mvp);
935 		vn_close(mvp, FREAD, cred, td);
936 	}
937 	vn_lock(vp, locked | LK_RETRY);
938 	return (error);
939 }
940 
941 int
942 vop_stdallocate(struct vop_allocate_args *ap)
943 {
944 #ifdef __notyet__
945 	struct statfs *sfs;
946 	off_t maxfilesize = 0;
947 #endif
948 	struct iovec aiov;
949 	struct vattr vattr, *vap;
950 	struct uio auio;
951 	off_t fsize, len, cur, offset;
952 	uint8_t *buf;
953 	struct thread *td;
954 	struct vnode *vp;
955 	size_t iosize;
956 	int error;
957 
958 	buf = NULL;
959 	error = 0;
960 	td = curthread;
961 	vap = &vattr;
962 	vp = ap->a_vp;
963 	len = *ap->a_len;
964 	offset = *ap->a_offset;
965 
966 	error = VOP_GETATTR(vp, vap, ap->a_cred);
967 	if (error != 0)
968 		goto out;
969 	fsize = vap->va_size;
970 	iosize = vap->va_blocksize;
971 	if (iosize == 0)
972 		iosize = BLKDEV_IOSIZE;
973 	if (iosize > maxphys)
974 		iosize = maxphys;
975 	buf = malloc(iosize, M_TEMP, M_WAITOK);
976 
977 #ifdef __notyet__
978 	/*
979 	 * Check if the filesystem sets f_maxfilesize; if not use
980 	 * VOP_SETATTR to perform the check.
981 	 */
982 	sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
983 	error = VFS_STATFS(vp->v_mount, sfs, td);
984 	if (error == 0)
985 		maxfilesize = sfs->f_maxfilesize;
986 	free(sfs, M_STATFS);
987 	if (error != 0)
988 		goto out;
989 	if (maxfilesize) {
990 		if (offset > maxfilesize || len > maxfilesize ||
991 		    offset + len > maxfilesize) {
992 			error = EFBIG;
993 			goto out;
994 		}
995 	} else
996 #endif
997 	if (offset + len > vap->va_size) {
998 		/*
999 		 * Test offset + len against the filesystem's maxfilesize.
1000 		 */
1001 		VATTR_NULL(vap);
1002 		vap->va_size = offset + len;
1003 		error = VOP_SETATTR(vp, vap, ap->a_cred);
1004 		if (error != 0)
1005 			goto out;
1006 		VATTR_NULL(vap);
1007 		vap->va_size = fsize;
1008 		error = VOP_SETATTR(vp, vap, ap->a_cred);
1009 		if (error != 0)
1010 			goto out;
1011 	}
1012 
1013 	for (;;) {
1014 		/*
1015 		 * Read and write back anything below the nominal file
1016 		 * size.  There's currently no way outside the filesystem
1017 		 * to know whether this area is sparse or not.
1018 		 */
1019 		cur = iosize;
1020 		if ((offset % iosize) != 0)
1021 			cur -= (offset % iosize);
1022 		if (cur > len)
1023 			cur = len;
1024 		if (offset < fsize) {
1025 			aiov.iov_base = buf;
1026 			aiov.iov_len = cur;
1027 			auio.uio_iov = &aiov;
1028 			auio.uio_iovcnt = 1;
1029 			auio.uio_offset = offset;
1030 			auio.uio_resid = cur;
1031 			auio.uio_segflg = UIO_SYSSPACE;
1032 			auio.uio_rw = UIO_READ;
1033 			auio.uio_td = td;
1034 			error = VOP_READ(vp, &auio, ap->a_ioflag, ap->a_cred);
1035 			if (error != 0)
1036 				break;
1037 			if (auio.uio_resid > 0) {
1038 				bzero(buf + cur - auio.uio_resid,
1039 				    auio.uio_resid);
1040 			}
1041 		} else {
1042 			bzero(buf, cur);
1043 		}
1044 
1045 		aiov.iov_base = buf;
1046 		aiov.iov_len = cur;
1047 		auio.uio_iov = &aiov;
1048 		auio.uio_iovcnt = 1;
1049 		auio.uio_offset = offset;
1050 		auio.uio_resid = cur;
1051 		auio.uio_segflg = UIO_SYSSPACE;
1052 		auio.uio_rw = UIO_WRITE;
1053 		auio.uio_td = td;
1054 
1055 		error = VOP_WRITE(vp, &auio, ap->a_ioflag, ap->a_cred);
1056 		if (error != 0)
1057 			break;
1058 
1059 		len -= cur;
1060 		offset += cur;
1061 		if (len == 0)
1062 			break;
1063 		if (should_yield())
1064 			break;
1065 	}
1066 
1067  out:
1068 	*ap->a_len = len;
1069 	*ap->a_offset = offset;
1070 	free(buf, M_TEMP);
1071 	return (error);
1072 }
1073 
1074 static int
1075 vp_zerofill(struct vnode *vp, struct vattr *vap, off_t *offsetp, off_t *lenp,
1076     int ioflag, struct ucred *cred)
1077 {
1078 	int iosize;
1079 	int error = 0;
1080 	struct iovec aiov;
1081 	struct uio auio;
1082 	struct thread *td;
1083 	off_t offset, len;
1084 
1085 	iosize = vap->va_blocksize;
1086 	td = curthread;
1087 	offset = *offsetp;
1088 	len = *lenp;
1089 
1090 	if (iosize == 0)
1091 		iosize = BLKDEV_IOSIZE;
1092 	/* If va_blocksize is 512 bytes, iosize will be 4 kilobytes */
1093 	iosize = min(iosize * 8, ZERO_REGION_SIZE);
1094 
1095 	while (len > 0) {
1096 		int xfersize = iosize;
1097 		if (offset % iosize != 0)
1098 			xfersize -= offset % iosize;
1099 		if (xfersize > len)
1100 			xfersize = len;
1101 
1102 		aiov.iov_base = __DECONST(void *, zero_region);
1103 		aiov.iov_len = xfersize;
1104 		auio.uio_iov = &aiov;
1105 		auio.uio_iovcnt = 1;
1106 		auio.uio_offset = offset;
1107 		auio.uio_resid = xfersize;
1108 		auio.uio_segflg = UIO_SYSSPACE;
1109 		auio.uio_rw = UIO_WRITE;
1110 		auio.uio_td = td;
1111 
1112 		error = VOP_WRITE(vp, &auio, ioflag, cred);
1113 		if (error != 0) {
1114 			len -= xfersize - auio.uio_resid;
1115 			offset += xfersize - auio.uio_resid;
1116 			break;
1117 		}
1118 
1119 		len -= xfersize;
1120 		offset += xfersize;
1121 	}
1122 
1123 	*offsetp = offset;
1124 	*lenp = len;
1125 	return (error);
1126 }
1127 
1128 int
1129 vop_stddeallocate(struct vop_deallocate_args *ap)
1130 {
1131 	struct vnode *vp;
1132 	off_t offset, len;
1133 	struct ucred *cred;
1134 	int error;
1135 	struct vattr va;
1136 	off_t noff, xfersize, rem;
1137 
1138 	vp = ap->a_vp;
1139 	offset = *ap->a_offset;
1140 	cred = ap->a_cred;
1141 
1142 	error = VOP_GETATTR(vp, &va, cred);
1143 	if (error)
1144 		return (error);
1145 
1146 	len = omin((off_t)va.va_size - offset, *ap->a_len);
1147 	while (len > 0) {
1148 		noff = offset;
1149 		error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred);
1150 		if (error) {
1151 			if (error != ENXIO)
1152 				/* XXX: Is it okay to fallback further? */
1153 				goto out;
1154 
1155 			/*
1156 			 * No more data region to be filled
1157 			 */
1158 			offset += len;
1159 			len = 0;
1160 			error = 0;
1161 			break;
1162 		}
1163 		KASSERT(noff >= offset, ("FIOSEEKDATA going backward"));
1164 		if (noff != offset) {
1165 			xfersize = omin(noff - offset, len);
1166 			len -= xfersize;
1167 			offset += xfersize;
1168 			if (len == 0)
1169 				break;
1170 		}
1171 		error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred);
1172 		if (error)
1173 			goto out;
1174 
1175 		/* Fill zeroes */
1176 		xfersize = rem = omin(noff - offset, len);
1177 		error = vp_zerofill(vp, &va, &offset, &rem, ap->a_ioflag, cred);
1178 		if (error) {
1179 			len -= xfersize - rem;
1180 			goto out;
1181 		}
1182 
1183 		len -= xfersize;
1184 		if (should_yield())
1185 			break;
1186 	}
1187 	/* Handle the case when offset is beyond EOF */
1188 	if (len < 0)
1189 		len = 0;
1190 out:
1191 	*ap->a_offset = offset;
1192 	*ap->a_len = len;
1193 	return (error);
1194 }
1195 
1196 int
1197 vop_stdadvise(struct vop_advise_args *ap)
1198 {
1199 	struct vnode *vp;
1200 	struct bufobj *bo;
1201 	daddr_t startn, endn;
1202 	off_t bstart, bend, start, end;
1203 	int bsize, error;
1204 
1205 	vp = ap->a_vp;
1206 	switch (ap->a_advice) {
1207 	case POSIX_FADV_WILLNEED:
1208 		/*
1209 		 * Do nothing for now.  Filesystems should provide a
1210 		 * custom method which starts an asynchronous read of
1211 		 * the requested region.
1212 		 */
1213 		error = 0;
1214 		break;
1215 	case POSIX_FADV_DONTNEED:
1216 		error = 0;
1217 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1218 		if (VN_IS_DOOMED(vp)) {
1219 			VOP_UNLOCK(vp);
1220 			break;
1221 		}
1222 
1223 		/*
1224 		 * Round to block boundaries (and later possibly further to
1225 		 * page boundaries).  Applications cannot reasonably be aware
1226 		 * of the boundaries, and the rounding must be to expand at
1227 		 * both extremities to cover enough.  It still doesn't cover
1228 		 * read-ahead.  For partial blocks, this gives unnecessary
1229 		 * discarding of buffers but is efficient enough since the
1230 		 * pages usually remain in VMIO for some time.
1231 		 */
1232 		bsize = vp->v_bufobj.bo_bsize;
1233 		bstart = rounddown(ap->a_start, bsize);
1234 		bend = roundup(ap->a_end, bsize);
1235 
1236 		/*
1237 		 * Deactivate pages in the specified range from the backing VM
1238 		 * object.  Pages that are resident in the buffer cache will
1239 		 * remain wired until their corresponding buffers are released
1240 		 * below.
1241 		 */
1242 		if (vp->v_object != NULL) {
1243 			start = trunc_page(bstart);
1244 			end = round_page(bend);
1245 			VM_OBJECT_RLOCK(vp->v_object);
1246 			vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
1247 			    OFF_TO_IDX(end));
1248 			VM_OBJECT_RUNLOCK(vp->v_object);
1249 		}
1250 
1251 		bo = &vp->v_bufobj;
1252 		BO_RLOCK(bo);
1253 		startn = bstart / bsize;
1254 		endn = bend / bsize;
1255 		error = bnoreuselist(&bo->bo_clean, bo, startn, endn);
1256 		if (error == 0)
1257 			error = bnoreuselist(&bo->bo_dirty, bo, startn, endn);
1258 		BO_RUNLOCK(bo);
1259 		VOP_UNLOCK(vp);
1260 		break;
1261 	default:
1262 		error = EINVAL;
1263 		break;
1264 	}
1265 	return (error);
1266 }
1267 
1268 int
1269 vop_stdunp_bind(struct vop_unp_bind_args *ap)
1270 {
1271 
1272 	ap->a_vp->v_unpcb = ap->a_unpcb;
1273 	return (0);
1274 }
1275 
1276 int
1277 vop_stdunp_connect(struct vop_unp_connect_args *ap)
1278 {
1279 
1280 	*ap->a_unpcb = ap->a_vp->v_unpcb;
1281 	return (0);
1282 }
1283 
1284 int
1285 vop_stdunp_detach(struct vop_unp_detach_args *ap)
1286 {
1287 
1288 	ap->a_vp->v_unpcb = NULL;
1289 	return (0);
1290 }
1291 
1292 static int
1293 vop_stdis_text(struct vop_is_text_args *ap)
1294 {
1295 
1296 	return (ap->a_vp->v_writecount < 0);
1297 }
1298 
1299 int
1300 vop_stdset_text(struct vop_set_text_args *ap)
1301 {
1302 	struct vnode *vp;
1303 	struct mount *mp;
1304 	int error, n;
1305 
1306 	vp = ap->a_vp;
1307 
1308 	/*
1309 	 * Avoid the interlock if execs are already present.
1310 	 */
1311 	n = atomic_load_int(&vp->v_writecount);
1312 	for (;;) {
1313 		if (n > -1) {
1314 			break;
1315 		}
1316 		if (atomic_fcmpset_int(&vp->v_writecount, &n, n - 1)) {
1317 			return (0);
1318 		}
1319 	}
1320 
1321 	VI_LOCK(vp);
1322 	if (vp->v_writecount > 0) {
1323 		error = ETXTBSY;
1324 	} else {
1325 		/*
1326 		 * If requested by fs, keep a use reference to the
1327 		 * vnode until the last text reference is released.
1328 		 */
1329 		mp = vp->v_mount;
1330 		if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 &&
1331 		    vp->v_writecount == 0) {
1332 			VNPASS((vp->v_iflag & VI_TEXT_REF) == 0, vp);
1333 			vp->v_iflag |= VI_TEXT_REF;
1334 			vrefl(vp);
1335 		}
1336 
1337 		atomic_subtract_int(&vp->v_writecount, 1);
1338 		error = 0;
1339 	}
1340 	VI_UNLOCK(vp);
1341 	return (error);
1342 }
1343 
1344 static int
1345 vop_stdunset_text(struct vop_unset_text_args *ap)
1346 {
1347 	struct vnode *vp;
1348 	int error, n;
1349 	bool last;
1350 
1351 	vp = ap->a_vp;
1352 
1353 	/*
1354 	 * Avoid the interlock if this is not the last exec.
1355 	 */
1356 	n = atomic_load_int(&vp->v_writecount);
1357 	for (;;) {
1358 		if (n >= -1) {
1359 			break;
1360 		}
1361 		if (atomic_fcmpset_int(&vp->v_writecount, &n, n + 1)) {
1362 			return (0);
1363 		}
1364 	}
1365 
1366 	last = false;
1367 	VI_LOCK(vp);
1368 	if (vp->v_writecount < 0) {
1369 		if ((vp->v_iflag & VI_TEXT_REF) != 0 &&
1370 		    vp->v_writecount == -1) {
1371 			last = true;
1372 			vp->v_iflag &= ~VI_TEXT_REF;
1373 		}
1374 		atomic_add_int(&vp->v_writecount, 1);
1375 		error = 0;
1376 	} else {
1377 		error = EINVAL;
1378 	}
1379 	VI_UNLOCK(vp);
1380 	if (last)
1381 		vunref(vp);
1382 	return (error);
1383 }
1384 
1385 static int
1386 vop_stdadd_writecount(struct vop_add_writecount_args *ap)
1387 {
1388 	struct vnode *vp;
1389 	struct mount *mp;
1390 	int error;
1391 
1392 	vp = ap->a_vp;
1393 	VI_LOCK_FLAGS(vp, MTX_DUPOK);
1394 	if (vp->v_writecount < 0) {
1395 		error = ETXTBSY;
1396 	} else {
1397 		VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
1398 		    ("neg writecount increment %d", ap->a_inc));
1399 		if (vp->v_writecount == 0) {
1400 			mp = vp->v_mount;
1401 			if (mp != NULL && (mp->mnt_kern_flag & MNTK_NOMSYNC) == 0)
1402 				vlazy(vp);
1403 		}
1404 		vp->v_writecount += ap->a_inc;
1405 		error = 0;
1406 	}
1407 	VI_UNLOCK(vp);
1408 	return (error);
1409 }
1410 
1411 int
1412 vop_stdneed_inactive(struct vop_need_inactive_args *ap)
1413 {
1414 
1415 	return (1);
1416 }
1417 
1418 int
1419 vop_stdioctl(struct vop_ioctl_args *ap)
1420 {
1421 	struct vnode *vp;
1422 	struct vattr va;
1423 	off_t *offp;
1424 	int error;
1425 
1426 	switch (ap->a_command) {
1427 	case FIOSEEKDATA:
1428 	case FIOSEEKHOLE:
1429 		vp = ap->a_vp;
1430 		error = vn_lock(vp, LK_SHARED);
1431 		if (error != 0)
1432 			return (EBADF);
1433 		if (vp->v_type == VREG)
1434 			error = VOP_GETATTR(vp, &va, ap->a_cred);
1435 		else
1436 			error = ENOTTY;
1437 		if (error == 0) {
1438 			offp = ap->a_data;
1439 			if (*offp < 0 || *offp >= va.va_size)
1440 				error = ENXIO;
1441 			else if (ap->a_command == FIOSEEKHOLE)
1442 				*offp = va.va_size;
1443 		}
1444 		VOP_UNLOCK(vp);
1445 		break;
1446 	default:
1447 		error = ENOTTY;
1448 		break;
1449 	}
1450 	return (error);
1451 }
1452 
1453 /*
1454  * vfs default ops
1455  * used to fill the vfs function table to get reasonable default return values.
1456  */
1457 int
1458 vfs_stdroot (mp, flags, vpp)
1459 	struct mount *mp;
1460 	int flags;
1461 	struct vnode **vpp;
1462 {
1463 
1464 	return (EOPNOTSUPP);
1465 }
1466 
1467 int
1468 vfs_stdstatfs (mp, sbp)
1469 	struct mount *mp;
1470 	struct statfs *sbp;
1471 {
1472 
1473 	return (EOPNOTSUPP);
1474 }
1475 
1476 int
1477 vfs_stdquotactl (mp, cmds, uid, arg, mp_busy)
1478 	struct mount *mp;
1479 	int cmds;
1480 	uid_t uid;
1481 	void *arg;
1482 	bool *mp_busy;
1483 {
1484 	return (EOPNOTSUPP);
1485 }
1486 
1487 int
1488 vfs_stdsync(mp, waitfor)
1489 	struct mount *mp;
1490 	int waitfor;
1491 {
1492 	struct vnode *vp, *mvp;
1493 	struct thread *td;
1494 	int error, lockreq, allerror = 0;
1495 
1496 	td = curthread;
1497 	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1498 	if (waitfor != MNT_WAIT)
1499 		lockreq |= LK_NOWAIT;
1500 	/*
1501 	 * Force stale buffer cache information to be flushed.
1502 	 */
1503 loop:
1504 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1505 		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1506 			VI_UNLOCK(vp);
1507 			continue;
1508 		}
1509 		if ((error = vget(vp, lockreq)) != 0) {
1510 			if (error == ENOENT) {
1511 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1512 				goto loop;
1513 			}
1514 			continue;
1515 		}
1516 		error = VOP_FSYNC(vp, waitfor, td);
1517 		if (error)
1518 			allerror = error;
1519 		vput(vp);
1520 	}
1521 	return (allerror);
1522 }
1523 
1524 int
1525 vfs_stdnosync (mp, waitfor)
1526 	struct mount *mp;
1527 	int waitfor;
1528 {
1529 
1530 	return (0);
1531 }
1532 
1533 static int
1534 vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
1535 {
1536 	int error;
1537 
1538 	error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
1539 	    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred,
1540 	    ap->a_outcred, ap->a_fsizetd);
1541 	return (error);
1542 }
1543 
1544 int
1545 vfs_stdvget (mp, ino, flags, vpp)
1546 	struct mount *mp;
1547 	ino_t ino;
1548 	int flags;
1549 	struct vnode **vpp;
1550 {
1551 
1552 	return (EOPNOTSUPP);
1553 }
1554 
1555 int
1556 vfs_stdfhtovp (mp, fhp, flags, vpp)
1557 	struct mount *mp;
1558 	struct fid *fhp;
1559 	int flags;
1560 	struct vnode **vpp;
1561 {
1562 
1563 	return (EOPNOTSUPP);
1564 }
1565 
1566 int
1567 vfs_stdinit (vfsp)
1568 	struct vfsconf *vfsp;
1569 {
1570 
1571 	return (0);
1572 }
1573 
1574 int
1575 vfs_stduninit (vfsp)
1576 	struct vfsconf *vfsp;
1577 {
1578 
1579 	return(0);
1580 }
1581 
1582 int
1583 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1584 	struct mount *mp;
1585 	int cmd;
1586 	struct vnode *filename_vp;
1587 	int attrnamespace;
1588 	const char *attrname;
1589 {
1590 
1591 	if (filename_vp != NULL)
1592 		VOP_UNLOCK(filename_vp);
1593 	return (EOPNOTSUPP);
1594 }
1595 
1596 int
1597 vfs_stdsysctl(mp, op, req)
1598 	struct mount *mp;
1599 	fsctlop_t op;
1600 	struct sysctl_req *req;
1601 {
1602 
1603 	return (EOPNOTSUPP);
1604 }
1605 
1606 static vop_bypass_t *
1607 bp_by_off(struct vop_vector *vop, struct vop_generic_args *a)
1608 {
1609 
1610 	return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset));
1611 }
1612 
1613 int
1614 vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
1615 {
1616 	vop_bypass_t *bp;
1617 	int prev_stops, rc;
1618 
1619 	bp = bp_by_off(vop, a);
1620 	MPASS(bp != NULL);
1621 
1622 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
1623 	rc = bp(a);
1624 	sigallowstop(prev_stops);
1625 	return (rc);
1626 }
1627 
1628 static int
1629 vop_stdstat(struct vop_stat_args *a)
1630 {
1631 	struct vattr vattr;
1632 	struct vattr *vap;
1633 	struct vnode *vp;
1634 	struct stat *sb;
1635 	int error;
1636 	u_short mode;
1637 
1638 	vp = a->a_vp;
1639 	sb = a->a_sb;
1640 
1641 	error = vop_stat_helper_pre(a);
1642 	if (error != 0)
1643 		return (error);
1644 
1645 	vap = &vattr;
1646 
1647 	/*
1648 	 * Initialize defaults for new and unusual fields, so that file
1649 	 * systems which don't support these fields don't need to know
1650 	 * about them.
1651 	 */
1652 	vap->va_birthtime.tv_sec = -1;
1653 	vap->va_birthtime.tv_nsec = 0;
1654 	vap->va_fsid = VNOVAL;
1655 	vap->va_gen = 0;
1656 	vap->va_rdev = NODEV;
1657 
1658 	error = VOP_GETATTR(vp, vap, a->a_active_cred);
1659 	if (error)
1660 		goto out;
1661 
1662 	/*
1663 	 * Zero the spare stat fields
1664 	 */
1665 	bzero(sb, sizeof *sb);
1666 
1667 	/*
1668 	 * Copy from vattr table
1669 	 */
1670 	if (vap->va_fsid != VNOVAL)
1671 		sb->st_dev = vap->va_fsid;
1672 	else
1673 		sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
1674 	sb->st_ino = vap->va_fileid;
1675 	mode = vap->va_mode;
1676 	switch (vap->va_type) {
1677 	case VREG:
1678 		mode |= S_IFREG;
1679 		break;
1680 	case VDIR:
1681 		mode |= S_IFDIR;
1682 		break;
1683 	case VBLK:
1684 		mode |= S_IFBLK;
1685 		break;
1686 	case VCHR:
1687 		mode |= S_IFCHR;
1688 		break;
1689 	case VLNK:
1690 		mode |= S_IFLNK;
1691 		break;
1692 	case VSOCK:
1693 		mode |= S_IFSOCK;
1694 		break;
1695 	case VFIFO:
1696 		mode |= S_IFIFO;
1697 		break;
1698 	default:
1699 		error = EBADF;
1700 		goto out;
1701 	}
1702 	sb->st_mode = mode;
1703 	sb->st_nlink = vap->va_nlink;
1704 	sb->st_uid = vap->va_uid;
1705 	sb->st_gid = vap->va_gid;
1706 	sb->st_rdev = vap->va_rdev;
1707 	if (vap->va_size > OFF_MAX) {
1708 		error = EOVERFLOW;
1709 		goto out;
1710 	}
1711 	sb->st_size = vap->va_size;
1712 	sb->st_atim.tv_sec = vap->va_atime.tv_sec;
1713 	sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
1714 	sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
1715 	sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
1716 	sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
1717 	sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
1718 	sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
1719 	sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
1720 
1721 	/*
1722 	 * According to www.opengroup.org, the meaning of st_blksize is
1723 	 *   "a filesystem-specific preferred I/O block size for this
1724 	 *    object.  In some filesystem types, this may vary from file
1725 	 *    to file"
1726 	 * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
1727 	 */
1728 
1729 	sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
1730 	sb->st_flags = vap->va_flags;
1731 	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
1732 	sb->st_gen = vap->va_gen;
1733 out:
1734 	return (vop_stat_helper_post(a, error));
1735 }
1736 
1737 static int
1738 vop_stdread_pgcache(struct vop_read_pgcache_args *ap __unused)
1739 {
1740 	return (EJUSTRETURN);
1741 }
1742 
1743 static int
1744 vop_stdvput_pair(struct vop_vput_pair_args *ap)
1745 {
1746 	struct vnode *dvp, *vp, **vpp;
1747 
1748 	dvp = ap->a_dvp;
1749 	vpp = ap->a_vpp;
1750 	vput(dvp);
1751 	if (vpp != NULL && ap->a_unlock_vp && (vp = *vpp) != NULL)
1752 		vput(vp);
1753 	return (0);
1754 }
1755