xref: /freebsd/sys/kern/vfs_default.c (revision 4ed925457ab06e83238a5db33e89ccc94b99a713)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed
6  * to Berkeley by John Heidemann of the UCLA Ficus project.
7  *
8  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/conf.h>
43 #include <sys/event.h>
44 #include <sys/kernel.h>
45 #include <sys/limits.h>
46 #include <sys/lock.h>
47 #include <sys/lockf.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/namei.h>
52 #include <sys/fcntl.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 #include <sys/dirent.h>
56 #include <sys/poll.h>
57 
58 #include <security/mac/mac_framework.h>
59 
60 #include <vm/vm.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_extern.h>
63 #include <vm/pmap.h>
64 #include <vm/vm_map.h>
65 #include <vm/vm_page.h>
66 #include <vm/vm_pager.h>
67 #include <vm/vnode_pager.h>
68 
69 static int	vop_nolookup(struct vop_lookup_args *);
70 static int	vop_nostrategy(struct vop_strategy_args *);
71 static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
72 				char *dirbuf, int dirbuflen, off_t *off,
73 				char **cpos, int *len, int *eofflag,
74 				struct thread *td);
75 static int	dirent_exists(struct vnode *vp, const char *dirname,
76 			      struct thread *td);
77 
78 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
79 
80 /*
81  * This vnode table stores what we want to do if the filesystem doesn't
82  * implement a particular VOP.
83  *
84  * If there is no specific entry here, we will return EOPNOTSUPP.
85  *
86  * Note that every filesystem has to implement either vop_access
87  * or vop_accessx; failing to do so will result in immediate crash
88  * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
89  * which calls vop_stdaccess() etc.
90  */
91 
92 struct vop_vector default_vnodeops = {
93 	.vop_default =		NULL,
94 	.vop_bypass =		VOP_EOPNOTSUPP,
95 
96 	.vop_access =		vop_stdaccess,
97 	.vop_accessx =		vop_stdaccessx,
98 	.vop_advlock =		vop_stdadvlock,
99 	.vop_advlockasync =	vop_stdadvlockasync,
100 	.vop_bmap =		vop_stdbmap,
101 	.vop_close =		VOP_NULL,
102 	.vop_fsync =		VOP_NULL,
103 	.vop_getpages =		vop_stdgetpages,
104 	.vop_getwritemount = 	vop_stdgetwritemount,
105 	.vop_inactive =		VOP_NULL,
106 	.vop_ioctl =		VOP_ENOTTY,
107 	.vop_kqfilter =		vop_stdkqfilter,
108 	.vop_islocked =		vop_stdislocked,
109 	.vop_lock1 =		vop_stdlock,
110 	.vop_lookup =		vop_nolookup,
111 	.vop_open =		VOP_NULL,
112 	.vop_pathconf =		VOP_EINVAL,
113 	.vop_poll =		vop_nopoll,
114 	.vop_putpages =		vop_stdputpages,
115 	.vop_readlink =		VOP_EINVAL,
116 	.vop_revoke =		VOP_PANIC,
117 	.vop_strategy =		vop_nostrategy,
118 	.vop_unlock =		vop_stdunlock,
119 	.vop_vptocnp =		vop_stdvptocnp,
120 	.vop_vptofh =		vop_stdvptofh,
121 };
122 
123 /*
124  * Series of placeholder functions for various error returns for
125  * VOPs.
126  */
127 
128 int
129 vop_eopnotsupp(struct vop_generic_args *ap)
130 {
131 	/*
132 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
133 	*/
134 
135 	return (EOPNOTSUPP);
136 }
137 
138 int
139 vop_ebadf(struct vop_generic_args *ap)
140 {
141 
142 	return (EBADF);
143 }
144 
145 int
146 vop_enotty(struct vop_generic_args *ap)
147 {
148 
149 	return (ENOTTY);
150 }
151 
152 int
153 vop_einval(struct vop_generic_args *ap)
154 {
155 
156 	return (EINVAL);
157 }
158 
159 int
160 vop_enoent(struct vop_generic_args *ap)
161 {
162 
163 	return (ENOENT);
164 }
165 
166 int
167 vop_null(struct vop_generic_args *ap)
168 {
169 
170 	return (0);
171 }
172 
173 /*
174  * Helper function to panic on some bad VOPs in some filesystems.
175  */
176 int
177 vop_panic(struct vop_generic_args *ap)
178 {
179 
180 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
181 }
182 
183 /*
184  * vop_std<something> and vop_no<something> are default functions for use by
185  * filesystems that need the "default reasonable" implementation for a
186  * particular operation.
187  *
188  * The documentation for the operations they implement exists (if it exists)
189  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
190  */
191 
192 /*
193  * Default vop for filesystems that do not support name lookup
194  */
195 static int
196 vop_nolookup(ap)
197 	struct vop_lookup_args /* {
198 		struct vnode *a_dvp;
199 		struct vnode **a_vpp;
200 		struct componentname *a_cnp;
201 	} */ *ap;
202 {
203 
204 	*ap->a_vpp = NULL;
205 	return (ENOTDIR);
206 }
207 
208 /*
209  *	vop_nostrategy:
210  *
211  *	Strategy routine for VFS devices that have none.
212  *
213  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
214  *	routine.  Typically this is done for a BIO_READ strategy call.
215  *	Typically B_INVAL is assumed to already be clear prior to a write
216  *	and should not be cleared manually unless you just made the buffer
217  *	invalid.  BIO_ERROR should be cleared either way.
218  */
219 
220 static int
221 vop_nostrategy (struct vop_strategy_args *ap)
222 {
223 	printf("No strategy for buffer at %p\n", ap->a_bp);
224 	vprint("vnode", ap->a_vp);
225 	ap->a_bp->b_ioflags |= BIO_ERROR;
226 	ap->a_bp->b_error = EOPNOTSUPP;
227 	bufdone(ap->a_bp);
228 	return (EOPNOTSUPP);
229 }
230 
231 static int
232 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
233 		int dirbuflen, off_t *off, char **cpos, int *len,
234 		int *eofflag, struct thread *td)
235 {
236 	int error, reclen;
237 	struct uio uio;
238 	struct iovec iov;
239 	struct dirent *dp;
240 
241 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
242 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
243 
244 	if (*len == 0) {
245 		iov.iov_base = dirbuf;
246 		iov.iov_len = dirbuflen;
247 
248 		uio.uio_iov = &iov;
249 		uio.uio_iovcnt = 1;
250 		uio.uio_offset = *off;
251 		uio.uio_resid = dirbuflen;
252 		uio.uio_segflg = UIO_SYSSPACE;
253 		uio.uio_rw = UIO_READ;
254 		uio.uio_td = td;
255 
256 		*eofflag = 0;
257 
258 #ifdef MAC
259 		error = mac_vnode_check_readdir(td->td_ucred, vp);
260 		if (error == 0)
261 #endif
262 			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
263 		    		NULL, NULL);
264 		if (error)
265 			return (error);
266 
267 		*off = uio.uio_offset;
268 
269 		*cpos = dirbuf;
270 		*len = (dirbuflen - uio.uio_resid);
271 	}
272 
273 	dp = (struct dirent *)(*cpos);
274 	reclen = dp->d_reclen;
275 	*dpp = dp;
276 
277 	/* check for malformed directory.. */
278 	if (reclen < DIRENT_MINSIZE)
279 		return (EINVAL);
280 
281 	*cpos += reclen;
282 	*len -= reclen;
283 
284 	return (0);
285 }
286 
287 /*
288  * Check if a named file exists in a given directory vnode.
289  */
290 static int
291 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
292 {
293 	char *dirbuf, *cpos;
294 	int error, eofflag, dirbuflen, len, found;
295 	off_t off;
296 	struct dirent *dp;
297 	struct vattr va;
298 
299 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
300 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
301 
302 	found = 0;
303 
304 	error = VOP_GETATTR(vp, &va, td->td_ucred);
305 	if (error)
306 		return (found);
307 
308 	dirbuflen = DEV_BSIZE;
309 	if (dirbuflen < va.va_blocksize)
310 		dirbuflen = va.va_blocksize;
311 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
312 
313 	off = 0;
314 	len = 0;
315 	do {
316 		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
317 					&cpos, &len, &eofflag, td);
318 		if (error)
319 			goto out;
320 
321 		if ((dp->d_type != DT_WHT) &&
322 		    !strcmp(dp->d_name, dirname)) {
323 			found = 1;
324 			goto out;
325 		}
326 	} while (len > 0 || !eofflag);
327 
328 out:
329 	free(dirbuf, M_TEMP);
330 	return (found);
331 }
332 
333 int
334 vop_stdaccess(struct vop_access_args *ap)
335 {
336 
337 	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
338 	    VAPPEND)) == 0, ("invalid bit in accmode"));
339 
340 	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
341 }
342 
343 int
344 vop_stdaccessx(struct vop_accessx_args *ap)
345 {
346 	int error;
347 	accmode_t accmode = ap->a_accmode;
348 
349 	error = vfs_unixify_accmode(&accmode);
350 	if (error != 0)
351 		return (error);
352 
353 	if (accmode == 0)
354 		return (0);
355 
356 	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
357 }
358 
359 /*
360  * Advisory record locking support
361  */
362 int
363 vop_stdadvlock(struct vop_advlock_args *ap)
364 {
365 	struct vnode *vp;
366 	struct ucred *cred;
367 	struct vattr vattr;
368 	int error;
369 
370 	vp = ap->a_vp;
371 	cred = curthread->td_ucred;
372 	vn_lock(vp, LK_SHARED | LK_RETRY);
373 	error = VOP_GETATTR(vp, &vattr, cred);
374 	VOP_UNLOCK(vp, 0);
375 	if (error)
376 		return (error);
377 
378 	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
379 }
380 
381 int
382 vop_stdadvlockasync(struct vop_advlockasync_args *ap)
383 {
384 	struct vnode *vp;
385 	struct ucred *cred;
386 	struct vattr vattr;
387 	int error;
388 
389 	vp = ap->a_vp;
390 	cred = curthread->td_ucred;
391 	vn_lock(vp, LK_SHARED | LK_RETRY);
392 	error = VOP_GETATTR(vp, &vattr, cred);
393 	VOP_UNLOCK(vp, 0);
394 	if (error)
395 		return (error);
396 
397 	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
398 }
399 
400 /*
401  * vop_stdpathconf:
402  *
403  * Standard implementation of POSIX pathconf, to get information about limits
404  * for a filesystem.
405  * Override per filesystem for the case where the filesystem has smaller
406  * limits.
407  */
408 int
409 vop_stdpathconf(ap)
410 	struct vop_pathconf_args /* {
411 	struct vnode *a_vp;
412 	int a_name;
413 	int *a_retval;
414 	} */ *ap;
415 {
416 
417 	switch (ap->a_name) {
418 		case _PC_NAME_MAX:
419 			*ap->a_retval = NAME_MAX;
420 			return (0);
421 		case _PC_PATH_MAX:
422 			*ap->a_retval = PATH_MAX;
423 			return (0);
424 		case _PC_LINK_MAX:
425 			*ap->a_retval = LINK_MAX;
426 			return (0);
427 		case _PC_MAX_CANON:
428 			*ap->a_retval = MAX_CANON;
429 			return (0);
430 		case _PC_MAX_INPUT:
431 			*ap->a_retval = MAX_INPUT;
432 			return (0);
433 		case _PC_PIPE_BUF:
434 			*ap->a_retval = PIPE_BUF;
435 			return (0);
436 		case _PC_CHOWN_RESTRICTED:
437 			*ap->a_retval = 1;
438 			return (0);
439 		case _PC_VDISABLE:
440 			*ap->a_retval = _POSIX_VDISABLE;
441 			return (0);
442 		default:
443 			return (EINVAL);
444 	}
445 	/* NOTREACHED */
446 }
447 
448 /*
449  * Standard lock, unlock and islocked functions.
450  */
451 int
452 vop_stdlock(ap)
453 	struct vop_lock1_args /* {
454 		struct vnode *a_vp;
455 		int a_flags;
456 		char *file;
457 		int line;
458 	} */ *ap;
459 {
460 	struct vnode *vp = ap->a_vp;
461 
462 	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
463 	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
464 	    ap->a_line));
465 }
466 
467 /* See above. */
468 int
469 vop_stdunlock(ap)
470 	struct vop_unlock_args /* {
471 		struct vnode *a_vp;
472 		int a_flags;
473 	} */ *ap;
474 {
475 	struct vnode *vp = ap->a_vp;
476 
477 	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
478 }
479 
480 /* See above. */
481 int
482 vop_stdislocked(ap)
483 	struct vop_islocked_args /* {
484 		struct vnode *a_vp;
485 	} */ *ap;
486 {
487 
488 	return (lockstatus(ap->a_vp->v_vnlock));
489 }
490 
491 /*
492  * Return true for select/poll.
493  */
494 int
495 vop_nopoll(ap)
496 	struct vop_poll_args /* {
497 		struct vnode *a_vp;
498 		int  a_events;
499 		struct ucred *a_cred;
500 		struct thread *a_td;
501 	} */ *ap;
502 {
503 
504 	return (poll_no_poll(ap->a_events));
505 }
506 
507 /*
508  * Implement poll for local filesystems that support it.
509  */
510 int
511 vop_stdpoll(ap)
512 	struct vop_poll_args /* {
513 		struct vnode *a_vp;
514 		int  a_events;
515 		struct ucred *a_cred;
516 		struct thread *a_td;
517 	} */ *ap;
518 {
519 	if (ap->a_events & ~POLLSTANDARD)
520 		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
521 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
522 }
523 
524 /*
525  * Return our mount point, as we will take charge of the writes.
526  */
527 int
528 vop_stdgetwritemount(ap)
529 	struct vop_getwritemount_args /* {
530 		struct vnode *a_vp;
531 		struct mount **a_mpp;
532 	} */ *ap;
533 {
534 	struct mount *mp;
535 
536 	/*
537 	 * XXX Since this is called unlocked we may be recycled while
538 	 * attempting to ref the mount.  If this is the case or mountpoint
539 	 * will be set to NULL.  We only have to prevent this call from
540 	 * returning with a ref to an incorrect mountpoint.  It is not
541 	 * harmful to return with a ref to our previous mountpoint.
542 	 */
543 	mp = ap->a_vp->v_mount;
544 	if (mp != NULL) {
545 		vfs_ref(mp);
546 		if (mp != ap->a_vp->v_mount) {
547 			vfs_rel(mp);
548 			mp = NULL;
549 		}
550 	}
551 	*(ap->a_mpp) = mp;
552 	return (0);
553 }
554 
555 /* XXX Needs good comment and VOP_BMAP(9) manpage */
556 int
557 vop_stdbmap(ap)
558 	struct vop_bmap_args /* {
559 		struct vnode *a_vp;
560 		daddr_t  a_bn;
561 		struct bufobj **a_bop;
562 		daddr_t *a_bnp;
563 		int *a_runp;
564 		int *a_runb;
565 	} */ *ap;
566 {
567 
568 	if (ap->a_bop != NULL)
569 		*ap->a_bop = &ap->a_vp->v_bufobj;
570 	if (ap->a_bnp != NULL)
571 		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
572 	if (ap->a_runp != NULL)
573 		*ap->a_runp = 0;
574 	if (ap->a_runb != NULL)
575 		*ap->a_runb = 0;
576 	return (0);
577 }
578 
579 int
580 vop_stdfsync(ap)
581 	struct vop_fsync_args /* {
582 		struct vnode *a_vp;
583 		struct ucred *a_cred;
584 		int a_waitfor;
585 		struct thread *a_td;
586 	} */ *ap;
587 {
588 	struct vnode *vp = ap->a_vp;
589 	struct buf *bp;
590 	struct bufobj *bo;
591 	struct buf *nbp;
592 	int error = 0;
593 	int maxretry = 1000;     /* large, arbitrarily chosen */
594 
595 	bo = &vp->v_bufobj;
596 	BO_LOCK(bo);
597 loop1:
598 	/*
599 	 * MARK/SCAN initialization to avoid infinite loops.
600 	 */
601         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
602                 bp->b_vflags &= ~BV_SCANNED;
603 		bp->b_error = 0;
604 	}
605 
606 	/*
607 	 * Flush all dirty buffers associated with a vnode.
608 	 */
609 loop2:
610 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
611 		if ((bp->b_vflags & BV_SCANNED) != 0)
612 			continue;
613 		bp->b_vflags |= BV_SCANNED;
614 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
615 			continue;
616 		BO_UNLOCK(bo);
617 		KASSERT(bp->b_bufobj == bo,
618 		    ("bp %p wrong b_bufobj %p should be %p",
619 		    bp, bp->b_bufobj, bo));
620 		if ((bp->b_flags & B_DELWRI) == 0)
621 			panic("fsync: not dirty");
622 		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
623 			vfs_bio_awrite(bp);
624 		} else {
625 			bremfree(bp);
626 			bawrite(bp);
627 		}
628 		BO_LOCK(bo);
629 		goto loop2;
630 	}
631 
632 	/*
633 	 * If synchronous the caller expects us to completely resolve all
634 	 * dirty buffers in the system.  Wait for in-progress I/O to
635 	 * complete (which could include background bitmap writes), then
636 	 * retry if dirty blocks still exist.
637 	 */
638 	if (ap->a_waitfor == MNT_WAIT) {
639 		bufobj_wwait(bo, 0, 0);
640 		if (bo->bo_dirty.bv_cnt > 0) {
641 			/*
642 			 * If we are unable to write any of these buffers
643 			 * then we fail now rather than trying endlessly
644 			 * to write them out.
645 			 */
646 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
647 				if ((error = bp->b_error) == 0)
648 					continue;
649 			if (error == 0 && --maxretry >= 0)
650 				goto loop1;
651 			error = EAGAIN;
652 		}
653 	}
654 	BO_UNLOCK(bo);
655 	if (error == EAGAIN)
656 		vprint("fsync: giving up on dirty", vp);
657 
658 	return (error);
659 }
660 
661 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
662 int
663 vop_stdgetpages(ap)
664 	struct vop_getpages_args /* {
665 		struct vnode *a_vp;
666 		vm_page_t *a_m;
667 		int a_count;
668 		int a_reqpage;
669 		vm_ooffset_t a_offset;
670 	} */ *ap;
671 {
672 
673 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
674 	    ap->a_count, ap->a_reqpage);
675 }
676 
677 int
678 vop_stdkqfilter(struct vop_kqfilter_args *ap)
679 {
680 	return vfs_kqfilter(ap);
681 }
682 
683 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
684 int
685 vop_stdputpages(ap)
686 	struct vop_putpages_args /* {
687 		struct vnode *a_vp;
688 		vm_page_t *a_m;
689 		int a_count;
690 		int a_sync;
691 		int *a_rtvals;
692 		vm_ooffset_t a_offset;
693 	} */ *ap;
694 {
695 
696 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
697 	     ap->a_sync, ap->a_rtvals);
698 }
699 
700 int
701 vop_stdvptofh(struct vop_vptofh_args *ap)
702 {
703 	return (EOPNOTSUPP);
704 }
705 
706 int
707 vop_stdvptocnp(struct vop_vptocnp_args *ap)
708 {
709 	struct vnode *vp = ap->a_vp;
710 	struct vnode **dvp = ap->a_vpp;
711 	struct ucred *cred = ap->a_cred;
712 	char *buf = ap->a_buf;
713 	int *buflen = ap->a_buflen;
714 	char *dirbuf, *cpos;
715 	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
716 	off_t off;
717 	ino_t fileno;
718 	struct vattr va;
719 	struct nameidata nd;
720 	struct thread *td;
721 	struct dirent *dp;
722 	struct vnode *mvp;
723 
724 	i = *buflen;
725 	error = 0;
726 	covered = 0;
727 	td = curthread;
728 
729 	if (vp->v_type != VDIR)
730 		return (ENOENT);
731 
732 	error = VOP_GETATTR(vp, &va, cred);
733 	if (error)
734 		return (error);
735 
736 	VREF(vp);
737 	locked = VOP_ISLOCKED(vp);
738 	VOP_UNLOCK(vp, 0);
739 	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
740 	    "..", vp, td);
741 	flags = FREAD;
742 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
743 	if (error) {
744 		vn_lock(vp, locked | LK_RETRY);
745 		return (error);
746 	}
747 	NDFREE(&nd, NDF_ONLY_PNBUF);
748 
749 	mvp = *dvp = nd.ni_vp;
750 
751 	if (vp->v_mount != (*dvp)->v_mount &&
752 	    ((*dvp)->v_vflag & VV_ROOT) &&
753 	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
754 		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
755 		VREF(mvp);
756 		VOP_UNLOCK(mvp, 0);
757 		vn_close(mvp, FREAD, cred, td);
758 		VREF(*dvp);
759 		vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
760 		covered = 1;
761 	}
762 
763 	fileno = va.va_fileid;
764 
765 	dirbuflen = DEV_BSIZE;
766 	if (dirbuflen < va.va_blocksize)
767 		dirbuflen = va.va_blocksize;
768 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
769 
770 	if ((*dvp)->v_type != VDIR) {
771 		error = ENOENT;
772 		goto out;
773 	}
774 
775 	off = 0;
776 	len = 0;
777 	do {
778 		/* call VOP_READDIR of parent */
779 		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
780 					&cpos, &len, &eofflag, td);
781 		if (error)
782 			goto out;
783 
784 		if ((dp->d_type != DT_WHT) &&
785 		    (dp->d_fileno == fileno)) {
786 			if (covered) {
787 				VOP_UNLOCK(*dvp, 0);
788 				vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
789 				if (dirent_exists(mvp, dp->d_name, td)) {
790 					error = ENOENT;
791 					VOP_UNLOCK(mvp, 0);
792 					vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
793 					goto out;
794 				}
795 				VOP_UNLOCK(mvp, 0);
796 				vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
797 			}
798 			i -= dp->d_namlen;
799 
800 			if (i < 0) {
801 				error = ENOMEM;
802 				goto out;
803 			}
804 			bcopy(dp->d_name, buf + i, dp->d_namlen);
805 			error = 0;
806 			goto out;
807 		}
808 	} while (len > 0 || !eofflag);
809 	error = ENOENT;
810 
811 out:
812 	free(dirbuf, M_TEMP);
813 	if (!error) {
814 		*buflen = i;
815 		vhold(*dvp);
816 	}
817 	if (covered) {
818 		vput(*dvp);
819 		vrele(mvp);
820 	} else {
821 		VOP_UNLOCK(mvp, 0);
822 		vn_close(mvp, FREAD, cred, td);
823 	}
824 	vn_lock(vp, locked | LK_RETRY);
825 	return (error);
826 }
827 
828 /*
829  * vfs default ops
830  * used to fill the vfs function table to get reasonable default return values.
831  */
832 int
833 vfs_stdroot (mp, flags, vpp)
834 	struct mount *mp;
835 	int flags;
836 	struct vnode **vpp;
837 {
838 
839 	return (EOPNOTSUPP);
840 }
841 
842 int
843 vfs_stdstatfs (mp, sbp)
844 	struct mount *mp;
845 	struct statfs *sbp;
846 {
847 
848 	return (EOPNOTSUPP);
849 }
850 
851 int
852 vfs_stdquotactl (mp, cmds, uid, arg)
853 	struct mount *mp;
854 	int cmds;
855 	uid_t uid;
856 	void *arg;
857 {
858 
859 	return (EOPNOTSUPP);
860 }
861 
862 int
863 vfs_stdsync(mp, waitfor)
864 	struct mount *mp;
865 	int waitfor;
866 {
867 	struct vnode *vp, *mvp;
868 	struct thread *td;
869 	int error, lockreq, allerror = 0;
870 
871 	td = curthread;
872 	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
873 	if (waitfor != MNT_WAIT)
874 		lockreq |= LK_NOWAIT;
875 	/*
876 	 * Force stale buffer cache information to be flushed.
877 	 */
878 	MNT_ILOCK(mp);
879 loop:
880 	MNT_VNODE_FOREACH(vp, mp, mvp) {
881 		/* bv_cnt is an acceptable race here. */
882 		if (vp->v_bufobj.bo_dirty.bv_cnt == 0)
883 			continue;
884 		VI_LOCK(vp);
885 		MNT_IUNLOCK(mp);
886 		if ((error = vget(vp, lockreq, td)) != 0) {
887 			MNT_ILOCK(mp);
888 			if (error == ENOENT) {
889 				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
890 				goto loop;
891 			}
892 			continue;
893 		}
894 		error = VOP_FSYNC(vp, waitfor, td);
895 		if (error)
896 			allerror = error;
897 		vput(vp);
898 		MNT_ILOCK(mp);
899 	}
900 	MNT_IUNLOCK(mp);
901 	return (allerror);
902 }
903 
904 int
905 vfs_stdnosync (mp, waitfor)
906 	struct mount *mp;
907 	int waitfor;
908 {
909 
910 	return (0);
911 }
912 
913 int
914 vfs_stdvget (mp, ino, flags, vpp)
915 	struct mount *mp;
916 	ino_t ino;
917 	int flags;
918 	struct vnode **vpp;
919 {
920 
921 	return (EOPNOTSUPP);
922 }
923 
924 int
925 vfs_stdfhtovp (mp, fhp, vpp)
926 	struct mount *mp;
927 	struct fid *fhp;
928 	struct vnode **vpp;
929 {
930 
931 	return (EOPNOTSUPP);
932 }
933 
934 int
935 vfs_stdinit (vfsp)
936 	struct vfsconf *vfsp;
937 {
938 
939 	return (0);
940 }
941 
942 int
943 vfs_stduninit (vfsp)
944 	struct vfsconf *vfsp;
945 {
946 
947 	return(0);
948 }
949 
950 int
951 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
952 	struct mount *mp;
953 	int cmd;
954 	struct vnode *filename_vp;
955 	int attrnamespace;
956 	const char *attrname;
957 {
958 
959 	if (filename_vp != NULL)
960 		VOP_UNLOCK(filename_vp, 0);
961 	return (EOPNOTSUPP);
962 }
963 
964 int
965 vfs_stdsysctl(mp, op, req)
966 	struct mount *mp;
967 	fsctlop_t op;
968 	struct sysctl_req *req;
969 {
970 
971 	return (EOPNOTSUPP);
972 }
973 
974 /* end of vfs default ops */
975