xref: /freebsd/sys/kern/vfs_default.c (revision ca9ac06c99bfd0150b85d4d83c396ce6237c0e05)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed
6  * to Berkeley by John Heidemann of the UCLA Ficus project.
7  *
8  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/conf.h>
43 #include <sys/kernel.h>
44 #include <sys/limits.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/unistd.h>
50 #include <sys/vnode.h>
51 #include <sys/poll.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_extern.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_map.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pager.h>
60 #include <vm/vnode_pager.h>
61 
62 static int	vop_nolookup(struct vop_lookup_args *);
63 static int	vop_nostrategy(struct vop_strategy_args *);
64 
65 /*
66  * This vnode table stores what we want to do if the filesystem doesn't
67  * implement a particular VOP.
68  *
69  * If there is no specific entry here, we will return EOPNOTSUPP.
70  *
71  */
72 
73 struct vop_vector default_vnodeops = {
74 	.vop_default =		NULL,
75 	.vop_bypass =		VOP_EOPNOTSUPP,
76 
77 	.vop_advlock =		VOP_EINVAL,
78 	.vop_bmap =		vop_stdbmap,
79 	.vop_close =		VOP_NULL,
80 	.vop_fsync =		VOP_NULL,
81 	.vop_getpages =		vop_stdgetpages,
82 	.vop_getwritemount = 	vop_stdgetwritemount,
83 	.vop_inactive =		VOP_NULL,
84 	.vop_ioctl =		VOP_ENOTTY,
85 	.vop_islocked =		vop_stdislocked,
86 	.vop_lease =		VOP_NULL,
87 	.vop_lock =		vop_stdlock,
88 	.vop_lookup =		vop_nolookup,
89 	.vop_open =		VOP_NULL,
90 	.vop_pathconf =		VOP_EINVAL,
91 	.vop_poll =		vop_nopoll,
92 	.vop_putpages =		vop_stdputpages,
93 	.vop_readlink =		VOP_EINVAL,
94 	.vop_revoke =		VOP_PANIC,
95 	.vop_strategy =		vop_nostrategy,
96 	.vop_unlock =		vop_stdunlock,
97 };
98 
99 /*
100  * Series of placeholder functions for various error returns for
101  * VOPs.
102  */
103 
104 int
105 vop_eopnotsupp(struct vop_generic_args *ap)
106 {
107 	/*
108 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
109 	*/
110 
111 	return (EOPNOTSUPP);
112 }
113 
114 int
115 vop_ebadf(struct vop_generic_args *ap)
116 {
117 
118 	return (EBADF);
119 }
120 
121 int
122 vop_enotty(struct vop_generic_args *ap)
123 {
124 
125 	return (ENOTTY);
126 }
127 
128 int
129 vop_einval(struct vop_generic_args *ap)
130 {
131 
132 	return (EINVAL);
133 }
134 
135 int
136 vop_null(struct vop_generic_args *ap)
137 {
138 
139 	return (0);
140 }
141 
142 /*
143  * Helper function to panic on some bad VOPs in some filesystems.
144  */
145 int
146 vop_panic(struct vop_generic_args *ap)
147 {
148 
149 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
150 }
151 
152 /*
153  * vop_std<something> and vop_no<something> are default functions for use by
154  * filesystems that need the "default reasonable" implementation for a
155  * particular operation.
156  *
157  * The documentation for the operations they implement exists (if it exists)
158  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
159  */
160 
161 /*
162  * Default vop for filesystems that do not support name lookup
163  */
164 static int
165 vop_nolookup(ap)
166 	struct vop_lookup_args /* {
167 		struct vnode *a_dvp;
168 		struct vnode **a_vpp;
169 		struct componentname *a_cnp;
170 	} */ *ap;
171 {
172 
173 	*ap->a_vpp = NULL;
174 	return (ENOTDIR);
175 }
176 
177 /*
178  *	vop_nostrategy:
179  *
180  *	Strategy routine for VFS devices that have none.
181  *
182  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
183  *	routine.  Typically this is done for a BIO_READ strategy call.
184  *	Typically B_INVAL is assumed to already be clear prior to a write
185  *	and should not be cleared manually unless you just made the buffer
186  *	invalid.  BIO_ERROR should be cleared either way.
187  */
188 
189 static int
190 vop_nostrategy (struct vop_strategy_args *ap)
191 {
192 	printf("No strategy for buffer at %p\n", ap->a_bp);
193 	vprint("vnode", ap->a_vp);
194 	ap->a_bp->b_ioflags |= BIO_ERROR;
195 	ap->a_bp->b_error = EOPNOTSUPP;
196 	bufdone(ap->a_bp);
197 	return (EOPNOTSUPP);
198 }
199 
200 /*
201  * vop_stdpathconf:
202  *
203  * Standard implementation of POSIX pathconf, to get information about limits
204  * for a filesystem.
205  * Override per filesystem for the case where the filesystem has smaller
206  * limits.
207  */
208 int
209 vop_stdpathconf(ap)
210 	struct vop_pathconf_args /* {
211 	struct vnode *a_vp;
212 	int a_name;
213 	int *a_retval;
214 	} */ *ap;
215 {
216 
217 	switch (ap->a_name) {
218 		case _PC_LINK_MAX:
219 			*ap->a_retval = LINK_MAX;
220 			return (0);
221 		case _PC_MAX_CANON:
222 			*ap->a_retval = MAX_CANON;
223 			return (0);
224 		case _PC_MAX_INPUT:
225 			*ap->a_retval = MAX_INPUT;
226 			return (0);
227 		case _PC_PIPE_BUF:
228 			*ap->a_retval = PIPE_BUF;
229 			return (0);
230 		case _PC_CHOWN_RESTRICTED:
231 			*ap->a_retval = 1;
232 			return (0);
233 		case _PC_VDISABLE:
234 			*ap->a_retval = _POSIX_VDISABLE;
235 			return (0);
236 		default:
237 			return (EINVAL);
238 	}
239 	/* NOTREACHED */
240 }
241 
242 /*
243  * Standard lock, unlock and islocked functions.
244  */
245 int
246 vop_stdlock(ap)
247 	struct vop_lock_args /* {
248 		struct vnode *a_vp;
249 		int a_flags;
250 		struct thread *a_td;
251 	} */ *ap;
252 {
253 	struct vnode *vp = ap->a_vp;
254 
255 #ifndef	DEBUG_LOCKS
256 	return (lockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), ap->a_td));
257 #else
258 	return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
259 	    ap->a_td, "vop_stdlock", vp->filename, vp->line));
260 #endif
261 }
262 
263 /* See above. */
264 int
265 vop_stdunlock(ap)
266 	struct vop_unlock_args /* {
267 		struct vnode *a_vp;
268 		int a_flags;
269 		struct thread *a_td;
270 	} */ *ap;
271 {
272 	struct vnode *vp = ap->a_vp;
273 
274 	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp),
275 	    ap->a_td));
276 }
277 
278 /* See above. */
279 int
280 vop_stdislocked(ap)
281 	struct vop_islocked_args /* {
282 		struct vnode *a_vp;
283 		struct thread *a_td;
284 	} */ *ap;
285 {
286 
287 	return (lockstatus(ap->a_vp->v_vnlock, ap->a_td));
288 }
289 
290 /*
291  * Return true for select/poll.
292  */
293 int
294 vop_nopoll(ap)
295 	struct vop_poll_args /* {
296 		struct vnode *a_vp;
297 		int  a_events;
298 		struct ucred *a_cred;
299 		struct thread *a_td;
300 	} */ *ap;
301 {
302 	/*
303 	 * Return true for read/write.  If the user asked for something
304 	 * special, return POLLNVAL, so that clients have a way of
305 	 * determining reliably whether or not the extended
306 	 * functionality is present without hard-coding knowledge
307 	 * of specific filesystem implementations.
308 	 * Stay in sync with kern_conf.c::no_poll().
309 	 */
310 	if (ap->a_events & ~POLLSTANDARD)
311 		return (POLLNVAL);
312 
313 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
314 }
315 
316 /*
317  * Implement poll for local filesystems that support it.
318  */
319 int
320 vop_stdpoll(ap)
321 	struct vop_poll_args /* {
322 		struct vnode *a_vp;
323 		int  a_events;
324 		struct ucred *a_cred;
325 		struct thread *a_td;
326 	} */ *ap;
327 {
328 	if (ap->a_events & ~POLLSTANDARD)
329 		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
330 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
331 }
332 
333 /*
334  * Return our mount point, as we will take charge of the writes.
335  */
336 int
337 vop_stdgetwritemount(ap)
338 	struct vop_getwritemount_args /* {
339 		struct vnode *a_vp;
340 		struct mount **a_mpp;
341 	} */ *ap;
342 {
343 
344 	*(ap->a_mpp) = ap->a_vp->v_mount;
345 	return (0);
346 }
347 
348 /* XXX Needs good comment and VOP_BMAP(9) manpage */
349 int
350 vop_stdbmap(ap)
351 	struct vop_bmap_args /* {
352 		struct vnode *a_vp;
353 		daddr_t  a_bn;
354 		struct bufobj **a_bop;
355 		daddr_t *a_bnp;
356 		int *a_runp;
357 		int *a_runb;
358 	} */ *ap;
359 {
360 
361 	if (ap->a_bop != NULL)
362 		*ap->a_bop = &ap->a_vp->v_bufobj;
363 	if (ap->a_bnp != NULL)
364 		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
365 	if (ap->a_runp != NULL)
366 		*ap->a_runp = 0;
367 	if (ap->a_runb != NULL)
368 		*ap->a_runb = 0;
369 	return (0);
370 }
371 
372 int
373 vop_stdfsync(ap)
374 	struct vop_fsync_args /* {
375 		struct vnode *a_vp;
376 		struct ucred *a_cred;
377 		int a_waitfor;
378 		struct thread *a_td;
379 	} */ *ap;
380 {
381 	struct vnode *vp = ap->a_vp;
382 	struct buf *bp;
383 	struct bufobj *bo;
384 	struct buf *nbp;
385 	int s, error = 0;
386 	int maxretry = 100;     /* large, arbitrarily chosen */
387 
388 	VI_LOCK(vp);
389 loop1:
390 	/*
391 	 * MARK/SCAN initialization to avoid infinite loops.
392 	 */
393 	s = splbio();
394         TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
395                 bp->b_vflags &= ~BV_SCANNED;
396 		bp->b_error = 0;
397 	}
398 	splx(s);
399 
400 	/*
401 	 * Flush all dirty buffers associated with a block device.
402 	 */
403 loop2:
404 	s = splbio();
405 	TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
406 		if ((bp->b_vflags & BV_SCANNED) != 0)
407 			continue;
408 		bp->b_vflags |= BV_SCANNED;
409 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
410 			continue;
411 		VI_UNLOCK(vp);
412 		if ((bp->b_flags & B_DELWRI) == 0)
413 			panic("fsync: not dirty");
414 		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
415 			vfs_bio_awrite(bp);
416 			splx(s);
417 		} else {
418 			bremfree(bp);
419 			splx(s);
420 			bawrite(bp);
421 		}
422 		VI_LOCK(vp);
423 		goto loop2;
424 	}
425 
426 	/*
427 	 * If synchronous the caller expects us to completely resolve all
428 	 * dirty buffers in the system.  Wait for in-progress I/O to
429 	 * complete (which could include background bitmap writes), then
430 	 * retry if dirty blocks still exist.
431 	 */
432 	if (ap->a_waitfor == MNT_WAIT) {
433 		bo = &vp->v_bufobj;
434 		bufobj_wwait(bo, 0, 0);
435 		if (bo->bo_dirty.bv_cnt > 0) {
436 			/*
437 			 * If we are unable to write any of these buffers
438 			 * then we fail now rather than trying endlessly
439 			 * to write them out.
440 			 */
441 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
442 				if ((error = bp->b_error) == 0)
443 					continue;
444 			if (error == 0 && --maxretry >= 0) {
445 				splx(s);
446 				goto loop1;
447 			}
448 			vprint("fsync: giving up on dirty", vp);
449 			error = EAGAIN;
450 		}
451 	}
452 	VI_UNLOCK(vp);
453 	splx(s);
454 
455 	return (error);
456 }
457 
458 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
459 int
460 vop_stdgetpages(ap)
461 	struct vop_getpages_args /* {
462 		struct vnode *a_vp;
463 		vm_page_t *a_m;
464 		int a_count;
465 		int a_reqpage;
466 		vm_ooffset_t a_offset;
467 	} */ *ap;
468 {
469 
470 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
471 	    ap->a_count, ap->a_reqpage);
472 }
473 
474 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
475 int
476 vop_stdputpages(ap)
477 	struct vop_putpages_args /* {
478 		struct vnode *a_vp;
479 		vm_page_t *a_m;
480 		int a_count;
481 		int a_sync;
482 		int *a_rtvals;
483 		vm_ooffset_t a_offset;
484 	} */ *ap;
485 {
486 
487 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
488 	     ap->a_sync, ap->a_rtvals);
489 }
490 
491 /*
492  * vfs default ops
493  * used to fill the vfs function table to get reasonable default return values.
494  */
495 int
496 vfs_stdroot (mp, vpp, td)
497 	struct mount *mp;
498 	struct vnode **vpp;
499 	struct thread *td;
500 {
501 
502 	return (EOPNOTSUPP);
503 }
504 
505 int
506 vfs_stdstatfs (mp, sbp, td)
507 	struct mount *mp;
508 	struct statfs *sbp;
509 	struct thread *td;
510 {
511 
512 	return (EOPNOTSUPP);
513 }
514 
515 int
516 vfs_stdvptofh (vp, fhp)
517 	struct vnode *vp;
518 	struct fid *fhp;
519 {
520 
521 	return (EOPNOTSUPP);
522 }
523 
524 int
525 vfs_stdquotactl (mp, cmds, uid, arg, td)
526 	struct mount *mp;
527 	int cmds;
528 	uid_t uid;
529 	caddr_t arg;
530 	struct thread *td;
531 {
532 
533 	return (EOPNOTSUPP);
534 }
535 
536 int
537 vfs_stdsync(mp, waitfor, td)
538 	struct mount *mp;
539 	int waitfor;
540 	struct thread *td;
541 {
542 	struct vnode *vp, *nvp;
543 	int error, lockreq, allerror = 0;
544 
545 	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
546 	if (waitfor != MNT_WAIT)
547 		lockreq |= LK_NOWAIT;
548 	/*
549 	 * Force stale buffer cache information to be flushed.
550 	 */
551 	MNT_ILOCK(mp);
552 loop:
553 	MNT_VNODE_FOREACH(vp, mp, nvp) {
554 
555 		VI_LOCK(vp);
556 		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
557 			VI_UNLOCK(vp);
558 			continue;
559 		}
560 		MNT_IUNLOCK(mp);
561 
562 		if ((error = vget(vp, lockreq, td)) != 0) {
563 			MNT_ILOCK(mp);
564 			if (error == ENOENT)
565 				goto loop;
566 			continue;
567 		}
568 		error = VOP_FSYNC(vp, waitfor, td);
569 		if (error)
570 			allerror = error;
571 
572 		VOP_UNLOCK(vp, 0, td);
573 		vrele(vp);
574 		MNT_ILOCK(mp);
575 	}
576 	MNT_IUNLOCK(mp);
577 	return (allerror);
578 }
579 
580 int
581 vfs_stdnosync (mp, waitfor, td)
582 	struct mount *mp;
583 	int waitfor;
584 	struct thread *td;
585 {
586 
587 	return (0);
588 }
589 
590 int
591 vfs_stdvget (mp, ino, flags, vpp)
592 	struct mount *mp;
593 	ino_t ino;
594 	int flags;
595 	struct vnode **vpp;
596 {
597 
598 	return (EOPNOTSUPP);
599 }
600 
601 int
602 vfs_stdfhtovp (mp, fhp, vpp)
603 	struct mount *mp;
604 	struct fid *fhp;
605 	struct vnode **vpp;
606 {
607 
608 	return (EOPNOTSUPP);
609 }
610 
611 int
612 vfs_stdinit (vfsp)
613 	struct vfsconf *vfsp;
614 {
615 
616 	return (0);
617 }
618 
619 int
620 vfs_stduninit (vfsp)
621 	struct vfsconf *vfsp;
622 {
623 
624 	return(0);
625 }
626 
627 int
628 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td)
629 	struct mount *mp;
630 	int cmd;
631 	struct vnode *filename_vp;
632 	int attrnamespace;
633 	const char *attrname;
634 	struct thread *td;
635 {
636 
637 	if (filename_vp != NULL)
638 		VOP_UNLOCK(filename_vp, 0, td);
639 	return (EOPNOTSUPP);
640 }
641 
642 int
643 vfs_stdsysctl(mp, op, req)
644 	struct mount *mp;
645 	fsctlop_t op;
646 	struct sysctl_req *req;
647 {
648 
649 	return (EOPNOTSUPP);
650 }
651 
652 /* end of vfs default ops */
653