xref: /freebsd/sys/kern/vfs_default.c (revision ceaec73d406831b1251babb61675df0a1aa54a31)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed
6  * to Berkeley by John Heidemann of the UCLA Ficus project.
7  *
8  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/conf.h>
43 #include <sys/kernel.h>
44 #include <sys/limits.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/unistd.h>
50 #include <sys/vnode.h>
51 #include <sys/poll.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_extern.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_map.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pager.h>
60 #include <vm/vnode_pager.h>
61 
62 static int	vop_nolookup(struct vop_lookup_args *);
63 static int	vop_nostrategy(struct vop_strategy_args *);
64 
65 /*
66  * This vnode table stores what we want to do if the filesystem doesn't
67  * implement a particular VOP.
68  *
69  * If there is no specific entry here, we will return EOPNOTSUPP.
70  *
71  */
72 
73 struct vop_vector default_vnodeops = {
74 	.vop_default =		NULL,
75 	.vop_bypass =		VOP_EOPNOTSUPP,
76 
77 	.vop_advlock =		VOP_EINVAL,
78 	.vop_bmap =		vop_stdbmap,
79 	.vop_close =		VOP_NULL,
80 	.vop_fsync =		VOP_NULL,
81 	.vop_getpages =		vop_stdgetpages,
82 	.vop_getwritemount = 	vop_stdgetwritemount,
83 	.vop_inactive =		VOP_NULL,
84 	.vop_ioctl =		VOP_ENOTTY,
85 	.vop_islocked =		vop_stdislocked,
86 	.vop_lease =		VOP_NULL,
87 	.vop_lock =		vop_stdlock,
88 	.vop_lookup =		vop_nolookup,
89 	.vop_open =		VOP_NULL,
90 	.vop_pathconf =		VOP_EINVAL,
91 	.vop_poll =		vop_nopoll,
92 	.vop_putpages =		vop_stdputpages,
93 	.vop_readlink =		VOP_EINVAL,
94 	.vop_revoke =		VOP_PANIC,
95 	.vop_strategy =		vop_nostrategy,
96 	.vop_unlock =		vop_stdunlock,
97 };
98 
99 /*
100  * Series of placeholder functions for various error returns for
101  * VOPs.
102  */
103 
104 int
105 vop_eopnotsupp(struct vop_generic_args *ap)
106 {
107 	/*
108 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
109 	*/
110 
111 	return (EOPNOTSUPP);
112 }
113 
114 int
115 vop_ebadf(struct vop_generic_args *ap)
116 {
117 
118 	return (EBADF);
119 }
120 
121 int
122 vop_enotty(struct vop_generic_args *ap)
123 {
124 
125 	return (ENOTTY);
126 }
127 
128 int
129 vop_einval(struct vop_generic_args *ap)
130 {
131 
132 	return (EINVAL);
133 }
134 
135 int
136 vop_null(struct vop_generic_args *ap)
137 {
138 
139 	return (0);
140 }
141 
142 /*
143  * Helper function to panic on some bad VOPs in some filesystems.
144  */
145 int
146 vop_panic(struct vop_generic_args *ap)
147 {
148 
149 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
150 }
151 
152 /*
153  * vop_std<something> and vop_no<something> are default functions for use by
154  * filesystems that need the "default reasonable" implementation for a
155  * particular operation.
156  *
157  * The documentation for the operations they implement exists (if it exists)
158  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
159  */
160 
161 /*
162  * Default vop for filesystems that do not support name lookup
163  */
164 static int
165 vop_nolookup(ap)
166 	struct vop_lookup_args /* {
167 		struct vnode *a_dvp;
168 		struct vnode **a_vpp;
169 		struct componentname *a_cnp;
170 	} */ *ap;
171 {
172 
173 	*ap->a_vpp = NULL;
174 	return (ENOTDIR);
175 }
176 
177 /*
178  *	vop_nostrategy:
179  *
180  *	Strategy routine for VFS devices that have none.
181  *
182  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
183  *	routine.  Typically this is done for a BIO_READ strategy call.
184  *	Typically B_INVAL is assumed to already be clear prior to a write
185  *	and should not be cleared manually unless you just made the buffer
186  *	invalid.  BIO_ERROR should be cleared either way.
187  */
188 
189 static int
190 vop_nostrategy (struct vop_strategy_args *ap)
191 {
192 	printf("No strategy for buffer at %p\n", ap->a_bp);
193 	vprint("vnode", ap->a_vp);
194 	ap->a_bp->b_ioflags |= BIO_ERROR;
195 	ap->a_bp->b_error = EOPNOTSUPP;
196 	bufdone(ap->a_bp);
197 	return (EOPNOTSUPP);
198 }
199 
200 /*
201  * vop_stdpathconf:
202  *
203  * Standard implementation of POSIX pathconf, to get information about limits
204  * for a filesystem.
205  * Override per filesystem for the case where the filesystem has smaller
206  * limits.
207  */
208 int
209 vop_stdpathconf(ap)
210 	struct vop_pathconf_args /* {
211 	struct vnode *a_vp;
212 	int a_name;
213 	int *a_retval;
214 	} */ *ap;
215 {
216 
217 	switch (ap->a_name) {
218 		case _PC_LINK_MAX:
219 			*ap->a_retval = LINK_MAX;
220 			return (0);
221 		case _PC_MAX_CANON:
222 			*ap->a_retval = MAX_CANON;
223 			return (0);
224 		case _PC_MAX_INPUT:
225 			*ap->a_retval = MAX_INPUT;
226 			return (0);
227 		case _PC_PIPE_BUF:
228 			*ap->a_retval = PIPE_BUF;
229 			return (0);
230 		case _PC_CHOWN_RESTRICTED:
231 			*ap->a_retval = 1;
232 			return (0);
233 		case _PC_VDISABLE:
234 			*ap->a_retval = _POSIX_VDISABLE;
235 			return (0);
236 		default:
237 			return (EINVAL);
238 	}
239 	/* NOTREACHED */
240 }
241 
242 /*
243  * Standard lock, unlock and islocked functions.
244  */
245 int
246 vop_stdlock(ap)
247 	struct vop_lock_args /* {
248 		struct vnode *a_vp;
249 		int a_flags;
250 		struct thread *a_td;
251 	} */ *ap;
252 {
253 	struct vnode *vp = ap->a_vp;
254 
255 #ifndef	DEBUG_LOCKS
256 	return (lockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), ap->a_td));
257 #else
258 	return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
259 	    ap->a_td, "vop_stdlock", vp->filename, vp->line));
260 #endif
261 }
262 
263 /* See above. */
264 int
265 vop_stdunlock(ap)
266 	struct vop_unlock_args /* {
267 		struct vnode *a_vp;
268 		int a_flags;
269 		struct thread *a_td;
270 	} */ *ap;
271 {
272 	struct vnode *vp = ap->a_vp;
273 
274 	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp),
275 	    ap->a_td));
276 }
277 
278 /* See above. */
279 int
280 vop_stdislocked(ap)
281 	struct vop_islocked_args /* {
282 		struct vnode *a_vp;
283 		struct thread *a_td;
284 	} */ *ap;
285 {
286 
287 	return (lockstatus(ap->a_vp->v_vnlock, ap->a_td));
288 }
289 
290 /*
291  * Return true for select/poll.
292  */
293 int
294 vop_nopoll(ap)
295 	struct vop_poll_args /* {
296 		struct vnode *a_vp;
297 		int  a_events;
298 		struct ucred *a_cred;
299 		struct thread *a_td;
300 	} */ *ap;
301 {
302 	/*
303 	 * Return true for read/write.  If the user asked for something
304 	 * special, return POLLNVAL, so that clients have a way of
305 	 * determining reliably whether or not the extended
306 	 * functionality is present without hard-coding knowledge
307 	 * of specific filesystem implementations.
308 	 * Stay in sync with kern_conf.c::no_poll().
309 	 */
310 	if (ap->a_events & ~POLLSTANDARD)
311 		return (POLLNVAL);
312 
313 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
314 }
315 
316 /*
317  * Implement poll for local filesystems that support it.
318  */
319 int
320 vop_stdpoll(ap)
321 	struct vop_poll_args /* {
322 		struct vnode *a_vp;
323 		int  a_events;
324 		struct ucred *a_cred;
325 		struct thread *a_td;
326 	} */ *ap;
327 {
328 	if (ap->a_events & ~POLLSTANDARD)
329 		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
330 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
331 }
332 
333 /*
334  * Return our mount point, as we will take charge of the writes.
335  */
336 int
337 vop_stdgetwritemount(ap)
338 	struct vop_getwritemount_args /* {
339 		struct vnode *a_vp;
340 		struct mount **a_mpp;
341 	} */ *ap;
342 {
343 
344 	*(ap->a_mpp) = ap->a_vp->v_mount;
345 	return (0);
346 }
347 
348 /* XXX Needs good comment and VOP_BMAP(9) manpage */
349 int
350 vop_stdbmap(ap)
351 	struct vop_bmap_args /* {
352 		struct vnode *a_vp;
353 		daddr_t  a_bn;
354 		struct bufobj **a_bop;
355 		daddr_t *a_bnp;
356 		int *a_runp;
357 		int *a_runb;
358 	} */ *ap;
359 {
360 
361 	if (ap->a_bop != NULL)
362 		*ap->a_bop = &ap->a_vp->v_bufobj;
363 	if (ap->a_bnp != NULL)
364 		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
365 	if (ap->a_runp != NULL)
366 		*ap->a_runp = 0;
367 	if (ap->a_runb != NULL)
368 		*ap->a_runb = 0;
369 	return (0);
370 }
371 
372 int
373 vop_stdfsync(ap)
374 	struct vop_fsync_args /* {
375 		struct vnode *a_vp;
376 		struct ucred *a_cred;
377 		int a_waitfor;
378 		struct thread *a_td;
379 	} */ *ap;
380 {
381 	struct vnode *vp = ap->a_vp;
382 	struct buf *bp;
383 	struct bufobj *bo;
384 	struct buf *nbp;
385 	int s, error = 0;
386 	int maxretry = 1000;     /* large, arbitrarily chosen */
387 
388 	VI_LOCK(vp);
389 loop1:
390 	/*
391 	 * MARK/SCAN initialization to avoid infinite loops.
392 	 */
393 	s = splbio();
394         TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
395                 bp->b_vflags &= ~BV_SCANNED;
396 		bp->b_error = 0;
397 	}
398 	splx(s);
399 
400 	/*
401 	 * Flush all dirty buffers associated with a vnode.
402 	 */
403 loop2:
404 	s = splbio();
405 	TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
406 		if ((bp->b_vflags & BV_SCANNED) != 0)
407 			continue;
408 		bp->b_vflags |= BV_SCANNED;
409 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
410 			continue;
411 		VI_UNLOCK(vp);
412 		if ((bp->b_flags & B_DELWRI) == 0)
413 			panic("fsync: not dirty");
414 		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
415 			vfs_bio_awrite(bp);
416 			splx(s);
417 		} else {
418 			bremfree(bp);
419 			splx(s);
420 			bawrite(bp);
421 		}
422 		VI_LOCK(vp);
423 		goto loop2;
424 	}
425 
426 	/*
427 	 * If synchronous the caller expects us to completely resolve all
428 	 * dirty buffers in the system.  Wait for in-progress I/O to
429 	 * complete (which could include background bitmap writes), then
430 	 * retry if dirty blocks still exist.
431 	 */
432 	if (ap->a_waitfor == MNT_WAIT) {
433 		bo = &vp->v_bufobj;
434 		bufobj_wwait(bo, 0, 0);
435 		if (bo->bo_dirty.bv_cnt > 0) {
436 			/*
437 			 * If we are unable to write any of these buffers
438 			 * then we fail now rather than trying endlessly
439 			 * to write them out.
440 			 */
441 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
442 				if ((error = bp->b_error) == 0)
443 					continue;
444 			if (error == 0 && --maxretry >= 0) {
445 				splx(s);
446 				goto loop1;
447 			}
448 			error = EAGAIN;
449 		}
450 	}
451 	VI_UNLOCK(vp);
452 	if (error == EAGAIN)
453 		vprint("fsync: giving up on dirty", vp);
454 	splx(s);
455 
456 	return (error);
457 }
458 
459 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
460 int
461 vop_stdgetpages(ap)
462 	struct vop_getpages_args /* {
463 		struct vnode *a_vp;
464 		vm_page_t *a_m;
465 		int a_count;
466 		int a_reqpage;
467 		vm_ooffset_t a_offset;
468 	} */ *ap;
469 {
470 
471 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
472 	    ap->a_count, ap->a_reqpage);
473 }
474 
475 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
476 int
477 vop_stdputpages(ap)
478 	struct vop_putpages_args /* {
479 		struct vnode *a_vp;
480 		vm_page_t *a_m;
481 		int a_count;
482 		int a_sync;
483 		int *a_rtvals;
484 		vm_ooffset_t a_offset;
485 	} */ *ap;
486 {
487 
488 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
489 	     ap->a_sync, ap->a_rtvals);
490 }
491 
492 /*
493  * vfs default ops
494  * used to fill the vfs function table to get reasonable default return values.
495  */
496 int
497 vfs_stdroot (mp, flags, vpp, td)
498 	struct mount *mp;
499 	int flags;
500 	struct vnode **vpp;
501 	struct thread *td;
502 {
503 
504 	return (EOPNOTSUPP);
505 }
506 
507 int
508 vfs_stdstatfs (mp, sbp, td)
509 	struct mount *mp;
510 	struct statfs *sbp;
511 	struct thread *td;
512 {
513 
514 	return (EOPNOTSUPP);
515 }
516 
517 int
518 vfs_stdvptofh (vp, fhp)
519 	struct vnode *vp;
520 	struct fid *fhp;
521 {
522 
523 	return (EOPNOTSUPP);
524 }
525 
526 int
527 vfs_stdquotactl (mp, cmds, uid, arg, td)
528 	struct mount *mp;
529 	int cmds;
530 	uid_t uid;
531 	caddr_t arg;
532 	struct thread *td;
533 {
534 
535 	return (EOPNOTSUPP);
536 }
537 
538 int
539 vfs_stdsync(mp, waitfor, td)
540 	struct mount *mp;
541 	int waitfor;
542 	struct thread *td;
543 {
544 	struct vnode *vp, *nvp;
545 	int error, lockreq, allerror = 0;
546 
547 	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
548 	if (waitfor != MNT_WAIT)
549 		lockreq |= LK_NOWAIT;
550 	/*
551 	 * Force stale buffer cache information to be flushed.
552 	 */
553 	MNT_ILOCK(mp);
554 loop:
555 	MNT_VNODE_FOREACH(vp, mp, nvp) {
556 
557 		VI_LOCK(vp);
558 		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
559 			VI_UNLOCK(vp);
560 			continue;
561 		}
562 		MNT_IUNLOCK(mp);
563 
564 		if ((error = vget(vp, lockreq, td)) != 0) {
565 			MNT_ILOCK(mp);
566 			if (error == ENOENT)
567 				goto loop;
568 			continue;
569 		}
570 		error = VOP_FSYNC(vp, waitfor, td);
571 		if (error)
572 			allerror = error;
573 
574 		VOP_UNLOCK(vp, 0, td);
575 		vrele(vp);
576 		MNT_ILOCK(mp);
577 	}
578 	MNT_IUNLOCK(mp);
579 	return (allerror);
580 }
581 
582 int
583 vfs_stdnosync (mp, waitfor, td)
584 	struct mount *mp;
585 	int waitfor;
586 	struct thread *td;
587 {
588 
589 	return (0);
590 }
591 
592 int
593 vfs_stdvget (mp, ino, flags, vpp)
594 	struct mount *mp;
595 	ino_t ino;
596 	int flags;
597 	struct vnode **vpp;
598 {
599 
600 	return (EOPNOTSUPP);
601 }
602 
603 int
604 vfs_stdfhtovp (mp, fhp, vpp)
605 	struct mount *mp;
606 	struct fid *fhp;
607 	struct vnode **vpp;
608 {
609 
610 	return (EOPNOTSUPP);
611 }
612 
613 int
614 vfs_stdinit (vfsp)
615 	struct vfsconf *vfsp;
616 {
617 
618 	return (0);
619 }
620 
621 int
622 vfs_stduninit (vfsp)
623 	struct vfsconf *vfsp;
624 {
625 
626 	return(0);
627 }
628 
629 int
630 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td)
631 	struct mount *mp;
632 	int cmd;
633 	struct vnode *filename_vp;
634 	int attrnamespace;
635 	const char *attrname;
636 	struct thread *td;
637 {
638 
639 	if (filename_vp != NULL)
640 		VOP_UNLOCK(filename_vp, 0, td);
641 	return (EOPNOTSUPP);
642 }
643 
644 int
645 vfs_stdsysctl(mp, op, req)
646 	struct mount *mp;
647 	fsctlop_t op;
648 	struct sysctl_req *req;
649 {
650 
651 	return (EOPNOTSUPP);
652 }
653 
654 /* end of vfs default ops */
655