xref: /illumos-gate/usr/src/uts/common/fs/fs_subr.c (revision f47a9c508408507a404eaf38dd597e6ac41f92e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  * Generic vnode operations.
35  */
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/errno.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/statvfs.h>
43 #include <sys/vfs.h>
44 #include <sys/vnode.h>
45 #include <sys/proc.h>
46 #include <sys/user.h>
47 #include <sys/unistd.h>
48 #include <sys/cred.h>
49 #include <sys/poll.h>
50 #include <sys/debug.h>
51 #include <sys/cmn_err.h>
52 #include <sys/stream.h>
53 #include <fs/fs_subr.h>
54 #include <sys/acl.h>
55 #include <sys/share.h>
56 #include <sys/file.h>
57 #include <sys/kmem.h>
58 #include <sys/file.h>
59 #include <sys/nbmlock.h>
60 #include <acl/acl_common.h>
61 
62 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
63 
64 /*
65  * The associated operation is not supported by the file system.
66  */
67 int
68 fs_nosys()
69 {
70 	return (ENOSYS);
71 }
72 
73 /*
74  * The associated operation is invalid (on this vnode).
75  */
76 int
77 fs_inval()
78 {
79 	return (EINVAL);
80 }
81 
82 /*
83  * The associated operation is valid only for directories.
84  */
85 int
86 fs_notdir()
87 {
88 	return (ENOTDIR);
89 }
90 
91 /*
92  * Free the file system specific resources. For the file systems that
93  * do not support the forced unmount, it will be a nop function.
94  */
95 
96 /*ARGSUSED*/
97 void
98 fs_freevfs(vfs_t *vfsp)
99 {
100 }
101 
102 /* ARGSUSED */
103 int
104 fs_nosys_map(struct vnode *vp,
105 	offset_t off,
106 	struct as *as,
107 	caddr_t *addrp,
108 	size_t len,
109 	uchar_t prot,
110 	uchar_t maxprot,
111 	uint_t flags,
112 	struct cred *cr)
113 {
114 	return (ENOSYS);
115 }
116 
117 /* ARGSUSED */
118 int
119 fs_nosys_addmap(struct vnode *vp,
120 	offset_t off,
121 	struct as *as,
122 	caddr_t addr,
123 	size_t len,
124 	uchar_t prot,
125 	uchar_t maxprot,
126 	uint_t flags,
127 	struct cred *cr)
128 {
129 	return (ENOSYS);
130 }
131 
132 /* ARGSUSED */
133 int
134 fs_nosys_poll(vnode_t *vp,
135 	register short events,
136 	int anyyet,
137 	register short *reventsp,
138 	struct pollhead **phpp)
139 {
140 	return (ENOSYS);
141 }
142 
143 
144 /*
145  * The file system has nothing to sync to disk.  However, the
146  * VFS_SYNC operation must not fail.
147  */
148 /* ARGSUSED */
149 int
150 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
151 {
152 	return (0);
153 }
154 
155 /*
156  * Read/write lock/unlock.  Does nothing.
157  */
158 /* ARGSUSED */
159 int
160 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
161 {
162 	return (-1);
163 }
164 
165 /* ARGSUSED */
166 void
167 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
168 {
169 }
170 
171 /*
172  * Compare two vnodes.
173  */
174 int
175 fs_cmp(vnode_t *vp1, vnode_t *vp2)
176 {
177 	return (vp1 == vp2);
178 }
179 
180 /*
181  * No-op seek operation.
182  */
183 /* ARGSUSED */
184 int
185 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
186 {
187 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
188 }
189 
190 /*
191  * File and record locking.
192  */
193 /* ARGSUSED */
194 int
195 fs_frlock(register vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
196 	offset_t offset, flk_callback_t *flk_cbp, cred_t *cr)
197 {
198 	int frcmd;
199 	int nlmid;
200 	int error = 0;
201 	flk_callback_t serialize_callback;
202 	int serialize = 0;
203 
204 	switch (cmd) {
205 
206 	case F_GETLK:
207 	case F_O_GETLK:
208 		if (flag & F_REMOTELOCK) {
209 			frcmd = RCMDLCK;
210 			break;
211 		}
212 		if (flag & F_PXFSLOCK) {
213 			frcmd = PCMDLCK;
214 			break;
215 		}
216 		bfp->l_pid = ttoproc(curthread)->p_pid;
217 		bfp->l_sysid = 0;
218 		frcmd = 0;
219 		break;
220 
221 	case F_SETLK_NBMAND:
222 		/*
223 		 * Are NBMAND locks allowed on this file?
224 		 */
225 		if (!vp->v_vfsp ||
226 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
227 			error = EINVAL;
228 			goto done;
229 		}
230 		if (vp->v_type != VREG) {
231 			error = EINVAL;
232 			goto done;
233 		}
234 		/*FALLTHROUGH*/
235 
236 	case F_SETLK:
237 		/*
238 		 * Check whether there is an NBMAND share reservation that
239 		 * conflicts with the lock request.
240 		 */
241 		if (nbl_need_check(vp)) {
242 			nbl_start_crit(vp, RW_WRITER);
243 			serialize = 1;
244 			if (share_blocks_lock(vp, bfp)) {
245 				error = EAGAIN;
246 				goto done;
247 			}
248 		}
249 		if (flag & F_REMOTELOCK) {
250 			frcmd = SETFLCK|RCMDLCK;
251 			break;
252 		}
253 		if (flag & F_PXFSLOCK) {
254 			frcmd = SETFLCK|PCMDLCK;
255 			break;
256 		}
257 		bfp->l_pid = ttoproc(curthread)->p_pid;
258 		bfp->l_sysid = 0;
259 		frcmd = SETFLCK;
260 		if (cmd == F_SETLK_NBMAND &&
261 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
262 			/* would check here for conflict with mapped region */
263 			frcmd |= NBMLCK;
264 		}
265 		break;
266 
267 	case F_SETLKW:
268 		/*
269 		 * If there is an NBMAND share reservation that conflicts
270 		 * with the lock request, block until the conflicting share
271 		 * reservation goes away.
272 		 */
273 		if (nbl_need_check(vp)) {
274 			nbl_start_crit(vp, RW_WRITER);
275 			serialize = 1;
276 			if (share_blocks_lock(vp, bfp)) {
277 				error = wait_for_share(vp, bfp);
278 				if (error != 0)
279 					goto done;
280 			}
281 		}
282 		if (flag & F_REMOTELOCK) {
283 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
284 			break;
285 		}
286 		if (flag & F_PXFSLOCK) {
287 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
288 			break;
289 		}
290 		bfp->l_pid = ttoproc(curthread)->p_pid;
291 		bfp->l_sysid = 0;
292 		frcmd = SETFLCK|SLPFLCK;
293 		break;
294 
295 	case F_HASREMOTELOCKS:
296 		nlmid = GETNLMID(bfp->l_sysid);
297 		if (nlmid != 0) {	/* booted as a cluster */
298 			l_has_rmt(bfp) =
299 				cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
300 		} else {		/* not booted as a cluster */
301 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
302 		}
303 
304 		goto done;
305 
306 	default:
307 		error = EINVAL;
308 		goto done;
309 	}
310 
311 	/*
312 	 * If this is a blocking lock request and we're serializing lock
313 	 * requests, modify the callback list to leave the critical region
314 	 * while we're waiting for the lock.
315 	 */
316 
317 	if (serialize && (frcmd & SLPFLCK) != 0) {
318 		flk_add_callback(&serialize_callback,
319 				frlock_serialize_blocked, vp, flk_cbp);
320 		flk_cbp = &serialize_callback;
321 	}
322 
323 	error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
324 
325 done:
326 	if (serialize)
327 		nbl_end_crit(vp);
328 
329 	return (error);
330 }
331 
332 /*
333  * Callback when a lock request blocks and we are serializing requests.  If
334  * before sleeping, leave the critical region.  If after wakeup, reenter
335  * the critical region.
336  */
337 
338 static callb_cpr_t *
339 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
340 {
341 	vnode_t *vp = (vnode_t *)infop;
342 
343 	if (when == FLK_BEFORE_SLEEP)
344 		nbl_end_crit(vp);
345 	else {
346 		nbl_start_crit(vp, RW_WRITER);
347 	}
348 
349 	return (NULL);
350 }
351 
352 /*
353  * Allow any flags.
354  */
355 /* ARGSUSED */
356 int
357 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr)
358 {
359 	return (0);
360 }
361 
362 /*
363  * Return the answer requested to poll() for non-device files.
364  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
365  */
366 struct pollhead fs_pollhd;
367 
368 /* ARGSUSED */
369 int
370 fs_poll(vnode_t *vp,
371 	register short events,
372 	int anyyet,
373 	register short *reventsp,
374 	struct pollhead **phpp)
375 {
376 	*reventsp = 0;
377 	if (events & POLLIN)
378 		*reventsp |= POLLIN;
379 	if (events & POLLRDNORM)
380 		*reventsp |= POLLRDNORM;
381 	if (events & POLLRDBAND)
382 		*reventsp |= POLLRDBAND;
383 	if (events & POLLOUT)
384 		*reventsp |= POLLOUT;
385 	if (events & POLLWRBAND)
386 		*reventsp |= POLLWRBAND;
387 	*phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
388 	return (0);
389 }
390 
391 /*
392  * POSIX pathconf() support.
393  */
394 /* ARGSUSED */
395 int
396 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr)
397 {
398 	register ulong_t val;
399 	register int error = 0;
400 	struct statvfs64 vfsbuf;
401 
402 	switch (cmd) {
403 
404 	case _PC_LINK_MAX:
405 		val = MAXLINK;
406 		break;
407 
408 	case _PC_MAX_CANON:
409 		val = MAX_CANON;
410 		break;
411 
412 	case _PC_MAX_INPUT:
413 		val = MAX_INPUT;
414 		break;
415 
416 	case _PC_NAME_MAX:
417 		bzero(&vfsbuf, sizeof (vfsbuf));
418 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
419 			break;
420 		val = vfsbuf.f_namemax;
421 		break;
422 
423 	case _PC_PATH_MAX:
424 	case _PC_SYMLINK_MAX:
425 		val = MAXPATHLEN;
426 		break;
427 
428 	case _PC_PIPE_BUF:
429 		val = PIPE_BUF;
430 		break;
431 
432 	case _PC_NO_TRUNC:
433 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
434 			val = 1;	/* NOTRUNC is enabled for vp */
435 		else
436 			val = (ulong_t)-1;
437 		break;
438 
439 	case _PC_VDISABLE:
440 		val = _POSIX_VDISABLE;
441 		break;
442 
443 	case _PC_CHOWN_RESTRICTED:
444 		if (rstchown)
445 			val = rstchown; /* chown restricted enabled */
446 		else
447 			val = (ulong_t)-1;
448 		break;
449 
450 	case _PC_FILESIZEBITS:
451 
452 		/*
453 		 * If ever we come here it means that underlying file system
454 		 * does not recognise the command and therefore this
455 		 * configurable limit cannot be determined. We return -1
456 		 * and don't change errno.
457 		 */
458 
459 		val = (ulong_t)-1;    /* large file support */
460 		break;
461 
462 	case _PC_ACL_ENABLED:
463 		val = 0;
464 		break;
465 
466 	default:
467 		error = EINVAL;
468 		break;
469 	}
470 
471 	if (error == 0)
472 		*valp = val;
473 	return (error);
474 }
475 
476 /*
477  * Dispose of a page.
478  */
479 /* ARGSUSED */
480 void
481 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr)
482 {
483 
484 	ASSERT(fl == B_FREE || fl == B_INVAL);
485 
486 	if (fl == B_FREE)
487 		page_free(pp, dn);
488 	else
489 		page_destroy(pp, dn);
490 }
491 
492 /* ARGSUSED */
493 void
494 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr)
495 {
496 	cmn_err(CE_PANIC, "fs_nodispose invoked");
497 }
498 
499 /*
500  * fabricate acls for file systems that do not support acls.
501  */
502 /* ARGSUSED */
503 int
504 fs_fab_acl(vp, vsecattr, flag, cr)
505 vnode_t		*vp;
506 vsecattr_t	*vsecattr;
507 int		flag;
508 cred_t		*cr;
509 {
510 	aclent_t	*aclentp;
511 	struct vattr	vattr;
512 	int		error;
513 
514 	vsecattr->vsa_aclcnt	= 0;
515 	vsecattr->vsa_aclentp	= NULL;
516 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
517 	vsecattr->vsa_dfaclentp	= NULL;
518 
519 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL))
520 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
521 
522 	if (vsecattr->vsa_mask & VSA_ACL) {
523 		vsecattr->vsa_aclentp = kmem_zalloc(4 * sizeof (aclent_t),
524 		    KM_SLEEP);
525 		vattr.va_mask = AT_MODE | AT_UID | AT_GID;
526 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED()))
527 			return (error);
528 		aclentp = vsecattr->vsa_aclentp;
529 
530 		aclentp->a_type = USER_OBJ;	/* Owner */
531 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
532 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
533 		aclentp++;
534 
535 		aclentp->a_type = GROUP_OBJ;    /* Group */
536 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
537 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
538 		aclentp++;
539 
540 		aclentp->a_type = OTHER_OBJ;    /* Other */
541 		aclentp->a_perm = vattr.va_mode & 0007;
542 		aclentp->a_id = -1;		/* Really undefined */
543 		aclentp++;
544 
545 		aclentp->a_type = CLASS_OBJ;    /* Class */
546 		aclentp->a_perm = (ushort_t)(0777);
547 		aclentp->a_id = -1;		/* Really undefined */
548 	}
549 
550 	return (0);
551 }
552 
553 /*
554  * Common code for implementing DOS share reservations
555  */
556 /* ARGSUSED4 */
557 int
558 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
559 {
560 	int error;
561 
562 	/*
563 	 * Make sure that the file was opened with permissions appropriate
564 	 * for the request, and make sure the caller isn't trying to sneak
565 	 * in an NBMAND request.
566 	 */
567 	if (cmd == F_SHARE) {
568 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
569 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
570 			return (EBADF);
571 		if (shr->s_deny & F_MANDDNY)
572 			return (EINVAL);
573 	}
574 	if (cmd == F_SHARE_NBMAND) {
575 		/* must have write permission to deny read access */
576 		if ((shr->s_deny & F_RDDNY) && (flag & FWRITE) == 0)
577 			return (EBADF);
578 		/* make sure nbmand is allowed on the file */
579 		if (!vp->v_vfsp ||
580 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
581 			return (EINVAL);
582 		}
583 		if (vp->v_type != VREG) {
584 			return (EINVAL);
585 		}
586 	}
587 
588 	nbl_start_crit(vp, RW_WRITER);
589 
590 	switch (cmd) {
591 
592 	case F_SHARE_NBMAND:
593 		shr->s_deny |= F_MANDDNY;
594 		/*FALLTHROUGH*/
595 	case F_SHARE:
596 		error = add_share(vp, shr);
597 		break;
598 
599 	case F_UNSHARE:
600 		error = del_share(vp, shr);
601 		break;
602 
603 	case F_HASREMOTELOCKS:
604 		/*
605 		 * We are overloading this command to refer to remote
606 		 * shares as well as remote locks, despite its name.
607 		 */
608 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
609 		error = 0;
610 		break;
611 
612 	default:
613 		error = EINVAL;
614 		break;
615 	}
616 
617 	nbl_end_crit(vp);
618 	return (error);
619 }
620 
621 /*ARGSUSED1*/
622 int
623 fs_vnevent_nosupport(vnode_t *vp, vnevent_t vnevent)
624 {
625 	ASSERT(vp != NULL);
626 	return (ENOTSUP);
627 }
628 
629 /*ARGSUSED1*/
630 int
631 fs_vnevent_support(vnode_t *vp, vnevent_t vnevent)
632 {
633 	ASSERT(vp != NULL);
634 	return (0);
635 }
636 
637 /*
638  * return 1 for non-trivial ACL.
639  *
640  * NB: It is not necessary for the caller to VOP_RWLOCK since
641  *	we only issue VOP_GETSECATTR.
642  *
643  * Returns 0 == trivial
644  *         1 == NOT Trivial
645  *	   <0 could not determine.
646  */
647 int
648 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
649 {
650 	ulong_t		acl_styles;
651 	ulong_t		acl_flavor;
652 	vsecattr_t 	vsecattr;
653 	int 		error;
654 	int		isnontrivial;
655 
656 	/* determine the forms of ACLs maintained */
657 	error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr);
658 
659 	/* clear bits we don't understand and establish default acl_style */
660 	acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
661 	if (error || (acl_styles == 0))
662 		acl_styles = _ACL_ACLENT_ENABLED;
663 
664 	vsecattr.vsa_aclentp = NULL;
665 	vsecattr.vsa_dfaclentp = NULL;
666 	vsecattr.vsa_aclcnt = 0;
667 	vsecattr.vsa_dfaclcnt = 0;
668 
669 	while (acl_styles) {
670 		/* select one of the styles as current flavor */
671 		acl_flavor = 0;
672 		if (acl_styles & _ACL_ACLENT_ENABLED) {
673 			acl_flavor = _ACL_ACLENT_ENABLED;
674 			vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
675 		} else if (acl_styles & _ACL_ACE_ENABLED) {
676 			acl_flavor = _ACL_ACE_ENABLED;
677 			vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
678 		}
679 
680 		ASSERT(vsecattr.vsa_mask && acl_flavor);
681 		error = VOP_GETSECATTR(vp, &vsecattr, 0, cr);
682 		if (error == 0)
683 			break;
684 
685 		/* that flavor failed */
686 		acl_styles &= ~acl_flavor;
687 	}
688 
689 	/* if all styles fail then assume trivial */
690 	if (acl_styles == 0)
691 		return (0);
692 
693 	/* process the flavor that worked */
694 	isnontrivial = 0;
695 	if (acl_flavor & _ACL_ACLENT_ENABLED) {
696 		if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
697 			isnontrivial = 1;
698 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
699 			kmem_free(vsecattr.vsa_aclentp,
700 			    vsecattr.vsa_aclcnt * sizeof (aclent_t));
701 		if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
702 			kmem_free(vsecattr.vsa_dfaclentp,
703 			    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
704 	}
705 	if (acl_flavor & _ACL_ACE_ENABLED) {
706 
707 		isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
708 		    vsecattr.vsa_aclcnt);
709 
710 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
711 			kmem_free(vsecattr.vsa_aclentp,
712 			    vsecattr.vsa_aclcnt * sizeof (ace_t));
713 		/* ACE has no vsecattr.vsa_dfaclcnt */
714 	}
715 	return (isnontrivial);
716 }
717