xref: /titanic_41/usr/src/uts/common/fs/fs_subr.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  * Generic vnode operations.
35  */
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/errno.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/statvfs.h>
43 #include <sys/vfs.h>
44 #include <sys/vnode.h>
45 #include <sys/proc.h>
46 #include <sys/user.h>
47 #include <sys/unistd.h>
48 #include <sys/cred.h>
49 #include <sys/poll.h>
50 #include <sys/debug.h>
51 #include <sys/cmn_err.h>
52 #include <sys/stream.h>
53 #include <fs/fs_subr.h>
54 #include <sys/acl.h>
55 #include <sys/share.h>
56 #include <sys/file.h>
57 #include <sys/kmem.h>
58 #include <sys/file.h>
59 #include <sys/nbmlock.h>
60 
61 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
62 
63 /*
64  * The associated operation is not supported by the file system.
65  */
66 int
67 fs_nosys()
68 {
69 	return (ENOSYS);
70 }
71 
72 /*
73  * The associated operation is invalid (on this vnode).
74  */
75 int
76 fs_inval()
77 {
78 	return (EINVAL);
79 }
80 
81 /*
82  * The associated operation is valid only for directories.
83  */
84 int
85 fs_notdir()
86 {
87 	return (ENOTDIR);
88 }
89 
90 /*
91  * Free the file system specific resources. For the file systems that
92  * do not support the forced unmount, it will be a nop function.
93  */
94 
95 /*ARGSUSED*/
96 void
97 fs_freevfs(vfs_t *vfsp)
98 {
99 }
100 
101 /* ARGSUSED */
102 int
103 fs_nosys_map(struct vnode *vp,
104 	offset_t off,
105 	struct as *as,
106 	caddr_t *addrp,
107 	size_t len,
108 	uchar_t prot,
109 	uchar_t maxprot,
110 	uint_t flags,
111 	struct cred *cr)
112 {
113 	return (ENOSYS);
114 }
115 
116 /* ARGSUSED */
117 int
118 fs_nosys_addmap(struct vnode *vp,
119 	offset_t off,
120 	struct as *as,
121 	caddr_t addr,
122 	size_t len,
123 	uchar_t prot,
124 	uchar_t maxprot,
125 	uint_t flags,
126 	struct cred *cr)
127 {
128 	return (ENOSYS);
129 }
130 
131 /* ARGSUSED */
132 int
133 fs_nosys_poll(vnode_t *vp,
134 	register short events,
135 	int anyyet,
136 	register short *reventsp,
137 	struct pollhead **phpp)
138 {
139 	return (ENOSYS);
140 }
141 
142 
143 /*
144  * The file system has nothing to sync to disk.  However, the
145  * VFS_SYNC operation must not fail.
146  */
147 /* ARGSUSED */
148 int
149 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
150 {
151 	return (0);
152 }
153 
154 /*
155  * Read/write lock/unlock.  Does nothing.
156  */
157 /* ARGSUSED */
158 int
159 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
160 {
161 	return (-1);
162 }
163 
164 /* ARGSUSED */
165 void
166 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
167 {
168 }
169 
170 /*
171  * Compare two vnodes.
172  */
173 int
174 fs_cmp(vnode_t *vp1, vnode_t *vp2)
175 {
176 	return (vp1 == vp2);
177 }
178 
179 /*
180  * No-op seek operation.
181  */
182 /* ARGSUSED */
183 int
184 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
185 {
186 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
187 }
188 
189 /*
190  * File and record locking.
191  */
192 /* ARGSUSED */
193 int
194 fs_frlock(register vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
195 	offset_t offset, flk_callback_t *flk_cbp, cred_t *cr)
196 {
197 	int frcmd;
198 	int nlmid;
199 	int error = 0;
200 	flk_callback_t serialize_callback;
201 	int serialize = 0;
202 
203 	switch (cmd) {
204 
205 	case F_GETLK:
206 	case F_O_GETLK:
207 		if (flag & F_REMOTELOCK) {
208 			frcmd = RCMDLCK;
209 			break;
210 		}
211 		if (flag & F_PXFSLOCK) {
212 			frcmd = PCMDLCK;
213 			break;
214 		}
215 		bfp->l_pid = ttoproc(curthread)->p_pid;
216 		bfp->l_sysid = 0;
217 		frcmd = 0;
218 		break;
219 
220 	case F_SETLK_NBMAND:
221 		/*
222 		 * Are NBMAND locks allowed on this file?
223 		 */
224 		if (!vp->v_vfsp ||
225 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
226 			error = EINVAL;
227 			goto done;
228 		}
229 		if (vp->v_type != VREG) {
230 			error = EINVAL;
231 			goto done;
232 		}
233 		/*FALLTHROUGH*/
234 
235 	case F_SETLK:
236 		/*
237 		 * Check whether there is an NBMAND share reservation that
238 		 * conflicts with the lock request.
239 		 */
240 		if (nbl_need_check(vp)) {
241 			nbl_start_crit(vp, RW_WRITER);
242 			serialize = 1;
243 			if (share_blocks_lock(vp, bfp)) {
244 				error = EAGAIN;
245 				goto done;
246 			}
247 		}
248 		if (flag & F_REMOTELOCK) {
249 			frcmd = SETFLCK|RCMDLCK;
250 			break;
251 		}
252 		if (flag & F_PXFSLOCK) {
253 			frcmd = SETFLCK|PCMDLCK;
254 			break;
255 		}
256 		bfp->l_pid = ttoproc(curthread)->p_pid;
257 		bfp->l_sysid = 0;
258 		frcmd = SETFLCK;
259 		if (cmd == F_SETLK_NBMAND &&
260 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
261 			/* would check here for conflict with mapped region */
262 			frcmd |= NBMLCK;
263 		}
264 		break;
265 
266 	case F_SETLKW:
267 		/*
268 		 * If there is an NBMAND share reservation that conflicts
269 		 * with the lock request, block until the conflicting share
270 		 * reservation goes away.
271 		 */
272 		if (nbl_need_check(vp)) {
273 			nbl_start_crit(vp, RW_WRITER);
274 			serialize = 1;
275 			if (share_blocks_lock(vp, bfp)) {
276 				error = wait_for_share(vp, bfp);
277 				if (error != 0)
278 					goto done;
279 			}
280 		}
281 		if (flag & F_REMOTELOCK) {
282 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
283 			break;
284 		}
285 		if (flag & F_PXFSLOCK) {
286 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
287 			break;
288 		}
289 		bfp->l_pid = ttoproc(curthread)->p_pid;
290 		bfp->l_sysid = 0;
291 		frcmd = SETFLCK|SLPFLCK;
292 		break;
293 
294 	case F_HASREMOTELOCKS:
295 		nlmid = GETNLMID(bfp->l_sysid);
296 		if (nlmid != 0) {	/* booted as a cluster */
297 			l_has_rmt(bfp) =
298 				cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
299 		} else {		/* not booted as a cluster */
300 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
301 		}
302 
303 		goto done;
304 
305 	default:
306 		error = EINVAL;
307 		goto done;
308 	}
309 
310 	/*
311 	 * If this is a blocking lock request and we're serializing lock
312 	 * requests, modify the callback list to leave the critical region
313 	 * while we're waiting for the lock.
314 	 */
315 
316 	if (serialize && (frcmd & SLPFLCK) != 0) {
317 		flk_add_callback(&serialize_callback,
318 				frlock_serialize_blocked, vp, flk_cbp);
319 		flk_cbp = &serialize_callback;
320 	}
321 
322 	error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
323 
324 done:
325 	if (serialize)
326 		nbl_end_crit(vp);
327 
328 	return (error);
329 }
330 
331 /*
332  * Callback when a lock request blocks and we are serializing requests.  If
333  * before sleeping, leave the critical region.  If after wakeup, reenter
334  * the critical region.
335  */
336 
337 static callb_cpr_t *
338 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
339 {
340 	vnode_t *vp = (vnode_t *)infop;
341 
342 	if (when == FLK_BEFORE_SLEEP)
343 		nbl_end_crit(vp);
344 	else {
345 		nbl_start_crit(vp, RW_WRITER);
346 	}
347 
348 	return (NULL);
349 }
350 
351 /*
352  * Allow any flags.
353  */
354 /* ARGSUSED */
355 int
356 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr)
357 {
358 	return (0);
359 }
360 
361 /*
362  * Return the answer requested to poll() for non-device files.
363  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
364  */
365 struct pollhead fs_pollhd;
366 
367 /* ARGSUSED */
368 int
369 fs_poll(vnode_t *vp,
370 	register short events,
371 	int anyyet,
372 	register short *reventsp,
373 	struct pollhead **phpp)
374 {
375 	*reventsp = 0;
376 	if (events & POLLIN)
377 		*reventsp |= POLLIN;
378 	if (events & POLLRDNORM)
379 		*reventsp |= POLLRDNORM;
380 	if (events & POLLRDBAND)
381 		*reventsp |= POLLRDBAND;
382 	if (events & POLLOUT)
383 		*reventsp |= POLLOUT;
384 	if (events & POLLWRBAND)
385 		*reventsp |= POLLWRBAND;
386 	*phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
387 	return (0);
388 }
389 
390 /*
391  * POSIX pathconf() support.
392  */
393 /* ARGSUSED */
394 int
395 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr)
396 {
397 	register ulong_t val;
398 	register int error = 0;
399 	struct statvfs64 vfsbuf;
400 
401 	switch (cmd) {
402 
403 	case _PC_LINK_MAX:
404 		val = MAXLINK;
405 		break;
406 
407 	case _PC_MAX_CANON:
408 		val = MAX_CANON;
409 		break;
410 
411 	case _PC_MAX_INPUT:
412 		val = MAX_INPUT;
413 		break;
414 
415 	case _PC_NAME_MAX:
416 		bzero(&vfsbuf, sizeof (vfsbuf));
417 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
418 			break;
419 		val = vfsbuf.f_namemax;
420 		break;
421 
422 	case _PC_PATH_MAX:
423 	case _PC_SYMLINK_MAX:
424 		val = MAXPATHLEN;
425 		break;
426 
427 	case _PC_PIPE_BUF:
428 		val = PIPE_BUF;
429 		break;
430 
431 	case _PC_NO_TRUNC:
432 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
433 			val = 1;	/* NOTRUNC is enabled for vp */
434 		else
435 			val = (ulong_t)-1;
436 		break;
437 
438 	case _PC_VDISABLE:
439 		val = _POSIX_VDISABLE;
440 		break;
441 
442 	case _PC_CHOWN_RESTRICTED:
443 		if (rstchown)
444 			val = rstchown; /* chown restricted enabled */
445 		else
446 			val = (ulong_t)-1;
447 		break;
448 
449 	case _PC_FILESIZEBITS:
450 
451 		/*
452 		 * If ever we come here it means that underlying file system
453 		 * does not recognise the command and therefore this
454 		 * configurable limit cannot be determined. We return -1
455 		 * and don't change errno.
456 		 */
457 
458 		val = (ulong_t)-1;    /* large file support */
459 		break;
460 
461 	case _PC_ACL_ENABLED:
462 		val = 0;
463 		break;
464 
465 	default:
466 		error = EINVAL;
467 		break;
468 	}
469 
470 	if (error == 0)
471 		*valp = val;
472 	return (error);
473 }
474 
475 /*
476  * Dispose of a page.
477  */
478 /* ARGSUSED */
479 void
480 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr)
481 {
482 
483 	ASSERT(fl == B_FREE || fl == B_INVAL);
484 
485 	if (fl == B_FREE)
486 		page_free(pp, dn);
487 	else
488 		page_destroy(pp, dn);
489 }
490 
491 /* ARGSUSED */
492 void
493 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr)
494 {
495 	cmn_err(CE_PANIC, "fs_nodispose invoked");
496 }
497 
498 /*
499  * fabricate acls for file systems that do not support acls.
500  */
501 /* ARGSUSED */
502 int
503 fs_fab_acl(vp, vsecattr, flag, cr)
504 vnode_t		*vp;
505 vsecattr_t	*vsecattr;
506 int		flag;
507 cred_t		*cr;
508 {
509 	aclent_t	*aclentp;
510 	struct vattr	vattr;
511 	int		error;
512 
513 	vsecattr->vsa_aclcnt	= 0;
514 	vsecattr->vsa_aclentp	= NULL;
515 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
516 	vsecattr->vsa_dfaclentp	= NULL;
517 
518 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL))
519 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
520 
521 	if (vsecattr->vsa_mask & VSA_ACL) {
522 		vsecattr->vsa_aclentp = kmem_zalloc(4 * sizeof (aclent_t),
523 		    KM_SLEEP);
524 		vattr.va_mask = AT_MODE | AT_UID | AT_GID;
525 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED()))
526 			return (error);
527 		aclentp = vsecattr->vsa_aclentp;
528 
529 		aclentp->a_type = USER_OBJ;	/* Owner */
530 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
531 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
532 		aclentp++;
533 
534 		aclentp->a_type = GROUP_OBJ;    /* Group */
535 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
536 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
537 		aclentp++;
538 
539 		aclentp->a_type = OTHER_OBJ;    /* Other */
540 		aclentp->a_perm = vattr.va_mode & 0007;
541 		aclentp->a_id = -1;		/* Really undefined */
542 		aclentp++;
543 
544 		aclentp->a_type = CLASS_OBJ;    /* Class */
545 		aclentp->a_perm = (ushort_t)(0777);
546 		aclentp->a_id = -1;		/* Really undefined */
547 	}
548 
549 	return (0);
550 }
551 
552 /*
553  * Common code for implementing DOS share reservations
554  */
555 /* ARGSUSED4 */
556 int
557 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
558 {
559 	int error;
560 
561 	/*
562 	 * Make sure that the file was opened with permissions appropriate
563 	 * for the request, and make sure the caller isn't trying to sneak
564 	 * in an NBMAND request.
565 	 */
566 	if (cmd == F_SHARE) {
567 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
568 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
569 			return (EBADF);
570 		if (shr->s_deny & F_MANDDNY)
571 			return (EINVAL);
572 	}
573 	if (cmd == F_SHARE_NBMAND) {
574 		/* must have write permission to deny read access */
575 		if ((shr->s_deny & F_RDDNY) && (flag & FWRITE) == 0)
576 			return (EBADF);
577 		/* make sure nbmand is allowed on the file */
578 		if (!vp->v_vfsp ||
579 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
580 			return (EINVAL);
581 		}
582 		if (vp->v_type != VREG) {
583 			return (EINVAL);
584 		}
585 	}
586 
587 	nbl_start_crit(vp, RW_WRITER);
588 
589 	switch (cmd) {
590 
591 	case F_SHARE_NBMAND:
592 		shr->s_deny |= F_MANDDNY;
593 		/*FALLTHROUGH*/
594 	case F_SHARE:
595 		error = add_share(vp, shr);
596 		break;
597 
598 	case F_UNSHARE:
599 		error = del_share(vp, shr);
600 		break;
601 
602 	case F_HASREMOTELOCKS:
603 		/*
604 		 * We are overloading this command to refer to remote
605 		 * shares as well as remote locks, despite its name.
606 		 */
607 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
608 		error = 0;
609 		break;
610 
611 	default:
612 		error = EINVAL;
613 		break;
614 	}
615 
616 	nbl_end_crit(vp);
617 	return (error);
618 }
619 
620 /*ARGSUSED1*/
621 int
622 fs_vnevent_nosupport(vnode_t *vp, vnevent_t vnevent)
623 {
624 	ASSERT(vp != NULL);
625 	return (ENOTSUP);
626 }
627 
628 /*ARGSUSED1*/
629 int
630 fs_vnevent_support(vnode_t *vp, vnevent_t vnevent)
631 {
632 	ASSERT(vp != NULL);
633 	return (0);
634 }
635