xref: /titanic_52/usr/src/uts/common/fs/fs_subr.c (revision 84ba300aaa958c8e8427c2ec66a932d86bee71c4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright 2015 Joyent, Inc.
29  */
30 
31 /*
32  * Generic vnode operations.
33  */
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
38 #include <sys/fcntl.h>
39 #include <sys/flock.h>
40 #include <sys/statvfs.h>
41 #include <sys/vfs.h>
42 #include <sys/vnode.h>
43 #include <sys/proc.h>
44 #include <sys/user.h>
45 #include <sys/unistd.h>
46 #include <sys/cred.h>
47 #include <sys/poll.h>
48 #include <sys/debug.h>
49 #include <sys/cmn_err.h>
50 #include <sys/stream.h>
51 #include <fs/fs_subr.h>
52 #include <fs/fs_reparse.h>
53 #include <sys/door.h>
54 #include <sys/acl.h>
55 #include <sys/share.h>
56 #include <sys/file.h>
57 #include <sys/kmem.h>
58 #include <sys/file.h>
59 #include <sys/nbmlock.h>
60 #include <acl/acl_common.h>
61 #include <sys/pathname.h>
62 
63 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
64 
65 /*
66  * Tunable to limit the number of retry to recover from STALE error.
67  */
68 int fs_estale_retry = 5;
69 
70 /*
71  * supports for reparse point door upcall
72  */
73 static door_handle_t reparsed_door;
74 static kmutex_t reparsed_door_lock;
75 
76 /*
77  * The associated operation is not supported by the file system.
78  */
79 int
80 fs_nosys()
81 {
82 	return (ENOSYS);
83 }
84 
85 /*
86  * The associated operation is invalid (on this vnode).
87  */
88 int
89 fs_inval()
90 {
91 	return (EINVAL);
92 }
93 
94 /*
95  * The associated operation is valid only for directories.
96  */
97 int
98 fs_notdir()
99 {
100 	return (ENOTDIR);
101 }
102 
103 /*
104  * Free the file system specific resources. For the file systems that
105  * do not support the forced unmount, it will be a nop function.
106  */
107 
108 /*ARGSUSED*/
109 void
110 fs_freevfs(vfs_t *vfsp)
111 {
112 }
113 
114 /* ARGSUSED */
115 int
116 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
117     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
118     caller_context_t *ct)
119 {
120 	return (ENOSYS);
121 }
122 
123 /* ARGSUSED */
124 int
125 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
126     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
127     caller_context_t *ct)
128 {
129 	return (ENOSYS);
130 }
131 
132 /* ARGSUSED */
133 int
134 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
135     struct pollhead **phpp, caller_context_t *ct)
136 {
137 	return (ENOSYS);
138 }
139 
140 
141 /*
142  * The file system has nothing to sync to disk.  However, the
143  * VFS_SYNC operation must not fail.
144  */
145 /* ARGSUSED */
146 int
147 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
148 {
149 	return (0);
150 }
151 
152 /*
153  * Does nothing but VOP_FSYNC must not fail.
154  */
155 /* ARGSUSED */
156 int
157 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
158 {
159 	return (0);
160 }
161 
162 /*
163  * Does nothing but VOP_PUTPAGE must not fail.
164  */
165 /* ARGSUSED */
166 int
167 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
168     caller_context_t *ctp)
169 {
170 	return (0);
171 }
172 
173 /*
174  * Does nothing but VOP_IOCTL must not fail.
175  */
176 /* ARGSUSED */
177 int
178 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
179     int *rvalp)
180 {
181 	return (0);
182 }
183 
184 /*
185  * Read/write lock/unlock.  Does nothing.
186  */
187 /* ARGSUSED */
188 int
189 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
190 {
191 	return (-1);
192 }
193 
194 /* ARGSUSED */
195 void
196 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
197 {
198 }
199 
200 /*
201  * Compare two vnodes.
202  */
203 /*ARGSUSED2*/
204 int
205 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
206 {
207 	return (vp1 == vp2);
208 }
209 
210 /*
211  * No-op seek operation.
212  */
213 /* ARGSUSED */
214 int
215 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
216 {
217 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
218 }
219 
220 /*
221  * File and record locking.
222  */
223 /* ARGSUSED */
224 int
225 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
226     flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
227 {
228 	int frcmd;
229 	int nlmid;
230 	int error = 0;
231 	boolean_t skip_lock = B_FALSE;
232 	flk_callback_t serialize_callback;
233 	int serialize = 0;
234 	v_mode_t mode;
235 
236 	switch (cmd) {
237 
238 	case F_GETLK:
239 	case F_O_GETLK:
240 		if (flag & F_REMOTELOCK) {
241 			frcmd = RCMDLCK;
242 		} else if (flag & F_PXFSLOCK) {
243 			frcmd = PCMDLCK;
244 		} else {
245 			frcmd = 0;
246 			bfp->l_pid = ttoproc(curthread)->p_pid;
247 			bfp->l_sysid = 0;
248 		}
249 		break;
250 
251 	case F_OFD_GETLK:
252 		/*
253 		 * TBD we do not support remote OFD locks at this time.
254 		 */
255 		if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
256 			error = EINVAL;
257 			goto done;
258 		}
259 		skip_lock = B_TRUE;
260 		break;
261 
262 	case F_SETLK_NBMAND:
263 		/*
264 		 * Are NBMAND locks allowed on this file?
265 		 */
266 		if (!vp->v_vfsp ||
267 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
268 			error = EINVAL;
269 			goto done;
270 		}
271 		if (vp->v_type != VREG) {
272 			error = EINVAL;
273 			goto done;
274 		}
275 		/*FALLTHROUGH*/
276 
277 	case F_SETLK:
278 		if (flag & F_REMOTELOCK) {
279 			frcmd = SETFLCK|RCMDLCK;
280 		} else if (flag & F_PXFSLOCK) {
281 			frcmd = SETFLCK|PCMDLCK;
282 		} else {
283 			frcmd = SETFLCK;
284 			bfp->l_pid = ttoproc(curthread)->p_pid;
285 			bfp->l_sysid = 0;
286 		}
287 		if (cmd == F_SETLK_NBMAND &&
288 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
289 			frcmd |= NBMLCK;
290 		}
291 
292 		if (nbl_need_check(vp)) {
293 			nbl_start_crit(vp, RW_WRITER);
294 			serialize = 1;
295 			if (frcmd & NBMLCK) {
296 				mode = (bfp->l_type == F_RDLCK) ?
297 				    V_READ : V_RDANDWR;
298 				if (vn_is_mapped(vp, mode)) {
299 					error = EAGAIN;
300 					goto done;
301 				}
302 			}
303 		}
304 		break;
305 
306 	case F_SETLKW:
307 		if (flag & F_REMOTELOCK) {
308 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
309 		} else if (flag & F_PXFSLOCK) {
310 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
311 		} else {
312 			frcmd = SETFLCK|SLPFLCK;
313 			bfp->l_pid = ttoproc(curthread)->p_pid;
314 			bfp->l_sysid = 0;
315 		}
316 
317 		if (nbl_need_check(vp)) {
318 			nbl_start_crit(vp, RW_WRITER);
319 			serialize = 1;
320 		}
321 		break;
322 
323 	case F_OFD_SETLK:
324 	case F_OFD_SETLKW:
325 	case F_FLOCK:
326 	case F_FLOCKW:
327 		/*
328 		 * TBD we do not support remote OFD locks at this time.
329 		 */
330 		if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
331 			error = EINVAL;
332 			goto done;
333 		}
334 		skip_lock = B_TRUE;
335 		break;
336 
337 	case F_HASREMOTELOCKS:
338 		nlmid = GETNLMID(bfp->l_sysid);
339 		if (nlmid != 0) {	/* booted as a cluster */
340 			l_has_rmt(bfp) =
341 			    cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
342 		} else {		/* not booted as a cluster */
343 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
344 		}
345 
346 		goto done;
347 
348 	default:
349 		error = EINVAL;
350 		goto done;
351 	}
352 
353 	/*
354 	 * If this is a blocking lock request and we're serializing lock
355 	 * requests, modify the callback list to leave the critical region
356 	 * while we're waiting for the lock.
357 	 */
358 
359 	if (serialize && (frcmd & SLPFLCK) != 0) {
360 		flk_add_callback(&serialize_callback,
361 		    frlock_serialize_blocked, vp, flk_cbp);
362 		flk_cbp = &serialize_callback;
363 	}
364 
365 	if (!skip_lock)
366 		error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
367 
368 	if (serialize && (frcmd & SLPFLCK) != 0)
369 		flk_del_callback(&serialize_callback);
370 
371 done:
372 	if (serialize)
373 		nbl_end_crit(vp);
374 
375 	return (error);
376 }
377 
378 /*
379  * Callback when a lock request blocks and we are serializing requests.  If
380  * before sleeping, leave the critical region.  If after wakeup, reenter
381  * the critical region.
382  */
383 
384 static callb_cpr_t *
385 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
386 {
387 	vnode_t *vp = (vnode_t *)infop;
388 
389 	if (when == FLK_BEFORE_SLEEP)
390 		nbl_end_crit(vp);
391 	else {
392 		nbl_start_crit(vp, RW_WRITER);
393 	}
394 
395 	return (NULL);
396 }
397 
398 /*
399  * Allow any flags.
400  */
401 /* ARGSUSED */
402 int
403 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
404 {
405 	return (0);
406 }
407 
408 /*
409  * Return the answer requested to poll() for non-device files.
410  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
411  */
412 struct pollhead fs_pollhd;
413 
414 /* ARGSUSED */
415 int
416 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
417     struct pollhead **phpp, caller_context_t *ct)
418 {
419 	*reventsp = 0;
420 	if (events & POLLIN)
421 		*reventsp |= POLLIN;
422 	if (events & POLLRDNORM)
423 		*reventsp |= POLLRDNORM;
424 	if (events & POLLRDBAND)
425 		*reventsp |= POLLRDBAND;
426 	if (events & POLLOUT)
427 		*reventsp |= POLLOUT;
428 	if (events & POLLWRBAND)
429 		*reventsp |= POLLWRBAND;
430 	*phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
431 	return (0);
432 }
433 
434 /*
435  * POSIX pathconf() support.
436  */
437 /* ARGSUSED */
438 int
439 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
440     caller_context_t *ct)
441 {
442 	ulong_t val;
443 	int error = 0;
444 	struct statvfs64 vfsbuf;
445 
446 	switch (cmd) {
447 
448 	case _PC_LINK_MAX:
449 		val = MAXLINK;
450 		break;
451 
452 	case _PC_MAX_CANON:
453 		val = MAX_CANON;
454 		break;
455 
456 	case _PC_MAX_INPUT:
457 		val = MAX_INPUT;
458 		break;
459 
460 	case _PC_NAME_MAX:
461 		bzero(&vfsbuf, sizeof (vfsbuf));
462 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
463 			break;
464 		val = vfsbuf.f_namemax;
465 		break;
466 
467 	case _PC_PATH_MAX:
468 	case _PC_SYMLINK_MAX:
469 		val = MAXPATHLEN;
470 		break;
471 
472 	case _PC_PIPE_BUF:
473 		val = PIPE_BUF;
474 		break;
475 
476 	case _PC_NO_TRUNC:
477 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
478 			val = 1;	/* NOTRUNC is enabled for vp */
479 		else
480 			val = (ulong_t)-1;
481 		break;
482 
483 	case _PC_VDISABLE:
484 		val = _POSIX_VDISABLE;
485 		break;
486 
487 	case _PC_CHOWN_RESTRICTED:
488 		if (rstchown)
489 			val = rstchown; /* chown restricted enabled */
490 		else
491 			val = (ulong_t)-1;
492 		break;
493 
494 	case _PC_FILESIZEBITS:
495 
496 		/*
497 		 * If ever we come here it means that underlying file system
498 		 * does not recognise the command and therefore this
499 		 * configurable limit cannot be determined. We return -1
500 		 * and don't change errno.
501 		 */
502 
503 		val = (ulong_t)-1;    /* large file support */
504 		break;
505 
506 	case _PC_ACL_ENABLED:
507 		val = 0;
508 		break;
509 
510 	case _PC_CASE_BEHAVIOR:
511 		val = _CASE_SENSITIVE;
512 		if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
513 			val |= _CASE_INSENSITIVE;
514 		if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
515 			val &= ~_CASE_SENSITIVE;
516 		break;
517 
518 	case _PC_SATTR_ENABLED:
519 	case _PC_SATTR_EXISTS:
520 		val = 0;
521 		break;
522 
523 	case _PC_ACCESS_FILTERING:
524 		val = 0;
525 		break;
526 
527 	default:
528 		error = EINVAL;
529 		break;
530 	}
531 
532 	if (error == 0)
533 		*valp = val;
534 	return (error);
535 }
536 
537 /*
538  * Dispose of a page.
539  */
540 /* ARGSUSED */
541 void
542 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
543     caller_context_t *ct)
544 {
545 
546 	ASSERT(fl == B_FREE || fl == B_INVAL);
547 
548 	if (fl == B_FREE)
549 		page_free(pp, dn);
550 	else
551 		page_destroy(pp, dn);
552 }
553 
554 /* ARGSUSED */
555 void
556 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
557     caller_context_t *ct)
558 {
559 	cmn_err(CE_PANIC, "fs_nodispose invoked");
560 }
561 
562 /*
563  * fabricate acls for file systems that do not support acls.
564  */
565 /* ARGSUSED */
566 int
567 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
568     caller_context_t *ct)
569 {
570 	aclent_t	*aclentp;
571 	struct vattr	vattr;
572 	int		error;
573 	size_t		aclsize;
574 
575 	vsecattr->vsa_aclcnt	= 0;
576 	vsecattr->vsa_aclentsz	= 0;
577 	vsecattr->vsa_aclentp	= NULL;
578 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
579 	vsecattr->vsa_dfaclentp	= NULL;
580 
581 	vattr.va_mask = AT_MODE | AT_UID | AT_GID;
582 	if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
583 		return (error);
584 
585 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
586 		aclsize = 4 * sizeof (aclent_t);
587 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
588 		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
589 		aclentp = vsecattr->vsa_aclentp;
590 
591 		aclentp->a_type = USER_OBJ;	/* Owner */
592 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
593 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
594 		aclentp++;
595 
596 		aclentp->a_type = GROUP_OBJ;    /* Group */
597 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
598 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
599 		aclentp++;
600 
601 		aclentp->a_type = OTHER_OBJ;    /* Other */
602 		aclentp->a_perm = vattr.va_mode & 0007;
603 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
604 		aclentp++;
605 
606 		aclentp->a_type = CLASS_OBJ;    /* Class */
607 		aclentp->a_perm = (ushort_t)(0007);
608 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
609 	} else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
610 		VERIFY(0 == acl_trivial_create(vattr.va_mode,
611 		    (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
612 		    &vsecattr->vsa_aclcnt));
613 		vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
614 	}
615 
616 	return (error);
617 }
618 
619 /*
620  * Common code for implementing DOS share reservations
621  */
622 /* ARGSUSED4 */
623 int
624 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
625     caller_context_t *ct)
626 {
627 	int error;
628 
629 	/*
630 	 * Make sure that the file was opened with permissions appropriate
631 	 * for the request, and make sure the caller isn't trying to sneak
632 	 * in an NBMAND request.
633 	 */
634 	if (cmd == F_SHARE) {
635 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
636 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
637 			return (EBADF);
638 		if (shr->s_access & (F_RMACC | F_MDACC))
639 			return (EINVAL);
640 		if (shr->s_deny & (F_MANDDNY | F_RMDNY))
641 			return (EINVAL);
642 	}
643 	if (cmd == F_SHARE_NBMAND) {
644 		/* make sure nbmand is allowed on the file */
645 		if (!vp->v_vfsp ||
646 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
647 			return (EINVAL);
648 		}
649 		if (vp->v_type != VREG) {
650 			return (EINVAL);
651 		}
652 	}
653 
654 	nbl_start_crit(vp, RW_WRITER);
655 
656 	switch (cmd) {
657 
658 	case F_SHARE_NBMAND:
659 		shr->s_deny |= F_MANDDNY;
660 		/*FALLTHROUGH*/
661 	case F_SHARE:
662 		error = add_share(vp, shr);
663 		break;
664 
665 	case F_UNSHARE:
666 		error = del_share(vp, shr);
667 		break;
668 
669 	case F_HASREMOTELOCKS:
670 		/*
671 		 * We are overloading this command to refer to remote
672 		 * shares as well as remote locks, despite its name.
673 		 */
674 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
675 		error = 0;
676 		break;
677 
678 	default:
679 		error = EINVAL;
680 		break;
681 	}
682 
683 	nbl_end_crit(vp);
684 	return (error);
685 }
686 
687 /*ARGSUSED1*/
688 int
689 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
690     caller_context_t *ct)
691 {
692 	ASSERT(vp != NULL);
693 	return (ENOTSUP);
694 }
695 
696 /*ARGSUSED1*/
697 int
698 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
699     caller_context_t *ct)
700 {
701 	ASSERT(vp != NULL);
702 	return (0);
703 }
704 
705 /*
706  * return 1 for non-trivial ACL.
707  *
708  * NB: It is not necessary for the caller to VOP_RWLOCK since
709  *	we only issue VOP_GETSECATTR.
710  *
711  * Returns 0 == trivial
712  *         1 == NOT Trivial
713  *	   <0 could not determine.
714  */
715 int
716 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
717 {
718 	ulong_t		acl_styles;
719 	ulong_t		acl_flavor;
720 	vsecattr_t 	vsecattr;
721 	int 		error;
722 	int		isnontrivial;
723 
724 	/* determine the forms of ACLs maintained */
725 	error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
726 
727 	/* clear bits we don't understand and establish default acl_style */
728 	acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
729 	if (error || (acl_styles == 0))
730 		acl_styles = _ACL_ACLENT_ENABLED;
731 
732 	vsecattr.vsa_aclentp = NULL;
733 	vsecattr.vsa_dfaclentp = NULL;
734 	vsecattr.vsa_aclcnt = 0;
735 	vsecattr.vsa_dfaclcnt = 0;
736 
737 	while (acl_styles) {
738 		/* select one of the styles as current flavor */
739 		acl_flavor = 0;
740 		if (acl_styles & _ACL_ACLENT_ENABLED) {
741 			acl_flavor = _ACL_ACLENT_ENABLED;
742 			vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
743 		} else if (acl_styles & _ACL_ACE_ENABLED) {
744 			acl_flavor = _ACL_ACE_ENABLED;
745 			vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
746 		}
747 
748 		ASSERT(vsecattr.vsa_mask && acl_flavor);
749 		error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
750 		if (error == 0)
751 			break;
752 
753 		/* that flavor failed */
754 		acl_styles &= ~acl_flavor;
755 	}
756 
757 	/* if all styles fail then assume trivial */
758 	if (acl_styles == 0)
759 		return (0);
760 
761 	/* process the flavor that worked */
762 	isnontrivial = 0;
763 	if (acl_flavor & _ACL_ACLENT_ENABLED) {
764 		if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
765 			isnontrivial = 1;
766 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
767 			kmem_free(vsecattr.vsa_aclentp,
768 			    vsecattr.vsa_aclcnt * sizeof (aclent_t));
769 		if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
770 			kmem_free(vsecattr.vsa_dfaclentp,
771 			    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
772 	}
773 	if (acl_flavor & _ACL_ACE_ENABLED) {
774 		isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
775 		    vsecattr.vsa_aclcnt);
776 
777 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
778 			kmem_free(vsecattr.vsa_aclentp,
779 			    vsecattr.vsa_aclcnt * sizeof (ace_t));
780 		/* ACE has no vsecattr.vsa_dfaclcnt */
781 	}
782 	return (isnontrivial);
783 }
784 
785 /*
786  * Check whether we need a retry to recover from STALE error.
787  */
788 int
789 fs_need_estale_retry(int retry_count)
790 {
791 	if (retry_count < fs_estale_retry)
792 		return (1);
793 	else
794 		return (0);
795 }
796 
797 
798 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
799 
800 /*
801  * Routine for anti-virus scanner to call to register its scanning routine.
802  */
803 void
804 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
805 {
806 	fs_av_scan = av_scan;
807 }
808 
809 /*
810  * Routine for file systems to call to initiate anti-virus scanning.
811  * Scanning will only be done on REGular files (currently).
812  */
813 int
814 fs_vscan(vnode_t *vp, cred_t *cr, int async)
815 {
816 	int ret = 0;
817 
818 	if (fs_av_scan && vp->v_type == VREG)
819 		ret = (*fs_av_scan)(vp, cr, async);
820 
821 	return (ret);
822 }
823 
824 /*
825  * support functions for reparse point
826  */
827 /*
828  * reparse_vnode_parse
829  *
830  * Read the symlink data of a reparse point specified by the vnode
831  * and return the reparse data as name-value pair in the nvlist.
832  */
833 int
834 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
835 {
836 	int err;
837 	char *lkdata;
838 	struct uio uio;
839 	struct iovec iov;
840 
841 	if (vp == NULL || nvl == NULL)
842 		return (EINVAL);
843 
844 	lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
845 
846 	/*
847 	 * Set up io vector to read sym link data
848 	 */
849 	iov.iov_base = lkdata;
850 	iov.iov_len = MAXREPARSELEN;
851 	uio.uio_iov = &iov;
852 	uio.uio_iovcnt = 1;
853 	uio.uio_segflg = UIO_SYSSPACE;
854 	uio.uio_extflg = UIO_COPY_CACHED;
855 	uio.uio_loffset = (offset_t)0;
856 	uio.uio_resid = MAXREPARSELEN;
857 
858 	if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
859 		*(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
860 		err = reparse_parse(lkdata, nvl);
861 	}
862 	kmem_free(lkdata, MAXREPARSELEN);	/* done with lkdata */
863 
864 	return (err);
865 }
866 
867 void
868 reparse_point_init()
869 {
870 	mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
871 }
872 
873 static door_handle_t
874 reparse_door_get_handle()
875 {
876 	door_handle_t dh;
877 
878 	mutex_enter(&reparsed_door_lock);
879 	if ((dh = reparsed_door) == NULL) {
880 		if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
881 			reparsed_door = NULL;
882 			dh = NULL;
883 		} else
884 			dh = reparsed_door;
885 	}
886 	mutex_exit(&reparsed_door_lock);
887 	return (dh);
888 }
889 
890 static void
891 reparse_door_reset_handle()
892 {
893 	mutex_enter(&reparsed_door_lock);
894 	reparsed_door = NULL;
895 	mutex_exit(&reparsed_door_lock);
896 }
897 
898 /*
899  * reparse_kderef
900  *
901  * Accepts the service-specific item from the reparse point and returns
902  * the service-specific data requested.  The caller specifies the size of
903  * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
904  * if the results will not fit in the buffer, in which case, *bufsz will
905  * contain the number of bytes needed to hold the results.
906  *
907  * if ok return 0 and update *bufsize with length of actual result
908  * else return error code.
909  */
910 int
911 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
912     size_t *bufsize)
913 {
914 	int err, retries, need_free, retried_doorhd;
915 	size_t dlen, res_len;
916 	char *darg;
917 	door_arg_t door_args;
918 	reparsed_door_res_t *resp;
919 	door_handle_t rp_door;
920 
921 	if (svc_type == NULL || svc_data == NULL || buf == NULL ||
922 	    bufsize == NULL)
923 		return (EINVAL);
924 
925 	/* get reparsed's door handle */
926 	if ((rp_door = reparse_door_get_handle()) == NULL)
927 		return (EBADF);
928 
929 	/* setup buffer for door_call args and results */
930 	dlen = strlen(svc_type) + strlen(svc_data) + 2;
931 	if (*bufsize < dlen) {
932 		darg = kmem_alloc(dlen, KM_SLEEP);
933 		need_free = 1;
934 	} else {
935 		darg = buf;	/* use same buffer for door's args & results */
936 		need_free = 0;
937 	}
938 
939 	/* build argument string of door call */
940 	(void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
941 
942 	/* setup args for door call */
943 	door_args.data_ptr = darg;
944 	door_args.data_size = dlen;
945 	door_args.desc_ptr = NULL;
946 	door_args.desc_num = 0;
947 	door_args.rbuf = buf;
948 	door_args.rsize = *bufsize;
949 
950 	/* do the door_call */
951 	retried_doorhd = 0;
952 	retries = 0;
953 	door_ki_hold(rp_door);
954 	while ((err = door_ki_upcall_limited(rp_door, &door_args,
955 	    NULL, SIZE_MAX, 0)) != 0) {
956 		if (err == EAGAIN || err == EINTR) {
957 			if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
958 				delay(SEC_TO_TICK(1));
959 				continue;
960 			}
961 		} else if (err == EBADF) {
962 			/* door server goes away... */
963 			reparse_door_reset_handle();
964 
965 			if (retried_doorhd == 0) {
966 				door_ki_rele(rp_door);
967 				retried_doorhd++;
968 				rp_door = reparse_door_get_handle();
969 				if (rp_door != NULL) {
970 					door_ki_hold(rp_door);
971 					continue;
972 				}
973 			}
974 		}
975 		break;
976 	}
977 
978 	if (rp_door)
979 		door_ki_rele(rp_door);
980 
981 	if (need_free)
982 		kmem_free(darg, dlen);		/* done with args buffer */
983 
984 	if (err != 0)
985 		return (err);
986 
987 	resp = (reparsed_door_res_t *)door_args.rbuf;
988 	if ((err = resp->res_status) == 0) {
989 		/*
990 		 * have to save the length of the results before the
991 		 * bcopy below since it's can be an overlap copy that
992 		 * overwrites the reparsed_door_res_t structure at
993 		 * the beginning of the buffer.
994 		 */
995 		res_len = (size_t)resp->res_len;
996 
997 		/* deref call is ok */
998 		if (res_len > *bufsize)
999 			err = EOVERFLOW;
1000 		else
1001 			bcopy(resp->res_data, buf, res_len);
1002 		*bufsize = res_len;
1003 	}
1004 	if (door_args.rbuf != buf)
1005 		kmem_free(door_args.rbuf, door_args.rsize);
1006 
1007 	return (err);
1008 }
1009