xref: /titanic_51/usr/src/uts/common/fs/fs_subr.c (revision 003a15e8c33d09669429b8c89e5832aac8b92828)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright 2015 Joyent, Inc.
29  */
30 
31 /*
32  * Generic vnode operations.
33  */
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
38 #include <sys/fcntl.h>
39 #include <sys/flock.h>
40 #include <sys/statvfs.h>
41 #include <sys/vfs.h>
42 #include <sys/vnode.h>
43 #include <sys/proc.h>
44 #include <sys/user.h>
45 #include <sys/unistd.h>
46 #include <sys/cred.h>
47 #include <sys/poll.h>
48 #include <sys/debug.h>
49 #include <sys/cmn_err.h>
50 #include <sys/stream.h>
51 #include <fs/fs_subr.h>
52 #include <fs/fs_reparse.h>
53 #include <sys/door.h>
54 #include <sys/acl.h>
55 #include <sys/share.h>
56 #include <sys/file.h>
57 #include <sys/kmem.h>
58 #include <sys/file.h>
59 #include <sys/nbmlock.h>
60 #include <acl/acl_common.h>
61 #include <sys/pathname.h>
62 
63 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
64 
65 /*
66  * Tunable to limit the number of retry to recover from STALE error.
67  */
68 int fs_estale_retry = 5;
69 
70 /*
71  * supports for reparse point door upcall
72  */
73 static door_handle_t reparsed_door;
74 static kmutex_t reparsed_door_lock;
75 
76 /*
77  * The associated operation is not supported by the file system.
78  */
79 int
80 fs_nosys()
81 {
82 	return (ENOSYS);
83 }
84 
85 /*
86  * The associated operation is invalid (on this vnode).
87  */
88 int
89 fs_inval()
90 {
91 	return (EINVAL);
92 }
93 
94 /*
95  * The associated operation is valid only for directories.
96  */
97 int
98 fs_notdir()
99 {
100 	return (ENOTDIR);
101 }
102 
103 /*
104  * Free the file system specific resources. For the file systems that
105  * do not support the forced unmount, it will be a nop function.
106  */
107 
108 /*ARGSUSED*/
109 void
110 fs_freevfs(vfs_t *vfsp)
111 {
112 }
113 
114 /* ARGSUSED */
115 int
116 fs_nosys_map(struct vnode *vp,
117 	offset_t off,
118 	struct as *as,
119 	caddr_t *addrp,
120 	size_t len,
121 	uchar_t prot,
122 	uchar_t maxprot,
123 	uint_t flags,
124 	struct cred *cr,
125 	caller_context_t *ct)
126 {
127 	return (ENOSYS);
128 }
129 
130 /* ARGSUSED */
131 int
132 fs_nosys_addmap(struct vnode *vp,
133 	offset_t off,
134 	struct as *as,
135 	caddr_t addr,
136 	size_t len,
137 	uchar_t prot,
138 	uchar_t maxprot,
139 	uint_t flags,
140 	struct cred *cr,
141 	caller_context_t *ct)
142 {
143 	return (ENOSYS);
144 }
145 
146 /* ARGSUSED */
147 int
148 fs_nosys_poll(vnode_t *vp,
149 	register short events,
150 	int anyyet,
151 	register short *reventsp,
152 	struct pollhead **phpp,
153 	caller_context_t *ct)
154 {
155 	return (ENOSYS);
156 }
157 
158 
159 /*
160  * The file system has nothing to sync to disk.  However, the
161  * VFS_SYNC operation must not fail.
162  */
163 /* ARGSUSED */
164 int
165 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
166 {
167 	return (0);
168 }
169 
170 /*
171  * Does nothing but VOP_FSYNC must not fail.
172  */
173 /* ARGSUSED */
174 int
175 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
176 {
177 	return (0);
178 }
179 
180 /*
181  * Does nothing but VOP_PUTPAGE must not fail.
182  */
183 /* ARGSUSED */
184 int
185 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
186 	caller_context_t *ctp)
187 {
188 	return (0);
189 }
190 
191 /*
192  * Does nothing but VOP_IOCTL must not fail.
193  */
194 /* ARGSUSED */
195 int
196 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
197 	int *rvalp)
198 {
199 	return (0);
200 }
201 
202 /*
203  * Read/write lock/unlock.  Does nothing.
204  */
205 /* ARGSUSED */
206 int
207 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
208 {
209 	return (-1);
210 }
211 
212 /* ARGSUSED */
213 void
214 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
215 {
216 }
217 
218 /*
219  * Compare two vnodes.
220  */
221 /*ARGSUSED2*/
222 int
223 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
224 {
225 	return (vp1 == vp2);
226 }
227 
228 /*
229  * No-op seek operation.
230  */
231 /* ARGSUSED */
232 int
233 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
234 {
235 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
236 }
237 
238 /*
239  * File and record locking.
240  */
241 /* ARGSUSED */
242 int
243 fs_frlock(register vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
244 	offset_t offset, flk_callback_t *flk_cbp, cred_t *cr,
245 	caller_context_t *ct)
246 {
247 	int frcmd;
248 	int nlmid;
249 	int error = 0;
250 	boolean_t skip_lock = B_FALSE;
251 	flk_callback_t serialize_callback;
252 	int serialize = 0;
253 	v_mode_t mode;
254 
255 	switch (cmd) {
256 
257 	case F_GETLK:
258 	case F_O_GETLK:
259 		if (flag & F_REMOTELOCK) {
260 			frcmd = RCMDLCK;
261 		} else if (flag & F_PXFSLOCK) {
262 			frcmd = PCMDLCK;
263 		} else {
264 			frcmd = 0;
265 			bfp->l_pid = ttoproc(curthread)->p_pid;
266 			bfp->l_sysid = 0;
267 		}
268 		break;
269 
270 	case F_OFD_GETLK:
271 		/*
272 		 * TBD we do not support remote OFD locks at this time.
273 		 */
274 		if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
275 			error = EINVAL;
276 			goto done;
277 		}
278 		skip_lock = B_TRUE;
279 		break;
280 
281 	case F_SETLK_NBMAND:
282 		/*
283 		 * Are NBMAND locks allowed on this file?
284 		 */
285 		if (!vp->v_vfsp ||
286 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
287 			error = EINVAL;
288 			goto done;
289 		}
290 		if (vp->v_type != VREG) {
291 			error = EINVAL;
292 			goto done;
293 		}
294 		/*FALLTHROUGH*/
295 
296 	case F_SETLK:
297 		if (flag & F_REMOTELOCK) {
298 			frcmd = SETFLCK|RCMDLCK;
299 		} else if (flag & F_PXFSLOCK) {
300 			frcmd = SETFLCK|PCMDLCK;
301 		} else {
302 			frcmd = SETFLCK;
303 			bfp->l_pid = ttoproc(curthread)->p_pid;
304 			bfp->l_sysid = 0;
305 		}
306 		if (cmd == F_SETLK_NBMAND &&
307 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
308 			frcmd |= NBMLCK;
309 		}
310 
311 		if (nbl_need_check(vp)) {
312 			nbl_start_crit(vp, RW_WRITER);
313 			serialize = 1;
314 			if (frcmd & NBMLCK) {
315 				mode = (bfp->l_type == F_RDLCK) ?
316 				    V_READ : V_RDANDWR;
317 				if (vn_is_mapped(vp, mode)) {
318 					error = EAGAIN;
319 					goto done;
320 				}
321 			}
322 		}
323 		break;
324 
325 	case F_SETLKW:
326 		if (flag & F_REMOTELOCK) {
327 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
328 		} else if (flag & F_PXFSLOCK) {
329 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
330 		} else {
331 			frcmd = SETFLCK|SLPFLCK;
332 			bfp->l_pid = ttoproc(curthread)->p_pid;
333 			bfp->l_sysid = 0;
334 		}
335 
336 		if (nbl_need_check(vp)) {
337 			nbl_start_crit(vp, RW_WRITER);
338 			serialize = 1;
339 		}
340 		break;
341 
342 	case F_OFD_SETLK:
343 	case F_OFD_SETLKW:
344 	case F_FLOCK:
345 	case F_FLOCKW:
346 		/*
347 		 * TBD we do not support remote OFD locks at this time.
348 		 */
349 		if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
350 			error = EINVAL;
351 			goto done;
352 		}
353 		skip_lock = B_TRUE;
354 		break;
355 
356 	case F_HASREMOTELOCKS:
357 		nlmid = GETNLMID(bfp->l_sysid);
358 		if (nlmid != 0) {	/* booted as a cluster */
359 			l_has_rmt(bfp) =
360 			    cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
361 		} else {		/* not booted as a cluster */
362 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
363 		}
364 
365 		goto done;
366 
367 	default:
368 		error = EINVAL;
369 		goto done;
370 	}
371 
372 	/*
373 	 * If this is a blocking lock request and we're serializing lock
374 	 * requests, modify the callback list to leave the critical region
375 	 * while we're waiting for the lock.
376 	 */
377 
378 	if (serialize && (frcmd & SLPFLCK) != 0) {
379 		flk_add_callback(&serialize_callback,
380 		    frlock_serialize_blocked, vp, flk_cbp);
381 		flk_cbp = &serialize_callback;
382 	}
383 
384 	if (!skip_lock)
385 		error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
386 
387 done:
388 	if (serialize)
389 		nbl_end_crit(vp);
390 
391 	return (error);
392 }
393 
394 /*
395  * Callback when a lock request blocks and we are serializing requests.  If
396  * before sleeping, leave the critical region.  If after wakeup, reenter
397  * the critical region.
398  */
399 
400 static callb_cpr_t *
401 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
402 {
403 	vnode_t *vp = (vnode_t *)infop;
404 
405 	if (when == FLK_BEFORE_SLEEP)
406 		nbl_end_crit(vp);
407 	else {
408 		nbl_start_crit(vp, RW_WRITER);
409 	}
410 
411 	return (NULL);
412 }
413 
414 /*
415  * Allow any flags.
416  */
417 /* ARGSUSED */
418 int
419 fs_setfl(
420 	vnode_t *vp,
421 	int oflags,
422 	int nflags,
423 	cred_t *cr,
424 	caller_context_t *ct)
425 {
426 	return (0);
427 }
428 
429 /*
430  * Return the answer requested to poll() for non-device files.
431  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
432  */
433 struct pollhead fs_pollhd;
434 
435 /* ARGSUSED */
436 int
437 fs_poll(vnode_t *vp,
438 	register short events,
439 	int anyyet,
440 	register short *reventsp,
441 	struct pollhead **phpp,
442 	caller_context_t *ct)
443 {
444 	*reventsp = 0;
445 	if (events & POLLIN)
446 		*reventsp |= POLLIN;
447 	if (events & POLLRDNORM)
448 		*reventsp |= POLLRDNORM;
449 	if (events & POLLRDBAND)
450 		*reventsp |= POLLRDBAND;
451 	if (events & POLLOUT)
452 		*reventsp |= POLLOUT;
453 	if (events & POLLWRBAND)
454 		*reventsp |= POLLWRBAND;
455 	*phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
456 	return (0);
457 }
458 
459 /*
460  * POSIX pathconf() support.
461  */
462 /* ARGSUSED */
463 int
464 fs_pathconf(
465 	vnode_t *vp,
466 	int cmd,
467 	ulong_t *valp,
468 	cred_t *cr,
469 	caller_context_t *ct)
470 {
471 	register ulong_t val;
472 	register int error = 0;
473 	struct statvfs64 vfsbuf;
474 
475 	switch (cmd) {
476 
477 	case _PC_LINK_MAX:
478 		val = MAXLINK;
479 		break;
480 
481 	case _PC_MAX_CANON:
482 		val = MAX_CANON;
483 		break;
484 
485 	case _PC_MAX_INPUT:
486 		val = MAX_INPUT;
487 		break;
488 
489 	case _PC_NAME_MAX:
490 		bzero(&vfsbuf, sizeof (vfsbuf));
491 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
492 			break;
493 		val = vfsbuf.f_namemax;
494 		break;
495 
496 	case _PC_PATH_MAX:
497 	case _PC_SYMLINK_MAX:
498 		val = MAXPATHLEN;
499 		break;
500 
501 	case _PC_PIPE_BUF:
502 		val = PIPE_BUF;
503 		break;
504 
505 	case _PC_NO_TRUNC:
506 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
507 			val = 1;	/* NOTRUNC is enabled for vp */
508 		else
509 			val = (ulong_t)-1;
510 		break;
511 
512 	case _PC_VDISABLE:
513 		val = _POSIX_VDISABLE;
514 		break;
515 
516 	case _PC_CHOWN_RESTRICTED:
517 		if (rstchown)
518 			val = rstchown; /* chown restricted enabled */
519 		else
520 			val = (ulong_t)-1;
521 		break;
522 
523 	case _PC_FILESIZEBITS:
524 
525 		/*
526 		 * If ever we come here it means that underlying file system
527 		 * does not recognise the command and therefore this
528 		 * configurable limit cannot be determined. We return -1
529 		 * and don't change errno.
530 		 */
531 
532 		val = (ulong_t)-1;    /* large file support */
533 		break;
534 
535 	case _PC_ACL_ENABLED:
536 		val = 0;
537 		break;
538 
539 	case _PC_CASE_BEHAVIOR:
540 		val = _CASE_SENSITIVE;
541 		if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
542 			val |= _CASE_INSENSITIVE;
543 		if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
544 			val &= ~_CASE_SENSITIVE;
545 		break;
546 
547 	case _PC_SATTR_ENABLED:
548 	case _PC_SATTR_EXISTS:
549 		val = 0;
550 		break;
551 
552 	case _PC_ACCESS_FILTERING:
553 		val = 0;
554 		break;
555 
556 	default:
557 		error = EINVAL;
558 		break;
559 	}
560 
561 	if (error == 0)
562 		*valp = val;
563 	return (error);
564 }
565 
566 /*
567  * Dispose of a page.
568  */
569 /* ARGSUSED */
570 void
571 fs_dispose(
572 	struct vnode *vp,
573 	page_t *pp,
574 	int fl,
575 	int dn,
576 	struct cred *cr,
577 	caller_context_t *ct)
578 {
579 
580 	ASSERT(fl == B_FREE || fl == B_INVAL);
581 
582 	if (fl == B_FREE)
583 		page_free(pp, dn);
584 	else
585 		page_destroy(pp, dn);
586 }
587 
588 /* ARGSUSED */
589 void
590 fs_nodispose(
591 	struct vnode *vp,
592 	page_t *pp,
593 	int fl,
594 	int dn,
595 	struct cred *cr,
596 	caller_context_t *ct)
597 {
598 	cmn_err(CE_PANIC, "fs_nodispose invoked");
599 }
600 
601 /*
602  * fabricate acls for file systems that do not support acls.
603  */
604 /* ARGSUSED */
605 int
606 fs_fab_acl(
607 	vnode_t *vp,
608 	vsecattr_t *vsecattr,
609 	int flag,
610 	cred_t *cr,
611 	caller_context_t *ct)
612 {
613 	aclent_t	*aclentp;
614 	struct vattr	vattr;
615 	int		error;
616 	size_t		aclsize;
617 
618 	vsecattr->vsa_aclcnt	= 0;
619 	vsecattr->vsa_aclentsz	= 0;
620 	vsecattr->vsa_aclentp	= NULL;
621 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
622 	vsecattr->vsa_dfaclentp	= NULL;
623 
624 	vattr.va_mask = AT_MODE | AT_UID | AT_GID;
625 	if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
626 		return (error);
627 
628 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
629 		aclsize = 4 * sizeof (aclent_t);
630 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
631 		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
632 		aclentp = vsecattr->vsa_aclentp;
633 
634 		aclentp->a_type = USER_OBJ;	/* Owner */
635 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
636 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
637 		aclentp++;
638 
639 		aclentp->a_type = GROUP_OBJ;    /* Group */
640 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
641 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
642 		aclentp++;
643 
644 		aclentp->a_type = OTHER_OBJ;    /* Other */
645 		aclentp->a_perm = vattr.va_mode & 0007;
646 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
647 		aclentp++;
648 
649 		aclentp->a_type = CLASS_OBJ;    /* Class */
650 		aclentp->a_perm = (ushort_t)(0007);
651 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
652 	} else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
653 		VERIFY(0 == acl_trivial_create(vattr.va_mode,
654 		    (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
655 		    &vsecattr->vsa_aclcnt));
656 		vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
657 	}
658 
659 	return (error);
660 }
661 
662 /*
663  * Common code for implementing DOS share reservations
664  */
665 /* ARGSUSED4 */
666 int
667 fs_shrlock(
668 	struct vnode *vp,
669 	int cmd,
670 	struct shrlock *shr,
671 	int flag,
672 	cred_t *cr,
673 	caller_context_t *ct)
674 {
675 	int error;
676 
677 	/*
678 	 * Make sure that the file was opened with permissions appropriate
679 	 * for the request, and make sure the caller isn't trying to sneak
680 	 * in an NBMAND request.
681 	 */
682 	if (cmd == F_SHARE) {
683 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
684 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
685 			return (EBADF);
686 		if (shr->s_access & (F_RMACC | F_MDACC))
687 			return (EINVAL);
688 		if (shr->s_deny & (F_MANDDNY | F_RMDNY))
689 			return (EINVAL);
690 	}
691 	if (cmd == F_SHARE_NBMAND) {
692 		/* make sure nbmand is allowed on the file */
693 		if (!vp->v_vfsp ||
694 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
695 			return (EINVAL);
696 		}
697 		if (vp->v_type != VREG) {
698 			return (EINVAL);
699 		}
700 	}
701 
702 	nbl_start_crit(vp, RW_WRITER);
703 
704 	switch (cmd) {
705 
706 	case F_SHARE_NBMAND:
707 		shr->s_deny |= F_MANDDNY;
708 		/*FALLTHROUGH*/
709 	case F_SHARE:
710 		error = add_share(vp, shr);
711 		break;
712 
713 	case F_UNSHARE:
714 		error = del_share(vp, shr);
715 		break;
716 
717 	case F_HASREMOTELOCKS:
718 		/*
719 		 * We are overloading this command to refer to remote
720 		 * shares as well as remote locks, despite its name.
721 		 */
722 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
723 		error = 0;
724 		break;
725 
726 	default:
727 		error = EINVAL;
728 		break;
729 	}
730 
731 	nbl_end_crit(vp);
732 	return (error);
733 }
734 
735 /*ARGSUSED1*/
736 int
737 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
738     caller_context_t *ct)
739 {
740 	ASSERT(vp != NULL);
741 	return (ENOTSUP);
742 }
743 
744 /*ARGSUSED1*/
745 int
746 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
747     caller_context_t *ct)
748 {
749 	ASSERT(vp != NULL);
750 	return (0);
751 }
752 
753 /*
754  * return 1 for non-trivial ACL.
755  *
756  * NB: It is not necessary for the caller to VOP_RWLOCK since
757  *	we only issue VOP_GETSECATTR.
758  *
759  * Returns 0 == trivial
760  *         1 == NOT Trivial
761  *	   <0 could not determine.
762  */
763 int
764 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
765 {
766 	ulong_t		acl_styles;
767 	ulong_t		acl_flavor;
768 	vsecattr_t 	vsecattr;
769 	int 		error;
770 	int		isnontrivial;
771 
772 	/* determine the forms of ACLs maintained */
773 	error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
774 
775 	/* clear bits we don't understand and establish default acl_style */
776 	acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
777 	if (error || (acl_styles == 0))
778 		acl_styles = _ACL_ACLENT_ENABLED;
779 
780 	vsecattr.vsa_aclentp = NULL;
781 	vsecattr.vsa_dfaclentp = NULL;
782 	vsecattr.vsa_aclcnt = 0;
783 	vsecattr.vsa_dfaclcnt = 0;
784 
785 	while (acl_styles) {
786 		/* select one of the styles as current flavor */
787 		acl_flavor = 0;
788 		if (acl_styles & _ACL_ACLENT_ENABLED) {
789 			acl_flavor = _ACL_ACLENT_ENABLED;
790 			vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
791 		} else if (acl_styles & _ACL_ACE_ENABLED) {
792 			acl_flavor = _ACL_ACE_ENABLED;
793 			vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
794 		}
795 
796 		ASSERT(vsecattr.vsa_mask && acl_flavor);
797 		error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
798 		if (error == 0)
799 			break;
800 
801 		/* that flavor failed */
802 		acl_styles &= ~acl_flavor;
803 	}
804 
805 	/* if all styles fail then assume trivial */
806 	if (acl_styles == 0)
807 		return (0);
808 
809 	/* process the flavor that worked */
810 	isnontrivial = 0;
811 	if (acl_flavor & _ACL_ACLENT_ENABLED) {
812 		if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
813 			isnontrivial = 1;
814 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
815 			kmem_free(vsecattr.vsa_aclentp,
816 			    vsecattr.vsa_aclcnt * sizeof (aclent_t));
817 		if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
818 			kmem_free(vsecattr.vsa_dfaclentp,
819 			    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
820 	}
821 	if (acl_flavor & _ACL_ACE_ENABLED) {
822 		isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
823 		    vsecattr.vsa_aclcnt);
824 
825 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
826 			kmem_free(vsecattr.vsa_aclentp,
827 			    vsecattr.vsa_aclcnt * sizeof (ace_t));
828 		/* ACE has no vsecattr.vsa_dfaclcnt */
829 	}
830 	return (isnontrivial);
831 }
832 
833 /*
834  * Check whether we need a retry to recover from STALE error.
835  */
836 int
837 fs_need_estale_retry(int retry_count)
838 {
839 	if (retry_count < fs_estale_retry)
840 		return (1);
841 	else
842 		return (0);
843 }
844 
845 
846 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
847 
848 /*
849  * Routine for anti-virus scanner to call to register its scanning routine.
850  */
851 void
852 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
853 {
854 	fs_av_scan = av_scan;
855 }
856 
857 /*
858  * Routine for file systems to call to initiate anti-virus scanning.
859  * Scanning will only be done on REGular files (currently).
860  */
861 int
862 fs_vscan(vnode_t *vp, cred_t *cr, int async)
863 {
864 	int ret = 0;
865 
866 	if (fs_av_scan && vp->v_type == VREG)
867 		ret = (*fs_av_scan)(vp, cr, async);
868 
869 	return (ret);
870 }
871 
872 /*
873  * support functions for reparse point
874  */
875 /*
876  * reparse_vnode_parse
877  *
878  * Read the symlink data of a reparse point specified by the vnode
879  * and return the reparse data as name-value pair in the nvlist.
880  */
881 int
882 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
883 {
884 	int err;
885 	char *lkdata;
886 	struct uio uio;
887 	struct iovec iov;
888 
889 	if (vp == NULL || nvl == NULL)
890 		return (EINVAL);
891 
892 	lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
893 
894 	/*
895 	 * Set up io vector to read sym link data
896 	 */
897 	iov.iov_base = lkdata;
898 	iov.iov_len = MAXREPARSELEN;
899 	uio.uio_iov = &iov;
900 	uio.uio_iovcnt = 1;
901 	uio.uio_segflg = UIO_SYSSPACE;
902 	uio.uio_extflg = UIO_COPY_CACHED;
903 	uio.uio_loffset = (offset_t)0;
904 	uio.uio_resid = MAXREPARSELEN;
905 
906 	if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
907 		*(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
908 		err = reparse_parse(lkdata, nvl);
909 	}
910 	kmem_free(lkdata, MAXREPARSELEN);	/* done with lkdata */
911 
912 	return (err);
913 }
914 
915 void
916 reparse_point_init()
917 {
918 	mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
919 }
920 
921 static door_handle_t
922 reparse_door_get_handle()
923 {
924 	door_handle_t dh;
925 
926 	mutex_enter(&reparsed_door_lock);
927 	if ((dh = reparsed_door) == NULL) {
928 		if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
929 			reparsed_door = NULL;
930 			dh = NULL;
931 		} else
932 			dh = reparsed_door;
933 	}
934 	mutex_exit(&reparsed_door_lock);
935 	return (dh);
936 }
937 
938 static void
939 reparse_door_reset_handle()
940 {
941 	mutex_enter(&reparsed_door_lock);
942 	reparsed_door = NULL;
943 	mutex_exit(&reparsed_door_lock);
944 }
945 
946 /*
947  * reparse_kderef
948  *
949  * Accepts the service-specific item from the reparse point and returns
950  * the service-specific data requested.  The caller specifies the size of
951  * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
952  * if the results will not fit in the buffer, in which case, *bufsz will
953  * contain the number of bytes needed to hold the results.
954  *
955  * if ok return 0 and update *bufsize with length of actual result
956  * else return error code.
957  */
958 int
959 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
960     size_t *bufsize)
961 {
962 	int err, retries, need_free, retried_doorhd;
963 	size_t dlen, res_len;
964 	char *darg;
965 	door_arg_t door_args;
966 	reparsed_door_res_t *resp;
967 	door_handle_t rp_door;
968 
969 	if (svc_type == NULL || svc_data == NULL || buf == NULL ||
970 	    bufsize == NULL)
971 		return (EINVAL);
972 
973 	/* get reparsed's door handle */
974 	if ((rp_door = reparse_door_get_handle()) == NULL)
975 		return (EBADF);
976 
977 	/* setup buffer for door_call args and results */
978 	dlen = strlen(svc_type) + strlen(svc_data) + 2;
979 	if (*bufsize < dlen) {
980 		darg = kmem_alloc(dlen, KM_SLEEP);
981 		need_free = 1;
982 	} else {
983 		darg = buf;	/* use same buffer for door's args & results */
984 		need_free = 0;
985 	}
986 
987 	/* build argument string of door call */
988 	(void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
989 
990 	/* setup args for door call */
991 	door_args.data_ptr = darg;
992 	door_args.data_size = dlen;
993 	door_args.desc_ptr = NULL;
994 	door_args.desc_num = 0;
995 	door_args.rbuf = buf;
996 	door_args.rsize = *bufsize;
997 
998 	/* do the door_call */
999 	retried_doorhd = 0;
1000 	retries = 0;
1001 	door_ki_hold(rp_door);
1002 	while ((err = door_ki_upcall_limited(rp_door, &door_args,
1003 	    NULL, SIZE_MAX, 0)) != 0) {
1004 		if (err == EAGAIN || err == EINTR) {
1005 			if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
1006 				delay(SEC_TO_TICK(1));
1007 				continue;
1008 			}
1009 		} else if (err == EBADF) {
1010 			/* door server goes away... */
1011 			reparse_door_reset_handle();
1012 
1013 			if (retried_doorhd == 0) {
1014 				door_ki_rele(rp_door);
1015 				retried_doorhd++;
1016 				rp_door = reparse_door_get_handle();
1017 				if (rp_door != NULL) {
1018 					door_ki_hold(rp_door);
1019 					continue;
1020 				}
1021 			}
1022 		}
1023 		break;
1024 	}
1025 
1026 	if (rp_door)
1027 		door_ki_rele(rp_door);
1028 
1029 	if (need_free)
1030 		kmem_free(darg, dlen);		/* done with args buffer */
1031 
1032 	if (err != 0)
1033 		return (err);
1034 
1035 	resp = (reparsed_door_res_t *)door_args.rbuf;
1036 	if ((err = resp->res_status) == 0) {
1037 		/*
1038 		 * have to save the length of the results before the
1039 		 * bcopy below since it's can be an overlap copy that
1040 		 * overwrites the reparsed_door_res_t structure at
1041 		 * the beginning of the buffer.
1042 		 */
1043 		res_len = (size_t)resp->res_len;
1044 
1045 		/* deref call is ok */
1046 		if (res_len > *bufsize)
1047 			err = EOVERFLOW;
1048 		else
1049 			bcopy(resp->res_data, buf, res_len);
1050 		*bufsize = res_len;
1051 	}
1052 	if (door_args.rbuf != buf)
1053 		kmem_free(door_args.rbuf, door_args.rsize);
1054 
1055 	return (err);
1056 }
1057