xref: /titanic_52/usr/src/uts/common/fs/fs_subr.c (revision 0a70879558a701a07771af87e7852a12f3bfd438)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /*
31  * Generic vnode operations.
32  */
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/fcntl.h>
38 #include <sys/flock.h>
39 #include <sys/statvfs.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/proc.h>
43 #include <sys/user.h>
44 #include <sys/unistd.h>
45 #include <sys/cred.h>
46 #include <sys/poll.h>
47 #include <sys/debug.h>
48 #include <sys/cmn_err.h>
49 #include <sys/stream.h>
50 #include <fs/fs_subr.h>
51 #include <fs/fs_reparse.h>
52 #include <sys/door.h>
53 #include <sys/acl.h>
54 #include <sys/share.h>
55 #include <sys/file.h>
56 #include <sys/kmem.h>
57 #include <sys/file.h>
58 #include <sys/nbmlock.h>
59 #include <acl/acl_common.h>
60 #include <sys/pathname.h>
61 
62 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
63 
64 /*
65  * Tunable to limit the number of retry to recover from STALE error.
66  */
67 int fs_estale_retry = 5;
68 
69 /*
70  * supports for reparse point door upcall
71  */
72 static door_handle_t reparsed_door;
73 static kmutex_t reparsed_door_lock;
74 
75 /*
76  * The associated operation is not supported by the file system.
77  */
78 int
79 fs_nosys()
80 {
81 	return (ENOSYS);
82 }
83 
84 /*
85  * The associated operation is invalid (on this vnode).
86  */
87 int
88 fs_inval()
89 {
90 	return (EINVAL);
91 }
92 
93 /*
94  * The associated operation is valid only for directories.
95  */
96 int
97 fs_notdir()
98 {
99 	return (ENOTDIR);
100 }
101 
102 /*
103  * Free the file system specific resources. For the file systems that
104  * do not support the forced unmount, it will be a nop function.
105  */
106 
107 /*ARGSUSED*/
108 void
109 fs_freevfs(vfs_t *vfsp)
110 {
111 }
112 
113 /* ARGSUSED */
114 int
115 fs_nosys_map(struct vnode *vp,
116 	offset_t off,
117 	struct as *as,
118 	caddr_t *addrp,
119 	size_t len,
120 	uchar_t prot,
121 	uchar_t maxprot,
122 	uint_t flags,
123 	struct cred *cr,
124 	caller_context_t *ct)
125 {
126 	return (ENOSYS);
127 }
128 
129 /* ARGSUSED */
130 int
131 fs_nosys_addmap(struct vnode *vp,
132 	offset_t off,
133 	struct as *as,
134 	caddr_t addr,
135 	size_t len,
136 	uchar_t prot,
137 	uchar_t maxprot,
138 	uint_t flags,
139 	struct cred *cr,
140 	caller_context_t *ct)
141 {
142 	return (ENOSYS);
143 }
144 
145 /* ARGSUSED */
146 int
147 fs_nosys_poll(vnode_t *vp,
148 	register short events,
149 	int anyyet,
150 	register short *reventsp,
151 	struct pollhead **phpp,
152 	caller_context_t *ct)
153 {
154 	return (ENOSYS);
155 }
156 
157 
158 /*
159  * The file system has nothing to sync to disk.  However, the
160  * VFS_SYNC operation must not fail.
161  */
162 /* ARGSUSED */
163 int
164 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
165 {
166 	return (0);
167 }
168 
169 /*
170  * Does nothing but VOP_FSYNC must not fail.
171  */
172 /* ARGSUSED */
173 int
174 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
175 {
176 	return (0);
177 }
178 
179 /*
180  * Does nothing but VOP_PUTPAGE must not fail.
181  */
182 /* ARGSUSED */
183 int
184 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
185 	caller_context_t *ctp)
186 {
187 	return (0);
188 }
189 
190 /*
191  * Does nothing but VOP_IOCTL must not fail.
192  */
193 /* ARGSUSED */
194 int
195 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
196 	int *rvalp)
197 {
198 	return (0);
199 }
200 
201 /*
202  * Read/write lock/unlock.  Does nothing.
203  */
204 /* ARGSUSED */
205 int
206 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
207 {
208 	return (-1);
209 }
210 
211 /* ARGSUSED */
212 void
213 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
214 {
215 }
216 
217 /*
218  * Compare two vnodes.
219  */
220 /*ARGSUSED2*/
221 int
222 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
223 {
224 	return (vp1 == vp2);
225 }
226 
227 /*
228  * No-op seek operation.
229  */
230 /* ARGSUSED */
231 int
232 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
233 {
234 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
235 }
236 
237 /*
238  * File and record locking.
239  */
240 /* ARGSUSED */
241 int
242 fs_frlock(register vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
243 	offset_t offset, flk_callback_t *flk_cbp, cred_t *cr,
244 	caller_context_t *ct)
245 {
246 	int frcmd;
247 	int nlmid;
248 	int error = 0;
249 	flk_callback_t serialize_callback;
250 	int serialize = 0;
251 	v_mode_t mode;
252 
253 	switch (cmd) {
254 
255 	case F_GETLK:
256 	case F_O_GETLK:
257 		if (flag & F_REMOTELOCK) {
258 			frcmd = RCMDLCK;
259 		} else if (flag & F_PXFSLOCK) {
260 			frcmd = PCMDLCK;
261 		} else {
262 			frcmd = 0;
263 			bfp->l_pid = ttoproc(curthread)->p_pid;
264 			bfp->l_sysid = 0;
265 		}
266 		break;
267 
268 	case F_SETLK_NBMAND:
269 		/*
270 		 * Are NBMAND locks allowed on this file?
271 		 */
272 		if (!vp->v_vfsp ||
273 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
274 			error = EINVAL;
275 			goto done;
276 		}
277 		if (vp->v_type != VREG) {
278 			error = EINVAL;
279 			goto done;
280 		}
281 		/*FALLTHROUGH*/
282 
283 	case F_SETLK:
284 		if (flag & F_REMOTELOCK) {
285 			frcmd = SETFLCK|RCMDLCK;
286 		} else if (flag & F_PXFSLOCK) {
287 			frcmd = SETFLCK|PCMDLCK;
288 		} else {
289 			frcmd = SETFLCK;
290 			bfp->l_pid = ttoproc(curthread)->p_pid;
291 			bfp->l_sysid = 0;
292 		}
293 		if (cmd == F_SETLK_NBMAND &&
294 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
295 			frcmd |= NBMLCK;
296 		}
297 
298 		if (nbl_need_check(vp)) {
299 			nbl_start_crit(vp, RW_WRITER);
300 			serialize = 1;
301 			if (frcmd & NBMLCK) {
302 				mode = (bfp->l_type == F_RDLCK) ?
303 				    V_READ : V_RDANDWR;
304 				if (vn_is_mapped(vp, mode)) {
305 					error = EAGAIN;
306 					goto done;
307 				}
308 			}
309 		}
310 		break;
311 
312 	case F_SETLKW:
313 		if (flag & F_REMOTELOCK) {
314 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
315 		} else if (flag & F_PXFSLOCK) {
316 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
317 		} else {
318 			frcmd = SETFLCK|SLPFLCK;
319 			bfp->l_pid = ttoproc(curthread)->p_pid;
320 			bfp->l_sysid = 0;
321 		}
322 
323 		if (nbl_need_check(vp)) {
324 			nbl_start_crit(vp, RW_WRITER);
325 			serialize = 1;
326 		}
327 		break;
328 
329 	case F_HASREMOTELOCKS:
330 		nlmid = GETNLMID(bfp->l_sysid);
331 		if (nlmid != 0) {	/* booted as a cluster */
332 			l_has_rmt(bfp) =
333 			    cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
334 		} else {		/* not booted as a cluster */
335 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
336 		}
337 
338 		goto done;
339 
340 	default:
341 		error = EINVAL;
342 		goto done;
343 	}
344 
345 	/*
346 	 * If this is a blocking lock request and we're serializing lock
347 	 * requests, modify the callback list to leave the critical region
348 	 * while we're waiting for the lock.
349 	 */
350 
351 	if (serialize && (frcmd & SLPFLCK) != 0) {
352 		flk_add_callback(&serialize_callback,
353 		    frlock_serialize_blocked, vp, flk_cbp);
354 		flk_cbp = &serialize_callback;
355 	}
356 
357 	error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
358 
359 done:
360 	if (serialize)
361 		nbl_end_crit(vp);
362 
363 	return (error);
364 }
365 
366 /*
367  * Callback when a lock request blocks and we are serializing requests.  If
368  * before sleeping, leave the critical region.  If after wakeup, reenter
369  * the critical region.
370  */
371 
372 static callb_cpr_t *
373 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
374 {
375 	vnode_t *vp = (vnode_t *)infop;
376 
377 	if (when == FLK_BEFORE_SLEEP)
378 		nbl_end_crit(vp);
379 	else {
380 		nbl_start_crit(vp, RW_WRITER);
381 	}
382 
383 	return (NULL);
384 }
385 
386 /*
387  * Allow any flags.
388  */
389 /* ARGSUSED */
390 int
391 fs_setfl(
392 	vnode_t *vp,
393 	int oflags,
394 	int nflags,
395 	cred_t *cr,
396 	caller_context_t *ct)
397 {
398 	return (0);
399 }
400 
401 /*
402  * Return the answer requested to poll() for non-device files.
403  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
404  */
405 struct pollhead fs_pollhd;
406 
407 /* ARGSUSED */
408 int
409 fs_poll(vnode_t *vp,
410 	register short events,
411 	int anyyet,
412 	register short *reventsp,
413 	struct pollhead **phpp,
414 	caller_context_t *ct)
415 {
416 	*reventsp = 0;
417 	if (events & POLLIN)
418 		*reventsp |= POLLIN;
419 	if (events & POLLRDNORM)
420 		*reventsp |= POLLRDNORM;
421 	if (events & POLLRDBAND)
422 		*reventsp |= POLLRDBAND;
423 	if (events & POLLOUT)
424 		*reventsp |= POLLOUT;
425 	if (events & POLLWRBAND)
426 		*reventsp |= POLLWRBAND;
427 	*phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
428 	return (0);
429 }
430 
431 /*
432  * POSIX pathconf() support.
433  */
434 /* ARGSUSED */
435 int
436 fs_pathconf(
437 	vnode_t *vp,
438 	int cmd,
439 	ulong_t *valp,
440 	cred_t *cr,
441 	caller_context_t *ct)
442 {
443 	register ulong_t val;
444 	register int error = 0;
445 	struct statvfs64 vfsbuf;
446 
447 	switch (cmd) {
448 
449 	case _PC_LINK_MAX:
450 		val = MAXLINK;
451 		break;
452 
453 	case _PC_MAX_CANON:
454 		val = MAX_CANON;
455 		break;
456 
457 	case _PC_MAX_INPUT:
458 		val = MAX_INPUT;
459 		break;
460 
461 	case _PC_NAME_MAX:
462 		bzero(&vfsbuf, sizeof (vfsbuf));
463 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
464 			break;
465 		val = vfsbuf.f_namemax;
466 		break;
467 
468 	case _PC_PATH_MAX:
469 	case _PC_SYMLINK_MAX:
470 		val = MAXPATHLEN;
471 		break;
472 
473 	case _PC_PIPE_BUF:
474 		val = PIPE_BUF;
475 		break;
476 
477 	case _PC_NO_TRUNC:
478 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
479 			val = 1;	/* NOTRUNC is enabled for vp */
480 		else
481 			val = (ulong_t)-1;
482 		break;
483 
484 	case _PC_VDISABLE:
485 		val = _POSIX_VDISABLE;
486 		break;
487 
488 	case _PC_CHOWN_RESTRICTED:
489 		if (rstchown)
490 			val = rstchown; /* chown restricted enabled */
491 		else
492 			val = (ulong_t)-1;
493 		break;
494 
495 	case _PC_FILESIZEBITS:
496 
497 		/*
498 		 * If ever we come here it means that underlying file system
499 		 * does not recognise the command and therefore this
500 		 * configurable limit cannot be determined. We return -1
501 		 * and don't change errno.
502 		 */
503 
504 		val = (ulong_t)-1;    /* large file support */
505 		break;
506 
507 	case _PC_ACL_ENABLED:
508 		val = 0;
509 		break;
510 
511 	case _PC_CASE_BEHAVIOR:
512 		val = _CASE_SENSITIVE;
513 		if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
514 			val |= _CASE_INSENSITIVE;
515 		if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
516 			val &= ~_CASE_SENSITIVE;
517 		break;
518 
519 	case _PC_SATTR_ENABLED:
520 	case _PC_SATTR_EXISTS:
521 		val = 0;
522 		break;
523 
524 	case _PC_ACCESS_FILTERING:
525 		val = 0;
526 		break;
527 
528 	default:
529 		error = EINVAL;
530 		break;
531 	}
532 
533 	if (error == 0)
534 		*valp = val;
535 	return (error);
536 }
537 
538 /*
539  * Dispose of a page.
540  */
541 /* ARGSUSED */
542 void
543 fs_dispose(
544 	struct vnode *vp,
545 	page_t *pp,
546 	int fl,
547 	int dn,
548 	struct cred *cr,
549 	caller_context_t *ct)
550 {
551 
552 	ASSERT(fl == B_FREE || fl == B_INVAL);
553 
554 	if (fl == B_FREE)
555 		page_free(pp, dn);
556 	else
557 		page_destroy(pp, dn);
558 }
559 
560 /* ARGSUSED */
561 void
562 fs_nodispose(
563 	struct vnode *vp,
564 	page_t *pp,
565 	int fl,
566 	int dn,
567 	struct cred *cr,
568 	caller_context_t *ct)
569 {
570 	cmn_err(CE_PANIC, "fs_nodispose invoked");
571 }
572 
573 /*
574  * fabricate acls for file systems that do not support acls.
575  */
576 /* ARGSUSED */
577 int
578 fs_fab_acl(
579 	vnode_t *vp,
580 	vsecattr_t *vsecattr,
581 	int flag,
582 	cred_t *cr,
583 	caller_context_t *ct)
584 {
585 	aclent_t	*aclentp;
586 	struct vattr	vattr;
587 	int		error;
588 	size_t		aclsize;
589 
590 	vsecattr->vsa_aclcnt	= 0;
591 	vsecattr->vsa_aclentsz	= 0;
592 	vsecattr->vsa_aclentp	= NULL;
593 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
594 	vsecattr->vsa_dfaclentp	= NULL;
595 
596 	vattr.va_mask = AT_MODE | AT_UID | AT_GID;
597 	if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
598 		return (error);
599 
600 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
601 		aclsize = 4 * sizeof (aclent_t);
602 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
603 		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
604 		aclentp = vsecattr->vsa_aclentp;
605 
606 		aclentp->a_type = USER_OBJ;	/* Owner */
607 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
608 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
609 		aclentp++;
610 
611 		aclentp->a_type = GROUP_OBJ;    /* Group */
612 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
613 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
614 		aclentp++;
615 
616 		aclentp->a_type = OTHER_OBJ;    /* Other */
617 		aclentp->a_perm = vattr.va_mode & 0007;
618 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
619 		aclentp++;
620 
621 		aclentp->a_type = CLASS_OBJ;    /* Class */
622 		aclentp->a_perm = (ushort_t)(0007);
623 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
624 	} else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
625 		VERIFY(0 == acl_trivial_create(vattr.va_mode,
626 		    (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
627 		    &vsecattr->vsa_aclcnt));
628 		vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
629 	}
630 
631 	return (error);
632 }
633 
634 /*
635  * Common code for implementing DOS share reservations
636  */
637 /* ARGSUSED4 */
638 int
639 fs_shrlock(
640 	struct vnode *vp,
641 	int cmd,
642 	struct shrlock *shr,
643 	int flag,
644 	cred_t *cr,
645 	caller_context_t *ct)
646 {
647 	int error;
648 
649 	/*
650 	 * Make sure that the file was opened with permissions appropriate
651 	 * for the request, and make sure the caller isn't trying to sneak
652 	 * in an NBMAND request.
653 	 */
654 	if (cmd == F_SHARE) {
655 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
656 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
657 			return (EBADF);
658 		if (shr->s_access & (F_RMACC | F_MDACC))
659 			return (EINVAL);
660 		if (shr->s_deny & (F_MANDDNY | F_RMDNY))
661 			return (EINVAL);
662 	}
663 	if (cmd == F_SHARE_NBMAND) {
664 		/* make sure nbmand is allowed on the file */
665 		if (!vp->v_vfsp ||
666 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
667 			return (EINVAL);
668 		}
669 		if (vp->v_type != VREG) {
670 			return (EINVAL);
671 		}
672 	}
673 
674 	nbl_start_crit(vp, RW_WRITER);
675 
676 	switch (cmd) {
677 
678 	case F_SHARE_NBMAND:
679 		shr->s_deny |= F_MANDDNY;
680 		/*FALLTHROUGH*/
681 	case F_SHARE:
682 		error = add_share(vp, shr);
683 		break;
684 
685 	case F_UNSHARE:
686 		error = del_share(vp, shr);
687 		break;
688 
689 	case F_HASREMOTELOCKS:
690 		/*
691 		 * We are overloading this command to refer to remote
692 		 * shares as well as remote locks, despite its name.
693 		 */
694 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
695 		error = 0;
696 		break;
697 
698 	default:
699 		error = EINVAL;
700 		break;
701 	}
702 
703 	nbl_end_crit(vp);
704 	return (error);
705 }
706 
707 /*ARGSUSED1*/
708 int
709 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
710     caller_context_t *ct)
711 {
712 	ASSERT(vp != NULL);
713 	return (ENOTSUP);
714 }
715 
716 /*ARGSUSED1*/
717 int
718 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
719     caller_context_t *ct)
720 {
721 	ASSERT(vp != NULL);
722 	return (0);
723 }
724 
725 /*
726  * return 1 for non-trivial ACL.
727  *
728  * NB: It is not necessary for the caller to VOP_RWLOCK since
729  *	we only issue VOP_GETSECATTR.
730  *
731  * Returns 0 == trivial
732  *         1 == NOT Trivial
733  *	   <0 could not determine.
734  */
735 int
736 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
737 {
738 	ulong_t		acl_styles;
739 	ulong_t		acl_flavor;
740 	vsecattr_t 	vsecattr;
741 	int 		error;
742 	int		isnontrivial;
743 
744 	/* determine the forms of ACLs maintained */
745 	error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
746 
747 	/* clear bits we don't understand and establish default acl_style */
748 	acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
749 	if (error || (acl_styles == 0))
750 		acl_styles = _ACL_ACLENT_ENABLED;
751 
752 	vsecattr.vsa_aclentp = NULL;
753 	vsecattr.vsa_dfaclentp = NULL;
754 	vsecattr.vsa_aclcnt = 0;
755 	vsecattr.vsa_dfaclcnt = 0;
756 
757 	while (acl_styles) {
758 		/* select one of the styles as current flavor */
759 		acl_flavor = 0;
760 		if (acl_styles & _ACL_ACLENT_ENABLED) {
761 			acl_flavor = _ACL_ACLENT_ENABLED;
762 			vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
763 		} else if (acl_styles & _ACL_ACE_ENABLED) {
764 			acl_flavor = _ACL_ACE_ENABLED;
765 			vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
766 		}
767 
768 		ASSERT(vsecattr.vsa_mask && acl_flavor);
769 		error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
770 		if (error == 0)
771 			break;
772 
773 		/* that flavor failed */
774 		acl_styles &= ~acl_flavor;
775 	}
776 
777 	/* if all styles fail then assume trivial */
778 	if (acl_styles == 0)
779 		return (0);
780 
781 	/* process the flavor that worked */
782 	isnontrivial = 0;
783 	if (acl_flavor & _ACL_ACLENT_ENABLED) {
784 		if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
785 			isnontrivial = 1;
786 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
787 			kmem_free(vsecattr.vsa_aclentp,
788 			    vsecattr.vsa_aclcnt * sizeof (aclent_t));
789 		if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
790 			kmem_free(vsecattr.vsa_dfaclentp,
791 			    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
792 	}
793 	if (acl_flavor & _ACL_ACE_ENABLED) {
794 		isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
795 		    vsecattr.vsa_aclcnt);
796 
797 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
798 			kmem_free(vsecattr.vsa_aclentp,
799 			    vsecattr.vsa_aclcnt * sizeof (ace_t));
800 		/* ACE has no vsecattr.vsa_dfaclcnt */
801 	}
802 	return (isnontrivial);
803 }
804 
805 /*
806  * Check whether we need a retry to recover from STALE error.
807  */
808 int
809 fs_need_estale_retry(int retry_count)
810 {
811 	if (retry_count < fs_estale_retry)
812 		return (1);
813 	else
814 		return (0);
815 }
816 
817 
818 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
819 
820 /*
821  * Routine for anti-virus scanner to call to register its scanning routine.
822  */
823 void
824 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
825 {
826 	fs_av_scan = av_scan;
827 }
828 
829 /*
830  * Routine for file systems to call to initiate anti-virus scanning.
831  * Scanning will only be done on REGular files (currently).
832  */
833 int
834 fs_vscan(vnode_t *vp, cred_t *cr, int async)
835 {
836 	int ret = 0;
837 
838 	if (fs_av_scan && vp->v_type == VREG)
839 		ret = (*fs_av_scan)(vp, cr, async);
840 
841 	return (ret);
842 }
843 
844 /*
845  * support functions for reparse point
846  */
847 /*
848  * reparse_vnode_parse
849  *
850  * Read the symlink data of a reparse point specified by the vnode
851  * and return the reparse data as name-value pair in the nvlist.
852  */
853 int
854 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
855 {
856 	int err;
857 	char *lkdata;
858 	struct uio uio;
859 	struct iovec iov;
860 
861 	if (vp == NULL || nvl == NULL)
862 		return (EINVAL);
863 
864 	lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
865 
866 	/*
867 	 * Set up io vector to read sym link data
868 	 */
869 	iov.iov_base = lkdata;
870 	iov.iov_len = MAXREPARSELEN;
871 	uio.uio_iov = &iov;
872 	uio.uio_iovcnt = 1;
873 	uio.uio_segflg = UIO_SYSSPACE;
874 	uio.uio_extflg = UIO_COPY_CACHED;
875 	uio.uio_loffset = (offset_t)0;
876 	uio.uio_resid = MAXREPARSELEN;
877 
878 	if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
879 		*(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
880 		err = reparse_parse(lkdata, nvl);
881 	}
882 	kmem_free(lkdata, MAXREPARSELEN);	/* done with lkdata */
883 
884 	return (err);
885 }
886 
887 void
888 reparse_point_init()
889 {
890 	mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
891 }
892 
893 static door_handle_t
894 reparse_door_get_handle()
895 {
896 	door_handle_t dh;
897 
898 	mutex_enter(&reparsed_door_lock);
899 	if ((dh = reparsed_door) == NULL) {
900 		if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
901 			reparsed_door = NULL;
902 			dh = NULL;
903 		} else
904 			dh = reparsed_door;
905 	}
906 	mutex_exit(&reparsed_door_lock);
907 	return (dh);
908 }
909 
910 static void
911 reparse_door_reset_handle()
912 {
913 	mutex_enter(&reparsed_door_lock);
914 	reparsed_door = NULL;
915 	mutex_exit(&reparsed_door_lock);
916 }
917 
918 /*
919  * reparse_kderef
920  *
921  * Accepts the service-specific item from the reparse point and returns
922  * the service-specific data requested.  The caller specifies the size of
923  * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
924  * if the results will not fit in the buffer, in which case, *bufsz will
925  * contain the number of bytes needed to hold the results.
926  *
927  * if ok return 0 and update *bufsize with length of actual result
928  * else return error code.
929  */
930 int
931 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
932     size_t *bufsize)
933 {
934 	int err, retries, need_free, retried_doorhd;
935 	size_t dlen, res_len;
936 	char *darg;
937 	door_arg_t door_args;
938 	reparsed_door_res_t *resp;
939 	door_handle_t rp_door;
940 
941 	if (svc_type == NULL || svc_data == NULL || buf == NULL ||
942 	    bufsize == NULL)
943 		return (EINVAL);
944 
945 	/* get reparsed's door handle */
946 	if ((rp_door = reparse_door_get_handle()) == NULL)
947 		return (EBADF);
948 
949 	/* setup buffer for door_call args and results */
950 	dlen = strlen(svc_type) + strlen(svc_data) + 2;
951 	if (*bufsize < dlen) {
952 		darg = kmem_alloc(dlen, KM_SLEEP);
953 		need_free = 1;
954 	} else {
955 		darg = buf;	/* use same buffer for door's args & results */
956 		need_free = 0;
957 	}
958 
959 	/* build argument string of door call */
960 	(void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
961 
962 	/* setup args for door call */
963 	door_args.data_ptr = darg;
964 	door_args.data_size = dlen;
965 	door_args.desc_ptr = NULL;
966 	door_args.desc_num = 0;
967 	door_args.rbuf = buf;
968 	door_args.rsize = *bufsize;
969 
970 	/* do the door_call */
971 	retried_doorhd = 0;
972 	retries = 0;
973 	door_ki_hold(rp_door);
974 	while ((err = door_ki_upcall_limited(rp_door, &door_args,
975 	    NULL, SIZE_MAX, 0)) != 0) {
976 		if (err == EAGAIN || err == EINTR) {
977 			if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
978 				delay(SEC_TO_TICK(1));
979 				continue;
980 			}
981 		} else if (err == EBADF) {
982 			/* door server goes away... */
983 			reparse_door_reset_handle();
984 
985 			if (retried_doorhd == 0) {
986 				door_ki_rele(rp_door);
987 				retried_doorhd++;
988 				rp_door = reparse_door_get_handle();
989 				if (rp_door != NULL) {
990 					door_ki_hold(rp_door);
991 					continue;
992 				}
993 			}
994 		}
995 		break;
996 	}
997 
998 	if (rp_door)
999 		door_ki_rele(rp_door);
1000 
1001 	if (need_free)
1002 		kmem_free(darg, dlen);		/* done with args buffer */
1003 
1004 	if (err != 0)
1005 		return (err);
1006 
1007 	resp = (reparsed_door_res_t *)door_args.rbuf;
1008 	if ((err = resp->res_status) == 0) {
1009 		/*
1010 		 * have to save the length of the results before the
1011 		 * bcopy below since it's can be an overlap copy that
1012 		 * overwrites the reparsed_door_res_t structure at
1013 		 * the beginning of the buffer.
1014 		 */
1015 		res_len = (size_t)resp->res_len;
1016 
1017 		/* deref call is ok */
1018 		if (res_len > *bufsize)
1019 			err = EOVERFLOW;
1020 		else
1021 			bcopy(resp->res_data, buf, res_len);
1022 		*bufsize = res_len;
1023 	}
1024 	if (door_args.rbuf != buf)
1025 		kmem_free(door_args.rbuf, door_args.rsize);
1026 
1027 	return (err);
1028 }
1029