xref: /titanic_44/usr/src/uts/common/fs/fs_subr.c (revision d1c5dc47e23888b05d4095e8983ccf62acbc69fa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /*
31  * Generic vnode operations.
32  */
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/fcntl.h>
38 #include <sys/flock.h>
39 #include <sys/statvfs.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/proc.h>
43 #include <sys/user.h>
44 #include <sys/unistd.h>
45 #include <sys/cred.h>
46 #include <sys/poll.h>
47 #include <sys/debug.h>
48 #include <sys/cmn_err.h>
49 #include <sys/stream.h>
50 #include <fs/fs_subr.h>
51 #include <fs/fs_reparse.h>
52 #include <sys/door.h>
53 #include <sys/acl.h>
54 #include <sys/share.h>
55 #include <sys/file.h>
56 #include <sys/kmem.h>
57 #include <sys/file.h>
58 #include <sys/nbmlock.h>
59 #include <acl/acl_common.h>
60 #include <sys/pathname.h>
61 
62 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
63 
64 /*
65  * Tunable to limit the number of retry to recover from STALE error.
66  */
67 int fs_estale_retry = 5;
68 
69 /*
70  * supports for reparse point door upcall
71  */
72 static door_handle_t reparsed_door;
73 static kmutex_t reparsed_door_lock;
74 
75 /*
76  * The associated operation is not supported by the file system.
77  */
78 int
79 fs_nosys()
80 {
81 	return (ENOSYS);
82 }
83 
84 /*
85  * The associated operation is invalid (on this vnode).
86  */
87 int
88 fs_inval()
89 {
90 	return (EINVAL);
91 }
92 
93 /*
94  * The associated operation is valid only for directories.
95  */
96 int
97 fs_notdir()
98 {
99 	return (ENOTDIR);
100 }
101 
102 /*
103  * Free the file system specific resources. For the file systems that
104  * do not support the forced unmount, it will be a nop function.
105  */
106 
107 /*ARGSUSED*/
108 void
109 fs_freevfs(vfs_t *vfsp)
110 {
111 }
112 
113 /* ARGSUSED */
114 int
115 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
116     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
117     caller_context_t *ct)
118 {
119 	return (ENOSYS);
120 }
121 
122 /* ARGSUSED */
123 int
124 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
125     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
126     caller_context_t *ct)
127 {
128 	return (ENOSYS);
129 }
130 
131 /* ARGSUSED */
132 int
133 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
134     struct pollhead **phpp, caller_context_t *ct)
135 {
136 	return (ENOSYS);
137 }
138 
139 
140 /*
141  * The file system has nothing to sync to disk.  However, the
142  * VFS_SYNC operation must not fail.
143  */
144 /* ARGSUSED */
145 int
146 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
147 {
148 	return (0);
149 }
150 
151 /*
152  * Does nothing but VOP_FSYNC must not fail.
153  */
154 /* ARGSUSED */
155 int
156 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
157 {
158 	return (0);
159 }
160 
161 /*
162  * Does nothing but VOP_PUTPAGE must not fail.
163  */
164 /* ARGSUSED */
165 int
166 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
167     caller_context_t *ctp)
168 {
169 	return (0);
170 }
171 
172 /*
173  * Does nothing but VOP_IOCTL must not fail.
174  */
175 /* ARGSUSED */
176 int
177 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
178     int *rvalp)
179 {
180 	return (0);
181 }
182 
183 /*
184  * Read/write lock/unlock.  Does nothing.
185  */
186 /* ARGSUSED */
187 int
188 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
189 {
190 	return (-1);
191 }
192 
193 /* ARGSUSED */
194 void
195 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
196 {
197 }
198 
199 /*
200  * Compare two vnodes.
201  */
202 /*ARGSUSED2*/
203 int
204 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
205 {
206 	return (vp1 == vp2);
207 }
208 
209 /*
210  * No-op seek operation.
211  */
212 /* ARGSUSED */
213 int
214 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
215 {
216 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
217 }
218 
219 /*
220  * File and record locking.
221  */
222 /* ARGSUSED */
223 int
224 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
225     flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
226 {
227 	int frcmd;
228 	int nlmid;
229 	int error = 0;
230 	flk_callback_t serialize_callback;
231 	int serialize = 0;
232 	v_mode_t mode;
233 
234 	switch (cmd) {
235 
236 	case F_GETLK:
237 	case F_O_GETLK:
238 		if (flag & F_REMOTELOCK) {
239 			frcmd = RCMDLCK;
240 		} else if (flag & F_PXFSLOCK) {
241 			frcmd = PCMDLCK;
242 		} else {
243 			frcmd = 0;
244 			bfp->l_pid = ttoproc(curthread)->p_pid;
245 			bfp->l_sysid = 0;
246 		}
247 		break;
248 
249 	case F_SETLK_NBMAND:
250 		/*
251 		 * Are NBMAND locks allowed on this file?
252 		 */
253 		if (!vp->v_vfsp ||
254 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
255 			error = EINVAL;
256 			goto done;
257 		}
258 		if (vp->v_type != VREG) {
259 			error = EINVAL;
260 			goto done;
261 		}
262 		/*FALLTHROUGH*/
263 
264 	case F_SETLK:
265 		if (flag & F_REMOTELOCK) {
266 			frcmd = SETFLCK|RCMDLCK;
267 		} else if (flag & F_PXFSLOCK) {
268 			frcmd = SETFLCK|PCMDLCK;
269 		} else {
270 			frcmd = SETFLCK;
271 			bfp->l_pid = ttoproc(curthread)->p_pid;
272 			bfp->l_sysid = 0;
273 		}
274 		if (cmd == F_SETLK_NBMAND &&
275 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
276 			frcmd |= NBMLCK;
277 		}
278 
279 		if (nbl_need_check(vp)) {
280 			nbl_start_crit(vp, RW_WRITER);
281 			serialize = 1;
282 			if (frcmd & NBMLCK) {
283 				mode = (bfp->l_type == F_RDLCK) ?
284 				    V_READ : V_RDANDWR;
285 				if (vn_is_mapped(vp, mode)) {
286 					error = EAGAIN;
287 					goto done;
288 				}
289 			}
290 		}
291 		break;
292 
293 	case F_SETLKW:
294 		if (flag & F_REMOTELOCK) {
295 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
296 		} else if (flag & F_PXFSLOCK) {
297 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
298 		} else {
299 			frcmd = SETFLCK|SLPFLCK;
300 			bfp->l_pid = ttoproc(curthread)->p_pid;
301 			bfp->l_sysid = 0;
302 		}
303 
304 		if (nbl_need_check(vp)) {
305 			nbl_start_crit(vp, RW_WRITER);
306 			serialize = 1;
307 		}
308 		break;
309 
310 	case F_HASREMOTELOCKS:
311 		nlmid = GETNLMID(bfp->l_sysid);
312 		if (nlmid != 0) {	/* booted as a cluster */
313 			l_has_rmt(bfp) =
314 			    cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
315 		} else {		/* not booted as a cluster */
316 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
317 		}
318 
319 		goto done;
320 
321 	default:
322 		error = EINVAL;
323 		goto done;
324 	}
325 
326 	/*
327 	 * If this is a blocking lock request and we're serializing lock
328 	 * requests, modify the callback list to leave the critical region
329 	 * while we're waiting for the lock.
330 	 */
331 
332 	if (serialize && (frcmd & SLPFLCK) != 0) {
333 		flk_add_callback(&serialize_callback,
334 		    frlock_serialize_blocked, vp, flk_cbp);
335 		flk_cbp = &serialize_callback;
336 	}
337 
338 	error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
339 
340 	if (serialize && (frcmd & SLPFLCK) != 0)
341 		flk_del_callback(&serialize_callback);
342 
343 done:
344 	if (serialize)
345 		nbl_end_crit(vp);
346 
347 	return (error);
348 }
349 
350 /*
351  * Callback when a lock request blocks and we are serializing requests.  If
352  * before sleeping, leave the critical region.  If after wakeup, reenter
353  * the critical region.
354  */
355 
356 static callb_cpr_t *
357 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
358 {
359 	vnode_t *vp = (vnode_t *)infop;
360 
361 	if (when == FLK_BEFORE_SLEEP)
362 		nbl_end_crit(vp);
363 	else {
364 		nbl_start_crit(vp, RW_WRITER);
365 	}
366 
367 	return (NULL);
368 }
369 
370 /*
371  * Allow any flags.
372  */
373 /* ARGSUSED */
374 int
375 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
376 {
377 	return (0);
378 }
379 
380 /*
381  * Return the answer requested to poll() for non-device files.
382  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
383  */
384 struct pollhead fs_pollhd;
385 
386 /* ARGSUSED */
387 int
388 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
389     struct pollhead **phpp, caller_context_t *ct)
390 {
391 	*reventsp = 0;
392 	if (events & POLLIN)
393 		*reventsp |= POLLIN;
394 	if (events & POLLRDNORM)
395 		*reventsp |= POLLRDNORM;
396 	if (events & POLLRDBAND)
397 		*reventsp |= POLLRDBAND;
398 	if (events & POLLOUT)
399 		*reventsp |= POLLOUT;
400 	if (events & POLLWRBAND)
401 		*reventsp |= POLLWRBAND;
402 	*phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
403 	return (0);
404 }
405 
406 /*
407  * POSIX pathconf() support.
408  */
409 /* ARGSUSED */
410 int
411 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
412     caller_context_t *ct)
413 {
414 	ulong_t val;
415 	int error = 0;
416 	struct statvfs64 vfsbuf;
417 
418 	switch (cmd) {
419 
420 	case _PC_LINK_MAX:
421 		val = MAXLINK;
422 		break;
423 
424 	case _PC_MAX_CANON:
425 		val = MAX_CANON;
426 		break;
427 
428 	case _PC_MAX_INPUT:
429 		val = MAX_INPUT;
430 		break;
431 
432 	case _PC_NAME_MAX:
433 		bzero(&vfsbuf, sizeof (vfsbuf));
434 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
435 			break;
436 		val = vfsbuf.f_namemax;
437 		break;
438 
439 	case _PC_PATH_MAX:
440 	case _PC_SYMLINK_MAX:
441 		val = MAXPATHLEN;
442 		break;
443 
444 	case _PC_PIPE_BUF:
445 		val = PIPE_BUF;
446 		break;
447 
448 	case _PC_NO_TRUNC:
449 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
450 			val = 1;	/* NOTRUNC is enabled for vp */
451 		else
452 			val = (ulong_t)-1;
453 		break;
454 
455 	case _PC_VDISABLE:
456 		val = _POSIX_VDISABLE;
457 		break;
458 
459 	case _PC_CHOWN_RESTRICTED:
460 		if (rstchown)
461 			val = rstchown; /* chown restricted enabled */
462 		else
463 			val = (ulong_t)-1;
464 		break;
465 
466 	case _PC_FILESIZEBITS:
467 
468 		/*
469 		 * If ever we come here it means that underlying file system
470 		 * does not recognise the command and therefore this
471 		 * configurable limit cannot be determined. We return -1
472 		 * and don't change errno.
473 		 */
474 
475 		val = (ulong_t)-1;    /* large file support */
476 		break;
477 
478 	case _PC_ACL_ENABLED:
479 		val = 0;
480 		break;
481 
482 	case _PC_CASE_BEHAVIOR:
483 		val = _CASE_SENSITIVE;
484 		if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
485 			val |= _CASE_INSENSITIVE;
486 		if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
487 			val &= ~_CASE_SENSITIVE;
488 		break;
489 
490 	case _PC_SATTR_ENABLED:
491 	case _PC_SATTR_EXISTS:
492 		val = 0;
493 		break;
494 
495 	case _PC_ACCESS_FILTERING:
496 		val = 0;
497 		break;
498 
499 	default:
500 		error = EINVAL;
501 		break;
502 	}
503 
504 	if (error == 0)
505 		*valp = val;
506 	return (error);
507 }
508 
509 /*
510  * Dispose of a page.
511  */
512 /* ARGSUSED */
513 void
514 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
515     caller_context_t *ct)
516 {
517 
518 	ASSERT(fl == B_FREE || fl == B_INVAL);
519 
520 	if (fl == B_FREE)
521 		page_free(pp, dn);
522 	else
523 		page_destroy(pp, dn);
524 }
525 
526 /* ARGSUSED */
527 void
528 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
529     caller_context_t *ct)
530 {
531 	cmn_err(CE_PANIC, "fs_nodispose invoked");
532 }
533 
534 /*
535  * fabricate acls for file systems that do not support acls.
536  */
537 /* ARGSUSED */
538 int
539 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
540     caller_context_t *ct)
541 {
542 	aclent_t	*aclentp;
543 	struct vattr	vattr;
544 	int		error;
545 	size_t		aclsize;
546 
547 	vsecattr->vsa_aclcnt	= 0;
548 	vsecattr->vsa_aclentsz	= 0;
549 	vsecattr->vsa_aclentp	= NULL;
550 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
551 	vsecattr->vsa_dfaclentp	= NULL;
552 
553 	vattr.va_mask = AT_MODE | AT_UID | AT_GID;
554 	if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
555 		return (error);
556 
557 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
558 		aclsize = 4 * sizeof (aclent_t);
559 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
560 		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
561 		aclentp = vsecattr->vsa_aclentp;
562 
563 		aclentp->a_type = USER_OBJ;	/* Owner */
564 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
565 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
566 		aclentp++;
567 
568 		aclentp->a_type = GROUP_OBJ;    /* Group */
569 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
570 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
571 		aclentp++;
572 
573 		aclentp->a_type = OTHER_OBJ;    /* Other */
574 		aclentp->a_perm = vattr.va_mode & 0007;
575 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
576 		aclentp++;
577 
578 		aclentp->a_type = CLASS_OBJ;    /* Class */
579 		aclentp->a_perm = (ushort_t)(0007);
580 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
581 	} else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
582 		VERIFY(0 == acl_trivial_create(vattr.va_mode,
583 		    (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
584 		    &vsecattr->vsa_aclcnt));
585 		vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
586 	}
587 
588 	return (error);
589 }
590 
591 /*
592  * Common code for implementing DOS share reservations
593  */
594 /* ARGSUSED4 */
595 int
596 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
597     caller_context_t *ct)
598 {
599 	int error;
600 
601 	/*
602 	 * Make sure that the file was opened with permissions appropriate
603 	 * for the request, and make sure the caller isn't trying to sneak
604 	 * in an NBMAND request.
605 	 */
606 	if (cmd == F_SHARE) {
607 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
608 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
609 			return (EBADF);
610 		if (shr->s_access & (F_RMACC | F_MDACC))
611 			return (EINVAL);
612 		if (shr->s_deny & (F_MANDDNY | F_RMDNY))
613 			return (EINVAL);
614 	}
615 	if (cmd == F_SHARE_NBMAND) {
616 		/* make sure nbmand is allowed on the file */
617 		if (!vp->v_vfsp ||
618 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
619 			return (EINVAL);
620 		}
621 		if (vp->v_type != VREG) {
622 			return (EINVAL);
623 		}
624 	}
625 
626 	nbl_start_crit(vp, RW_WRITER);
627 
628 	switch (cmd) {
629 
630 	case F_SHARE_NBMAND:
631 		shr->s_deny |= F_MANDDNY;
632 		/*FALLTHROUGH*/
633 	case F_SHARE:
634 		error = add_share(vp, shr);
635 		break;
636 
637 	case F_UNSHARE:
638 		error = del_share(vp, shr);
639 		break;
640 
641 	case F_HASREMOTELOCKS:
642 		/*
643 		 * We are overloading this command to refer to remote
644 		 * shares as well as remote locks, despite its name.
645 		 */
646 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
647 		error = 0;
648 		break;
649 
650 	default:
651 		error = EINVAL;
652 		break;
653 	}
654 
655 	nbl_end_crit(vp);
656 	return (error);
657 }
658 
659 /*ARGSUSED1*/
660 int
661 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
662     caller_context_t *ct)
663 {
664 	ASSERT(vp != NULL);
665 	return (ENOTSUP);
666 }
667 
668 /*ARGSUSED1*/
669 int
670 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
671     caller_context_t *ct)
672 {
673 	ASSERT(vp != NULL);
674 	return (0);
675 }
676 
677 /*
678  * return 1 for non-trivial ACL.
679  *
680  * NB: It is not necessary for the caller to VOP_RWLOCK since
681  *	we only issue VOP_GETSECATTR.
682  *
683  * Returns 0 == trivial
684  *         1 == NOT Trivial
685  *	   <0 could not determine.
686  */
687 int
688 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
689 {
690 	ulong_t		acl_styles;
691 	ulong_t		acl_flavor;
692 	vsecattr_t 	vsecattr;
693 	int 		error;
694 	int		isnontrivial;
695 
696 	/* determine the forms of ACLs maintained */
697 	error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
698 
699 	/* clear bits we don't understand and establish default acl_style */
700 	acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
701 	if (error || (acl_styles == 0))
702 		acl_styles = _ACL_ACLENT_ENABLED;
703 
704 	vsecattr.vsa_aclentp = NULL;
705 	vsecattr.vsa_dfaclentp = NULL;
706 	vsecattr.vsa_aclcnt = 0;
707 	vsecattr.vsa_dfaclcnt = 0;
708 
709 	while (acl_styles) {
710 		/* select one of the styles as current flavor */
711 		acl_flavor = 0;
712 		if (acl_styles & _ACL_ACLENT_ENABLED) {
713 			acl_flavor = _ACL_ACLENT_ENABLED;
714 			vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
715 		} else if (acl_styles & _ACL_ACE_ENABLED) {
716 			acl_flavor = _ACL_ACE_ENABLED;
717 			vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
718 		}
719 
720 		ASSERT(vsecattr.vsa_mask && acl_flavor);
721 		error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
722 		if (error == 0)
723 			break;
724 
725 		/* that flavor failed */
726 		acl_styles &= ~acl_flavor;
727 	}
728 
729 	/* if all styles fail then assume trivial */
730 	if (acl_styles == 0)
731 		return (0);
732 
733 	/* process the flavor that worked */
734 	isnontrivial = 0;
735 	if (acl_flavor & _ACL_ACLENT_ENABLED) {
736 		if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
737 			isnontrivial = 1;
738 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
739 			kmem_free(vsecattr.vsa_aclentp,
740 			    vsecattr.vsa_aclcnt * sizeof (aclent_t));
741 		if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
742 			kmem_free(vsecattr.vsa_dfaclentp,
743 			    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
744 	}
745 	if (acl_flavor & _ACL_ACE_ENABLED) {
746 		isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
747 		    vsecattr.vsa_aclcnt);
748 
749 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
750 			kmem_free(vsecattr.vsa_aclentp,
751 			    vsecattr.vsa_aclcnt * sizeof (ace_t));
752 		/* ACE has no vsecattr.vsa_dfaclcnt */
753 	}
754 	return (isnontrivial);
755 }
756 
757 /*
758  * Check whether we need a retry to recover from STALE error.
759  */
760 int
761 fs_need_estale_retry(int retry_count)
762 {
763 	if (retry_count < fs_estale_retry)
764 		return (1);
765 	else
766 		return (0);
767 }
768 
769 
770 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
771 
772 /*
773  * Routine for anti-virus scanner to call to register its scanning routine.
774  */
775 void
776 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
777 {
778 	fs_av_scan = av_scan;
779 }
780 
781 /*
782  * Routine for file systems to call to initiate anti-virus scanning.
783  * Scanning will only be done on REGular files (currently).
784  */
785 int
786 fs_vscan(vnode_t *vp, cred_t *cr, int async)
787 {
788 	int ret = 0;
789 
790 	if (fs_av_scan && vp->v_type == VREG)
791 		ret = (*fs_av_scan)(vp, cr, async);
792 
793 	return (ret);
794 }
795 
796 /*
797  * support functions for reparse point
798  */
799 /*
800  * reparse_vnode_parse
801  *
802  * Read the symlink data of a reparse point specified by the vnode
803  * and return the reparse data as name-value pair in the nvlist.
804  */
805 int
806 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
807 {
808 	int err;
809 	char *lkdata;
810 	struct uio uio;
811 	struct iovec iov;
812 
813 	if (vp == NULL || nvl == NULL)
814 		return (EINVAL);
815 
816 	lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
817 
818 	/*
819 	 * Set up io vector to read sym link data
820 	 */
821 	iov.iov_base = lkdata;
822 	iov.iov_len = MAXREPARSELEN;
823 	uio.uio_iov = &iov;
824 	uio.uio_iovcnt = 1;
825 	uio.uio_segflg = UIO_SYSSPACE;
826 	uio.uio_extflg = UIO_COPY_CACHED;
827 	uio.uio_loffset = (offset_t)0;
828 	uio.uio_resid = MAXREPARSELEN;
829 
830 	if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
831 		*(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
832 		err = reparse_parse(lkdata, nvl);
833 	}
834 	kmem_free(lkdata, MAXREPARSELEN);	/* done with lkdata */
835 
836 	return (err);
837 }
838 
839 void
840 reparse_point_init()
841 {
842 	mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
843 }
844 
845 static door_handle_t
846 reparse_door_get_handle()
847 {
848 	door_handle_t dh;
849 
850 	mutex_enter(&reparsed_door_lock);
851 	if ((dh = reparsed_door) == NULL) {
852 		if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
853 			reparsed_door = NULL;
854 			dh = NULL;
855 		} else
856 			dh = reparsed_door;
857 	}
858 	mutex_exit(&reparsed_door_lock);
859 	return (dh);
860 }
861 
862 static void
863 reparse_door_reset_handle()
864 {
865 	mutex_enter(&reparsed_door_lock);
866 	reparsed_door = NULL;
867 	mutex_exit(&reparsed_door_lock);
868 }
869 
870 /*
871  * reparse_kderef
872  *
873  * Accepts the service-specific item from the reparse point and returns
874  * the service-specific data requested.  The caller specifies the size of
875  * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
876  * if the results will not fit in the buffer, in which case, *bufsz will
877  * contain the number of bytes needed to hold the results.
878  *
879  * if ok return 0 and update *bufsize with length of actual result
880  * else return error code.
881  */
882 int
883 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
884     size_t *bufsize)
885 {
886 	int err, retries, need_free, retried_doorhd;
887 	size_t dlen, res_len;
888 	char *darg;
889 	door_arg_t door_args;
890 	reparsed_door_res_t *resp;
891 	door_handle_t rp_door;
892 
893 	if (svc_type == NULL || svc_data == NULL || buf == NULL ||
894 	    bufsize == NULL)
895 		return (EINVAL);
896 
897 	/* get reparsed's door handle */
898 	if ((rp_door = reparse_door_get_handle()) == NULL)
899 		return (EBADF);
900 
901 	/* setup buffer for door_call args and results */
902 	dlen = strlen(svc_type) + strlen(svc_data) + 2;
903 	if (*bufsize < dlen) {
904 		darg = kmem_alloc(dlen, KM_SLEEP);
905 		need_free = 1;
906 	} else {
907 		darg = buf;	/* use same buffer for door's args & results */
908 		need_free = 0;
909 	}
910 
911 	/* build argument string of door call */
912 	(void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
913 
914 	/* setup args for door call */
915 	door_args.data_ptr = darg;
916 	door_args.data_size = dlen;
917 	door_args.desc_ptr = NULL;
918 	door_args.desc_num = 0;
919 	door_args.rbuf = buf;
920 	door_args.rsize = *bufsize;
921 
922 	/* do the door_call */
923 	retried_doorhd = 0;
924 	retries = 0;
925 	door_ki_hold(rp_door);
926 	while ((err = door_ki_upcall_limited(rp_door, &door_args,
927 	    NULL, SIZE_MAX, 0)) != 0) {
928 		if (err == EAGAIN || err == EINTR) {
929 			if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
930 				delay(SEC_TO_TICK(1));
931 				continue;
932 			}
933 		} else if (err == EBADF) {
934 			/* door server goes away... */
935 			reparse_door_reset_handle();
936 
937 			if (retried_doorhd == 0) {
938 				door_ki_rele(rp_door);
939 				retried_doorhd++;
940 				rp_door = reparse_door_get_handle();
941 				if (rp_door != NULL) {
942 					door_ki_hold(rp_door);
943 					continue;
944 				}
945 			}
946 		}
947 		break;
948 	}
949 
950 	if (rp_door)
951 		door_ki_rele(rp_door);
952 
953 	if (need_free)
954 		kmem_free(darg, dlen);		/* done with args buffer */
955 
956 	if (err != 0)
957 		return (err);
958 
959 	resp = (reparsed_door_res_t *)door_args.rbuf;
960 	if ((err = resp->res_status) == 0) {
961 		/*
962 		 * have to save the length of the results before the
963 		 * bcopy below since it's can be an overlap copy that
964 		 * overwrites the reparsed_door_res_t structure at
965 		 * the beginning of the buffer.
966 		 */
967 		res_len = (size_t)resp->res_len;
968 
969 		/* deref call is ok */
970 		if (res_len > *bufsize)
971 			err = EOVERFLOW;
972 		else
973 			bcopy(resp->res_data, buf, res_len);
974 		*bufsize = res_len;
975 	}
976 	if (door_args.rbuf != buf)
977 		kmem_free(door_args.rbuf, door_args.rsize);
978 
979 	return (err);
980 }
981