xref: /titanic_52/usr/src/uts/common/os/share.c (revision bbaa8b60dd95d714741fc474adad3cf710ef4efd)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5da6c28aaSamw  * Common Development and Distribution License (the "License").
6da6c28aaSamw  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22b89a8333Snatalie li - Sun Microsystems - Irvine United States  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
26*bbaa8b60SDan Kruchinin /*
27*bbaa8b60SDan Kruchinin  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
28*bbaa8b60SDan Kruchinin  */
29*bbaa8b60SDan Kruchinin 
307c478bd9Sstevel@tonic-gate #include <sys/types.h>
317c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
327c478bd9Sstevel@tonic-gate #include <sys/param.h>
337c478bd9Sstevel@tonic-gate #include <sys/systm.h>
347c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>
357c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
367c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
377c478bd9Sstevel@tonic-gate #include <sys/share.h>
387c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
397c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
407c478bd9Sstevel@tonic-gate #include <sys/debug.h>
417c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
427c478bd9Sstevel@tonic-gate #include <sys/errno.h>
437c478bd9Sstevel@tonic-gate #include <sys/nbmlock.h>
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate int share_debug = 0;
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate #ifdef DEBUG
487c478bd9Sstevel@tonic-gate static void print_shares(struct vnode *);
497c478bd9Sstevel@tonic-gate static void print_share(struct shrlock *);
507c478bd9Sstevel@tonic-gate #endif
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate static int isreadonly(struct vnode *);
53*bbaa8b60SDan Kruchinin static void do_cleanshares(struct vnode *, pid_t, int32_t);
54*bbaa8b60SDan Kruchinin 
557c478bd9Sstevel@tonic-gate 
567c478bd9Sstevel@tonic-gate /*
577c478bd9Sstevel@tonic-gate  * Add the share reservation shr to vp.
587c478bd9Sstevel@tonic-gate  */
597c478bd9Sstevel@tonic-gate int
607c478bd9Sstevel@tonic-gate add_share(struct vnode *vp, struct shrlock *shr)
617c478bd9Sstevel@tonic-gate {
627c478bd9Sstevel@tonic-gate 	struct shrlocklist *shrl;
637c478bd9Sstevel@tonic-gate 
647c478bd9Sstevel@tonic-gate 	/*
657c478bd9Sstevel@tonic-gate 	 * An access of zero is not legal, however some older clients
667c478bd9Sstevel@tonic-gate 	 * generate it anyways.  Allow the request only if it is
677c478bd9Sstevel@tonic-gate 	 * coming from a remote system.  Be generous in what you
687c478bd9Sstevel@tonic-gate 	 * accept and strict in what you send.
697c478bd9Sstevel@tonic-gate 	 */
707c478bd9Sstevel@tonic-gate 	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
717c478bd9Sstevel@tonic-gate 		return (EINVAL);
727c478bd9Sstevel@tonic-gate 	}
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate 	/*
757c478bd9Sstevel@tonic-gate 	 * Sanity check to make sure we have valid options.
767c478bd9Sstevel@tonic-gate 	 * There is known overlap but it doesn't hurt to be careful.
777c478bd9Sstevel@tonic-gate 	 */
78da6c28aaSamw 	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
797c478bd9Sstevel@tonic-gate 		return (EINVAL);
807c478bd9Sstevel@tonic-gate 	}
817c478bd9Sstevel@tonic-gate 	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
82da6c28aaSamw 	    F_MANDDNY|F_RMDNY)) {
837c478bd9Sstevel@tonic-gate 		return (EINVAL);
847c478bd9Sstevel@tonic-gate 	}
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
877c478bd9Sstevel@tonic-gate 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
887c478bd9Sstevel@tonic-gate 		/*
897c478bd9Sstevel@tonic-gate 		 * If the share owner matches previous request
907c478bd9Sstevel@tonic-gate 		 * do special handling.
917c478bd9Sstevel@tonic-gate 		 */
927c478bd9Sstevel@tonic-gate 		if ((shrl->shr->s_sysid == shr->s_sysid) &&
937c478bd9Sstevel@tonic-gate 		    (shrl->shr->s_pid == shr->s_pid) &&
947c478bd9Sstevel@tonic-gate 		    (shrl->shr->s_own_len == shr->s_own_len) &&
957c478bd9Sstevel@tonic-gate 		    bcmp(shrl->shr->s_owner, shr->s_owner,
967c478bd9Sstevel@tonic-gate 		    shr->s_own_len) == 0) {
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate 			/*
997c478bd9Sstevel@tonic-gate 			 * If the existing request is F_COMPAT and
1007c478bd9Sstevel@tonic-gate 			 * is the first share then allow any F_COMPAT
1017c478bd9Sstevel@tonic-gate 			 * from the same process.  Trick:  If the existing
1027c478bd9Sstevel@tonic-gate 			 * F_COMPAT is write access then it must have
1037c478bd9Sstevel@tonic-gate 			 * the same owner as the first.
1047c478bd9Sstevel@tonic-gate 			 */
1057c478bd9Sstevel@tonic-gate 			if ((shrl->shr->s_deny & F_COMPAT) &&
1067c478bd9Sstevel@tonic-gate 			    (shr->s_deny & F_COMPAT) &&
1077c478bd9Sstevel@tonic-gate 			    ((shrl->next == NULL) ||
1087c478bd9Sstevel@tonic-gate 			    (shrl->shr->s_access & F_WRACC)))
1097c478bd9Sstevel@tonic-gate 				break;
1107c478bd9Sstevel@tonic-gate 		}
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate 		/*
1137c478bd9Sstevel@tonic-gate 		 * If a first share has been done in compatibility mode
1147c478bd9Sstevel@tonic-gate 		 * handle the special cases.
1157c478bd9Sstevel@tonic-gate 		 */
1167c478bd9Sstevel@tonic-gate 		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate 			if (!(shr->s_deny & F_COMPAT)) {
1197c478bd9Sstevel@tonic-gate 				/*
1207c478bd9Sstevel@tonic-gate 				 * If not compat and want write access or
1217c478bd9Sstevel@tonic-gate 				 * want to deny read or
1227c478bd9Sstevel@tonic-gate 				 * write exists, fails
1237c478bd9Sstevel@tonic-gate 				 */
1247c478bd9Sstevel@tonic-gate 				if ((shr->s_access & F_WRACC) ||
1257c478bd9Sstevel@tonic-gate 				    (shr->s_deny & F_RDDNY) ||
1267c478bd9Sstevel@tonic-gate 				    (shrl->shr->s_access & F_WRACC)) {
1277c478bd9Sstevel@tonic-gate 					mutex_exit(&vp->v_lock);
1287c478bd9Sstevel@tonic-gate 					return (EAGAIN);
1297c478bd9Sstevel@tonic-gate 				}
1307c478bd9Sstevel@tonic-gate 				/*
1317c478bd9Sstevel@tonic-gate 				 * If read only file allow, this may allow
1327c478bd9Sstevel@tonic-gate 				 * a deny write but that is meaningless on
1337c478bd9Sstevel@tonic-gate 				 * a read only file.
1347c478bd9Sstevel@tonic-gate 				 */
1357c478bd9Sstevel@tonic-gate 				if (isreadonly(vp))
1367c478bd9Sstevel@tonic-gate 					break;
1377c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
1387c478bd9Sstevel@tonic-gate 				return (EAGAIN);
1397c478bd9Sstevel@tonic-gate 			}
1407c478bd9Sstevel@tonic-gate 			/*
1417c478bd9Sstevel@tonic-gate 			 * This is a compat request and read access
1427c478bd9Sstevel@tonic-gate 			 * and the first was also read access
1437c478bd9Sstevel@tonic-gate 			 * we always allow it, otherwise we reject because
1447c478bd9Sstevel@tonic-gate 			 * we have handled the only valid write case above.
1457c478bd9Sstevel@tonic-gate 			 */
1467c478bd9Sstevel@tonic-gate 			if ((shr->s_access == F_RDACC) &&
1477c478bd9Sstevel@tonic-gate 			    (shrl->shr->s_access == F_RDACC))
1487c478bd9Sstevel@tonic-gate 				break;
1497c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
1507c478bd9Sstevel@tonic-gate 			return (EAGAIN);
1517c478bd9Sstevel@tonic-gate 		}
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 		/*
1547c478bd9Sstevel@tonic-gate 		 * If we are trying to share in compatibility mode
1557c478bd9Sstevel@tonic-gate 		 * and the current share is compat (and not the first)
1567c478bd9Sstevel@tonic-gate 		 * we don't know enough.
1577c478bd9Sstevel@tonic-gate 		 */
1587c478bd9Sstevel@tonic-gate 		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
1597c478bd9Sstevel@tonic-gate 			continue;
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate 		/*
1627c478bd9Sstevel@tonic-gate 		 * If this is a compat we check for what can't succeed.
1637c478bd9Sstevel@tonic-gate 		 */
1647c478bd9Sstevel@tonic-gate 		if (shr->s_deny & F_COMPAT) {
1657c478bd9Sstevel@tonic-gate 			/*
1667c478bd9Sstevel@tonic-gate 			 * If we want write access or
1677c478bd9Sstevel@tonic-gate 			 * if anyone is denying read or
1687c478bd9Sstevel@tonic-gate 			 * if anyone has write access we fail
1697c478bd9Sstevel@tonic-gate 			 */
1707c478bd9Sstevel@tonic-gate 			if ((shr->s_access & F_WRACC) ||
1717c478bd9Sstevel@tonic-gate 			    (shrl->shr->s_deny & F_RDDNY) ||
1727c478bd9Sstevel@tonic-gate 			    (shrl->shr->s_access & F_WRACC)) {
1737c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
1747c478bd9Sstevel@tonic-gate 				return (EAGAIN);
1757c478bd9Sstevel@tonic-gate 			}
1767c478bd9Sstevel@tonic-gate 			/*
1777c478bd9Sstevel@tonic-gate 			 * If the first was opened with only read access
1787c478bd9Sstevel@tonic-gate 			 * and is a read only file we allow.
1797c478bd9Sstevel@tonic-gate 			 */
1807c478bd9Sstevel@tonic-gate 			if (shrl->next == NULL) {
1817c478bd9Sstevel@tonic-gate 				if ((shrl->shr->s_access == F_RDACC) &&
1827c478bd9Sstevel@tonic-gate 				    isreadonly(vp)) {
1837c478bd9Sstevel@tonic-gate 					break;
1847c478bd9Sstevel@tonic-gate 				}
1857c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
1867c478bd9Sstevel@tonic-gate 				return (EAGAIN);
1877c478bd9Sstevel@tonic-gate 			}
1887c478bd9Sstevel@tonic-gate 			/*
1897c478bd9Sstevel@tonic-gate 			 * We still can't determine our fate so continue
1907c478bd9Sstevel@tonic-gate 			 */
1917c478bd9Sstevel@tonic-gate 			continue;
1927c478bd9Sstevel@tonic-gate 		}
1937c478bd9Sstevel@tonic-gate 
1947c478bd9Sstevel@tonic-gate 		/*
1957c478bd9Sstevel@tonic-gate 		 * Simple bitwise test, if we are trying to access what
1967c478bd9Sstevel@tonic-gate 		 * someone else is denying or we are trying to deny
1977c478bd9Sstevel@tonic-gate 		 * what someone else is accessing we fail.
1987c478bd9Sstevel@tonic-gate 		 */
1997c478bd9Sstevel@tonic-gate 		if ((shr->s_access & shrl->shr->s_deny) ||
2007c478bd9Sstevel@tonic-gate 		    (shr->s_deny & shrl->shr->s_access)) {
2017c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2027c478bd9Sstevel@tonic-gate 			return (EAGAIN);
2037c478bd9Sstevel@tonic-gate 		}
2047c478bd9Sstevel@tonic-gate 	}
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate 	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
2077c478bd9Sstevel@tonic-gate 	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
2087c478bd9Sstevel@tonic-gate 	shrl->shr->s_access = shr->s_access;
2097c478bd9Sstevel@tonic-gate 	shrl->shr->s_deny = shr->s_deny;
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 	/*
2127c478bd9Sstevel@tonic-gate 	 * Make sure no other deny modes are also set with F_COMPAT
2137c478bd9Sstevel@tonic-gate 	 */
2147c478bd9Sstevel@tonic-gate 	if (shrl->shr->s_deny & F_COMPAT)
2157c478bd9Sstevel@tonic-gate 		shrl->shr->s_deny = F_COMPAT;
2167c478bd9Sstevel@tonic-gate 	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
2177c478bd9Sstevel@tonic-gate 	shrl->shr->s_pid = shr->s_pid;
2187c478bd9Sstevel@tonic-gate 	shrl->shr->s_own_len = shr->s_own_len;
2197c478bd9Sstevel@tonic-gate 	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
2207c478bd9Sstevel@tonic-gate 	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
2217c478bd9Sstevel@tonic-gate 	shrl->next = vp->v_shrlocks;
2227c478bd9Sstevel@tonic-gate 	vp->v_shrlocks = shrl;
2237c478bd9Sstevel@tonic-gate #ifdef DEBUG
2247c478bd9Sstevel@tonic-gate 	if (share_debug)
2257c478bd9Sstevel@tonic-gate 		print_shares(vp);
2267c478bd9Sstevel@tonic-gate #endif
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 	return (0);
2317c478bd9Sstevel@tonic-gate }
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate /*
2347c478bd9Sstevel@tonic-gate  *	nlmid	sysid	pid
2357c478bd9Sstevel@tonic-gate  *	=====	=====	===
2367c478bd9Sstevel@tonic-gate  *	!=0	!=0	=0	in cluster; NLM lock
2377c478bd9Sstevel@tonic-gate  *	!=0	=0	=0	in cluster; special case for NLM lock
2387c478bd9Sstevel@tonic-gate  *	!=0	=0	!=0	in cluster; PXFS local lock
2397c478bd9Sstevel@tonic-gate  *	!=0	!=0	!=0	cannot happen
2407c478bd9Sstevel@tonic-gate  *	=0	!=0	=0	not in cluster; NLM lock
2417c478bd9Sstevel@tonic-gate  *	=0	=0	!=0	not in cluster; local lock
2427c478bd9Sstevel@tonic-gate  *	=0	=0	=0	cannot happen
2437c478bd9Sstevel@tonic-gate  *	=0	!=0	!=0	cannot happen
2447c478bd9Sstevel@tonic-gate  */
2457c478bd9Sstevel@tonic-gate static int
2467c478bd9Sstevel@tonic-gate is_match_for_del(struct shrlock *shr, struct shrlock *element)
2477c478bd9Sstevel@tonic-gate {
2487c478bd9Sstevel@tonic-gate 	int nlmid1, nlmid2;
2497c478bd9Sstevel@tonic-gate 	int result = 0;
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate 	nlmid1 = GETNLMID(shr->s_sysid);
2527c478bd9Sstevel@tonic-gate 	nlmid2 = GETNLMID(element->s_sysid);
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate 	if (nlmid1 != 0) {		/* in a cluster */
2557c478bd9Sstevel@tonic-gate 		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
2567c478bd9Sstevel@tonic-gate 			/*
2577c478bd9Sstevel@tonic-gate 			 * Lock obtained through nlm server.  Just need to
2587c478bd9Sstevel@tonic-gate 			 * compare whole sysids.  pid will always = 0.
2597c478bd9Sstevel@tonic-gate 			 */
2607c478bd9Sstevel@tonic-gate 			result = shr->s_sysid == element->s_sysid;
2617c478bd9Sstevel@tonic-gate 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
2627c478bd9Sstevel@tonic-gate 			/*
2637c478bd9Sstevel@tonic-gate 			 * This is a special case.  The NLM server wishes to
2647c478bd9Sstevel@tonic-gate 			 * delete all share locks obtained through nlmid1.
2657c478bd9Sstevel@tonic-gate 			 */
2667c478bd9Sstevel@tonic-gate 			result = (nlmid1 == nlmid2);
2677c478bd9Sstevel@tonic-gate 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
2687c478bd9Sstevel@tonic-gate 			/*
2697c478bd9Sstevel@tonic-gate 			 * Lock obtained locally through PXFS.  Match nlmids
2707c478bd9Sstevel@tonic-gate 			 * and pids.
2717c478bd9Sstevel@tonic-gate 			 */
2727c478bd9Sstevel@tonic-gate 			result = (nlmid1 == nlmid2 &&
2737c478bd9Sstevel@tonic-gate 			    shr->s_pid == element->s_pid);
2747c478bd9Sstevel@tonic-gate 		}
2757c478bd9Sstevel@tonic-gate 	} else {			/* not in a cluster */
2767c478bd9Sstevel@tonic-gate 		result = ((shr->s_sysid == 0 &&
2777c478bd9Sstevel@tonic-gate 		    shr->s_pid == element->s_pid) ||
2787c478bd9Sstevel@tonic-gate 		    (shr->s_sysid != 0 &&
2797c478bd9Sstevel@tonic-gate 		    shr->s_sysid == element->s_sysid));
2807c478bd9Sstevel@tonic-gate 	}
2817c478bd9Sstevel@tonic-gate 	return (result);
2827c478bd9Sstevel@tonic-gate }
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate /*
2857c478bd9Sstevel@tonic-gate  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
2867c478bd9Sstevel@tonic-gate  * share could not be found.  If the share reservation is an NBMAND share
2877c478bd9Sstevel@tonic-gate  * reservation, signal anyone waiting for the share to go away (e.g.,
2887c478bd9Sstevel@tonic-gate  * blocking lock requests).
2897c478bd9Sstevel@tonic-gate  */
2907c478bd9Sstevel@tonic-gate 
2917c478bd9Sstevel@tonic-gate int
2927c478bd9Sstevel@tonic-gate del_share(struct vnode *vp, struct shrlock *shr)
2937c478bd9Sstevel@tonic-gate {
2947c478bd9Sstevel@tonic-gate 	struct shrlocklist *shrl;
2957c478bd9Sstevel@tonic-gate 	struct shrlocklist **shrlp;
2967c478bd9Sstevel@tonic-gate 	int found = 0;
2977c478bd9Sstevel@tonic-gate 	int is_nbmand = 0;
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
3007c478bd9Sstevel@tonic-gate 	/*
3017c478bd9Sstevel@tonic-gate 	 * Delete the shares with the matching sysid and owner
3027c478bd9Sstevel@tonic-gate 	 * But if own_len == 0 and sysid == 0 delete all with matching pid
3037c478bd9Sstevel@tonic-gate 	 * But if own_len == 0 delete all with matching sysid.
3047c478bd9Sstevel@tonic-gate 	 */
3057c478bd9Sstevel@tonic-gate 	shrlp = &vp->v_shrlocks;
3067c478bd9Sstevel@tonic-gate 	while (*shrlp) {
3077c478bd9Sstevel@tonic-gate 		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
3087c478bd9Sstevel@tonic-gate 		    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
3097c478bd9Sstevel@tonic-gate 		    shr->s_own_len) == 0)) ||
3107c478bd9Sstevel@tonic-gate 
3117c478bd9Sstevel@tonic-gate 		    (shr->s_own_len == 0 &&
3127c478bd9Sstevel@tonic-gate 		    is_match_for_del(shr, (*shrlp)->shr))) {
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 			shrl = *shrlp;
3157c478bd9Sstevel@tonic-gate 			*shrlp = shrl->next;
3167c478bd9Sstevel@tonic-gate 
3177c478bd9Sstevel@tonic-gate 			if (shrl->shr->s_deny & F_MANDDNY)
3187c478bd9Sstevel@tonic-gate 				is_nbmand = 1;
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 			/* XXX deref sysid */
3217c478bd9Sstevel@tonic-gate 			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
3227c478bd9Sstevel@tonic-gate 			kmem_free(shrl->shr, sizeof (struct shrlock));
3237c478bd9Sstevel@tonic-gate 			kmem_free(shrl, sizeof (struct shrlocklist));
3247c478bd9Sstevel@tonic-gate 			found++;
3257c478bd9Sstevel@tonic-gate 			continue;
3267c478bd9Sstevel@tonic-gate 		}
3277c478bd9Sstevel@tonic-gate 		shrlp = &(*shrlp)->next;
3287c478bd9Sstevel@tonic-gate 	}
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate 	if (is_nbmand)
3317c478bd9Sstevel@tonic-gate 		cv_broadcast(&vp->v_cv);
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
3347c478bd9Sstevel@tonic-gate 	return (found ? 0 : EINVAL);
3357c478bd9Sstevel@tonic-gate }
3367c478bd9Sstevel@tonic-gate 
3377c478bd9Sstevel@tonic-gate /*
3387c478bd9Sstevel@tonic-gate  * Clean up all local share reservations that the given process has with
3397c478bd9Sstevel@tonic-gate  * the given file.
3407c478bd9Sstevel@tonic-gate  */
3417c478bd9Sstevel@tonic-gate void
3427c478bd9Sstevel@tonic-gate cleanshares(struct vnode *vp, pid_t pid)
3437c478bd9Sstevel@tonic-gate {
344*bbaa8b60SDan Kruchinin 	do_cleanshares(vp, pid, 0);
345*bbaa8b60SDan Kruchinin }
346*bbaa8b60SDan Kruchinin 
347*bbaa8b60SDan Kruchinin /*
348*bbaa8b60SDan Kruchinin  * Cleanup all remote share reservations that
349*bbaa8b60SDan Kruchinin  * were made by the given sysid on given vnode.
350*bbaa8b60SDan Kruchinin  */
351*bbaa8b60SDan Kruchinin void
352*bbaa8b60SDan Kruchinin cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
353*bbaa8b60SDan Kruchinin {
354*bbaa8b60SDan Kruchinin 	if (sysid == 0)
355*bbaa8b60SDan Kruchinin 		return;
356*bbaa8b60SDan Kruchinin 
357*bbaa8b60SDan Kruchinin 	do_cleanshares(vp, 0, sysid);
358*bbaa8b60SDan Kruchinin }
359*bbaa8b60SDan Kruchinin 
360*bbaa8b60SDan Kruchinin /*
361*bbaa8b60SDan Kruchinin  * Cleanup share reservations on given vnode made
362*bbaa8b60SDan Kruchinin  * by the either given pid or sysid.
363*bbaa8b60SDan Kruchinin  * If sysid is 0, remove all shares made by given pid,
364*bbaa8b60SDan Kruchinin  * otherwise all shares made by the given sysid will
365*bbaa8b60SDan Kruchinin  * be removed.
366*bbaa8b60SDan Kruchinin  */
367*bbaa8b60SDan Kruchinin static void
368*bbaa8b60SDan Kruchinin do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
369*bbaa8b60SDan Kruchinin {
3707c478bd9Sstevel@tonic-gate 	struct shrlock shr;
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	if (vp->v_shrlocks == NULL)
3737c478bd9Sstevel@tonic-gate 		return;
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate 	shr.s_access = 0;
3767c478bd9Sstevel@tonic-gate 	shr.s_deny = 0;
3777c478bd9Sstevel@tonic-gate 	shr.s_pid = pid;
378*bbaa8b60SDan Kruchinin 	shr.s_sysid = sysid;
3797c478bd9Sstevel@tonic-gate 	shr.s_own_len = 0;
3807c478bd9Sstevel@tonic-gate 	shr.s_owner = NULL;
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 	(void) del_share(vp, &shr);
3837c478bd9Sstevel@tonic-gate }
3847c478bd9Sstevel@tonic-gate 
3857c478bd9Sstevel@tonic-gate static int
3867c478bd9Sstevel@tonic-gate is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
3877c478bd9Sstevel@tonic-gate {
3887c478bd9Sstevel@tonic-gate 	int result = 0;
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	if (GETNLMID(sysid1) != 0) { /* in a cluster */
3917c478bd9Sstevel@tonic-gate 		if (GETSYSID(sysid1) != 0) {
3927c478bd9Sstevel@tonic-gate 			/*
3937c478bd9Sstevel@tonic-gate 			 * Lock obtained through nlm server.  Just need to
3947c478bd9Sstevel@tonic-gate 			 * compare whole sysids.
3957c478bd9Sstevel@tonic-gate 			 */
3967c478bd9Sstevel@tonic-gate 			result = (sysid1 == sysid2);
3977c478bd9Sstevel@tonic-gate 		} else if (GETSYSID(sysid1) == 0) {
3987c478bd9Sstevel@tonic-gate 			/*
3997c478bd9Sstevel@tonic-gate 			 * This is a special case.  The NLM server identified
4007c478bd9Sstevel@tonic-gate 			 * by nlmid1 wishes to find out if it has obtained
4017c478bd9Sstevel@tonic-gate 			 * any share locks on the vnode.
4027c478bd9Sstevel@tonic-gate 			 */
4037c478bd9Sstevel@tonic-gate 			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
4047c478bd9Sstevel@tonic-gate 		}
4057c478bd9Sstevel@tonic-gate 	} else {			/* not in a cluster */
4067c478bd9Sstevel@tonic-gate 		result = ((sysid1 != 0 && sysid1 == sysid2) ||
4077c478bd9Sstevel@tonic-gate 		    (sysid1 == 0 && sysid2 != 0));
4087c478bd9Sstevel@tonic-gate 	}
4097c478bd9Sstevel@tonic-gate 	return (result);
4107c478bd9Sstevel@tonic-gate }
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate 
4137c478bd9Sstevel@tonic-gate /*
4147c478bd9Sstevel@tonic-gate  * Determine whether there are any shares for the given vnode
4157c478bd9Sstevel@tonic-gate  * with a remote sysid. Returns zero if not, non-zero if there are.
4167c478bd9Sstevel@tonic-gate  * If sysid is non-zero then determine if this sysid has a share.
4177c478bd9Sstevel@tonic-gate  *
4187c478bd9Sstevel@tonic-gate  * Note that the return value from this function is potentially invalid
4197c478bd9Sstevel@tonic-gate  * once it has been returned.  The caller is responsible for providing its
4207c478bd9Sstevel@tonic-gate  * own synchronization mechanism to ensure that the return value is useful.
4217c478bd9Sstevel@tonic-gate  */
4227c478bd9Sstevel@tonic-gate int
4237c478bd9Sstevel@tonic-gate shr_has_remote_shares(vnode_t *vp, int32_t sysid)
4247c478bd9Sstevel@tonic-gate {
4257c478bd9Sstevel@tonic-gate 	struct shrlocklist *shrl;
4267c478bd9Sstevel@tonic-gate 	int result = 0;
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
4297c478bd9Sstevel@tonic-gate 	shrl = vp->v_shrlocks;
4307c478bd9Sstevel@tonic-gate 	while (shrl) {
4317c478bd9Sstevel@tonic-gate 		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
4327c478bd9Sstevel@tonic-gate 
4337c478bd9Sstevel@tonic-gate 			result = 1;
4347c478bd9Sstevel@tonic-gate 			break;
4357c478bd9Sstevel@tonic-gate 		}
4367c478bd9Sstevel@tonic-gate 		shrl = shrl->next;
4377c478bd9Sstevel@tonic-gate 	}
4387c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
4397c478bd9Sstevel@tonic-gate 	return (result);
4407c478bd9Sstevel@tonic-gate }
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate static int
4437c478bd9Sstevel@tonic-gate isreadonly(struct vnode *vp)
4447c478bd9Sstevel@tonic-gate {
4457c478bd9Sstevel@tonic-gate 	return (vp->v_type != VCHR && vp->v_type != VBLK &&
4467c478bd9Sstevel@tonic-gate 	    vp->v_type != VFIFO && vn_is_readonly(vp));
4477c478bd9Sstevel@tonic-gate }
4487c478bd9Sstevel@tonic-gate 
4497c478bd9Sstevel@tonic-gate #ifdef DEBUG
4507c478bd9Sstevel@tonic-gate static void
4517c478bd9Sstevel@tonic-gate print_shares(struct vnode *vp)
4527c478bd9Sstevel@tonic-gate {
4537c478bd9Sstevel@tonic-gate 	struct shrlocklist *shrl;
4547c478bd9Sstevel@tonic-gate 
4557c478bd9Sstevel@tonic-gate 	if (vp->v_shrlocks == NULL) {
4567c478bd9Sstevel@tonic-gate 		printf("<NULL>\n");
4577c478bd9Sstevel@tonic-gate 		return;
4587c478bd9Sstevel@tonic-gate 	}
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate 	shrl = vp->v_shrlocks;
4617c478bd9Sstevel@tonic-gate 	while (shrl) {
4627c478bd9Sstevel@tonic-gate 		print_share(shrl->shr);
4637c478bd9Sstevel@tonic-gate 		shrl = shrl->next;
4647c478bd9Sstevel@tonic-gate 	}
4657c478bd9Sstevel@tonic-gate }
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate static void
4687c478bd9Sstevel@tonic-gate print_share(struct shrlock *shr)
4697c478bd9Sstevel@tonic-gate {
4707c478bd9Sstevel@tonic-gate 	int i;
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate 	if (shr == NULL) {
4737c478bd9Sstevel@tonic-gate 		printf("<NULL>\n");
4747c478bd9Sstevel@tonic-gate 		return;
4757c478bd9Sstevel@tonic-gate 	}
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate 	printf("    access(%d):	", shr->s_access);
4787c478bd9Sstevel@tonic-gate 	if (shr->s_access & F_RDACC)
4797c478bd9Sstevel@tonic-gate 		printf("R");
4807c478bd9Sstevel@tonic-gate 	if (shr->s_access & F_WRACC)
4817c478bd9Sstevel@tonic-gate 		printf("W");
4827c478bd9Sstevel@tonic-gate 	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
4837c478bd9Sstevel@tonic-gate 		printf("N");
4847c478bd9Sstevel@tonic-gate 	printf("\n");
4857c478bd9Sstevel@tonic-gate 	printf("    deny:	");
4867c478bd9Sstevel@tonic-gate 	if (shr->s_deny & F_COMPAT)
4877c478bd9Sstevel@tonic-gate 		printf("C");
4887c478bd9Sstevel@tonic-gate 	if (shr->s_deny & F_RDDNY)
4897c478bd9Sstevel@tonic-gate 		printf("R");
4907c478bd9Sstevel@tonic-gate 	if (shr->s_deny & F_WRDNY)
4917c478bd9Sstevel@tonic-gate 		printf("W");
4927c478bd9Sstevel@tonic-gate 	if (shr->s_deny == F_NODNY)
4937c478bd9Sstevel@tonic-gate 		printf("N");
4947c478bd9Sstevel@tonic-gate 	printf("\n");
4957c478bd9Sstevel@tonic-gate 	printf("    sysid:	%d\n", shr->s_sysid);
4967c478bd9Sstevel@tonic-gate 	printf("    pid:	%d\n", shr->s_pid);
4977c478bd9Sstevel@tonic-gate 	printf("    owner:	[%d]", shr->s_own_len);
4987c478bd9Sstevel@tonic-gate 	printf("'");
4997c478bd9Sstevel@tonic-gate 	for (i = 0; i < shr->s_own_len; i++)
5007c478bd9Sstevel@tonic-gate 		printf("%02x", (unsigned)shr->s_owner[i]);
5017c478bd9Sstevel@tonic-gate 	printf("'\n");
5027c478bd9Sstevel@tonic-gate }
5037c478bd9Sstevel@tonic-gate #endif
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate /*
5067c478bd9Sstevel@tonic-gate  * Return non-zero if the given I/O request conflicts with a registered
5077c478bd9Sstevel@tonic-gate  * share reservation.
508da6c28aaSamw  *
509da6c28aaSamw  * A process is identified by the tuple (sysid, pid). When the caller
510da6c28aaSamw  * context is passed to nbl_share_conflict, the sysid and pid in the
511da6c28aaSamw  * caller context are used. Otherwise the sysid is zero, and the pid is
512da6c28aaSamw  * taken from the current process.
513da6c28aaSamw  *
514da6c28aaSamw  * Conflict Algorithm:
515da6c28aaSamw  *   1. An op request of NBL_READ will fail if a different
516da6c28aaSamw  *      process has a mandatory share reservation with deny read.
517da6c28aaSamw  *
518da6c28aaSamw  *   2. An op request of NBL_WRITE will fail if a different
519da6c28aaSamw  *      process has a mandatory share reservation with deny write.
520da6c28aaSamw  *
521da6c28aaSamw  *   3. An op request of NBL_READWRITE will fail if a different
522da6c28aaSamw  *      process has a mandatory share reservation with deny read
523da6c28aaSamw  *      or deny write.
524da6c28aaSamw  *
525da6c28aaSamw  *   4. An op request of NBL_REMOVE will fail if there is
526da6c28aaSamw  *      a mandatory share reservation with an access of read,
527da6c28aaSamw  *      write, or remove. (Anything other than meta data access).
528da6c28aaSamw  *
529da6c28aaSamw  *   5. An op request of NBL_RENAME will fail if there is
530da6c28aaSamw  *      a mandatory share reservation with:
531da6c28aaSamw  *        a) access write or access remove
532da6c28aaSamw  *      or
533da6c28aaSamw  *        b) access read and deny remove
534da6c28aaSamw  *
535da6c28aaSamw  *   Otherwise there is no conflict and the op request succeeds.
536da6c28aaSamw  *
537da6c28aaSamw  * This behavior is required for interoperability between
538da6c28aaSamw  * the nfs server, cifs server, and local access.
539da6c28aaSamw  * This behavior can result in non-posix semantics.
540da6c28aaSamw  *
541da6c28aaSamw  * When mandatory share reservations are enabled, a process
542da6c28aaSamw  * should call nbl_share_conflict to determine if the
543da6c28aaSamw  * desired operation would conflict with an existing share
544da6c28aaSamw  * reservation.
545da6c28aaSamw  *
546da6c28aaSamw  * The call to nbl_share_conflict may be skipped if the
547da6c28aaSamw  * process has an existing share reservation and the operation
548da6c28aaSamw  * is being performed in the context of that existing share
549da6c28aaSamw  * reservation.
5507c478bd9Sstevel@tonic-gate  */
5517c478bd9Sstevel@tonic-gate int
552da6c28aaSamw nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
5537c478bd9Sstevel@tonic-gate {
5547c478bd9Sstevel@tonic-gate 	struct shrlocklist *shrl;
5557c478bd9Sstevel@tonic-gate 	int conflict = 0;
556da6c28aaSamw 	pid_t pid;
557da6c28aaSamw 	int sysid;
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate 	ASSERT(nbl_in_crit(vp));
5607c478bd9Sstevel@tonic-gate 
561da6c28aaSamw 	if (ct == NULL) {
562da6c28aaSamw 		pid = curproc->p_pid;
563da6c28aaSamw 		sysid = 0;
564da6c28aaSamw 	} else {
565da6c28aaSamw 		pid = ct->cc_pid;
566da6c28aaSamw 		sysid = ct->cc_sysid;
567da6c28aaSamw 	}
568da6c28aaSamw 
5697c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
5707c478bd9Sstevel@tonic-gate 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
571da6c28aaSamw 		if (!(shrl->shr->s_deny & F_MANDDNY))
572da6c28aaSamw 			continue;
573da6c28aaSamw 		/*
574da6c28aaSamw 		 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
575da6c28aaSamw 		 * check if the share reservation being examined
576da6c28aaSamw 		 * belongs to the current process.
577da6c28aaSamw 		 * NBL_REMOVE and NBL_RENAME do not.
578da6c28aaSamw 		 * This behavior is required by the conflict
579da6c28aaSamw 		 * algorithm described above.
580da6c28aaSamw 		 */
5817c478bd9Sstevel@tonic-gate 		switch (op) {
5827c478bd9Sstevel@tonic-gate 		case NBL_READ:
583da6c28aaSamw 			if ((shrl->shr->s_deny & F_RDDNY) &&
584da6c28aaSamw 			    (shrl->shr->s_sysid != sysid ||
585da6c28aaSamw 			    shrl->shr->s_pid != pid))
5867c478bd9Sstevel@tonic-gate 				conflict = 1;
5877c478bd9Sstevel@tonic-gate 			break;
5887c478bd9Sstevel@tonic-gate 		case NBL_WRITE:
589da6c28aaSamw 			if ((shrl->shr->s_deny & F_WRDNY) &&
590da6c28aaSamw 			    (shrl->shr->s_sysid != sysid ||
591da6c28aaSamw 			    shrl->shr->s_pid != pid))
5927c478bd9Sstevel@tonic-gate 				conflict = 1;
5937c478bd9Sstevel@tonic-gate 			break;
5947c478bd9Sstevel@tonic-gate 		case NBL_READWRITE:
595da6c28aaSamw 			if ((shrl->shr->s_deny & F_RWDNY) &&
596da6c28aaSamw 			    (shrl->shr->s_sysid != sysid ||
597da6c28aaSamw 			    shrl->shr->s_pid != pid))
598da6c28aaSamw 				conflict = 1;
599da6c28aaSamw 			break;
600da6c28aaSamw 		case NBL_REMOVE:
601da6c28aaSamw 			if (shrl->shr->s_access & (F_RWACC|F_RMACC))
6027c478bd9Sstevel@tonic-gate 				conflict = 1;
6037c478bd9Sstevel@tonic-gate 			break;
6047c478bd9Sstevel@tonic-gate 		case NBL_RENAME:
605da6c28aaSamw 			if (shrl->shr->s_access & (F_WRACC|F_RMACC))
606da6c28aaSamw 				conflict = 1;
607da6c28aaSamw 
608da6c28aaSamw 			else if ((shrl->shr->s_access & F_RDACC) &&
609da6c28aaSamw 			    (shrl->shr->s_deny & F_RMDNY))
6107c478bd9Sstevel@tonic-gate 				conflict = 1;
6117c478bd9Sstevel@tonic-gate 			break;
6127c478bd9Sstevel@tonic-gate #ifdef DEBUG
6137c478bd9Sstevel@tonic-gate 		default:
6147c478bd9Sstevel@tonic-gate 			cmn_err(CE_PANIC,
6157c478bd9Sstevel@tonic-gate 			    "nbl_share_conflict: bogus op (%d)",
6167c478bd9Sstevel@tonic-gate 			    op);
6177c478bd9Sstevel@tonic-gate 			break;
6187c478bd9Sstevel@tonic-gate #endif
6197c478bd9Sstevel@tonic-gate 		}
6207c478bd9Sstevel@tonic-gate 		if (conflict)
6217c478bd9Sstevel@tonic-gate 			break;
6227c478bd9Sstevel@tonic-gate 	}
6237c478bd9Sstevel@tonic-gate 
6247c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
6257c478bd9Sstevel@tonic-gate 	return (conflict);
6267c478bd9Sstevel@tonic-gate }
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate /*
6297c478bd9Sstevel@tonic-gate  * Determine if the given process has a NBMAND share reservation on the
6307c478bd9Sstevel@tonic-gate  * given vnode. Returns 1 if the process has such a share reservation,
6317c478bd9Sstevel@tonic-gate  * returns 0 otherwise.
6327c478bd9Sstevel@tonic-gate  */
6337c478bd9Sstevel@tonic-gate int
6347c478bd9Sstevel@tonic-gate proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
6357c478bd9Sstevel@tonic-gate {
6367c478bd9Sstevel@tonic-gate 	struct shrlocklist *shrl;
6377c478bd9Sstevel@tonic-gate 
6387c478bd9Sstevel@tonic-gate 	/*
6397c478bd9Sstevel@tonic-gate 	 * Any NBMAND share reservation on the vp for this process?
6407c478bd9Sstevel@tonic-gate 	 */
6417c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
6427c478bd9Sstevel@tonic-gate 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
6437c478bd9Sstevel@tonic-gate 		if (shrl->shr->s_sysid == 0 &&
6447c478bd9Sstevel@tonic-gate 		    (shrl->shr->s_deny & F_MANDDNY) &&
6457c478bd9Sstevel@tonic-gate 		    (shrl->shr->s_pid == pid)) {
6467c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
6477c478bd9Sstevel@tonic-gate 			return (1);
6487c478bd9Sstevel@tonic-gate 		}
6497c478bd9Sstevel@tonic-gate 	}
6507c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
6517c478bd9Sstevel@tonic-gate 
6527c478bd9Sstevel@tonic-gate 	return (0);
6537c478bd9Sstevel@tonic-gate }
654