xref: /titanic_44/usr/src/uts/common/os/flock.c (revision 7a0cc5a9d7d5732e36817701e754e703527c61cd)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5da6c28aaSamw  * Common Development and Distribution License (the "License").
6da6c28aaSamw  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate 
227c478bd9Sstevel@tonic-gate /*
23da6c28aaSamw  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
287c478bd9Sstevel@tonic-gate /*	All Rights Reserved */
297c478bd9Sstevel@tonic-gate 
30bbaa8b60SDan Kruchinin /*
31bbaa8b60SDan Kruchinin  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
32bbaa8b60SDan Kruchinin  */
33bbaa8b60SDan Kruchinin 
347c478bd9Sstevel@tonic-gate #include <sys/flock_impl.h>
357c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
367c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>		/* for <sys/callb.h> */
377c478bd9Sstevel@tonic-gate #include <sys/callb.h>
387c478bd9Sstevel@tonic-gate #include <sys/clconf.h>
397c478bd9Sstevel@tonic-gate #include <sys/cladm.h>
407c478bd9Sstevel@tonic-gate #include <sys/nbmlock.h>
417c478bd9Sstevel@tonic-gate #include <sys/cred.h>
427c478bd9Sstevel@tonic-gate #include <sys/policy.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate /*
457c478bd9Sstevel@tonic-gate  * The following four variables are for statistics purposes and they are
467c478bd9Sstevel@tonic-gate  * not protected by locks. They may not be accurate but will at least be
477c478bd9Sstevel@tonic-gate  * close to the actual value.
487c478bd9Sstevel@tonic-gate  */
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate int	flk_lock_allocs;
517c478bd9Sstevel@tonic-gate int	flk_lock_frees;
527c478bd9Sstevel@tonic-gate int 	edge_allocs;
537c478bd9Sstevel@tonic-gate int	edge_frees;
547c478bd9Sstevel@tonic-gate int 	flk_proc_vertex_allocs;
557c478bd9Sstevel@tonic-gate int 	flk_proc_edge_allocs;
567c478bd9Sstevel@tonic-gate int	flk_proc_vertex_frees;
577c478bd9Sstevel@tonic-gate int	flk_proc_edge_frees;
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate static kmutex_t flock_lock;
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate #ifdef DEBUG
627c478bd9Sstevel@tonic-gate int check_debug = 0;
637c478bd9Sstevel@tonic-gate #define	CHECK_ACTIVE_LOCKS(gp)	if (check_debug) \
647c478bd9Sstevel@tonic-gate 					check_active_locks(gp);
657c478bd9Sstevel@tonic-gate #define	CHECK_SLEEPING_LOCKS(gp)	if (check_debug) \
667c478bd9Sstevel@tonic-gate 						check_sleeping_locks(gp);
677c478bd9Sstevel@tonic-gate #define	CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 	\
687c478bd9Sstevel@tonic-gate 		if (check_debug)	\
697c478bd9Sstevel@tonic-gate 			check_owner_locks(gp, pid, sysid, vp);
707c478bd9Sstevel@tonic-gate #define	CHECK_LOCK_TRANSITION(old_state, new_state) \
717c478bd9Sstevel@tonic-gate 	{ \
727c478bd9Sstevel@tonic-gate 		if (check_lock_transition(old_state, new_state)) { \
737c478bd9Sstevel@tonic-gate 			cmn_err(CE_PANIC, "Illegal lock transition \
747c478bd9Sstevel@tonic-gate 			    from %d to %d", old_state, new_state); \
757c478bd9Sstevel@tonic-gate 		} \
767c478bd9Sstevel@tonic-gate 	}
777c478bd9Sstevel@tonic-gate #else
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate #define	CHECK_ACTIVE_LOCKS(gp)
807c478bd9Sstevel@tonic-gate #define	CHECK_SLEEPING_LOCKS(gp)
817c478bd9Sstevel@tonic-gate #define	CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
827c478bd9Sstevel@tonic-gate #define	CHECK_LOCK_TRANSITION(old_state, new_state)
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate #endif /* DEBUG */
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate struct kmem_cache	*flk_edge_cache;
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate graph_t		*lock_graph[HASH_SIZE];
897c478bd9Sstevel@tonic-gate proc_graph_t	pgraph;
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate /*
927c478bd9Sstevel@tonic-gate  * Clustering.
937c478bd9Sstevel@tonic-gate  *
947c478bd9Sstevel@tonic-gate  * NLM REGISTRY TYPE IMPLEMENTATION
957c478bd9Sstevel@tonic-gate  *
967c478bd9Sstevel@tonic-gate  * Assumptions:
977c478bd9Sstevel@tonic-gate  *  1.  Nodes in a cluster are numbered starting at 1; always non-negative
987c478bd9Sstevel@tonic-gate  *	integers; maximum node id is returned by clconf_maximum_nodeid().
997c478bd9Sstevel@tonic-gate  *  2.  We use this node id to identify the node an NLM server runs on.
1007c478bd9Sstevel@tonic-gate  */
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate /*
1037c478bd9Sstevel@tonic-gate  * NLM registry object keeps track of NLM servers via their
1047c478bd9Sstevel@tonic-gate  * nlmids (which are the node ids of the node in the cluster they run on)
1057c478bd9Sstevel@tonic-gate  * that have requested locks at this LLM with which this registry is
1067c478bd9Sstevel@tonic-gate  * associated.
1077c478bd9Sstevel@tonic-gate  *
1087c478bd9Sstevel@tonic-gate  * Representation of abstraction:
1097c478bd9Sstevel@tonic-gate  *    rep = record[	states: array[nlm_state],
1107c478bd9Sstevel@tonic-gate  *			lock: mutex]
1117c478bd9Sstevel@tonic-gate  *
1127c478bd9Sstevel@tonic-gate  *    Representation invariants:
1137c478bd9Sstevel@tonic-gate  *	1. index i of rep.states is between 0 and n - 1 where n is number
1147c478bd9Sstevel@tonic-gate  *	   of elements in the array, which happen to be the maximum number
1157c478bd9Sstevel@tonic-gate  *	   of nodes in the cluster configuration + 1.
1167c478bd9Sstevel@tonic-gate  *	2. map nlmid to index i of rep.states
1177c478bd9Sstevel@tonic-gate  *		0   -> 0
1187c478bd9Sstevel@tonic-gate  *		1   -> 1
1197c478bd9Sstevel@tonic-gate  *		2   -> 2
1207c478bd9Sstevel@tonic-gate  *		n-1 -> clconf_maximum_nodeid()+1
1217c478bd9Sstevel@tonic-gate  *	3.  This 1-1 mapping is quite convenient and it avoids errors resulting
1227c478bd9Sstevel@tonic-gate  *	    from forgetting to subtract 1 from the index.
1237c478bd9Sstevel@tonic-gate  *	4.  The reason we keep the 0th index is the following.  A legitimate
1247c478bd9Sstevel@tonic-gate  *	    cluster configuration includes making a UFS file system NFS
1257c478bd9Sstevel@tonic-gate  *	    exportable.  The code is structured so that if you're in a cluster
1267c478bd9Sstevel@tonic-gate  *	    you do one thing; otherwise, you do something else.  The problem
1277c478bd9Sstevel@tonic-gate  *	    is what to do if you think you're in a cluster with PXFS loaded,
1287c478bd9Sstevel@tonic-gate  *	    but you're using UFS not PXFS?  The upper two bytes of the sysid
1297c478bd9Sstevel@tonic-gate  *	    encode the node id of the node where NLM server runs; these bytes
1307c478bd9Sstevel@tonic-gate  *	    are zero for UFS.  Since the nodeid is used to index into the
1317c478bd9Sstevel@tonic-gate  *	    registry, we can record the NLM server state information at index
1327c478bd9Sstevel@tonic-gate  *	    0 using the same mechanism used for PXFS file locks!
1337c478bd9Sstevel@tonic-gate  */
1347c478bd9Sstevel@tonic-gate static flk_nlm_status_t *nlm_reg_status = NULL;	/* state array 0..N-1 */
1357c478bd9Sstevel@tonic-gate static kmutex_t nlm_reg_lock;			/* lock to protect arrary */
1367c478bd9Sstevel@tonic-gate static uint_t nlm_status_size;			/* size of state array */
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate /*
1397c478bd9Sstevel@tonic-gate  * Although we need a global lock dependency graph (and associated data
1407c478bd9Sstevel@tonic-gate  * structures), we also need a per-zone notion of whether the lock manager is
1417c478bd9Sstevel@tonic-gate  * running, and so whether to allow lock manager requests or not.
1427c478bd9Sstevel@tonic-gate  *
1437c478bd9Sstevel@tonic-gate  * Thus, on a per-zone basis we maintain a ``global'' variable
1447c478bd9Sstevel@tonic-gate  * (flk_lockmgr_status), protected by flock_lock, and set when the lock
1457c478bd9Sstevel@tonic-gate  * manager is determined to be changing state (starting or stopping).
1467c478bd9Sstevel@tonic-gate  *
1477c478bd9Sstevel@tonic-gate  * Each graph/zone pair also has a copy of this variable, which is protected by
1487c478bd9Sstevel@tonic-gate  * the graph's mutex.
1497c478bd9Sstevel@tonic-gate  *
1507c478bd9Sstevel@tonic-gate  * The per-graph copies are used to synchronize lock requests with shutdown
1517c478bd9Sstevel@tonic-gate  * requests.  The global copy is used to initialize the per-graph field when a
1527c478bd9Sstevel@tonic-gate  * new graph is created.
1537c478bd9Sstevel@tonic-gate  */
1547c478bd9Sstevel@tonic-gate struct flock_globals {
1557c478bd9Sstevel@tonic-gate 	flk_lockmgr_status_t flk_lockmgr_status;
1567c478bd9Sstevel@tonic-gate 	flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
1577c478bd9Sstevel@tonic-gate };
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate zone_key_t flock_zone_key;
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate static void create_flock(lock_descriptor_t *, flock64_t *);
1627c478bd9Sstevel@tonic-gate static lock_descriptor_t	*flk_get_lock(void);
1637c478bd9Sstevel@tonic-gate static void	flk_free_lock(lock_descriptor_t	*lock);
1647c478bd9Sstevel@tonic-gate static void	flk_get_first_blocking_lock(lock_descriptor_t *request);
1657c478bd9Sstevel@tonic-gate static int flk_process_request(lock_descriptor_t *);
1667c478bd9Sstevel@tonic-gate static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
1677c478bd9Sstevel@tonic-gate static edge_t *flk_get_edge(void);
1687c478bd9Sstevel@tonic-gate static int flk_wait_execute_request(lock_descriptor_t *);
1697c478bd9Sstevel@tonic-gate static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
1707c478bd9Sstevel@tonic-gate static void flk_insert_active_lock(lock_descriptor_t *);
1717c478bd9Sstevel@tonic-gate static void flk_delete_active_lock(lock_descriptor_t *, int);
1727c478bd9Sstevel@tonic-gate static void flk_insert_sleeping_lock(lock_descriptor_t *);
1737c478bd9Sstevel@tonic-gate static void flk_graph_uncolor(graph_t *);
1747c478bd9Sstevel@tonic-gate static void flk_wakeup(lock_descriptor_t *, int);
1757c478bd9Sstevel@tonic-gate static void flk_free_edge(edge_t *);
1767c478bd9Sstevel@tonic-gate static void flk_recompute_dependencies(lock_descriptor_t *,
1777c478bd9Sstevel@tonic-gate 			lock_descriptor_t **,  int, int);
1787c478bd9Sstevel@tonic-gate static int flk_find_barriers(lock_descriptor_t *);
1797c478bd9Sstevel@tonic-gate static void flk_update_barriers(lock_descriptor_t *);
1807c478bd9Sstevel@tonic-gate static int flk_color_reachables(lock_descriptor_t *);
1817c478bd9Sstevel@tonic-gate static int flk_canceled(lock_descriptor_t *);
1827c478bd9Sstevel@tonic-gate static void flk_delete_locks_by_sysid(lock_descriptor_t *);
1837c478bd9Sstevel@tonic-gate static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
1847c478bd9Sstevel@tonic-gate static void wait_for_lock(lock_descriptor_t *);
1857c478bd9Sstevel@tonic-gate static void unlock_lockmgr_granted(struct flock_globals *);
1867c478bd9Sstevel@tonic-gate static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate /* Clustering hooks */
1897c478bd9Sstevel@tonic-gate static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
1907c478bd9Sstevel@tonic-gate static void cl_flk_wakeup_sleeping_nlm_locks(int);
1917c478bd9Sstevel@tonic-gate static void cl_flk_unlock_nlm_granted(int);
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate #ifdef DEBUG
1947c478bd9Sstevel@tonic-gate static int check_lock_transition(int, int);
1957c478bd9Sstevel@tonic-gate static void check_sleeping_locks(graph_t *);
1967c478bd9Sstevel@tonic-gate static void check_active_locks(graph_t *);
1977c478bd9Sstevel@tonic-gate static int no_path(lock_descriptor_t *, lock_descriptor_t *);
1987c478bd9Sstevel@tonic-gate static void path(lock_descriptor_t *, lock_descriptor_t *);
1997c478bd9Sstevel@tonic-gate static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
2007c478bd9Sstevel@tonic-gate static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
2017c478bd9Sstevel@tonic-gate static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
2027c478bd9Sstevel@tonic-gate #endif
2037c478bd9Sstevel@tonic-gate 
204da6c28aaSamw /*	proc_graph function definitions */
2057c478bd9Sstevel@tonic-gate static int flk_check_deadlock(lock_descriptor_t *);
2067c478bd9Sstevel@tonic-gate static void flk_proc_graph_uncolor(void);
2077c478bd9Sstevel@tonic-gate static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
2087c478bd9Sstevel@tonic-gate static proc_edge_t *flk_get_proc_edge(void);
2097c478bd9Sstevel@tonic-gate static void flk_proc_release(proc_vertex_t *);
2107c478bd9Sstevel@tonic-gate static void flk_free_proc_edge(proc_edge_t *);
2117c478bd9Sstevel@tonic-gate static void flk_update_proc_graph(edge_t *, int);
2127c478bd9Sstevel@tonic-gate 
2137c478bd9Sstevel@tonic-gate /* Non-blocking mandatory locking */
2147c478bd9Sstevel@tonic-gate static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
2157c478bd9Sstevel@tonic-gate 			u_offset_t);
2167c478bd9Sstevel@tonic-gate 
2177c478bd9Sstevel@tonic-gate static struct flock_globals *
flk_get_globals(void)2187c478bd9Sstevel@tonic-gate flk_get_globals(void)
2197c478bd9Sstevel@tonic-gate {
2207c478bd9Sstevel@tonic-gate 	/*
2217c478bd9Sstevel@tonic-gate 	 * The KLM module had better be loaded if we're attempting to handle
2227c478bd9Sstevel@tonic-gate 	 * lockmgr requests.
2237c478bd9Sstevel@tonic-gate 	 */
2247c478bd9Sstevel@tonic-gate 	ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
2257c478bd9Sstevel@tonic-gate 	return (zone_getspecific(flock_zone_key, curproc->p_zone));
2267c478bd9Sstevel@tonic-gate }
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate static flk_lockmgr_status_t
flk_get_lockmgr_status(void)2297c478bd9Sstevel@tonic-gate flk_get_lockmgr_status(void)
2307c478bd9Sstevel@tonic-gate {
2317c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 	if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
2367c478bd9Sstevel@tonic-gate 		/*
2377c478bd9Sstevel@tonic-gate 		 * KLM module not loaded; lock manager definitely not running.
2387c478bd9Sstevel@tonic-gate 		 */
2397c478bd9Sstevel@tonic-gate 		return (FLK_LOCKMGR_DOWN);
2407c478bd9Sstevel@tonic-gate 	}
2417c478bd9Sstevel@tonic-gate 	fg = flk_get_globals();
2427c478bd9Sstevel@tonic-gate 	return (fg->flk_lockmgr_status);
2437c478bd9Sstevel@tonic-gate }
2447c478bd9Sstevel@tonic-gate 
2457c478bd9Sstevel@tonic-gate /*
2467c478bd9Sstevel@tonic-gate  * Routine called from fs_frlock in fs/fs_subr.c
2477c478bd9Sstevel@tonic-gate  */
2487c478bd9Sstevel@tonic-gate 
2497c478bd9Sstevel@tonic-gate int
reclock(vnode_t * vp,flock64_t * lckdat,int cmd,int flag,u_offset_t offset,flk_callback_t * flk_cbp)250*7a0cc5a9SMarcel Telka reclock(vnode_t *vp, flock64_t *lckdat, int cmd, int flag, u_offset_t offset,
2517c478bd9Sstevel@tonic-gate     flk_callback_t *flk_cbp)
2527c478bd9Sstevel@tonic-gate {
2537c478bd9Sstevel@tonic-gate 	lock_descriptor_t	stack_lock_request;
2547c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock_request;
2557c478bd9Sstevel@tonic-gate 	int error = 0;
2567c478bd9Sstevel@tonic-gate 	graph_t	*gp;
2577c478bd9Sstevel@tonic-gate 	int			nlmid;
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	/*
2607c478bd9Sstevel@tonic-gate 	 * Check access permissions
2617c478bd9Sstevel@tonic-gate 	 */
2627c478bd9Sstevel@tonic-gate 	if ((cmd & SETFLCK) &&
2637c478bd9Sstevel@tonic-gate 	    ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
2647c478bd9Sstevel@tonic-gate 	    (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
2657c478bd9Sstevel@tonic-gate 			return (EBADF);
2667c478bd9Sstevel@tonic-gate 
2677c478bd9Sstevel@tonic-gate 	/*
2687c478bd9Sstevel@tonic-gate 	 * for query and unlock we use the stack_lock_request
2697c478bd9Sstevel@tonic-gate 	 */
2707c478bd9Sstevel@tonic-gate 
2717c478bd9Sstevel@tonic-gate 	if ((lckdat->l_type == F_UNLCK) ||
2727c478bd9Sstevel@tonic-gate 	    !((cmd & INOFLCK) || (cmd & SETFLCK))) {
2737c478bd9Sstevel@tonic-gate 		lock_request = &stack_lock_request;
2747c478bd9Sstevel@tonic-gate 		(void) bzero((caddr_t)lock_request,
2757c478bd9Sstevel@tonic-gate 		    sizeof (lock_descriptor_t));
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate 		/*
2787c478bd9Sstevel@tonic-gate 		 * following is added to make the assertions in
2797c478bd9Sstevel@tonic-gate 		 * flk_execute_request() to pass through
2807c478bd9Sstevel@tonic-gate 		 */
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_in_next = &lock_request->l_edge;
2837c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
2847c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
2857c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
2867c478bd9Sstevel@tonic-gate 		lock_request->l_status = FLK_INITIAL_STATE;
2877c478bd9Sstevel@tonic-gate 	} else {
2887c478bd9Sstevel@tonic-gate 		lock_request = flk_get_lock();
2897c478bd9Sstevel@tonic-gate 	}
2907c478bd9Sstevel@tonic-gate 	lock_request->l_state = 0;
2917c478bd9Sstevel@tonic-gate 	lock_request->l_vnode = vp;
2927c478bd9Sstevel@tonic-gate 	lock_request->l_zoneid = getzoneid();
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate 	/*
2957c478bd9Sstevel@tonic-gate 	 * Convert the request range into the canonical start and end
2967c478bd9Sstevel@tonic-gate 	 * values.  The NLM protocol supports locking over the entire
2977c478bd9Sstevel@tonic-gate 	 * 32-bit range, so there's no range checking for remote requests,
2987c478bd9Sstevel@tonic-gate 	 * but we still need to verify that local requests obey the rules.
2997c478bd9Sstevel@tonic-gate 	 */
3007c478bd9Sstevel@tonic-gate 	/* Clustering */
3017c478bd9Sstevel@tonic-gate 	if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
3027c478bd9Sstevel@tonic-gate 		ASSERT(lckdat->l_whence == 0);
3037c478bd9Sstevel@tonic-gate 		lock_request->l_start = lckdat->l_start;
3047c478bd9Sstevel@tonic-gate 		lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
3057c478bd9Sstevel@tonic-gate 		    lckdat->l_start + (lckdat->l_len - 1);
3067c478bd9Sstevel@tonic-gate 	} else {
3077c478bd9Sstevel@tonic-gate 		/* check the validity of the lock range */
3087c478bd9Sstevel@tonic-gate 		error = flk_convert_lock_data(vp, lckdat,
3097c478bd9Sstevel@tonic-gate 		    &lock_request->l_start, &lock_request->l_end,
3107c478bd9Sstevel@tonic-gate 		    offset);
3117c478bd9Sstevel@tonic-gate 		if (error) {
3127c478bd9Sstevel@tonic-gate 			goto done;
3137c478bd9Sstevel@tonic-gate 		}
3147c478bd9Sstevel@tonic-gate 		error = flk_check_lock_data(lock_request->l_start,
3157c478bd9Sstevel@tonic-gate 		    lock_request->l_end, MAXEND);
3167c478bd9Sstevel@tonic-gate 		if (error) {
3177c478bd9Sstevel@tonic-gate 			goto done;
3187c478bd9Sstevel@tonic-gate 		}
3197c478bd9Sstevel@tonic-gate 	}
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	ASSERT(lock_request->l_end >= lock_request->l_start);
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 	lock_request->l_type = lckdat->l_type;
3247c478bd9Sstevel@tonic-gate 	if (cmd & INOFLCK)
3257c478bd9Sstevel@tonic-gate 		lock_request->l_state |= IO_LOCK;
3267c478bd9Sstevel@tonic-gate 	if (cmd & SLPFLCK)
3277c478bd9Sstevel@tonic-gate 		lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
3287c478bd9Sstevel@tonic-gate 	if (cmd & RCMDLCK)
3297c478bd9Sstevel@tonic-gate 		lock_request->l_state |= LOCKMGR_LOCK;
3307c478bd9Sstevel@tonic-gate 	if (cmd & NBMLCK)
3317c478bd9Sstevel@tonic-gate 		lock_request->l_state |= NBMAND_LOCK;
3327c478bd9Sstevel@tonic-gate 	/*
3337c478bd9Sstevel@tonic-gate 	 * Clustering: set flag for PXFS locks
3347c478bd9Sstevel@tonic-gate 	 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
3357c478bd9Sstevel@tonic-gate 	 * also be of type 'RCMDLCK'.
3367c478bd9Sstevel@tonic-gate 	 * We do not _only_ check the GETPXFSID() macro because local PXFS
3377c478bd9Sstevel@tonic-gate 	 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
3387c478bd9Sstevel@tonic-gate 	 */
3397c478bd9Sstevel@tonic-gate 
3407c478bd9Sstevel@tonic-gate 	if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
3417c478bd9Sstevel@tonic-gate 		lock_request->l_state |= PXFS_LOCK;
3427c478bd9Sstevel@tonic-gate 	}
3437c478bd9Sstevel@tonic-gate 	if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
3447c478bd9Sstevel@tonic-gate 		if (lock_request->l_type == F_RDLCK ||
3457c478bd9Sstevel@tonic-gate 		    lock_request->l_type == F_WRLCK)
3467c478bd9Sstevel@tonic-gate 			lock_request->l_state |= QUERY_LOCK;
3477c478bd9Sstevel@tonic-gate 	}
3487c478bd9Sstevel@tonic-gate 	lock_request->l_flock = (*lckdat);
3497c478bd9Sstevel@tonic-gate 	lock_request->l_callbacks = flk_cbp;
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate 	/*
3527c478bd9Sstevel@tonic-gate 	 * We are ready for processing the request
3537c478bd9Sstevel@tonic-gate 	 */
3547c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(lock_request)) {
3557c478bd9Sstevel@tonic-gate 		/*
3567c478bd9Sstevel@tonic-gate 		 * If the lock request is an NLM server request ....
3577c478bd9Sstevel@tonic-gate 		 */
3587c478bd9Sstevel@tonic-gate 		if (nlm_status_size == 0) { /* not booted as cluster */
3597c478bd9Sstevel@tonic-gate 			mutex_enter(&flock_lock);
3607c478bd9Sstevel@tonic-gate 			/*
3617c478bd9Sstevel@tonic-gate 			 * Bail out if this is a lock manager request and the
3627c478bd9Sstevel@tonic-gate 			 * lock manager is not supposed to be running.
3637c478bd9Sstevel@tonic-gate 			 */
3647c478bd9Sstevel@tonic-gate 			if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
3657c478bd9Sstevel@tonic-gate 				mutex_exit(&flock_lock);
3667c478bd9Sstevel@tonic-gate 				error = ENOLCK;
3677c478bd9Sstevel@tonic-gate 				goto done;
3687c478bd9Sstevel@tonic-gate 			}
3697c478bd9Sstevel@tonic-gate 			mutex_exit(&flock_lock);
3707c478bd9Sstevel@tonic-gate 		} else {			/* booted as a cluster */
3717c478bd9Sstevel@tonic-gate 			nlmid = GETNLMID(lock_request->l_flock.l_sysid);
3727c478bd9Sstevel@tonic-gate 			ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 			mutex_enter(&nlm_reg_lock);
3757c478bd9Sstevel@tonic-gate 			/*
3767c478bd9Sstevel@tonic-gate 			 * If the NLM registry does not know about this
3777c478bd9Sstevel@tonic-gate 			 * NLM server making the request, add its nlmid
3787c478bd9Sstevel@tonic-gate 			 * to the registry.
3797c478bd9Sstevel@tonic-gate 			 */
3807c478bd9Sstevel@tonic-gate 			if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
3817c478bd9Sstevel@tonic-gate 			    nlmid)) {
3827c478bd9Sstevel@tonic-gate 				FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
3837c478bd9Sstevel@tonic-gate 			} else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
3847c478bd9Sstevel@tonic-gate 			    nlmid)) {
3857c478bd9Sstevel@tonic-gate 				/*
3867c478bd9Sstevel@tonic-gate 				 * If the NLM server is already known (has made
3877c478bd9Sstevel@tonic-gate 				 * previous lock requests) and its state is
3887c478bd9Sstevel@tonic-gate 				 * not NLM_UP (means that NLM server is
3897c478bd9Sstevel@tonic-gate 				 * shutting down), then bail out with an
3907c478bd9Sstevel@tonic-gate 				 * error to deny the lock request.
3917c478bd9Sstevel@tonic-gate 				 */
3927c478bd9Sstevel@tonic-gate 				mutex_exit(&nlm_reg_lock);
3937c478bd9Sstevel@tonic-gate 				error = ENOLCK;
3947c478bd9Sstevel@tonic-gate 				goto done;
3957c478bd9Sstevel@tonic-gate 			}
3967c478bd9Sstevel@tonic-gate 			mutex_exit(&nlm_reg_lock);
3977c478bd9Sstevel@tonic-gate 		}
3987c478bd9Sstevel@tonic-gate 	}
3997c478bd9Sstevel@tonic-gate 
4007c478bd9Sstevel@tonic-gate 	/* Now get the lock graph for a particular vnode */
4017c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
4027c478bd9Sstevel@tonic-gate 
4037c478bd9Sstevel@tonic-gate 	/*
4047c478bd9Sstevel@tonic-gate 	 * We drop rwlock here otherwise this might end up causing a
4057c478bd9Sstevel@tonic-gate 	 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
4067c478bd9Sstevel@tonic-gate 	 */
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate 	if (IS_IO_LOCK(lock_request)) {
4097c478bd9Sstevel@tonic-gate 		VOP_RWUNLOCK(vp,
4107c478bd9Sstevel@tonic-gate 		    (lock_request->l_type == F_RDLCK) ?
4117c478bd9Sstevel@tonic-gate 		    V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
4127c478bd9Sstevel@tonic-gate 	}
4137c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate 	lock_request->l_state |= REFERENCED_LOCK;
4167c478bd9Sstevel@tonic-gate 	lock_request->l_graph = gp;
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate 	switch (lock_request->l_type) {
4197c478bd9Sstevel@tonic-gate 	case F_RDLCK:
4207c478bd9Sstevel@tonic-gate 	case F_WRLCK:
4217c478bd9Sstevel@tonic-gate 		if (IS_QUERY_LOCK(lock_request)) {
4227c478bd9Sstevel@tonic-gate 			flk_get_first_blocking_lock(lock_request);
4237c478bd9Sstevel@tonic-gate 			(*lckdat) = lock_request->l_flock;
4247c478bd9Sstevel@tonic-gate 			break;
4257c478bd9Sstevel@tonic-gate 		}
4267c478bd9Sstevel@tonic-gate 
4277c478bd9Sstevel@tonic-gate 		/* process the request now */
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 		error = flk_process_request(lock_request);
4307c478bd9Sstevel@tonic-gate 		break;
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	case F_UNLCK:
4337c478bd9Sstevel@tonic-gate 		/* unlock request will not block so execute it immediately */
4347c478bd9Sstevel@tonic-gate 
4357c478bd9Sstevel@tonic-gate 		if (IS_LOCKMGR(lock_request) &&
4367c478bd9Sstevel@tonic-gate 		    flk_canceled(lock_request)) {
4377c478bd9Sstevel@tonic-gate 			error = 0;
4387c478bd9Sstevel@tonic-gate 		} else {
4397c478bd9Sstevel@tonic-gate 			error = flk_execute_request(lock_request);
4407c478bd9Sstevel@tonic-gate 		}
4417c478bd9Sstevel@tonic-gate 		break;
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 	case F_UNLKSYS:
4447c478bd9Sstevel@tonic-gate 		/*
4457c478bd9Sstevel@tonic-gate 		 * Recovery mechanism to release lock manager locks when
4467c478bd9Sstevel@tonic-gate 		 * NFS client crashes and restart. NFS server will clear
4477c478bd9Sstevel@tonic-gate 		 * old locks and grant new locks.
4487c478bd9Sstevel@tonic-gate 		 */
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 		if (lock_request->l_flock.l_sysid == 0) {
4517c478bd9Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
4527c478bd9Sstevel@tonic-gate 			return (EINVAL);
4537c478bd9Sstevel@tonic-gate 		}
4547c478bd9Sstevel@tonic-gate 		if (secpolicy_nfs(CRED()) != 0) {
4557c478bd9Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
4567c478bd9Sstevel@tonic-gate 			return (EPERM);
4577c478bd9Sstevel@tonic-gate 		}
4587c478bd9Sstevel@tonic-gate 		flk_delete_locks_by_sysid(lock_request);
4597c478bd9Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
4607c478bd9Sstevel@tonic-gate 		flk_set_state(lock_request, FLK_DEAD_STATE);
4617c478bd9Sstevel@tonic-gate 		flk_free_lock(lock_request);
4627c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
4637c478bd9Sstevel@tonic-gate 		return (0);
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate 	default:
4667c478bd9Sstevel@tonic-gate 		error = EINVAL;
4677c478bd9Sstevel@tonic-gate 		break;
4687c478bd9Sstevel@tonic-gate 	}
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 	/* Clustering: For blocked PXFS locks, return */
4717c478bd9Sstevel@tonic-gate 	if (error == PXFS_LOCK_BLOCKED) {
4727c478bd9Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
4737c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
4747c478bd9Sstevel@tonic-gate 		return (error);
4757c478bd9Sstevel@tonic-gate 	}
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate 	/*
4787c478bd9Sstevel@tonic-gate 	 * Now that we have seen the status of locks in the system for
4797c478bd9Sstevel@tonic-gate 	 * this vnode we acquire the rwlock if it is an IO_LOCK.
4807c478bd9Sstevel@tonic-gate 	 */
4817c478bd9Sstevel@tonic-gate 
4827c478bd9Sstevel@tonic-gate 	if (IS_IO_LOCK(lock_request)) {
4837c478bd9Sstevel@tonic-gate 		(void) VOP_RWLOCK(vp,
4847c478bd9Sstevel@tonic-gate 		    (lock_request->l_type == F_RDLCK) ?
4857c478bd9Sstevel@tonic-gate 		    V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
4867c478bd9Sstevel@tonic-gate 		if (!error) {
4877c478bd9Sstevel@tonic-gate 			lckdat->l_type = F_UNLCK;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 			/*
4907c478bd9Sstevel@tonic-gate 			 * This wake up is needed otherwise
4917c478bd9Sstevel@tonic-gate 			 * if IO_LOCK has slept the dependents on this
4927c478bd9Sstevel@tonic-gate 			 * will not be woken up at all. (bugid # 1185482).
4937c478bd9Sstevel@tonic-gate 			 */
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 			flk_wakeup(lock_request, 1);
4967c478bd9Sstevel@tonic-gate 			flk_set_state(lock_request, FLK_DEAD_STATE);
4977c478bd9Sstevel@tonic-gate 			flk_free_lock(lock_request);
4987c478bd9Sstevel@tonic-gate 		}
4997c478bd9Sstevel@tonic-gate 		/*
5007c478bd9Sstevel@tonic-gate 		 * else if error had occurred either flk_process_request()
5017c478bd9Sstevel@tonic-gate 		 * has returned EDEADLK in which case there will be no
5027c478bd9Sstevel@tonic-gate 		 * dependents for this lock or EINTR from flk_wait_execute_
5037c478bd9Sstevel@tonic-gate 		 * request() in which case flk_cancel_sleeping_lock()
5047c478bd9Sstevel@tonic-gate 		 * would have been done. same is true with EBADF.
5057c478bd9Sstevel@tonic-gate 		 */
5067c478bd9Sstevel@tonic-gate 	}
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate 	if (lock_request == &stack_lock_request) {
5097c478bd9Sstevel@tonic-gate 		flk_set_state(lock_request, FLK_DEAD_STATE);
5107c478bd9Sstevel@tonic-gate 	} else {
5117c478bd9Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
5127c478bd9Sstevel@tonic-gate 		if ((error != 0) || IS_DELETED(lock_request)) {
5137c478bd9Sstevel@tonic-gate 			flk_set_state(lock_request, FLK_DEAD_STATE);
5147c478bd9Sstevel@tonic-gate 			flk_free_lock(lock_request);
5157c478bd9Sstevel@tonic-gate 		}
5167c478bd9Sstevel@tonic-gate 	}
5177c478bd9Sstevel@tonic-gate 
5187c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
5197c478bd9Sstevel@tonic-gate 	return (error);
5207c478bd9Sstevel@tonic-gate 
5217c478bd9Sstevel@tonic-gate done:
5227c478bd9Sstevel@tonic-gate 	flk_set_state(lock_request, FLK_DEAD_STATE);
5237c478bd9Sstevel@tonic-gate 	if (lock_request != &stack_lock_request)
5247c478bd9Sstevel@tonic-gate 		flk_free_lock(lock_request);
5257c478bd9Sstevel@tonic-gate 	return (error);
5267c478bd9Sstevel@tonic-gate }
5277c478bd9Sstevel@tonic-gate 
5287c478bd9Sstevel@tonic-gate /*
5297c478bd9Sstevel@tonic-gate  * Invoke the callbacks in the given list.  If before sleeping, invoke in
5307c478bd9Sstevel@tonic-gate  * list order.  If after sleeping, invoke in reverse order.
5317c478bd9Sstevel@tonic-gate  *
5327c478bd9Sstevel@tonic-gate  * CPR (suspend/resume) support: if one of the callbacks returns a
5337c478bd9Sstevel@tonic-gate  * callb_cpr_t, return it.   This will be used to make the thread CPR-safe
5347c478bd9Sstevel@tonic-gate  * while it is sleeping.  There should be at most one callb_cpr_t for the
5357c478bd9Sstevel@tonic-gate  * thread.
5367c478bd9Sstevel@tonic-gate  * XXX This is unnecessarily complicated.  The CPR information should just
5377c478bd9Sstevel@tonic-gate  * get passed in directly through VOP_FRLOCK and reclock, rather than
5387c478bd9Sstevel@tonic-gate  * sneaking it in via a callback.
5397c478bd9Sstevel@tonic-gate  */
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate callb_cpr_t *
flk_invoke_callbacks(flk_callback_t * cblist,flk_cb_when_t when)5427c478bd9Sstevel@tonic-gate flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
5437c478bd9Sstevel@tonic-gate {
5447c478bd9Sstevel@tonic-gate 	callb_cpr_t *cpr_callbackp = NULL;
5457c478bd9Sstevel@tonic-gate 	callb_cpr_t *one_result;
5467c478bd9Sstevel@tonic-gate 	flk_callback_t *cb;
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate 	if (cblist == NULL)
5497c478bd9Sstevel@tonic-gate 		return (NULL);
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate 	if (when == FLK_BEFORE_SLEEP) {
5527c478bd9Sstevel@tonic-gate 		cb = cblist;
5537c478bd9Sstevel@tonic-gate 		do {
5547c478bd9Sstevel@tonic-gate 			one_result = (*cb->cb_callback)(when, cb->cb_data);
5557c478bd9Sstevel@tonic-gate 			if (one_result != NULL) {
5567c478bd9Sstevel@tonic-gate 				ASSERT(cpr_callbackp == NULL);
5577c478bd9Sstevel@tonic-gate 				cpr_callbackp = one_result;
5587c478bd9Sstevel@tonic-gate 			}
5597c478bd9Sstevel@tonic-gate 			cb = cb->cb_next;
5607c478bd9Sstevel@tonic-gate 		} while (cb != cblist);
5617c478bd9Sstevel@tonic-gate 	} else {
5627c478bd9Sstevel@tonic-gate 		cb = cblist->cb_prev;
5637c478bd9Sstevel@tonic-gate 		do {
5647c478bd9Sstevel@tonic-gate 			one_result = (*cb->cb_callback)(when, cb->cb_data);
5657c478bd9Sstevel@tonic-gate 			if (one_result != NULL) {
5667c478bd9Sstevel@tonic-gate 				cpr_callbackp = one_result;
5677c478bd9Sstevel@tonic-gate 			}
5687c478bd9Sstevel@tonic-gate 			cb = cb->cb_prev;
5697c478bd9Sstevel@tonic-gate 		} while (cb != cblist->cb_prev);
5707c478bd9Sstevel@tonic-gate 	}
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate 	return (cpr_callbackp);
5737c478bd9Sstevel@tonic-gate }
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate /*
5767c478bd9Sstevel@tonic-gate  * Initialize a flk_callback_t to hold the given callback.
5777c478bd9Sstevel@tonic-gate  */
5787c478bd9Sstevel@tonic-gate 
5797c478bd9Sstevel@tonic-gate void
flk_init_callback(flk_callback_t * flk_cb,callb_cpr_t * (* cb_fcn)(flk_cb_when_t,void *),void * cbdata)5807c478bd9Sstevel@tonic-gate flk_init_callback(flk_callback_t *flk_cb,
5817c478bd9Sstevel@tonic-gate     callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
5827c478bd9Sstevel@tonic-gate {
5837c478bd9Sstevel@tonic-gate 	flk_cb->cb_next = flk_cb;
5847c478bd9Sstevel@tonic-gate 	flk_cb->cb_prev = flk_cb;
5857c478bd9Sstevel@tonic-gate 	flk_cb->cb_callback = cb_fcn;
5867c478bd9Sstevel@tonic-gate 	flk_cb->cb_data = cbdata;
5877c478bd9Sstevel@tonic-gate }
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate /*
5907c478bd9Sstevel@tonic-gate  * Initialize an flk_callback_t and then link it into the head of an
5917c478bd9Sstevel@tonic-gate  * existing list (which may be NULL).
5927c478bd9Sstevel@tonic-gate  */
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate void
flk_add_callback(flk_callback_t * newcb,callb_cpr_t * (* cb_fcn)(flk_cb_when_t,void *),void * cbdata,flk_callback_t * cblist)5957c478bd9Sstevel@tonic-gate flk_add_callback(flk_callback_t *newcb,
5967c478bd9Sstevel@tonic-gate     callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
5977c478bd9Sstevel@tonic-gate     void *cbdata, flk_callback_t *cblist)
5987c478bd9Sstevel@tonic-gate {
5997c478bd9Sstevel@tonic-gate 	flk_init_callback(newcb, cb_fcn, cbdata);
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate 	if (cblist == NULL)
6027c478bd9Sstevel@tonic-gate 		return;
6037c478bd9Sstevel@tonic-gate 
6047c478bd9Sstevel@tonic-gate 	newcb->cb_prev = cblist->cb_prev;
6057c478bd9Sstevel@tonic-gate 	newcb->cb_next = cblist;
6067c478bd9Sstevel@tonic-gate 	cblist->cb_prev->cb_next = newcb;
6077c478bd9Sstevel@tonic-gate 	cblist->cb_prev = newcb;
6087c478bd9Sstevel@tonic-gate }
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate /*
611*7a0cc5a9SMarcel Telka  * Remove the callback from a list.
612*7a0cc5a9SMarcel Telka  */
613*7a0cc5a9SMarcel Telka 
614*7a0cc5a9SMarcel Telka void
flk_del_callback(flk_callback_t * flk_cb)615*7a0cc5a9SMarcel Telka flk_del_callback(flk_callback_t *flk_cb)
616*7a0cc5a9SMarcel Telka {
617*7a0cc5a9SMarcel Telka 	flk_cb->cb_next->cb_prev = flk_cb->cb_prev;
618*7a0cc5a9SMarcel Telka 	flk_cb->cb_prev->cb_next = flk_cb->cb_next;
619*7a0cc5a9SMarcel Telka 
620*7a0cc5a9SMarcel Telka 	flk_cb->cb_prev = flk_cb;
621*7a0cc5a9SMarcel Telka 	flk_cb->cb_next = flk_cb;
622*7a0cc5a9SMarcel Telka }
623*7a0cc5a9SMarcel Telka 
624*7a0cc5a9SMarcel Telka /*
6257c478bd9Sstevel@tonic-gate  * Initialize the flk_edge_cache data structure and create the
6267c478bd9Sstevel@tonic-gate  * nlm_reg_status array.
6277c478bd9Sstevel@tonic-gate  */
6287c478bd9Sstevel@tonic-gate 
6297c478bd9Sstevel@tonic-gate void
flk_init(void)6307c478bd9Sstevel@tonic-gate flk_init(void)
6317c478bd9Sstevel@tonic-gate {
6327c478bd9Sstevel@tonic-gate 	uint_t	i;
6337c478bd9Sstevel@tonic-gate 
6347c478bd9Sstevel@tonic-gate 	flk_edge_cache = kmem_cache_create("flk_edges",
6357c478bd9Sstevel@tonic-gate 	    sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
6367c478bd9Sstevel@tonic-gate 	if (flk_edge_cache == NULL) {
6377c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
6387c478bd9Sstevel@tonic-gate 	}
6397c478bd9Sstevel@tonic-gate 	/*
6407c478bd9Sstevel@tonic-gate 	 * Create the NLM registry object.
6417c478bd9Sstevel@tonic-gate 	 */
6427c478bd9Sstevel@tonic-gate 
6437c478bd9Sstevel@tonic-gate 	if (cluster_bootflags & CLUSTER_BOOTED) {
6447c478bd9Sstevel@tonic-gate 		/*
6457c478bd9Sstevel@tonic-gate 		 * This routine tells you the maximum node id that will be used
6467c478bd9Sstevel@tonic-gate 		 * in the cluster.  This number will be the size of the nlm
6477c478bd9Sstevel@tonic-gate 		 * registry status array.  We add 1 because we will be using
6487c478bd9Sstevel@tonic-gate 		 * all entries indexed from 0 to maxnodeid; e.g., from 0
6497c478bd9Sstevel@tonic-gate 		 * to 64, for a total of 65 entries.
6507c478bd9Sstevel@tonic-gate 		 */
6517c478bd9Sstevel@tonic-gate 		nlm_status_size = clconf_maximum_nodeid() + 1;
6527c478bd9Sstevel@tonic-gate 	} else {
6537c478bd9Sstevel@tonic-gate 		nlm_status_size = 0;
6547c478bd9Sstevel@tonic-gate 	}
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 	if (nlm_status_size != 0) {	/* booted as a cluster */
6577c478bd9Sstevel@tonic-gate 		nlm_reg_status = (flk_nlm_status_t *)
6587c478bd9Sstevel@tonic-gate 		    kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
6597c478bd9Sstevel@tonic-gate 		    KM_SLEEP);
6607c478bd9Sstevel@tonic-gate 
6617c478bd9Sstevel@tonic-gate 		/* initialize all NLM states in array to NLM_UNKNOWN */
6627c478bd9Sstevel@tonic-gate 		for (i = 0; i < nlm_status_size; i++) {
6637c478bd9Sstevel@tonic-gate 			nlm_reg_status[i] = FLK_NLM_UNKNOWN;
6647c478bd9Sstevel@tonic-gate 		}
6657c478bd9Sstevel@tonic-gate 	}
6667c478bd9Sstevel@tonic-gate }
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate /*
6697c478bd9Sstevel@tonic-gate  * Zone constructor/destructor callbacks to be executed when a zone is
6707c478bd9Sstevel@tonic-gate  * created/destroyed.
6717c478bd9Sstevel@tonic-gate  */
6727c478bd9Sstevel@tonic-gate /* ARGSUSED */
6737c478bd9Sstevel@tonic-gate void *
flk_zone_init(zoneid_t zoneid)6747c478bd9Sstevel@tonic-gate flk_zone_init(zoneid_t zoneid)
6757c478bd9Sstevel@tonic-gate {
6767c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
6777c478bd9Sstevel@tonic-gate 	uint_t i;
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate 	fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
6807c478bd9Sstevel@tonic-gate 	fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
6817c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++)
6827c478bd9Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
6837c478bd9Sstevel@tonic-gate 	return (fg);
6847c478bd9Sstevel@tonic-gate }
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate /* ARGSUSED */
6877c478bd9Sstevel@tonic-gate void
flk_zone_fini(zoneid_t zoneid,void * data)6887c478bd9Sstevel@tonic-gate flk_zone_fini(zoneid_t zoneid, void *data)
6897c478bd9Sstevel@tonic-gate {
6907c478bd9Sstevel@tonic-gate 	struct flock_globals *fg = data;
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate 	kmem_free(fg, sizeof (*fg));
6937c478bd9Sstevel@tonic-gate }
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate /*
696da6c28aaSamw  * Get a lock_descriptor structure with initialization of edge lists.
6977c478bd9Sstevel@tonic-gate  */
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate static lock_descriptor_t *
flk_get_lock(void)7007c478bd9Sstevel@tonic-gate flk_get_lock(void)
7017c478bd9Sstevel@tonic-gate {
7027c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*l;
7037c478bd9Sstevel@tonic-gate 
7047c478bd9Sstevel@tonic-gate 	l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
7077c478bd9Sstevel@tonic-gate 	l->l_edge.edge_in_next = &l->l_edge;
7087c478bd9Sstevel@tonic-gate 	l->l_edge.edge_in_prev = &l->l_edge;
7097c478bd9Sstevel@tonic-gate 	l->l_edge.edge_adj_next = &l->l_edge;
7107c478bd9Sstevel@tonic-gate 	l->l_edge.edge_adj_prev = &l->l_edge;
7117c478bd9Sstevel@tonic-gate 	l->pvertex = -1;
7127c478bd9Sstevel@tonic-gate 	l->l_status = FLK_INITIAL_STATE;
7137c478bd9Sstevel@tonic-gate 	flk_lock_allocs++;
7147c478bd9Sstevel@tonic-gate 	return (l);
7157c478bd9Sstevel@tonic-gate }
7167c478bd9Sstevel@tonic-gate 
7177c478bd9Sstevel@tonic-gate /*
7187c478bd9Sstevel@tonic-gate  * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
7197c478bd9Sstevel@tonic-gate  * when some thread has a reference to it as in reclock().
7207c478bd9Sstevel@tonic-gate  */
7217c478bd9Sstevel@tonic-gate 
7227c478bd9Sstevel@tonic-gate void
flk_free_lock(lock_descriptor_t * lock)7237c478bd9Sstevel@tonic-gate flk_free_lock(lock_descriptor_t	*lock)
7247c478bd9Sstevel@tonic-gate {
7257c478bd9Sstevel@tonic-gate 	ASSERT(IS_DEAD(lock));
7267c478bd9Sstevel@tonic-gate 	if (IS_REFERENCED(lock)) {
7277c478bd9Sstevel@tonic-gate 		lock->l_state |= DELETED_LOCK;
7287c478bd9Sstevel@tonic-gate 		return;
7297c478bd9Sstevel@tonic-gate 	}
7307c478bd9Sstevel@tonic-gate 	flk_lock_frees++;
7317c478bd9Sstevel@tonic-gate 	kmem_free((void *)lock, sizeof (lock_descriptor_t));
7327c478bd9Sstevel@tonic-gate }
7337c478bd9Sstevel@tonic-gate 
7347c478bd9Sstevel@tonic-gate void
flk_set_state(lock_descriptor_t * lock,int new_state)7357c478bd9Sstevel@tonic-gate flk_set_state(lock_descriptor_t *lock, int new_state)
7367c478bd9Sstevel@tonic-gate {
7377c478bd9Sstevel@tonic-gate 	/*
7387c478bd9Sstevel@tonic-gate 	 * Locks in the sleeping list may be woken up in a number of ways,
739da6c28aaSamw 	 * and more than once.  If a sleeping lock is signaled awake more
7407c478bd9Sstevel@tonic-gate 	 * than once, then it may or may not change state depending on its
7417c478bd9Sstevel@tonic-gate 	 * current state.
7427c478bd9Sstevel@tonic-gate 	 * Also note that NLM locks that are sleeping could be moved to an
7437c478bd9Sstevel@tonic-gate 	 * interrupted state more than once if the unlock request is
7447c478bd9Sstevel@tonic-gate 	 * retransmitted by the NLM client - the second time around, this is
7457c478bd9Sstevel@tonic-gate 	 * just a nop.
746da6c28aaSamw 	 * The ordering of being signaled awake is:
7477c478bd9Sstevel@tonic-gate 	 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
7487c478bd9Sstevel@tonic-gate 	 * The checks below implement this ordering.
7497c478bd9Sstevel@tonic-gate 	 */
7507c478bd9Sstevel@tonic-gate 	if (IS_INTERRUPTED(lock)) {
7517c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_CANCELLED_STATE) ||
7527c478bd9Sstevel@tonic-gate 		    (new_state == FLK_GRANTED_STATE) ||
7537c478bd9Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE)) {
7547c478bd9Sstevel@tonic-gate 			return;
7557c478bd9Sstevel@tonic-gate 		}
7567c478bd9Sstevel@tonic-gate 	}
7577c478bd9Sstevel@tonic-gate 	if (IS_CANCELLED(lock)) {
7587c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_GRANTED_STATE) ||
7597c478bd9Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
7607c478bd9Sstevel@tonic-gate 			return;
7617c478bd9Sstevel@tonic-gate 		}
7627c478bd9Sstevel@tonic-gate 	}
7637c478bd9Sstevel@tonic-gate 	CHECK_LOCK_TRANSITION(lock->l_status, new_state);
7647c478bd9Sstevel@tonic-gate 	if (IS_PXFS(lock)) {
7657c478bd9Sstevel@tonic-gate 		cl_flk_state_transition_notify(lock, lock->l_status, new_state);
7667c478bd9Sstevel@tonic-gate 	}
7677c478bd9Sstevel@tonic-gate 	lock->l_status = new_state;
7687c478bd9Sstevel@tonic-gate }
7697c478bd9Sstevel@tonic-gate 
7707c478bd9Sstevel@tonic-gate /*
7717c478bd9Sstevel@tonic-gate  * Routine that checks whether there are any blocking locks in the system.
7727c478bd9Sstevel@tonic-gate  *
7737c478bd9Sstevel@tonic-gate  * The policy followed is if a write lock is sleeping we don't allow read
7747c478bd9Sstevel@tonic-gate  * locks before this write lock even though there may not be any active
7757c478bd9Sstevel@tonic-gate  * locks corresponding to the read locks' region.
7767c478bd9Sstevel@tonic-gate  *
7777c478bd9Sstevel@tonic-gate  * flk_add_edge() function adds an edge between l1 and l2 iff there
7787c478bd9Sstevel@tonic-gate  * is no path between l1 and l2. This is done to have a "minimum
7797c478bd9Sstevel@tonic-gate  * storage representation" of the dependency graph.
7807c478bd9Sstevel@tonic-gate  *
7817c478bd9Sstevel@tonic-gate  * Another property of the graph is since only the new request throws
7827c478bd9Sstevel@tonic-gate  * edges to the existing locks in the graph, the graph is always topologically
7837c478bd9Sstevel@tonic-gate  * ordered.
7847c478bd9Sstevel@tonic-gate  */
7857c478bd9Sstevel@tonic-gate 
7867c478bd9Sstevel@tonic-gate static int
flk_process_request(lock_descriptor_t * request)7877c478bd9Sstevel@tonic-gate flk_process_request(lock_descriptor_t *request)
7887c478bd9Sstevel@tonic-gate {
7897c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
7907c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
7917c478bd9Sstevel@tonic-gate 	int request_blocked_by_active = 0;
7927c478bd9Sstevel@tonic-gate 	int request_blocked_by_granted = 0;
7937c478bd9Sstevel@tonic-gate 	int request_blocked_by_sleeping = 0;
7947c478bd9Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
7957c478bd9Sstevel@tonic-gate 	int	error = 0;
7967c478bd9Sstevel@tonic-gate 	int request_will_wait = 0;
7977c478bd9Sstevel@tonic-gate 	int found_covering_lock = 0;
7987c478bd9Sstevel@tonic-gate 	lock_descriptor_t *covered_by = NULL;
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
8017c478bd9Sstevel@tonic-gate 	request_will_wait = IS_WILLING_TO_SLEEP(request);
8027c478bd9Sstevel@tonic-gate 
8037c478bd9Sstevel@tonic-gate 	/*
8047c478bd9Sstevel@tonic-gate 	 * check active locks
8057c478bd9Sstevel@tonic-gate 	 */
8067c478bd9Sstevel@tonic-gate 
8077c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
8087c478bd9Sstevel@tonic-gate 
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate 	if (lock) {
8117c478bd9Sstevel@tonic-gate 		do {
8127c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
8137c478bd9Sstevel@tonic-gate 				if (!request_will_wait)
8147c478bd9Sstevel@tonic-gate 					return (EAGAIN);
8157c478bd9Sstevel@tonic-gate 				request_blocked_by_active = 1;
8167c478bd9Sstevel@tonic-gate 				break;
8177c478bd9Sstevel@tonic-gate 			}
8187c478bd9Sstevel@tonic-gate 			/*
8197c478bd9Sstevel@tonic-gate 			 * Grant lock if it is for the same owner holding active
8207c478bd9Sstevel@tonic-gate 			 * lock that covers the request.
8217c478bd9Sstevel@tonic-gate 			 */
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate 			if (SAME_OWNER(lock, request) &&
8247c478bd9Sstevel@tonic-gate 			    COVERS(lock, request) &&
8257c478bd9Sstevel@tonic-gate 			    (request->l_type == F_RDLCK))
8267c478bd9Sstevel@tonic-gate 				return (flk_execute_request(request));
8277c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
8287c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
8297c478bd9Sstevel@tonic-gate 	}
8307c478bd9Sstevel@tonic-gate 
8317c478bd9Sstevel@tonic-gate 	if (!request_blocked_by_active) {
8327c478bd9Sstevel@tonic-gate 			lock_descriptor_t *lk[1];
8337c478bd9Sstevel@tonic-gate 			lock_descriptor_t *first_glock = NULL;
8347c478bd9Sstevel@tonic-gate 		/*
8357c478bd9Sstevel@tonic-gate 		 * Shall we grant this?! NO!!
8367c478bd9Sstevel@tonic-gate 		 * What about those locks that were just granted and still
8377c478bd9Sstevel@tonic-gate 		 * in sleep queue. Those threads are woken up and so locks
8387c478bd9Sstevel@tonic-gate 		 * are almost active.
8397c478bd9Sstevel@tonic-gate 		 */
8407c478bd9Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
8417c478bd9Sstevel@tonic-gate 		if (lock) {
8427c478bd9Sstevel@tonic-gate 			do {
8437c478bd9Sstevel@tonic-gate 				if (BLOCKS(lock, request)) {
8447c478bd9Sstevel@tonic-gate 					if (IS_GRANTED(lock)) {
8457c478bd9Sstevel@tonic-gate 						request_blocked_by_granted = 1;
8467c478bd9Sstevel@tonic-gate 					} else {
8477c478bd9Sstevel@tonic-gate 						request_blocked_by_sleeping = 1;
8487c478bd9Sstevel@tonic-gate 					}
8497c478bd9Sstevel@tonic-gate 				}
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate 				lock = lock->l_next;
8527c478bd9Sstevel@tonic-gate 			} while ((lock->l_vnode == vp));
8537c478bd9Sstevel@tonic-gate 			first_glock = lock->l_prev;
8547c478bd9Sstevel@tonic-gate 			ASSERT(first_glock->l_vnode == vp);
8557c478bd9Sstevel@tonic-gate 		}
8567c478bd9Sstevel@tonic-gate 
8577c478bd9Sstevel@tonic-gate 		if (request_blocked_by_granted)
8587c478bd9Sstevel@tonic-gate 			goto block;
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate 		if (!request_blocked_by_sleeping) {
8617c478bd9Sstevel@tonic-gate 			/*
8627c478bd9Sstevel@tonic-gate 			 * If the request isn't going to be blocked by a
8637c478bd9Sstevel@tonic-gate 			 * sleeping request, we know that it isn't going to
8647c478bd9Sstevel@tonic-gate 			 * be blocked; we can just execute the request --
8657c478bd9Sstevel@tonic-gate 			 * without performing costly deadlock detection.
8667c478bd9Sstevel@tonic-gate 			 */
8677c478bd9Sstevel@tonic-gate 			ASSERT(!request_blocked_by_active);
8687c478bd9Sstevel@tonic-gate 			return (flk_execute_request(request));
8697c478bd9Sstevel@tonic-gate 		} else if (request->l_type == F_RDLCK) {
8707c478bd9Sstevel@tonic-gate 			/*
8717c478bd9Sstevel@tonic-gate 			 * If we have a sleeping writer in the requested
8727c478bd9Sstevel@tonic-gate 			 * lock's range, block.
8737c478bd9Sstevel@tonic-gate 			 */
8747c478bd9Sstevel@tonic-gate 			goto block;
8757c478bd9Sstevel@tonic-gate 		}
8767c478bd9Sstevel@tonic-gate 
8777c478bd9Sstevel@tonic-gate 		lk[0] = request;
8787c478bd9Sstevel@tonic-gate 		request->l_state |= RECOMPUTE_LOCK;
8797c478bd9Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
8807c478bd9Sstevel@tonic-gate 		if (lock) {
8817c478bd9Sstevel@tonic-gate 			do {
8827c478bd9Sstevel@tonic-gate 				flk_recompute_dependencies(lock, lk, 1, 0);
8837c478bd9Sstevel@tonic-gate 				lock = lock->l_next;
8847c478bd9Sstevel@tonic-gate 			} while (lock->l_vnode == vp);
8857c478bd9Sstevel@tonic-gate 		}
8867c478bd9Sstevel@tonic-gate 		lock = first_glock;
8877c478bd9Sstevel@tonic-gate 		if (lock) {
8887c478bd9Sstevel@tonic-gate 			do {
8897c478bd9Sstevel@tonic-gate 				if (IS_GRANTED(lock)) {
8907c478bd9Sstevel@tonic-gate 				flk_recompute_dependencies(lock, lk, 1, 0);
8917c478bd9Sstevel@tonic-gate 				}
8927c478bd9Sstevel@tonic-gate 				lock = lock->l_prev;
8937c478bd9Sstevel@tonic-gate 			} while ((lock->l_vnode == vp));
8947c478bd9Sstevel@tonic-gate 		}
8957c478bd9Sstevel@tonic-gate 		request->l_state &= ~RECOMPUTE_LOCK;
8967c478bd9Sstevel@tonic-gate 		if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
8977c478bd9Sstevel@tonic-gate 			return (EDEADLK);
8987c478bd9Sstevel@tonic-gate 		return (flk_execute_request(request));
8997c478bd9Sstevel@tonic-gate 	}
9007c478bd9Sstevel@tonic-gate 
9017c478bd9Sstevel@tonic-gate block:
9027c478bd9Sstevel@tonic-gate 	if (request_will_wait)
9037c478bd9Sstevel@tonic-gate 		flk_graph_uncolor(gp);
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	/* check sleeping locks */
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate 	/*
9107c478bd9Sstevel@tonic-gate 	 * If we find a sleeping write lock that is a superset of the
9117c478bd9Sstevel@tonic-gate 	 * region wanted by request we can be assured that by adding an
9127c478bd9Sstevel@tonic-gate 	 * edge to this write lock we have paths to all locks in the
9137c478bd9Sstevel@tonic-gate 	 * graph that blocks the request except in one case and that is why
9147c478bd9Sstevel@tonic-gate 	 * another check for SAME_OWNER in the loop below. The exception
9157c478bd9Sstevel@tonic-gate 	 * case is when this process that owns the sleeping write lock 'l1'
9167c478bd9Sstevel@tonic-gate 	 * has other locks l2, l3, l4 that are in the system and arrived
9177c478bd9Sstevel@tonic-gate 	 * before l1. l1 does not have path to these locks as they are from
9187c478bd9Sstevel@tonic-gate 	 * same process. We break when we find a second covering sleeping
9197c478bd9Sstevel@tonic-gate 	 * lock l5 owned by a process different from that owning l1, because
9207c478bd9Sstevel@tonic-gate 	 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
9217c478bd9Sstevel@tonic-gate 	 * it has l1 would have produced a deadlock already.
9227c478bd9Sstevel@tonic-gate 	 */
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate 	if (lock) {
9257c478bd9Sstevel@tonic-gate 		do {
9267c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
9277c478bd9Sstevel@tonic-gate 				if (!request_will_wait)
9287c478bd9Sstevel@tonic-gate 					return (EAGAIN);
9297c478bd9Sstevel@tonic-gate 				if (COVERS(lock, request) &&
9307c478bd9Sstevel@tonic-gate 				    lock->l_type == F_WRLCK) {
9317c478bd9Sstevel@tonic-gate 					if (found_covering_lock &&
9327c478bd9Sstevel@tonic-gate 					    !SAME_OWNER(lock, covered_by)) {
9337c478bd9Sstevel@tonic-gate 						found_covering_lock++;
9347c478bd9Sstevel@tonic-gate 						break;
9357c478bd9Sstevel@tonic-gate 					}
9367c478bd9Sstevel@tonic-gate 					found_covering_lock = 1;
9377c478bd9Sstevel@tonic-gate 					covered_by = lock;
9387c478bd9Sstevel@tonic-gate 				}
9397c478bd9Sstevel@tonic-gate 				if (found_covering_lock &&
9407c478bd9Sstevel@tonic-gate 				    !SAME_OWNER(lock, covered_by)) {
9417c478bd9Sstevel@tonic-gate 					lock = lock->l_next;
9427c478bd9Sstevel@tonic-gate 					continue;
9437c478bd9Sstevel@tonic-gate 				}
9447c478bd9Sstevel@tonic-gate 				if ((error = flk_add_edge(request, lock,
9457c478bd9Sstevel@tonic-gate 				    !found_covering_lock, 0)))
9467c478bd9Sstevel@tonic-gate 					return (error);
9477c478bd9Sstevel@tonic-gate 			}
9487c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
9497c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
9507c478bd9Sstevel@tonic-gate 	}
9517c478bd9Sstevel@tonic-gate 
9527c478bd9Sstevel@tonic-gate /*
9537c478bd9Sstevel@tonic-gate  * found_covering_lock == 2 iff at this point 'request' has paths
9547c478bd9Sstevel@tonic-gate  * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
9557c478bd9Sstevel@tonic-gate  * point 'request' has paths to all locks that blocks 'request' whose owners
9567c478bd9Sstevel@tonic-gate  * are not same as the one that covers 'request' (covered_by above) and
9577c478bd9Sstevel@tonic-gate  * we can have locks whose owner is same as covered_by in the active list.
9587c478bd9Sstevel@tonic-gate  */
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	if (request_blocked_by_active && found_covering_lock != 2) {
9617c478bd9Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
9627c478bd9Sstevel@tonic-gate 		ASSERT(lock != NULL);
9637c478bd9Sstevel@tonic-gate 		do {
9647c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
9657c478bd9Sstevel@tonic-gate 				if (found_covering_lock &&
9667c478bd9Sstevel@tonic-gate 				    !SAME_OWNER(lock, covered_by)) {
9677c478bd9Sstevel@tonic-gate 					lock = lock->l_next;
9687c478bd9Sstevel@tonic-gate 					continue;
9697c478bd9Sstevel@tonic-gate 				}
9707c478bd9Sstevel@tonic-gate 				if ((error = flk_add_edge(request, lock,
9717c478bd9Sstevel@tonic-gate 				    CHECK_CYCLE, 0)))
9727c478bd9Sstevel@tonic-gate 					return (error);
9737c478bd9Sstevel@tonic-gate 			}
9747c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
9757c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
9767c478bd9Sstevel@tonic-gate 	}
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	if (NOT_BLOCKED(request)) {
9797c478bd9Sstevel@tonic-gate 		/*
9807c478bd9Sstevel@tonic-gate 		 * request not dependent on any other locks
9817c478bd9Sstevel@tonic-gate 		 * so execute this request
9827c478bd9Sstevel@tonic-gate 		 */
9837c478bd9Sstevel@tonic-gate 		return (flk_execute_request(request));
9847c478bd9Sstevel@tonic-gate 	} else {
9857c478bd9Sstevel@tonic-gate 		/*
9867c478bd9Sstevel@tonic-gate 		 * check for deadlock
9877c478bd9Sstevel@tonic-gate 		 */
9887c478bd9Sstevel@tonic-gate 		if (flk_check_deadlock(request))
9897c478bd9Sstevel@tonic-gate 			return (EDEADLK);
9907c478bd9Sstevel@tonic-gate 		/*
9917c478bd9Sstevel@tonic-gate 		 * this thread has to sleep
9927c478bd9Sstevel@tonic-gate 		 */
9937c478bd9Sstevel@tonic-gate 		return (flk_wait_execute_request(request));
9947c478bd9Sstevel@tonic-gate 	}
9957c478bd9Sstevel@tonic-gate }
9967c478bd9Sstevel@tonic-gate 
9977c478bd9Sstevel@tonic-gate /*
9987c478bd9Sstevel@tonic-gate  * The actual execution of the request in the simple case is only to
9997c478bd9Sstevel@tonic-gate  * insert the 'request' in the list of active locks if it is not an
10007c478bd9Sstevel@tonic-gate  * UNLOCK.
10017c478bd9Sstevel@tonic-gate  * We have to consider the existing active locks' relation to
10027c478bd9Sstevel@tonic-gate  * this 'request' if they are owned by same process. flk_relation() does
10037c478bd9Sstevel@tonic-gate  * this job and sees to that the dependency graph information is maintained
10047c478bd9Sstevel@tonic-gate  * properly.
10057c478bd9Sstevel@tonic-gate  */
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate int
flk_execute_request(lock_descriptor_t * request)10087c478bd9Sstevel@tonic-gate flk_execute_request(lock_descriptor_t *request)
10097c478bd9Sstevel@tonic-gate {
10107c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
10117c478bd9Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
10127c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock, *lock1;
10137c478bd9Sstevel@tonic-gate 	int done_searching = 0;
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
10167c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
10177c478bd9Sstevel@tonic-gate 
10187c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
10197c478bd9Sstevel@tonic-gate 
10207c478bd9Sstevel@tonic-gate 	flk_set_state(request, FLK_START_STATE);
10217c478bd9Sstevel@tonic-gate 
10227c478bd9Sstevel@tonic-gate 	ASSERT(NOT_BLOCKED(request));
10237c478bd9Sstevel@tonic-gate 
10247c478bd9Sstevel@tonic-gate 	/* IO_LOCK requests are only to check status */
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate 	if (IS_IO_LOCK(request))
10277c478bd9Sstevel@tonic-gate 		return (0);
10287c478bd9Sstevel@tonic-gate 
10297c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
10307c478bd9Sstevel@tonic-gate 
10317c478bd9Sstevel@tonic-gate 	if (lock == NULL && request->l_type == F_UNLCK)
10327c478bd9Sstevel@tonic-gate 		return (0);
10337c478bd9Sstevel@tonic-gate 	if (lock == NULL) {
10347c478bd9Sstevel@tonic-gate 		flk_insert_active_lock(request);
10357c478bd9Sstevel@tonic-gate 		return (0);
10367c478bd9Sstevel@tonic-gate 	}
10377c478bd9Sstevel@tonic-gate 
10387c478bd9Sstevel@tonic-gate 	do {
10397c478bd9Sstevel@tonic-gate 		lock1 = lock->l_next;
10407c478bd9Sstevel@tonic-gate 		if (SAME_OWNER(request, lock)) {
10417c478bd9Sstevel@tonic-gate 			done_searching = flk_relation(lock, request);
10427c478bd9Sstevel@tonic-gate 		}
10437c478bd9Sstevel@tonic-gate 		lock = lock1;
10447c478bd9Sstevel@tonic-gate 	} while (lock->l_vnode == vp && !done_searching);
10457c478bd9Sstevel@tonic-gate 
10467c478bd9Sstevel@tonic-gate 	/*
10477c478bd9Sstevel@tonic-gate 	 * insert in active queue
10487c478bd9Sstevel@tonic-gate 	 */
10497c478bd9Sstevel@tonic-gate 
10507c478bd9Sstevel@tonic-gate 	if (request->l_type != F_UNLCK)
10517c478bd9Sstevel@tonic-gate 		flk_insert_active_lock(request);
10527c478bd9Sstevel@tonic-gate 
10537c478bd9Sstevel@tonic-gate 	return (0);
10547c478bd9Sstevel@tonic-gate }
10557c478bd9Sstevel@tonic-gate 
10567c478bd9Sstevel@tonic-gate /*
10577c478bd9Sstevel@tonic-gate  * 'request' is blocked by some one therefore we put it into sleep queue.
10587c478bd9Sstevel@tonic-gate  */
10597c478bd9Sstevel@tonic-gate static int
flk_wait_execute_request(lock_descriptor_t * request)10607c478bd9Sstevel@tonic-gate flk_wait_execute_request(lock_descriptor_t *request)
10617c478bd9Sstevel@tonic-gate {
10627c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
10637c478bd9Sstevel@tonic-gate 	callb_cpr_t 	*cprp;		/* CPR info from callback */
10647c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
10657c478bd9Sstevel@tonic-gate 	int index;
10667c478bd9Sstevel@tonic-gate 
10677c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
10687c478bd9Sstevel@tonic-gate 	ASSERT(IS_WILLING_TO_SLEEP(request));
10697c478bd9Sstevel@tonic-gate 
10707c478bd9Sstevel@tonic-gate 	flk_insert_sleeping_lock(request);
10717c478bd9Sstevel@tonic-gate 
10727c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
10737c478bd9Sstevel@tonic-gate 		index = HASH_INDEX(request->l_vnode);
10747c478bd9Sstevel@tonic-gate 		fg = flk_get_globals();
10757c478bd9Sstevel@tonic-gate 
10767c478bd9Sstevel@tonic-gate 		if (nlm_status_size == 0) {	/* not booted as a cluster */
10777c478bd9Sstevel@tonic-gate 			if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
10787c478bd9Sstevel@tonic-gate 				flk_cancel_sleeping_lock(request, 1);
10797c478bd9Sstevel@tonic-gate 				return (ENOLCK);
10807c478bd9Sstevel@tonic-gate 			}
10817c478bd9Sstevel@tonic-gate 		} else {			/* booted as a cluster */
10827c478bd9Sstevel@tonic-gate 			/*
10837c478bd9Sstevel@tonic-gate 			 * If the request is an NLM server lock request,
10847c478bd9Sstevel@tonic-gate 			 * and the NLM state of the lock request is not
10857c478bd9Sstevel@tonic-gate 			 * NLM_UP (because the NLM server is shutting
10867c478bd9Sstevel@tonic-gate 			 * down), then cancel the sleeping lock and
10877c478bd9Sstevel@tonic-gate 			 * return error ENOLCK that will encourage the
10887c478bd9Sstevel@tonic-gate 			 * client to retransmit.
10897c478bd9Sstevel@tonic-gate 			 */
10907c478bd9Sstevel@tonic-gate 			if (!IS_NLM_UP(request)) {
10917c478bd9Sstevel@tonic-gate 				flk_cancel_sleeping_lock(request, 1);
10927c478bd9Sstevel@tonic-gate 				return (ENOLCK);
10937c478bd9Sstevel@tonic-gate 			}
10947c478bd9Sstevel@tonic-gate 		}
10957c478bd9Sstevel@tonic-gate 	}
10967c478bd9Sstevel@tonic-gate 
10977c478bd9Sstevel@tonic-gate 	/* Clustering: For blocking PXFS locks, return */
10987c478bd9Sstevel@tonic-gate 	if (IS_PXFS(request)) {
10997c478bd9Sstevel@tonic-gate 		/*
11007c478bd9Sstevel@tonic-gate 		 * PXFS locks sleep on the client side.
11017c478bd9Sstevel@tonic-gate 		 * The callback argument is used to wake up the sleeper
11027c478bd9Sstevel@tonic-gate 		 * when the lock is granted.
11037c478bd9Sstevel@tonic-gate 		 * We return -1 (rather than an errno value) to indicate
11047c478bd9Sstevel@tonic-gate 		 * the client side should sleep
11057c478bd9Sstevel@tonic-gate 		 */
11067c478bd9Sstevel@tonic-gate 		return (PXFS_LOCK_BLOCKED);
11077c478bd9Sstevel@tonic-gate 	}
11087c478bd9Sstevel@tonic-gate 
11097c478bd9Sstevel@tonic-gate 	if (request->l_callbacks != NULL) {
11107c478bd9Sstevel@tonic-gate 		/*
11117c478bd9Sstevel@tonic-gate 		 * To make sure the shutdown code works correctly, either
11127c478bd9Sstevel@tonic-gate 		 * the callback must happen after putting the lock on the
11137c478bd9Sstevel@tonic-gate 		 * sleep list, or we must check the shutdown status after
11147c478bd9Sstevel@tonic-gate 		 * returning from the callback (and before sleeping).  At
11157c478bd9Sstevel@tonic-gate 		 * least for now, we'll use the first option.  If a
11167c478bd9Sstevel@tonic-gate 		 * shutdown or signal or whatever happened while the graph
11177c478bd9Sstevel@tonic-gate 		 * mutex was dropped, that will be detected by
11187c478bd9Sstevel@tonic-gate 		 * wait_for_lock().
11197c478bd9Sstevel@tonic-gate 		 */
11207c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
11217c478bd9Sstevel@tonic-gate 
11227c478bd9Sstevel@tonic-gate 		cprp = flk_invoke_callbacks(request->l_callbacks,
11237c478bd9Sstevel@tonic-gate 		    FLK_BEFORE_SLEEP);
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
11267c478bd9Sstevel@tonic-gate 
11277c478bd9Sstevel@tonic-gate 		if (cprp == NULL) {
11287c478bd9Sstevel@tonic-gate 			wait_for_lock(request);
11297c478bd9Sstevel@tonic-gate 		} else {
11307c478bd9Sstevel@tonic-gate 			mutex_enter(cprp->cc_lockp);
11317c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(cprp);
11327c478bd9Sstevel@tonic-gate 			mutex_exit(cprp->cc_lockp);
11337c478bd9Sstevel@tonic-gate 			wait_for_lock(request);
11347c478bd9Sstevel@tonic-gate 			mutex_enter(cprp->cc_lockp);
11357c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
11367c478bd9Sstevel@tonic-gate 			mutex_exit(cprp->cc_lockp);
11377c478bd9Sstevel@tonic-gate 		}
11387c478bd9Sstevel@tonic-gate 
11397c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
11407c478bd9Sstevel@tonic-gate 		(void) flk_invoke_callbacks(request->l_callbacks,
11417c478bd9Sstevel@tonic-gate 		    FLK_AFTER_SLEEP);
11427c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
11437c478bd9Sstevel@tonic-gate 	} else {
11447c478bd9Sstevel@tonic-gate 		wait_for_lock(request);
11457c478bd9Sstevel@tonic-gate 	}
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
11487c478bd9Sstevel@tonic-gate 		/*
11497c478bd9Sstevel@tonic-gate 		 * If the lock manager is shutting down, return an
11507c478bd9Sstevel@tonic-gate 		 * error that will encourage the client to retransmit.
11517c478bd9Sstevel@tonic-gate 		 */
11527c478bd9Sstevel@tonic-gate 		if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
11537c478bd9Sstevel@tonic-gate 		    !IS_GRANTED(request)) {
11547c478bd9Sstevel@tonic-gate 			flk_cancel_sleeping_lock(request, 1);
11557c478bd9Sstevel@tonic-gate 			return (ENOLCK);
11567c478bd9Sstevel@tonic-gate 		}
11577c478bd9Sstevel@tonic-gate 	}
11587c478bd9Sstevel@tonic-gate 
11597c478bd9Sstevel@tonic-gate 	if (IS_INTERRUPTED(request)) {
11607c478bd9Sstevel@tonic-gate 		/* we got a signal, or act like we did */
11617c478bd9Sstevel@tonic-gate 		flk_cancel_sleeping_lock(request, 1);
11627c478bd9Sstevel@tonic-gate 		return (EINTR);
11637c478bd9Sstevel@tonic-gate 	}
11647c478bd9Sstevel@tonic-gate 
11657c478bd9Sstevel@tonic-gate 	/* Cancelled if some other thread has closed the file */
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 	if (IS_CANCELLED(request)) {
11687c478bd9Sstevel@tonic-gate 		flk_cancel_sleeping_lock(request, 1);
11697c478bd9Sstevel@tonic-gate 		return (EBADF);
11707c478bd9Sstevel@tonic-gate 	}
11717c478bd9Sstevel@tonic-gate 
11727c478bd9Sstevel@tonic-gate 	request->l_state &= ~GRANTED_LOCK;
11737c478bd9Sstevel@tonic-gate 	REMOVE_SLEEP_QUEUE(request);
11747c478bd9Sstevel@tonic-gate 	return (flk_execute_request(request));
11757c478bd9Sstevel@tonic-gate }
11767c478bd9Sstevel@tonic-gate 
11777c478bd9Sstevel@tonic-gate /*
11787c478bd9Sstevel@tonic-gate  * This routine adds an edge between from and to because from depends
11797c478bd9Sstevel@tonic-gate  * to. If asked to check for deadlock it checks whether there are any
11807c478bd9Sstevel@tonic-gate  * reachable locks from "from_lock" that is owned by the same process
11817c478bd9Sstevel@tonic-gate  * as "from_lock".
11827c478bd9Sstevel@tonic-gate  * NOTE: It is the caller's responsibility to make sure that the color
11837c478bd9Sstevel@tonic-gate  * of the graph is consistent between the calls to flk_add_edge as done
11847c478bd9Sstevel@tonic-gate  * in flk_process_request. This routine does not color and check for
11857c478bd9Sstevel@tonic-gate  * deadlock explicitly.
11867c478bd9Sstevel@tonic-gate  */
11877c478bd9Sstevel@tonic-gate 
11887c478bd9Sstevel@tonic-gate static int
flk_add_edge(lock_descriptor_t * from_lock,lock_descriptor_t * to_lock,int check_cycle,int update_graph)11897c478bd9Sstevel@tonic-gate flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
11907c478bd9Sstevel@tonic-gate     int check_cycle, int update_graph)
11917c478bd9Sstevel@tonic-gate {
11927c478bd9Sstevel@tonic-gate 	edge_t	*edge;
11937c478bd9Sstevel@tonic-gate 	edge_t	*ep;
11947c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*vertex;
11957c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
11967c478bd9Sstevel@tonic-gate 
11977c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
11987c478bd9Sstevel@tonic-gate 
11997c478bd9Sstevel@tonic-gate 	/*
12007c478bd9Sstevel@tonic-gate 	 * if to vertex already has mark_color just return
12017c478bd9Sstevel@tonic-gate 	 * don't add an edge as it is reachable from from vertex
12027c478bd9Sstevel@tonic-gate 	 * before itself.
12037c478bd9Sstevel@tonic-gate 	 */
12047c478bd9Sstevel@tonic-gate 
12057c478bd9Sstevel@tonic-gate 	if (COLORED(to_lock))
12067c478bd9Sstevel@tonic-gate 		return (0);
12077c478bd9Sstevel@tonic-gate 
12087c478bd9Sstevel@tonic-gate 	edge = flk_get_edge();
12097c478bd9Sstevel@tonic-gate 
12107c478bd9Sstevel@tonic-gate 	/*
12117c478bd9Sstevel@tonic-gate 	 * set the from and to vertex
12127c478bd9Sstevel@tonic-gate 	 */
12137c478bd9Sstevel@tonic-gate 
12147c478bd9Sstevel@tonic-gate 	edge->from_vertex = from_lock;
12157c478bd9Sstevel@tonic-gate 	edge->to_vertex = to_lock;
12167c478bd9Sstevel@tonic-gate 
12177c478bd9Sstevel@tonic-gate 	/*
12187c478bd9Sstevel@tonic-gate 	 * put in adjacency list of from vertex
12197c478bd9Sstevel@tonic-gate 	 */
12207c478bd9Sstevel@tonic-gate 
12217c478bd9Sstevel@tonic-gate 	from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
12227c478bd9Sstevel@tonic-gate 	edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
12237c478bd9Sstevel@tonic-gate 	edge->edge_adj_prev = &from_lock->l_edge;
12247c478bd9Sstevel@tonic-gate 	from_lock->l_edge.edge_adj_next = edge;
12257c478bd9Sstevel@tonic-gate 
12267c478bd9Sstevel@tonic-gate 	/*
12277c478bd9Sstevel@tonic-gate 	 * put in in list of to vertex
12287c478bd9Sstevel@tonic-gate 	 */
12297c478bd9Sstevel@tonic-gate 
12307c478bd9Sstevel@tonic-gate 	to_lock->l_edge.edge_in_next->edge_in_prev = edge;
12317c478bd9Sstevel@tonic-gate 	edge->edge_in_next = to_lock->l_edge.edge_in_next;
12327c478bd9Sstevel@tonic-gate 	to_lock->l_edge.edge_in_next = edge;
12337c478bd9Sstevel@tonic-gate 	edge->edge_in_prev = &to_lock->l_edge;
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate 
12367c478bd9Sstevel@tonic-gate 	if (update_graph) {
12377c478bd9Sstevel@tonic-gate 		flk_update_proc_graph(edge, 0);
12387c478bd9Sstevel@tonic-gate 		return (0);
12397c478bd9Sstevel@tonic-gate 	}
12407c478bd9Sstevel@tonic-gate 	if (!check_cycle) {
12417c478bd9Sstevel@tonic-gate 		return (0);
12427c478bd9Sstevel@tonic-gate 	}
12437c478bd9Sstevel@tonic-gate 
12447c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, from_lock, l_stack);
12457c478bd9Sstevel@tonic-gate 
12467c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
12477c478bd9Sstevel@tonic-gate 
12487c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack);
12497c478bd9Sstevel@tonic-gate 
12507c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex);
12517c478bd9Sstevel@tonic-gate 		    ep != HEAD(vertex);
12527c478bd9Sstevel@tonic-gate 		    ep = NEXT_ADJ(ep)) {
12537c478bd9Sstevel@tonic-gate 			if (COLORED(ep->to_vertex))
12547c478bd9Sstevel@tonic-gate 				continue;
12557c478bd9Sstevel@tonic-gate 			COLOR(ep->to_vertex);
12567c478bd9Sstevel@tonic-gate 			if (SAME_OWNER(ep->to_vertex, from_lock))
12577c478bd9Sstevel@tonic-gate 				goto dead_lock;
12587c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
12597c478bd9Sstevel@tonic-gate 		}
12607c478bd9Sstevel@tonic-gate 	}
12617c478bd9Sstevel@tonic-gate 	return (0);
12627c478bd9Sstevel@tonic-gate 
12637c478bd9Sstevel@tonic-gate dead_lock:
12647c478bd9Sstevel@tonic-gate 
12657c478bd9Sstevel@tonic-gate 	/*
12667c478bd9Sstevel@tonic-gate 	 * remove all edges
12677c478bd9Sstevel@tonic-gate 	 */
12687c478bd9Sstevel@tonic-gate 
12697c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(from_lock);
12707c478bd9Sstevel@tonic-gate 
12717c478bd9Sstevel@tonic-gate 	while (ep != HEAD(from_lock)) {
12727c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
12737c478bd9Sstevel@tonic-gate 		from_lock->l_sedge = NEXT_ADJ(ep);
12747c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
12757c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
12767c478bd9Sstevel@tonic-gate 		ep = from_lock->l_sedge;
12777c478bd9Sstevel@tonic-gate 	}
12787c478bd9Sstevel@tonic-gate 	return (EDEADLK);
12797c478bd9Sstevel@tonic-gate }
12807c478bd9Sstevel@tonic-gate 
12817c478bd9Sstevel@tonic-gate /*
12827c478bd9Sstevel@tonic-gate  * Get an edge structure for representing the dependency between two locks.
12837c478bd9Sstevel@tonic-gate  */
12847c478bd9Sstevel@tonic-gate 
12857c478bd9Sstevel@tonic-gate static edge_t *
flk_get_edge()12867c478bd9Sstevel@tonic-gate flk_get_edge()
12877c478bd9Sstevel@tonic-gate {
12887c478bd9Sstevel@tonic-gate 	edge_t	*ep;
12897c478bd9Sstevel@tonic-gate 
12907c478bd9Sstevel@tonic-gate 	ASSERT(flk_edge_cache != NULL);
12917c478bd9Sstevel@tonic-gate 
12927c478bd9Sstevel@tonic-gate 	ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
12937c478bd9Sstevel@tonic-gate 	edge_allocs++;
12947c478bd9Sstevel@tonic-gate 	return (ep);
12957c478bd9Sstevel@tonic-gate }
12967c478bd9Sstevel@tonic-gate 
12977c478bd9Sstevel@tonic-gate /*
12987c478bd9Sstevel@tonic-gate  * Free the edge structure.
12997c478bd9Sstevel@tonic-gate  */
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate static void
flk_free_edge(edge_t * ep)13027c478bd9Sstevel@tonic-gate flk_free_edge(edge_t *ep)
13037c478bd9Sstevel@tonic-gate {
13047c478bd9Sstevel@tonic-gate 	edge_frees++;
13057c478bd9Sstevel@tonic-gate 	kmem_cache_free(flk_edge_cache, (void *)ep);
13067c478bd9Sstevel@tonic-gate }
13077c478bd9Sstevel@tonic-gate 
13087c478bd9Sstevel@tonic-gate /*
13097c478bd9Sstevel@tonic-gate  * Check the relationship of request with lock and perform the
13107c478bd9Sstevel@tonic-gate  * recomputation of dependencies, break lock if required, and return
13117c478bd9Sstevel@tonic-gate  * 1 if request cannot have any more relationship with the next
13127c478bd9Sstevel@tonic-gate  * active locks.
13137c478bd9Sstevel@tonic-gate  * The 'lock' and 'request' are compared and in case of overlap we
13147c478bd9Sstevel@tonic-gate  * delete the 'lock' and form new locks to represent the non-overlapped
13157c478bd9Sstevel@tonic-gate  * portion of original 'lock'. This function has side effects such as
13167c478bd9Sstevel@tonic-gate  * 'lock' will be freed, new locks will be added to the active list.
13177c478bd9Sstevel@tonic-gate  */
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate static int
flk_relation(lock_descriptor_t * lock,lock_descriptor_t * request)13207c478bd9Sstevel@tonic-gate flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
13217c478bd9Sstevel@tonic-gate {
13227c478bd9Sstevel@tonic-gate 	int lock_effect;
13237c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock1, *lock2;
13247c478bd9Sstevel@tonic-gate 	lock_descriptor_t *topology[3];
13257c478bd9Sstevel@tonic-gate 	int nvertex = 0;
13267c478bd9Sstevel@tonic-gate 	int i;
13277c478bd9Sstevel@tonic-gate 	edge_t	*ep;
13287c478bd9Sstevel@tonic-gate 	graph_t	*gp = (lock->l_graph);
13297c478bd9Sstevel@tonic-gate 
13307c478bd9Sstevel@tonic-gate 
13317c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
13327c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
13337c478bd9Sstevel@tonic-gate 
13347c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
13357c478bd9Sstevel@tonic-gate 
13367c478bd9Sstevel@tonic-gate 	topology[0] = topology[1] = topology[2] = NULL;
13377c478bd9Sstevel@tonic-gate 
13387c478bd9Sstevel@tonic-gate 	if (request->l_type == F_UNLCK)
13397c478bd9Sstevel@tonic-gate 		lock_effect = FLK_UNLOCK;
13407c478bd9Sstevel@tonic-gate 	else if (request->l_type == F_RDLCK &&
13417c478bd9Sstevel@tonic-gate 	    lock->l_type == F_WRLCK)
13427c478bd9Sstevel@tonic-gate 		lock_effect = FLK_DOWNGRADE;
13437c478bd9Sstevel@tonic-gate 	else if (request->l_type == F_WRLCK &&
13447c478bd9Sstevel@tonic-gate 	    lock->l_type == F_RDLCK)
13457c478bd9Sstevel@tonic-gate 		lock_effect = FLK_UPGRADE;
13467c478bd9Sstevel@tonic-gate 	else
13477c478bd9Sstevel@tonic-gate 		lock_effect = FLK_STAY_SAME;
13487c478bd9Sstevel@tonic-gate 
13497c478bd9Sstevel@tonic-gate 	if (lock->l_end < request->l_start) {
13507c478bd9Sstevel@tonic-gate 		if (lock->l_end == request->l_start - 1 &&
13517c478bd9Sstevel@tonic-gate 		    lock_effect == FLK_STAY_SAME) {
13527c478bd9Sstevel@tonic-gate 			topology[0] = request;
13537c478bd9Sstevel@tonic-gate 			request->l_start = lock->l_start;
13547c478bd9Sstevel@tonic-gate 			nvertex = 1;
13557c478bd9Sstevel@tonic-gate 			goto recompute;
13567c478bd9Sstevel@tonic-gate 		} else {
13577c478bd9Sstevel@tonic-gate 			return (0);
13587c478bd9Sstevel@tonic-gate 		}
13597c478bd9Sstevel@tonic-gate 	}
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 	if (lock->l_start > request->l_end) {
13627c478bd9Sstevel@tonic-gate 		if (request->l_end == lock->l_start - 1 &&
13637c478bd9Sstevel@tonic-gate 		    lock_effect == FLK_STAY_SAME) {
13647c478bd9Sstevel@tonic-gate 			topology[0] = request;
13657c478bd9Sstevel@tonic-gate 			request->l_end = lock->l_end;
13667c478bd9Sstevel@tonic-gate 			nvertex = 1;
13677c478bd9Sstevel@tonic-gate 			goto recompute;
13687c478bd9Sstevel@tonic-gate 		} else {
13697c478bd9Sstevel@tonic-gate 			return (1);
13707c478bd9Sstevel@tonic-gate 		}
13717c478bd9Sstevel@tonic-gate 	}
13727c478bd9Sstevel@tonic-gate 
13737c478bd9Sstevel@tonic-gate 	if (request->l_end < lock->l_end) {
13747c478bd9Sstevel@tonic-gate 		if (request->l_start > lock->l_start) {
13757c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
13767c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
13777c478bd9Sstevel@tonic-gate 				request->l_end = lock->l_end;
13787c478bd9Sstevel@tonic-gate 				topology[0] = request;
13797c478bd9Sstevel@tonic-gate 				nvertex = 1;
13807c478bd9Sstevel@tonic-gate 			} else {
13817c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
13827c478bd9Sstevel@tonic-gate 				lock2 = flk_get_lock();
13837c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
13847c478bd9Sstevel@tonic-gate 				COPY(lock2, lock);
13857c478bd9Sstevel@tonic-gate 				lock1->l_start = lock->l_start;
13867c478bd9Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
13877c478bd9Sstevel@tonic-gate 				lock2->l_start = request->l_end + 1;
13887c478bd9Sstevel@tonic-gate 				lock2->l_end = lock->l_end;
13897c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
13907c478bd9Sstevel@tonic-gate 				topology[1] = lock2;
13917c478bd9Sstevel@tonic-gate 				topology[2] = request;
13927c478bd9Sstevel@tonic-gate 				nvertex = 3;
13937c478bd9Sstevel@tonic-gate 			}
13947c478bd9Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start) {
13957c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
13967c478bd9Sstevel@tonic-gate 				request->l_end = lock->l_end;
13977c478bd9Sstevel@tonic-gate 				topology[0] = request;
13987c478bd9Sstevel@tonic-gate 				nvertex = 1;
13997c478bd9Sstevel@tonic-gate 			} else {
14007c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
14017c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
14027c478bd9Sstevel@tonic-gate 				lock1->l_start = request->l_end + 1;
14037c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
14047c478bd9Sstevel@tonic-gate 				topology[1] = request;
14057c478bd9Sstevel@tonic-gate 				nvertex = 2;
14067c478bd9Sstevel@tonic-gate 			}
14077c478bd9Sstevel@tonic-gate 		} else  {
14087c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
14097c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
14107c478bd9Sstevel@tonic-gate 				request->l_end = lock->l_end;
14117c478bd9Sstevel@tonic-gate 				topology[0] = request;
14127c478bd9Sstevel@tonic-gate 				nvertex = 1;
14137c478bd9Sstevel@tonic-gate 			} else {
14147c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
14157c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
14167c478bd9Sstevel@tonic-gate 				lock1->l_start = request->l_end + 1;
14177c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
14187c478bd9Sstevel@tonic-gate 				topology[1] = request;
14197c478bd9Sstevel@tonic-gate 				nvertex = 2;
14207c478bd9Sstevel@tonic-gate 			}
14217c478bd9Sstevel@tonic-gate 		}
14227c478bd9Sstevel@tonic-gate 	} else if (request->l_end > lock->l_end) {
14237c478bd9Sstevel@tonic-gate 		if (request->l_start > lock->l_start)  {
14247c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
14257c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
14267c478bd9Sstevel@tonic-gate 				topology[0] = request;
14277c478bd9Sstevel@tonic-gate 				nvertex = 1;
14287c478bd9Sstevel@tonic-gate 			} else {
14297c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
14307c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
14317c478bd9Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
14327c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
14337c478bd9Sstevel@tonic-gate 				topology[1] = request;
14347c478bd9Sstevel@tonic-gate 				nvertex = 2;
14357c478bd9Sstevel@tonic-gate 			}
14367c478bd9Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start)  {
14377c478bd9Sstevel@tonic-gate 			topology[0] = request;
14387c478bd9Sstevel@tonic-gate 			nvertex = 1;
14397c478bd9Sstevel@tonic-gate 		} else {
14407c478bd9Sstevel@tonic-gate 			topology[0] = request;
14417c478bd9Sstevel@tonic-gate 			nvertex = 1;
14427c478bd9Sstevel@tonic-gate 		}
14437c478bd9Sstevel@tonic-gate 	} else {
14447c478bd9Sstevel@tonic-gate 		if (request->l_start > lock->l_start) {
14457c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
14467c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
14477c478bd9Sstevel@tonic-gate 				topology[0] = request;
14487c478bd9Sstevel@tonic-gate 				nvertex = 1;
14497c478bd9Sstevel@tonic-gate 			} else {
14507c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
14517c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
14527c478bd9Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
14537c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
14547c478bd9Sstevel@tonic-gate 				topology[1] = request;
14557c478bd9Sstevel@tonic-gate 				nvertex = 2;
14567c478bd9Sstevel@tonic-gate 			}
14577c478bd9Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start) {
14587c478bd9Sstevel@tonic-gate 			topology[0] = request;
14597c478bd9Sstevel@tonic-gate 			nvertex = 1;
14607c478bd9Sstevel@tonic-gate 		} else {
14617c478bd9Sstevel@tonic-gate 			if (lock_effect !=  FLK_UNLOCK) {
14627c478bd9Sstevel@tonic-gate 				topology[0] = request;
14637c478bd9Sstevel@tonic-gate 				nvertex = 1;
14647c478bd9Sstevel@tonic-gate 			} else {
14657c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
14667c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
14677c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
14687c478bd9Sstevel@tonic-gate 				CHECK_SLEEPING_LOCKS(gp);
14697c478bd9Sstevel@tonic-gate 				CHECK_ACTIVE_LOCKS(gp);
14707c478bd9Sstevel@tonic-gate 				return (1);
14717c478bd9Sstevel@tonic-gate 			}
14727c478bd9Sstevel@tonic-gate 		}
14737c478bd9Sstevel@tonic-gate 	}
14747c478bd9Sstevel@tonic-gate 
14757c478bd9Sstevel@tonic-gate recompute:
14767c478bd9Sstevel@tonic-gate 
14777c478bd9Sstevel@tonic-gate 	/*
14787c478bd9Sstevel@tonic-gate 	 * For unlock we don't send the 'request' to for recomputing
14797c478bd9Sstevel@tonic-gate 	 * dependencies because no lock will add an edge to this.
14807c478bd9Sstevel@tonic-gate 	 */
14817c478bd9Sstevel@tonic-gate 
14827c478bd9Sstevel@tonic-gate 	if (lock_effect == FLK_UNLOCK) {
14837c478bd9Sstevel@tonic-gate 		topology[nvertex-1] = NULL;
14847c478bd9Sstevel@tonic-gate 		nvertex--;
14857c478bd9Sstevel@tonic-gate 	}
14867c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
14877c478bd9Sstevel@tonic-gate 		topology[i]->l_state |= RECOMPUTE_LOCK;
14887c478bd9Sstevel@tonic-gate 		topology[i]->l_color = NO_COLOR;
14897c478bd9Sstevel@tonic-gate 	}
14907c478bd9Sstevel@tonic-gate 
14917c478bd9Sstevel@tonic-gate 	ASSERT(FIRST_ADJ(lock) == HEAD(lock));
14927c478bd9Sstevel@tonic-gate 
14937c478bd9Sstevel@tonic-gate 	/*
14947c478bd9Sstevel@tonic-gate 	 * we remove the adjacent edges for all vertices' to this vertex
14957c478bd9Sstevel@tonic-gate 	 * 'lock'.
14967c478bd9Sstevel@tonic-gate 	 */
14977c478bd9Sstevel@tonic-gate 
14987c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
14997c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
15007c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
15017c478bd9Sstevel@tonic-gate 		ep = NEXT_IN(ep);
15027c478bd9Sstevel@tonic-gate 	}
15037c478bd9Sstevel@tonic-gate 
15047c478bd9Sstevel@tonic-gate 	flk_delete_active_lock(lock, 0);
15057c478bd9Sstevel@tonic-gate 
15067c478bd9Sstevel@tonic-gate 	/* We are ready for recomputing the dependencies now */
15077c478bd9Sstevel@tonic-gate 
15087c478bd9Sstevel@tonic-gate 	flk_recompute_dependencies(lock, topology, nvertex, 1);
15097c478bd9Sstevel@tonic-gate 
15107c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
15117c478bd9Sstevel@tonic-gate 		topology[i]->l_state &= ~RECOMPUTE_LOCK;
15127c478bd9Sstevel@tonic-gate 		topology[i]->l_color = NO_COLOR;
15137c478bd9Sstevel@tonic-gate 	}
15147c478bd9Sstevel@tonic-gate 
15157c478bd9Sstevel@tonic-gate 
15167c478bd9Sstevel@tonic-gate 	if (lock_effect == FLK_UNLOCK) {
15177c478bd9Sstevel@tonic-gate 		nvertex++;
15187c478bd9Sstevel@tonic-gate 	}
15197c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex - 1; i++) {
15207c478bd9Sstevel@tonic-gate 		flk_insert_active_lock(topology[i]);
15217c478bd9Sstevel@tonic-gate 	}
15227c478bd9Sstevel@tonic-gate 
15237c478bd9Sstevel@tonic-gate 
15247c478bd9Sstevel@tonic-gate 	if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
15257c478bd9Sstevel@tonic-gate 		flk_wakeup(lock, 0);
15267c478bd9Sstevel@tonic-gate 	} else {
15277c478bd9Sstevel@tonic-gate 		ep = FIRST_IN(lock);
15287c478bd9Sstevel@tonic-gate 		while (ep != HEAD(lock)) {
15297c478bd9Sstevel@tonic-gate 			lock->l_sedge = NEXT_IN(ep);
15307c478bd9Sstevel@tonic-gate 			IN_LIST_REMOVE(ep);
15317c478bd9Sstevel@tonic-gate 			flk_update_proc_graph(ep, 1);
15327c478bd9Sstevel@tonic-gate 			flk_free_edge(ep);
15337c478bd9Sstevel@tonic-gate 			ep = lock->l_sedge;
15347c478bd9Sstevel@tonic-gate 		}
15357c478bd9Sstevel@tonic-gate 	}
15367c478bd9Sstevel@tonic-gate 	flk_free_lock(lock);
15377c478bd9Sstevel@tonic-gate 
15387c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
15397c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
15407c478bd9Sstevel@tonic-gate 	return (0);
15417c478bd9Sstevel@tonic-gate }
15427c478bd9Sstevel@tonic-gate 
15437c478bd9Sstevel@tonic-gate /*
15447c478bd9Sstevel@tonic-gate  * Insert a lock into the active queue.
15457c478bd9Sstevel@tonic-gate  */
15467c478bd9Sstevel@tonic-gate 
15477c478bd9Sstevel@tonic-gate static void
flk_insert_active_lock(lock_descriptor_t * new_lock)15487c478bd9Sstevel@tonic-gate flk_insert_active_lock(lock_descriptor_t *new_lock)
15497c478bd9Sstevel@tonic-gate {
15507c478bd9Sstevel@tonic-gate 	graph_t	*gp = new_lock->l_graph;
15517c478bd9Sstevel@tonic-gate 	vnode_t	*vp = new_lock->l_vnode;
15527c478bd9Sstevel@tonic-gate 	lock_descriptor_t *first_lock, *lock;
15537c478bd9Sstevel@tonic-gate 
15547c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
15557c478bd9Sstevel@tonic-gate 
15567c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
15577c478bd9Sstevel@tonic-gate 	first_lock = lock;
15587c478bd9Sstevel@tonic-gate 
15597c478bd9Sstevel@tonic-gate 	if (first_lock != NULL) {
15607c478bd9Sstevel@tonic-gate 		for (; (lock->l_vnode == vp &&
15617c478bd9Sstevel@tonic-gate 		    lock->l_start < new_lock->l_start); lock = lock->l_next)
15627c478bd9Sstevel@tonic-gate 			;
15637c478bd9Sstevel@tonic-gate 	} else {
15647c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp);
15657c478bd9Sstevel@tonic-gate 	}
15667c478bd9Sstevel@tonic-gate 
15677c478bd9Sstevel@tonic-gate 	lock->l_prev->l_next = new_lock;
15687c478bd9Sstevel@tonic-gate 	new_lock->l_next = lock;
15697c478bd9Sstevel@tonic-gate 	new_lock->l_prev = lock->l_prev;
15707c478bd9Sstevel@tonic-gate 	lock->l_prev = new_lock;
15717c478bd9Sstevel@tonic-gate 
15727c478bd9Sstevel@tonic-gate 	if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
15737c478bd9Sstevel@tonic-gate 		vp->v_filocks = (struct filock *)new_lock;
15747c478bd9Sstevel@tonic-gate 	}
15757c478bd9Sstevel@tonic-gate 	flk_set_state(new_lock, FLK_ACTIVE_STATE);
15767c478bd9Sstevel@tonic-gate 	new_lock->l_state |= ACTIVE_LOCK;
15777c478bd9Sstevel@tonic-gate 
15787c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
15797c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
15807c478bd9Sstevel@tonic-gate }
15817c478bd9Sstevel@tonic-gate 
15827c478bd9Sstevel@tonic-gate /*
15837c478bd9Sstevel@tonic-gate  * Delete the active lock : Performs two functions depending on the
15847c478bd9Sstevel@tonic-gate  * value of second parameter. One is to remove from the active lists
15857c478bd9Sstevel@tonic-gate  * only and other is to both remove and free the lock.
15867c478bd9Sstevel@tonic-gate  */
15877c478bd9Sstevel@tonic-gate 
15887c478bd9Sstevel@tonic-gate static void
flk_delete_active_lock(lock_descriptor_t * lock,int free_lock)15897c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
15907c478bd9Sstevel@tonic-gate {
15917c478bd9Sstevel@tonic-gate 	vnode_t *vp = lock->l_vnode;
15927c478bd9Sstevel@tonic-gate 	graph_t	*gp = lock->l_graph;
15937c478bd9Sstevel@tonic-gate 
15947c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
15957c478bd9Sstevel@tonic-gate 	if (free_lock)
15967c478bd9Sstevel@tonic-gate 		ASSERT(NO_DEPENDENTS(lock));
15977c478bd9Sstevel@tonic-gate 	ASSERT(NOT_BLOCKED(lock));
15987c478bd9Sstevel@tonic-gate 	ASSERT(IS_ACTIVE(lock));
15997c478bd9Sstevel@tonic-gate 
16007c478bd9Sstevel@tonic-gate 	ASSERT((vp->v_filocks != NULL));
16017c478bd9Sstevel@tonic-gate 
16027c478bd9Sstevel@tonic-gate 	if (vp->v_filocks == (struct filock *)lock) {
16037c478bd9Sstevel@tonic-gate 		vp->v_filocks = (struct filock *)
16047c478bd9Sstevel@tonic-gate 		    ((lock->l_next->l_vnode == vp) ? lock->l_next :
16057c478bd9Sstevel@tonic-gate 		    NULL);
16067c478bd9Sstevel@tonic-gate 	}
16077c478bd9Sstevel@tonic-gate 	lock->l_next->l_prev = lock->l_prev;
16087c478bd9Sstevel@tonic-gate 	lock->l_prev->l_next = lock->l_next;
16097c478bd9Sstevel@tonic-gate 	lock->l_next = lock->l_prev = NULL;
16107c478bd9Sstevel@tonic-gate 	flk_set_state(lock, FLK_DEAD_STATE);
16117c478bd9Sstevel@tonic-gate 	lock->l_state &= ~ACTIVE_LOCK;
16127c478bd9Sstevel@tonic-gate 
16137c478bd9Sstevel@tonic-gate 	if (free_lock)
16147c478bd9Sstevel@tonic-gate 		flk_free_lock(lock);
16157c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
16167c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
16177c478bd9Sstevel@tonic-gate }
16187c478bd9Sstevel@tonic-gate 
16197c478bd9Sstevel@tonic-gate /*
16207c478bd9Sstevel@tonic-gate  * Insert into the sleep queue.
16217c478bd9Sstevel@tonic-gate  */
16227c478bd9Sstevel@tonic-gate 
16237c478bd9Sstevel@tonic-gate static void
flk_insert_sleeping_lock(lock_descriptor_t * request)16247c478bd9Sstevel@tonic-gate flk_insert_sleeping_lock(lock_descriptor_t *request)
16257c478bd9Sstevel@tonic-gate {
16267c478bd9Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
16277c478bd9Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
16287c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
16297c478bd9Sstevel@tonic-gate 
16307c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
16317c478bd9Sstevel@tonic-gate 	ASSERT(IS_INITIAL(request));
16327c478bd9Sstevel@tonic-gate 
16337c478bd9Sstevel@tonic-gate 	for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
16347c478bd9Sstevel@tonic-gate 	    lock->l_vnode < vp); lock = lock->l_next)
16357c478bd9Sstevel@tonic-gate 		;
16367c478bd9Sstevel@tonic-gate 
16377c478bd9Sstevel@tonic-gate 	lock->l_prev->l_next = request;
16387c478bd9Sstevel@tonic-gate 	request->l_prev = lock->l_prev;
16397c478bd9Sstevel@tonic-gate 	lock->l_prev = request;
16407c478bd9Sstevel@tonic-gate 	request->l_next = lock;
16417c478bd9Sstevel@tonic-gate 	flk_set_state(request, FLK_SLEEPING_STATE);
16427c478bd9Sstevel@tonic-gate 	request->l_state |= SLEEPING_LOCK;
16437c478bd9Sstevel@tonic-gate }
16447c478bd9Sstevel@tonic-gate 
16457c478bd9Sstevel@tonic-gate /*
16467c478bd9Sstevel@tonic-gate  * Cancelling a sleeping lock implies removing a vertex from the
16477c478bd9Sstevel@tonic-gate  * dependency graph and therefore we should recompute the dependencies
16487c478bd9Sstevel@tonic-gate  * of all vertices that have a path  to this vertex, w.r.t. all
16497c478bd9Sstevel@tonic-gate  * vertices reachable from this vertex.
16507c478bd9Sstevel@tonic-gate  */
16517c478bd9Sstevel@tonic-gate 
16527c478bd9Sstevel@tonic-gate void
flk_cancel_sleeping_lock(lock_descriptor_t * request,int remove_from_queue)16537c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
16547c478bd9Sstevel@tonic-gate {
16557c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
16567c478bd9Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
16577c478bd9Sstevel@tonic-gate 	lock_descriptor_t **topology = NULL;
16587c478bd9Sstevel@tonic-gate 	edge_t	*ep;
16597c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lock;
16607c478bd9Sstevel@tonic-gate 	int nvertex = 0;
16617c478bd9Sstevel@tonic-gate 	int i;
16627c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
16637c478bd9Sstevel@tonic-gate 
16647c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
16657c478bd9Sstevel@tonic-gate 
16667c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
16677c478bd9Sstevel@tonic-gate 	/*
16687c478bd9Sstevel@tonic-gate 	 * count number of vertex pointers that has to be allocated
16697c478bd9Sstevel@tonic-gate 	 * All vertices that are reachable from request.
16707c478bd9Sstevel@tonic-gate 	 */
16717c478bd9Sstevel@tonic-gate 
16727c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, request, l_stack);
16737c478bd9Sstevel@tonic-gate 
16747c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
16757c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack);
16767c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
16777c478bd9Sstevel@tonic-gate 		    ep = NEXT_ADJ(ep)) {
16787c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(ep->to_vertex))
16797c478bd9Sstevel@tonic-gate 				continue;
16807c478bd9Sstevel@tonic-gate 			ep->to_vertex->l_state |= RECOMPUTE_LOCK;
16817c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
16827c478bd9Sstevel@tonic-gate 			nvertex++;
16837c478bd9Sstevel@tonic-gate 		}
16847c478bd9Sstevel@tonic-gate 	}
16857c478bd9Sstevel@tonic-gate 
16867c478bd9Sstevel@tonic-gate 	/*
16877c478bd9Sstevel@tonic-gate 	 * allocate memory for holding the vertex pointers
16887c478bd9Sstevel@tonic-gate 	 */
16897c478bd9Sstevel@tonic-gate 
16907c478bd9Sstevel@tonic-gate 	if (nvertex) {
16917c478bd9Sstevel@tonic-gate 		topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
16927c478bd9Sstevel@tonic-gate 		    KM_SLEEP);
16937c478bd9Sstevel@tonic-gate 	}
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate 	/*
16967c478bd9Sstevel@tonic-gate 	 * one more pass to actually store the vertices in the
16977c478bd9Sstevel@tonic-gate 	 * allocated array.
16987c478bd9Sstevel@tonic-gate 	 * We first check sleeping locks and then active locks
16997c478bd9Sstevel@tonic-gate 	 * so that topology array will be in a topological
17007c478bd9Sstevel@tonic-gate 	 * order.
17017c478bd9Sstevel@tonic-gate 	 */
17027c478bd9Sstevel@tonic-gate 
17037c478bd9Sstevel@tonic-gate 	nvertex = 0;
17047c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
17057c478bd9Sstevel@tonic-gate 
17067c478bd9Sstevel@tonic-gate 	if (lock) {
17077c478bd9Sstevel@tonic-gate 		do {
17087c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock)) {
17097c478bd9Sstevel@tonic-gate 				lock->l_index = nvertex;
17107c478bd9Sstevel@tonic-gate 				topology[nvertex++] = lock;
17117c478bd9Sstevel@tonic-gate 			}
17127c478bd9Sstevel@tonic-gate 			lock->l_color = NO_COLOR;
17137c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
17147c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
17157c478bd9Sstevel@tonic-gate 	}
17167c478bd9Sstevel@tonic-gate 
17177c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
17187c478bd9Sstevel@tonic-gate 
17197c478bd9Sstevel@tonic-gate 	if (lock) {
17207c478bd9Sstevel@tonic-gate 		do {
17217c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock)) {
17227c478bd9Sstevel@tonic-gate 				lock->l_index = nvertex;
17237c478bd9Sstevel@tonic-gate 				topology[nvertex++] = lock;
17247c478bd9Sstevel@tonic-gate 			}
17257c478bd9Sstevel@tonic-gate 			lock->l_color = NO_COLOR;
17267c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
17277c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
17287c478bd9Sstevel@tonic-gate 	}
17297c478bd9Sstevel@tonic-gate 
17307c478bd9Sstevel@tonic-gate 	/*
17317c478bd9Sstevel@tonic-gate 	 * remove in and out edges of request
17327c478bd9Sstevel@tonic-gate 	 * They are freed after updating proc_graph below.
17337c478bd9Sstevel@tonic-gate 	 */
17347c478bd9Sstevel@tonic-gate 
17357c478bd9Sstevel@tonic-gate 	for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
17367c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
17377c478bd9Sstevel@tonic-gate 	}
17387c478bd9Sstevel@tonic-gate 
17397c478bd9Sstevel@tonic-gate 
17407c478bd9Sstevel@tonic-gate 	if (remove_from_queue)
17417c478bd9Sstevel@tonic-gate 		REMOVE_SLEEP_QUEUE(request);
17427c478bd9Sstevel@tonic-gate 
17437c478bd9Sstevel@tonic-gate 	/* we are ready to recompute */
17447c478bd9Sstevel@tonic-gate 
17457c478bd9Sstevel@tonic-gate 	flk_recompute_dependencies(request, topology, nvertex, 1);
17467c478bd9Sstevel@tonic-gate 
17477c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(request);
17487c478bd9Sstevel@tonic-gate 	while (ep != HEAD(request)) {
17497c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
17507c478bd9Sstevel@tonic-gate 		request->l_sedge = NEXT_ADJ(ep);
17517c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
17527c478bd9Sstevel@tonic-gate 		flk_update_proc_graph(ep, 1);
17537c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
17547c478bd9Sstevel@tonic-gate 		ep = request->l_sedge;
17557c478bd9Sstevel@tonic-gate 	}
17567c478bd9Sstevel@tonic-gate 
17577c478bd9Sstevel@tonic-gate 
17587c478bd9Sstevel@tonic-gate 	/*
17597c478bd9Sstevel@tonic-gate 	 * unset the RECOMPUTE flag in those vertices
17607c478bd9Sstevel@tonic-gate 	 */
17617c478bd9Sstevel@tonic-gate 
17627c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
17637c478bd9Sstevel@tonic-gate 		topology[i]->l_state &= ~RECOMPUTE_LOCK;
17647c478bd9Sstevel@tonic-gate 	}
17657c478bd9Sstevel@tonic-gate 
17667c478bd9Sstevel@tonic-gate 	/*
17677c478bd9Sstevel@tonic-gate 	 * free the topology
17687c478bd9Sstevel@tonic-gate 	 */
17697c478bd9Sstevel@tonic-gate 	if (nvertex)
17707c478bd9Sstevel@tonic-gate 		kmem_free((void *)topology,
17717c478bd9Sstevel@tonic-gate 		    (nvertex * sizeof (lock_descriptor_t *)));
17727c478bd9Sstevel@tonic-gate 	/*
17737c478bd9Sstevel@tonic-gate 	 * Possibility of some locks unblocked now
17747c478bd9Sstevel@tonic-gate 	 */
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 	flk_wakeup(request, 0);
17777c478bd9Sstevel@tonic-gate 
17787c478bd9Sstevel@tonic-gate 	/*
17797c478bd9Sstevel@tonic-gate 	 * we expect to have a correctly recomputed graph  now.
17807c478bd9Sstevel@tonic-gate 	 */
17817c478bd9Sstevel@tonic-gate 	flk_set_state(request, FLK_DEAD_STATE);
17827c478bd9Sstevel@tonic-gate 	flk_free_lock(request);
17837c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
17847c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
17857c478bd9Sstevel@tonic-gate 
17867c478bd9Sstevel@tonic-gate }
17877c478bd9Sstevel@tonic-gate 
17887c478bd9Sstevel@tonic-gate /*
17897c478bd9Sstevel@tonic-gate  * Uncoloring the graph is simply to increment the mark value of the graph
17907c478bd9Sstevel@tonic-gate  * And only when wrap round takes place will we color all vertices in
17917c478bd9Sstevel@tonic-gate  * the graph explicitly.
17927c478bd9Sstevel@tonic-gate  */
17937c478bd9Sstevel@tonic-gate 
17947c478bd9Sstevel@tonic-gate static void
flk_graph_uncolor(graph_t * gp)17957c478bd9Sstevel@tonic-gate flk_graph_uncolor(graph_t *gp)
17967c478bd9Sstevel@tonic-gate {
17977c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate 	if (gp->mark == UINT_MAX) {
18007c478bd9Sstevel@tonic-gate 		gp->mark = 1;
18017c478bd9Sstevel@tonic-gate 	for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
18027c478bd9Sstevel@tonic-gate 	    lock = lock->l_next)
18037c478bd9Sstevel@tonic-gate 			lock->l_color  = 0;
18047c478bd9Sstevel@tonic-gate 
18057c478bd9Sstevel@tonic-gate 	for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
18067c478bd9Sstevel@tonic-gate 	    lock = lock->l_next)
18077c478bd9Sstevel@tonic-gate 			lock->l_color  = 0;
18087c478bd9Sstevel@tonic-gate 	} else {
18097c478bd9Sstevel@tonic-gate 		gp->mark++;
18107c478bd9Sstevel@tonic-gate 	}
18117c478bd9Sstevel@tonic-gate }
18127c478bd9Sstevel@tonic-gate 
18137c478bd9Sstevel@tonic-gate /*
18147c478bd9Sstevel@tonic-gate  * Wake up locks that are blocked on the given lock.
18157c478bd9Sstevel@tonic-gate  */
18167c478bd9Sstevel@tonic-gate 
18177c478bd9Sstevel@tonic-gate static void
flk_wakeup(lock_descriptor_t * lock,int adj_list_remove)18187c478bd9Sstevel@tonic-gate flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
18197c478bd9Sstevel@tonic-gate {
18207c478bd9Sstevel@tonic-gate 	edge_t	*ep;
18217c478bd9Sstevel@tonic-gate 	graph_t	*gp = lock->l_graph;
18227c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lck;
18237c478bd9Sstevel@tonic-gate 
18247c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
18257c478bd9Sstevel@tonic-gate 	if (NO_DEPENDENTS(lock))
18267c478bd9Sstevel@tonic-gate 		return;
18277c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
18287c478bd9Sstevel@tonic-gate 	do {
18297c478bd9Sstevel@tonic-gate 		/*
18307c478bd9Sstevel@tonic-gate 		 * delete the edge from the adjacency list
18317c478bd9Sstevel@tonic-gate 		 * of from vertex. if no more adjacent edges
18327c478bd9Sstevel@tonic-gate 		 * for this vertex wake this process.
18337c478bd9Sstevel@tonic-gate 		 */
18347c478bd9Sstevel@tonic-gate 		lck = ep->from_vertex;
18357c478bd9Sstevel@tonic-gate 		if (adj_list_remove)
18367c478bd9Sstevel@tonic-gate 			ADJ_LIST_REMOVE(ep);
18377c478bd9Sstevel@tonic-gate 		flk_update_proc_graph(ep, 1);
18387c478bd9Sstevel@tonic-gate 		if (NOT_BLOCKED(lck)) {
18397c478bd9Sstevel@tonic-gate 			GRANT_WAKEUP(lck);
18407c478bd9Sstevel@tonic-gate 		}
18417c478bd9Sstevel@tonic-gate 		lock->l_sedge = NEXT_IN(ep);
18427c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
18437c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
18447c478bd9Sstevel@tonic-gate 		ep = lock->l_sedge;
18457c478bd9Sstevel@tonic-gate 	} while (ep != HEAD(lock));
18467c478bd9Sstevel@tonic-gate 	ASSERT(NO_DEPENDENTS(lock));
18477c478bd9Sstevel@tonic-gate }
18487c478bd9Sstevel@tonic-gate 
18497c478bd9Sstevel@tonic-gate /*
18507c478bd9Sstevel@tonic-gate  * The dependents of request, is checked for its dependency against the
18517c478bd9Sstevel@tonic-gate  * locks in topology (called topology because the array is and should be in
18527c478bd9Sstevel@tonic-gate  * topological order for this algorithm, if not in topological order the
18537c478bd9Sstevel@tonic-gate  * inner loop below might add more edges than necessary. Topological ordering
18547c478bd9Sstevel@tonic-gate  * of vertices satisfies the property that all edges will be from left to
18557c478bd9Sstevel@tonic-gate  * right i.e., topology[i] can have an edge to  topology[j], iff i<j)
18567c478bd9Sstevel@tonic-gate  * If lock l1 in the dependent set of request is dependent (blocked by)
18577c478bd9Sstevel@tonic-gate  * on lock l2 in topology but does not have a path to it, we add an edge
18587c478bd9Sstevel@tonic-gate  * in the inner loop below.
18597c478bd9Sstevel@tonic-gate  *
18607c478bd9Sstevel@tonic-gate  * We don't want to add an edge between l1 and l2 if there exists
18617c478bd9Sstevel@tonic-gate  * already a path from l1 to l2, so care has to be taken for those vertices
18627c478bd9Sstevel@tonic-gate  * that  have two paths to 'request'. These vertices are referred to here
18637c478bd9Sstevel@tonic-gate  * as barrier locks.
18647c478bd9Sstevel@tonic-gate  *
18657c478bd9Sstevel@tonic-gate  * The barriers has to be found (those vertex that originally had two paths
18667c478bd9Sstevel@tonic-gate  * to request) because otherwise we may end up adding edges unnecessarily
18677c478bd9Sstevel@tonic-gate  * to vertices in topology, and thus barrier vertices can have an edge
18687c478bd9Sstevel@tonic-gate  * to a vertex in topology as well a path to it.
18697c478bd9Sstevel@tonic-gate  */
18707c478bd9Sstevel@tonic-gate 
18717c478bd9Sstevel@tonic-gate static void
flk_recompute_dependencies(lock_descriptor_t * request,lock_descriptor_t ** topology,int nvertex,int update_graph)18727c478bd9Sstevel@tonic-gate flk_recompute_dependencies(lock_descriptor_t *request,
1873*7a0cc5a9SMarcel Telka     lock_descriptor_t **topology, int nvertex, int update_graph)
18747c478bd9Sstevel@tonic-gate {
18757c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lock;
18767c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
18777c478bd9Sstevel@tonic-gate 	int i, count;
18787c478bd9Sstevel@tonic-gate 	int barrier_found = 0;
18797c478bd9Sstevel@tonic-gate 	edge_t	*ep;
18807c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
18817c478bd9Sstevel@tonic-gate 
18827c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
18837c478bd9Sstevel@tonic-gate 
18847c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
18857c478bd9Sstevel@tonic-gate 	if (nvertex == 0)
18867c478bd9Sstevel@tonic-gate 		return;
18877c478bd9Sstevel@tonic-gate 	flk_graph_uncolor(request->l_graph);
18887c478bd9Sstevel@tonic-gate 	barrier_found = flk_find_barriers(request);
18897c478bd9Sstevel@tonic-gate 	request->l_state |= RECOMPUTE_DONE;
18907c478bd9Sstevel@tonic-gate 
18917c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, request, l_stack);
18927c478bd9Sstevel@tonic-gate 	request->l_sedge = FIRST_IN(request);
18937c478bd9Sstevel@tonic-gate 
18947c478bd9Sstevel@tonic-gate 
18957c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
18967c478bd9Sstevel@tonic-gate 		if (vertex->l_state & RECOMPUTE_DONE) {
18977c478bd9Sstevel@tonic-gate 			count = 0;
18987c478bd9Sstevel@tonic-gate 			goto next_in_edge;
18997c478bd9Sstevel@tonic-gate 		}
19007c478bd9Sstevel@tonic-gate 		if (IS_BARRIER(vertex)) {
19017c478bd9Sstevel@tonic-gate 			/* decrement the barrier count */
19027c478bd9Sstevel@tonic-gate 			if (vertex->l_index) {
19037c478bd9Sstevel@tonic-gate 				vertex->l_index--;
19047c478bd9Sstevel@tonic-gate 				/* this guy will be pushed again anyway ? */
19057c478bd9Sstevel@tonic-gate 				STACK_POP(vertex_stack, l_stack);
19067c478bd9Sstevel@tonic-gate 				if (vertex->l_index == 0)  {
19077c478bd9Sstevel@tonic-gate 				/*
19087c478bd9Sstevel@tonic-gate 				 * barrier is over we can recompute
19097c478bd9Sstevel@tonic-gate 				 * dependencies for this lock in the
19107c478bd9Sstevel@tonic-gate 				 * next stack pop
19117c478bd9Sstevel@tonic-gate 				 */
19127c478bd9Sstevel@tonic-gate 					vertex->l_state &= ~BARRIER_LOCK;
19137c478bd9Sstevel@tonic-gate 				}
19147c478bd9Sstevel@tonic-gate 				continue;
19157c478bd9Sstevel@tonic-gate 			}
19167c478bd9Sstevel@tonic-gate 		}
19177c478bd9Sstevel@tonic-gate 		vertex->l_state |= RECOMPUTE_DONE;
19187c478bd9Sstevel@tonic-gate 		flk_graph_uncolor(gp);
19197c478bd9Sstevel@tonic-gate 		count = flk_color_reachables(vertex);
19207c478bd9Sstevel@tonic-gate 		for (i = 0; i < nvertex; i++) {
19217c478bd9Sstevel@tonic-gate 			lock = topology[i];
19227c478bd9Sstevel@tonic-gate 			if (COLORED(lock))
19237c478bd9Sstevel@tonic-gate 				continue;
19247c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, vertex)) {
19257c478bd9Sstevel@tonic-gate 				(void) flk_add_edge(vertex, lock,
19267c478bd9Sstevel@tonic-gate 				    NO_CHECK_CYCLE, update_graph);
19277c478bd9Sstevel@tonic-gate 				COLOR(lock);
19287c478bd9Sstevel@tonic-gate 				count++;
19297c478bd9Sstevel@tonic-gate 				count += flk_color_reachables(lock);
19307c478bd9Sstevel@tonic-gate 			}
19317c478bd9Sstevel@tonic-gate 
19327c478bd9Sstevel@tonic-gate 		}
19337c478bd9Sstevel@tonic-gate 
19347c478bd9Sstevel@tonic-gate next_in_edge:
19357c478bd9Sstevel@tonic-gate 		if (count == nvertex ||
19367c478bd9Sstevel@tonic-gate 		    vertex->l_sedge == HEAD(vertex)) {
19377c478bd9Sstevel@tonic-gate 			/* prune the tree below this */
19387c478bd9Sstevel@tonic-gate 			STACK_POP(vertex_stack, l_stack);
19397c478bd9Sstevel@tonic-gate 			vertex->l_state &= ~RECOMPUTE_DONE;
19407c478bd9Sstevel@tonic-gate 			/* update the barrier locks below this! */
19417c478bd9Sstevel@tonic-gate 			if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
19427c478bd9Sstevel@tonic-gate 				flk_graph_uncolor(gp);
19437c478bd9Sstevel@tonic-gate 				flk_update_barriers(vertex);
19447c478bd9Sstevel@tonic-gate 			}
19457c478bd9Sstevel@tonic-gate 			continue;
19467c478bd9Sstevel@tonic-gate 		}
19477c478bd9Sstevel@tonic-gate 
19487c478bd9Sstevel@tonic-gate 		ep = vertex->l_sedge;
19497c478bd9Sstevel@tonic-gate 		lock = ep->from_vertex;
19507c478bd9Sstevel@tonic-gate 		STACK_PUSH(vertex_stack, lock, l_stack);
19517c478bd9Sstevel@tonic-gate 		lock->l_sedge = FIRST_IN(lock);
19527c478bd9Sstevel@tonic-gate 		vertex->l_sedge = NEXT_IN(ep);
19537c478bd9Sstevel@tonic-gate 	}
19547c478bd9Sstevel@tonic-gate 
19557c478bd9Sstevel@tonic-gate }
19567c478bd9Sstevel@tonic-gate 
19577c478bd9Sstevel@tonic-gate /*
19587c478bd9Sstevel@tonic-gate  * Color all reachable vertices from vertex that belongs to topology (here
19597c478bd9Sstevel@tonic-gate  * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
19607c478bd9Sstevel@tonic-gate  *
19617c478bd9Sstevel@tonic-gate  * Note: we need to use a different stack_link l_stack1 because this is
19627c478bd9Sstevel@tonic-gate  * called from flk_recompute_dependencies() that already uses a stack with
19637c478bd9Sstevel@tonic-gate  * l_stack as stack_link.
19647c478bd9Sstevel@tonic-gate  */
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate static int
flk_color_reachables(lock_descriptor_t * vertex)19677c478bd9Sstevel@tonic-gate flk_color_reachables(lock_descriptor_t *vertex)
19687c478bd9Sstevel@tonic-gate {
19697c478bd9Sstevel@tonic-gate 	lock_descriptor_t *ver, *lock;
19707c478bd9Sstevel@tonic-gate 	int count;
19717c478bd9Sstevel@tonic-gate 	edge_t	*ep;
19727c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
19737c478bd9Sstevel@tonic-gate 
19747c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
19757c478bd9Sstevel@tonic-gate 
19767c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, vertex, l_stack1);
19777c478bd9Sstevel@tonic-gate 	count = 0;
19787c478bd9Sstevel@tonic-gate 	while ((ver = STACK_TOP(vertex_stack)) != NULL) {
19797c478bd9Sstevel@tonic-gate 
19807c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
19817c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
19827c478bd9Sstevel@tonic-gate 		    ep = NEXT_ADJ(ep)) {
19837c478bd9Sstevel@tonic-gate 			lock = ep->to_vertex;
19847c478bd9Sstevel@tonic-gate 			if (COLORED(lock))
19857c478bd9Sstevel@tonic-gate 				continue;
19867c478bd9Sstevel@tonic-gate 			COLOR(lock);
19877c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock))
19887c478bd9Sstevel@tonic-gate 				count++;
19897c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lock, l_stack1);
19907c478bd9Sstevel@tonic-gate 		}
19917c478bd9Sstevel@tonic-gate 
19927c478bd9Sstevel@tonic-gate 	}
19937c478bd9Sstevel@tonic-gate 	return (count);
19947c478bd9Sstevel@tonic-gate }
19957c478bd9Sstevel@tonic-gate 
19967c478bd9Sstevel@tonic-gate /*
19977c478bd9Sstevel@tonic-gate  * Called from flk_recompute_dependencies() this routine decrements
19987c478bd9Sstevel@tonic-gate  * the barrier count of barrier vertices that are reachable from lock.
19997c478bd9Sstevel@tonic-gate  */
20007c478bd9Sstevel@tonic-gate 
20017c478bd9Sstevel@tonic-gate static void
flk_update_barriers(lock_descriptor_t * lock)20027c478bd9Sstevel@tonic-gate flk_update_barriers(lock_descriptor_t *lock)
20037c478bd9Sstevel@tonic-gate {
20047c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lck;
20057c478bd9Sstevel@tonic-gate 	edge_t	*ep;
20067c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
20077c478bd9Sstevel@tonic-gate 
20087c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
20097c478bd9Sstevel@tonic-gate 
20107c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, lock, l_stack1);
20117c478bd9Sstevel@tonic-gate 
20127c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
20137c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
20147c478bd9Sstevel@tonic-gate 		for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
20157c478bd9Sstevel@tonic-gate 		    ep = NEXT_IN(ep)) {
20167c478bd9Sstevel@tonic-gate 			lck = ep->from_vertex;
20177c478bd9Sstevel@tonic-gate 			if (COLORED(lck)) {
20187c478bd9Sstevel@tonic-gate 				if (IS_BARRIER(lck)) {
20197c478bd9Sstevel@tonic-gate 					ASSERT(lck->l_index > 0);
20207c478bd9Sstevel@tonic-gate 					lck->l_index--;
20217c478bd9Sstevel@tonic-gate 					if (lck->l_index == 0)
20227c478bd9Sstevel@tonic-gate 						lck->l_state &= ~BARRIER_LOCK;
20237c478bd9Sstevel@tonic-gate 				}
20247c478bd9Sstevel@tonic-gate 				continue;
20257c478bd9Sstevel@tonic-gate 			}
20267c478bd9Sstevel@tonic-gate 			COLOR(lck);
20277c478bd9Sstevel@tonic-gate 			if (IS_BARRIER(lck)) {
20287c478bd9Sstevel@tonic-gate 				ASSERT(lck->l_index > 0);
20297c478bd9Sstevel@tonic-gate 				lck->l_index--;
20307c478bd9Sstevel@tonic-gate 				if (lck->l_index == 0)
20317c478bd9Sstevel@tonic-gate 					lck->l_state &= ~BARRIER_LOCK;
20327c478bd9Sstevel@tonic-gate 			}
20337c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lck, l_stack1);
20347c478bd9Sstevel@tonic-gate 		}
20357c478bd9Sstevel@tonic-gate 	}
20367c478bd9Sstevel@tonic-gate }
20377c478bd9Sstevel@tonic-gate 
20387c478bd9Sstevel@tonic-gate /*
20397c478bd9Sstevel@tonic-gate  * Finds all vertices that are reachable from 'lock' more than once and
20407c478bd9Sstevel@tonic-gate  * mark them as barrier vertices and increment their barrier count.
20417c478bd9Sstevel@tonic-gate  * The barrier count is one minus the total number of paths from lock
20427c478bd9Sstevel@tonic-gate  * to that vertex.
20437c478bd9Sstevel@tonic-gate  */
20447c478bd9Sstevel@tonic-gate 
20457c478bd9Sstevel@tonic-gate static int
flk_find_barriers(lock_descriptor_t * lock)20467c478bd9Sstevel@tonic-gate flk_find_barriers(lock_descriptor_t *lock)
20477c478bd9Sstevel@tonic-gate {
20487c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lck;
20497c478bd9Sstevel@tonic-gate 	int found = 0;
20507c478bd9Sstevel@tonic-gate 	edge_t	*ep;
20517c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
20527c478bd9Sstevel@tonic-gate 
20537c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
20547c478bd9Sstevel@tonic-gate 
20557c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, lock, l_stack1);
20567c478bd9Sstevel@tonic-gate 
20577c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
20587c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
20597c478bd9Sstevel@tonic-gate 		for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
20607c478bd9Sstevel@tonic-gate 		    ep = NEXT_IN(ep)) {
20617c478bd9Sstevel@tonic-gate 			lck = ep->from_vertex;
20627c478bd9Sstevel@tonic-gate 			if (COLORED(lck)) {
20637c478bd9Sstevel@tonic-gate 				/* this is a barrier */
20647c478bd9Sstevel@tonic-gate 				lck->l_state |= BARRIER_LOCK;
20657c478bd9Sstevel@tonic-gate 				/* index will have barrier count */
20667c478bd9Sstevel@tonic-gate 				lck->l_index++;
20677c478bd9Sstevel@tonic-gate 				if (!found)
20687c478bd9Sstevel@tonic-gate 					found = 1;
20697c478bd9Sstevel@tonic-gate 				continue;
20707c478bd9Sstevel@tonic-gate 			}
20717c478bd9Sstevel@tonic-gate 			COLOR(lck);
20727c478bd9Sstevel@tonic-gate 			lck->l_index = 0;
20737c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lck, l_stack1);
20747c478bd9Sstevel@tonic-gate 		}
20757c478bd9Sstevel@tonic-gate 	}
20767c478bd9Sstevel@tonic-gate 	return (found);
20777c478bd9Sstevel@tonic-gate }
20787c478bd9Sstevel@tonic-gate 
20797c478bd9Sstevel@tonic-gate /*
20807c478bd9Sstevel@tonic-gate  * Finds the first lock that is mainly responsible for blocking this
20817c478bd9Sstevel@tonic-gate  * request.  If there is no such lock, request->l_flock.l_type is set to
20827c478bd9Sstevel@tonic-gate  * F_UNLCK.  Otherwise, request->l_flock is filled in with the particulars
20837c478bd9Sstevel@tonic-gate  * of the blocking lock.
20847c478bd9Sstevel@tonic-gate  *
20857c478bd9Sstevel@tonic-gate  * Note: It is possible a request is blocked by a sleeping lock because
20867c478bd9Sstevel@tonic-gate  * of the fairness policy used in flk_process_request() to construct the
20877c478bd9Sstevel@tonic-gate  * dependencies. (see comments before flk_process_request()).
20887c478bd9Sstevel@tonic-gate  */
20897c478bd9Sstevel@tonic-gate 
20907c478bd9Sstevel@tonic-gate static void
flk_get_first_blocking_lock(lock_descriptor_t * request)20917c478bd9Sstevel@tonic-gate flk_get_first_blocking_lock(lock_descriptor_t *request)
20927c478bd9Sstevel@tonic-gate {
20937c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
20947c478bd9Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
20957c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *blocker;
20967c478bd9Sstevel@tonic-gate 
20977c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
20987c478bd9Sstevel@tonic-gate 	blocker = NULL;
20997c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
21007c478bd9Sstevel@tonic-gate 
21017c478bd9Sstevel@tonic-gate 	if (lock) {
21027c478bd9Sstevel@tonic-gate 		do {
21037c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
21047c478bd9Sstevel@tonic-gate 				blocker = lock;
21057c478bd9Sstevel@tonic-gate 				break;
21067c478bd9Sstevel@tonic-gate 			}
21077c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
21087c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
21097c478bd9Sstevel@tonic-gate 	}
21107c478bd9Sstevel@tonic-gate 
21116609f642SArne Jansen 	if (blocker == NULL && request->l_flock.l_type == F_RDLCK) {
21126609f642SArne Jansen 		/*
21136609f642SArne Jansen 		 * No active lock is blocking this request, but if a read
21146609f642SArne Jansen 		 * lock is requested, it may also get blocked by a waiting
21156609f642SArne Jansen 		 * writer. So search all sleeping locks and see if there is
21166609f642SArne Jansen 		 * a writer waiting.
21176609f642SArne Jansen 		 */
21186609f642SArne Jansen 		SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
21196609f642SArne Jansen 		if (lock) {
21206609f642SArne Jansen 			do {
21216609f642SArne Jansen 				if (BLOCKS(lock, request)) {
21226609f642SArne Jansen 					blocker = lock;
21236609f642SArne Jansen 					break;
21246609f642SArne Jansen 				}
21256609f642SArne Jansen 				lock = lock->l_next;
21266609f642SArne Jansen 			} while (lock->l_vnode == vp);
21276609f642SArne Jansen 		}
21286609f642SArne Jansen 	}
21296609f642SArne Jansen 
21307c478bd9Sstevel@tonic-gate 	if (blocker) {
21317c478bd9Sstevel@tonic-gate 		report_blocker(blocker, request);
21327c478bd9Sstevel@tonic-gate 	} else
21337c478bd9Sstevel@tonic-gate 		request->l_flock.l_type = F_UNLCK;
21347c478bd9Sstevel@tonic-gate }
21357c478bd9Sstevel@tonic-gate 
21367c478bd9Sstevel@tonic-gate /*
21377c478bd9Sstevel@tonic-gate  * Get the graph_t structure associated with a vnode.
21387c478bd9Sstevel@tonic-gate  * If 'initialize' is non-zero, and the graph_t structure for this vnode has
21397c478bd9Sstevel@tonic-gate  * not yet been initialized, then a new element is allocated and returned.
21407c478bd9Sstevel@tonic-gate  */
21417c478bd9Sstevel@tonic-gate graph_t *
flk_get_lock_graph(vnode_t * vp,int initialize)21427c478bd9Sstevel@tonic-gate flk_get_lock_graph(vnode_t *vp, int initialize)
21437c478bd9Sstevel@tonic-gate {
21447c478bd9Sstevel@tonic-gate 	graph_t *gp;
21457c478bd9Sstevel@tonic-gate 	graph_t *gp_alloc = NULL;
21467c478bd9Sstevel@tonic-gate 	int index = HASH_INDEX(vp);
21477c478bd9Sstevel@tonic-gate 
21487c478bd9Sstevel@tonic-gate 	if (initialize == FLK_USE_GRAPH) {
21497c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
21507c478bd9Sstevel@tonic-gate 		gp = lock_graph[index];
21517c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
21527c478bd9Sstevel@tonic-gate 		return (gp);
21537c478bd9Sstevel@tonic-gate 	}
21547c478bd9Sstevel@tonic-gate 
21557c478bd9Sstevel@tonic-gate 	ASSERT(initialize == FLK_INIT_GRAPH);
21567c478bd9Sstevel@tonic-gate 
21577c478bd9Sstevel@tonic-gate 	if (lock_graph[index] == NULL) {
21587c478bd9Sstevel@tonic-gate 
21597c478bd9Sstevel@tonic-gate 		gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
21607c478bd9Sstevel@tonic-gate 
21617c478bd9Sstevel@tonic-gate 		/* Initialize the graph */
21627c478bd9Sstevel@tonic-gate 
21637c478bd9Sstevel@tonic-gate 		gp_alloc->active_locks.l_next =
21647c478bd9Sstevel@tonic-gate 		    gp_alloc->active_locks.l_prev =
21657c478bd9Sstevel@tonic-gate 		    (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
21667c478bd9Sstevel@tonic-gate 		gp_alloc->sleeping_locks.l_next =
21677c478bd9Sstevel@tonic-gate 		    gp_alloc->sleeping_locks.l_prev =
21687c478bd9Sstevel@tonic-gate 		    (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
21697c478bd9Sstevel@tonic-gate 		gp_alloc->index = index;
21707c478bd9Sstevel@tonic-gate 		mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
21717c478bd9Sstevel@tonic-gate 	}
21727c478bd9Sstevel@tonic-gate 
21737c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
21747c478bd9Sstevel@tonic-gate 
21757c478bd9Sstevel@tonic-gate 	gp = lock_graph[index];
21767c478bd9Sstevel@tonic-gate 
21777c478bd9Sstevel@tonic-gate 	/* Recheck the value within flock_lock */
21787c478bd9Sstevel@tonic-gate 	if (gp == NULL) {
21797c478bd9Sstevel@tonic-gate 		struct flock_globals *fg;
21807c478bd9Sstevel@tonic-gate 
21817c478bd9Sstevel@tonic-gate 		/* We must have previously allocated the graph_t structure */
21827c478bd9Sstevel@tonic-gate 		ASSERT(gp_alloc != NULL);
21837c478bd9Sstevel@tonic-gate 		lock_graph[index] = gp = gp_alloc;
21847c478bd9Sstevel@tonic-gate 		/*
21857c478bd9Sstevel@tonic-gate 		 * The lockmgr status is only needed if KLM is loaded.
21867c478bd9Sstevel@tonic-gate 		 */
21877c478bd9Sstevel@tonic-gate 		if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
21887c478bd9Sstevel@tonic-gate 			fg = flk_get_globals();
21897c478bd9Sstevel@tonic-gate 			fg->lockmgr_status[index] = fg->flk_lockmgr_status;
21907c478bd9Sstevel@tonic-gate 		}
21917c478bd9Sstevel@tonic-gate 	}
21927c478bd9Sstevel@tonic-gate 
21937c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
21947c478bd9Sstevel@tonic-gate 
21957c478bd9Sstevel@tonic-gate 	if ((gp_alloc != NULL) && (gp != gp_alloc)) {
21967c478bd9Sstevel@tonic-gate 		/* There was a race to allocate the graph_t and we lost */
21977c478bd9Sstevel@tonic-gate 		mutex_destroy(&gp_alloc->gp_mutex);
21987c478bd9Sstevel@tonic-gate 		kmem_free(gp_alloc, sizeof (graph_t));
21997c478bd9Sstevel@tonic-gate 	}
22007c478bd9Sstevel@tonic-gate 
22017c478bd9Sstevel@tonic-gate 	return (gp);
22027c478bd9Sstevel@tonic-gate }
22037c478bd9Sstevel@tonic-gate 
22047c478bd9Sstevel@tonic-gate /*
22057c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
22067c478bd9Sstevel@tonic-gate  */
22077c478bd9Sstevel@tonic-gate int
cl_flk_has_remote_locks_for_nlmid(vnode_t * vp,int nlmid)22087c478bd9Sstevel@tonic-gate cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
22097c478bd9Sstevel@tonic-gate {
22107c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
22117c478bd9Sstevel@tonic-gate 	int result = 0;
22127c478bd9Sstevel@tonic-gate 	graph_t *gp;
22137c478bd9Sstevel@tonic-gate 	int			lock_nlmid;
22147c478bd9Sstevel@tonic-gate 
22157c478bd9Sstevel@tonic-gate 	/*
22167c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
22177c478bd9Sstevel@tonic-gate 	 */
22187c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
22197c478bd9Sstevel@tonic-gate 		return (0);
22207c478bd9Sstevel@tonic-gate 	}
22217c478bd9Sstevel@tonic-gate 
22227c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
22237c478bd9Sstevel@tonic-gate 	if (gp == NULL) {
22247c478bd9Sstevel@tonic-gate 		return (0);
22257c478bd9Sstevel@tonic-gate 	}
22267c478bd9Sstevel@tonic-gate 
22277c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
22287c478bd9Sstevel@tonic-gate 
22297c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
22307c478bd9Sstevel@tonic-gate 
22317c478bd9Sstevel@tonic-gate 	if (lock) {
22327c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
22337c478bd9Sstevel@tonic-gate 			/* get NLM id from sysid */
22347c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
22357c478bd9Sstevel@tonic-gate 
22367c478bd9Sstevel@tonic-gate 			/*
22377c478bd9Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
22387c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then we've found a remote lock.
22397c478bd9Sstevel@tonic-gate 			 */
22407c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
22417c478bd9Sstevel@tonic-gate 				result = 1;
22427c478bd9Sstevel@tonic-gate 				goto done;
22437c478bd9Sstevel@tonic-gate 			}
22447c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
22457c478bd9Sstevel@tonic-gate 		}
22467c478bd9Sstevel@tonic-gate 	}
22477c478bd9Sstevel@tonic-gate 
22487c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
22497c478bd9Sstevel@tonic-gate 
22507c478bd9Sstevel@tonic-gate 	if (lock) {
22517c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
22527c478bd9Sstevel@tonic-gate 			/* get NLM id from sysid */
22537c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
22547c478bd9Sstevel@tonic-gate 
22557c478bd9Sstevel@tonic-gate 			/*
22567c478bd9Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
22577c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then we've found a remote lock.
22587c478bd9Sstevel@tonic-gate 			 */
22597c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
22607c478bd9Sstevel@tonic-gate 				result = 1;
22617c478bd9Sstevel@tonic-gate 				goto done;
22627c478bd9Sstevel@tonic-gate 			}
22637c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
22647c478bd9Sstevel@tonic-gate 		}
22657c478bd9Sstevel@tonic-gate 	}
22667c478bd9Sstevel@tonic-gate 
22677c478bd9Sstevel@tonic-gate done:
22687c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
22697c478bd9Sstevel@tonic-gate 	return (result);
22707c478bd9Sstevel@tonic-gate }
22717c478bd9Sstevel@tonic-gate 
22727c478bd9Sstevel@tonic-gate /*
22737c478bd9Sstevel@tonic-gate  * Determine whether there are any locks for the given vnode with a remote
22747c478bd9Sstevel@tonic-gate  * sysid.  Returns zero if not, non-zero if there are.
22757c478bd9Sstevel@tonic-gate  *
22767c478bd9Sstevel@tonic-gate  * Note that the return value from this function is potentially invalid
22777c478bd9Sstevel@tonic-gate  * once it has been returned.  The caller is responsible for providing its
22787c478bd9Sstevel@tonic-gate  * own synchronization mechanism to ensure that the return value is useful
22797c478bd9Sstevel@tonic-gate  * (e.g., see nfs_lockcompletion()).
22807c478bd9Sstevel@tonic-gate  */
22817c478bd9Sstevel@tonic-gate int
flk_has_remote_locks(vnode_t * vp)22827c478bd9Sstevel@tonic-gate flk_has_remote_locks(vnode_t *vp)
22837c478bd9Sstevel@tonic-gate {
22847c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
22857c478bd9Sstevel@tonic-gate 	int result = 0;
22867c478bd9Sstevel@tonic-gate 	graph_t *gp;
22877c478bd9Sstevel@tonic-gate 
22887c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
22897c478bd9Sstevel@tonic-gate 	if (gp == NULL) {
22907c478bd9Sstevel@tonic-gate 		return (0);
22917c478bd9Sstevel@tonic-gate 	}
22927c478bd9Sstevel@tonic-gate 
22937c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
22947c478bd9Sstevel@tonic-gate 
22957c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
22967c478bd9Sstevel@tonic-gate 
22977c478bd9Sstevel@tonic-gate 	if (lock) {
22987c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
22997c478bd9Sstevel@tonic-gate 			if (IS_REMOTE(lock)) {
23007c478bd9Sstevel@tonic-gate 				result = 1;
23017c478bd9Sstevel@tonic-gate 				goto done;
23027c478bd9Sstevel@tonic-gate 			}
23037c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
23047c478bd9Sstevel@tonic-gate 		}
23057c478bd9Sstevel@tonic-gate 	}
23067c478bd9Sstevel@tonic-gate 
23077c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
23087c478bd9Sstevel@tonic-gate 
23097c478bd9Sstevel@tonic-gate 	if (lock) {
23107c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
23117c478bd9Sstevel@tonic-gate 			if (IS_REMOTE(lock)) {
23127c478bd9Sstevel@tonic-gate 				result = 1;
23137c478bd9Sstevel@tonic-gate 				goto done;
23147c478bd9Sstevel@tonic-gate 			}
23157c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
23167c478bd9Sstevel@tonic-gate 		}
23177c478bd9Sstevel@tonic-gate 	}
23187c478bd9Sstevel@tonic-gate 
23197c478bd9Sstevel@tonic-gate done:
23207c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
23217c478bd9Sstevel@tonic-gate 	return (result);
23227c478bd9Sstevel@tonic-gate }
23237c478bd9Sstevel@tonic-gate 
23247c478bd9Sstevel@tonic-gate /*
2325bbaa8b60SDan Kruchinin  * Determine whether there are any locks for the given vnode with a remote
2326bbaa8b60SDan Kruchinin  * sysid matching given sysid.
2327bbaa8b60SDan Kruchinin  * Used by the new (open source) NFS Lock Manager (NLM)
2328bbaa8b60SDan Kruchinin  */
2329bbaa8b60SDan Kruchinin int
flk_has_remote_locks_for_sysid(vnode_t * vp,int sysid)2330bbaa8b60SDan Kruchinin flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
2331bbaa8b60SDan Kruchinin {
2332bbaa8b60SDan Kruchinin 	lock_descriptor_t *lock;
2333bbaa8b60SDan Kruchinin 	int result = 0;
2334bbaa8b60SDan Kruchinin 	graph_t *gp;
2335bbaa8b60SDan Kruchinin 
2336bbaa8b60SDan Kruchinin 	if (sysid == 0)
2337bbaa8b60SDan Kruchinin 		return (0);
2338bbaa8b60SDan Kruchinin 
2339bbaa8b60SDan Kruchinin 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2340bbaa8b60SDan Kruchinin 	if (gp == NULL) {
2341bbaa8b60SDan Kruchinin 		return (0);
2342bbaa8b60SDan Kruchinin 	}
2343bbaa8b60SDan Kruchinin 
2344bbaa8b60SDan Kruchinin 	mutex_enter(&gp->gp_mutex);
2345bbaa8b60SDan Kruchinin 
2346bbaa8b60SDan Kruchinin 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2347bbaa8b60SDan Kruchinin 
2348bbaa8b60SDan Kruchinin 	if (lock) {
2349bbaa8b60SDan Kruchinin 		while (lock->l_vnode == vp) {
2350bbaa8b60SDan Kruchinin 			if (lock->l_flock.l_sysid == sysid) {
2351bbaa8b60SDan Kruchinin 				result = 1;
2352bbaa8b60SDan Kruchinin 				goto done;
2353bbaa8b60SDan Kruchinin 			}
2354bbaa8b60SDan Kruchinin 			lock = lock->l_next;
2355bbaa8b60SDan Kruchinin 		}
2356bbaa8b60SDan Kruchinin 	}
2357bbaa8b60SDan Kruchinin 
2358bbaa8b60SDan Kruchinin 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2359bbaa8b60SDan Kruchinin 
2360bbaa8b60SDan Kruchinin 	if (lock) {
2361bbaa8b60SDan Kruchinin 		while (lock->l_vnode == vp) {
2362bbaa8b60SDan Kruchinin 			if (lock->l_flock.l_sysid == sysid) {
2363bbaa8b60SDan Kruchinin 				result = 1;
2364bbaa8b60SDan Kruchinin 				goto done;
2365bbaa8b60SDan Kruchinin 			}
2366bbaa8b60SDan Kruchinin 			lock = lock->l_next;
2367bbaa8b60SDan Kruchinin 		}
2368bbaa8b60SDan Kruchinin 	}
2369bbaa8b60SDan Kruchinin 
2370bbaa8b60SDan Kruchinin done:
2371bbaa8b60SDan Kruchinin 	mutex_exit(&gp->gp_mutex);
2372bbaa8b60SDan Kruchinin 	return (result);
2373bbaa8b60SDan Kruchinin }
2374bbaa8b60SDan Kruchinin 
2375bbaa8b60SDan Kruchinin /*
23767c478bd9Sstevel@tonic-gate  * Determine if there are any locks owned by the given sysid.
23777c478bd9Sstevel@tonic-gate  * Returns zero if not, non-zero if there are.  Note that this return code
23787c478bd9Sstevel@tonic-gate  * could be derived from flk_get_{sleeping,active}_locks, but this routine
23797c478bd9Sstevel@tonic-gate  * avoids all the memory allocations of those routines.
23807c478bd9Sstevel@tonic-gate  *
23817c478bd9Sstevel@tonic-gate  * This routine has the same synchronization issues as
23827c478bd9Sstevel@tonic-gate  * flk_has_remote_locks.
23837c478bd9Sstevel@tonic-gate  */
23847c478bd9Sstevel@tonic-gate 
23857c478bd9Sstevel@tonic-gate int
flk_sysid_has_locks(int sysid,int lck_type)23867c478bd9Sstevel@tonic-gate flk_sysid_has_locks(int sysid, int lck_type)
23877c478bd9Sstevel@tonic-gate {
23887c478bd9Sstevel@tonic-gate 	int		has_locks = 0;
23897c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
23907c478bd9Sstevel@tonic-gate 	graph_t 	*gp;
23917c478bd9Sstevel@tonic-gate 	int		i;
23927c478bd9Sstevel@tonic-gate 
23937c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE && !has_locks; i++) {
23947c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
23957c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
23967c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
23977c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
23987c478bd9Sstevel@tonic-gate 			continue;
23997c478bd9Sstevel@tonic-gate 		}
24007c478bd9Sstevel@tonic-gate 
24017c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
24027c478bd9Sstevel@tonic-gate 
24037c478bd9Sstevel@tonic-gate 		if (lck_type & FLK_QUERY_ACTIVE) {
24047c478bd9Sstevel@tonic-gate 			for (lock = ACTIVE_HEAD(gp)->l_next;
24057c478bd9Sstevel@tonic-gate 			    lock != ACTIVE_HEAD(gp) && !has_locks;
24067c478bd9Sstevel@tonic-gate 			    lock = lock->l_next) {
24077c478bd9Sstevel@tonic-gate 				if (lock->l_flock.l_sysid == sysid)
24087c478bd9Sstevel@tonic-gate 					has_locks = 1;
24097c478bd9Sstevel@tonic-gate 			}
24107c478bd9Sstevel@tonic-gate 		}
24117c478bd9Sstevel@tonic-gate 
24127c478bd9Sstevel@tonic-gate 		if (lck_type & FLK_QUERY_SLEEPING) {
24137c478bd9Sstevel@tonic-gate 			for (lock = SLEEPING_HEAD(gp)->l_next;
24147c478bd9Sstevel@tonic-gate 			    lock != SLEEPING_HEAD(gp) && !has_locks;
24157c478bd9Sstevel@tonic-gate 			    lock = lock->l_next) {
24167c478bd9Sstevel@tonic-gate 				if (lock->l_flock.l_sysid == sysid)
24177c478bd9Sstevel@tonic-gate 					has_locks = 1;
24187c478bd9Sstevel@tonic-gate 			}
24197c478bd9Sstevel@tonic-gate 		}
24207c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
24217c478bd9Sstevel@tonic-gate 	}
24227c478bd9Sstevel@tonic-gate 
24237c478bd9Sstevel@tonic-gate 	return (has_locks);
24247c478bd9Sstevel@tonic-gate }
24257c478bd9Sstevel@tonic-gate 
24267c478bd9Sstevel@tonic-gate 
24277c478bd9Sstevel@tonic-gate /*
24287c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
24297c478bd9Sstevel@tonic-gate  *
24307c478bd9Sstevel@tonic-gate  * Requires: "sysid" is a pair [nlmid, sysid].  The lower half is 16-bit
24317c478bd9Sstevel@tonic-gate  *  quantity, the real sysid generated by the NLM server; the upper half
24327c478bd9Sstevel@tonic-gate  *  identifies the node of the cluster where the NLM server ran.
24337c478bd9Sstevel@tonic-gate  *  This routine is only called by an NLM server running in a cluster.
24347c478bd9Sstevel@tonic-gate  * Effects: Remove all locks held on behalf of the client identified
24357c478bd9Sstevel@tonic-gate  *  by "sysid."
24367c478bd9Sstevel@tonic-gate  */
24377c478bd9Sstevel@tonic-gate void
cl_flk_remove_locks_by_sysid(int sysid)24387c478bd9Sstevel@tonic-gate cl_flk_remove_locks_by_sysid(int sysid)
24397c478bd9Sstevel@tonic-gate {
24407c478bd9Sstevel@tonic-gate 	graph_t	*gp;
24417c478bd9Sstevel@tonic-gate 	int i;
24427c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
24437c478bd9Sstevel@tonic-gate 
24447c478bd9Sstevel@tonic-gate 	/*
24457c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
24467c478bd9Sstevel@tonic-gate 	 */
24477c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
24487c478bd9Sstevel@tonic-gate 		return;
24497c478bd9Sstevel@tonic-gate 	}
24507c478bd9Sstevel@tonic-gate 
24517c478bd9Sstevel@tonic-gate 	ASSERT(sysid != 0);
24527c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
24537c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
24547c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
24557c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate 		if (gp == NULL)
24587c478bd9Sstevel@tonic-gate 			continue;
24597c478bd9Sstevel@tonic-gate 
24607c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);	/*  get mutex on lock graph */
24617c478bd9Sstevel@tonic-gate 
24627c478bd9Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
24637c478bd9Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
24647c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
24657c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
24667c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
24677c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
24687c478bd9Sstevel@tonic-gate 			}
24697c478bd9Sstevel@tonic-gate 			lock = nlock;
24707c478bd9Sstevel@tonic-gate 		}
24717c478bd9Sstevel@tonic-gate 
24727c478bd9Sstevel@tonic-gate 		/* delete active locks */
24737c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
24747c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
24757c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
24767c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
24777c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
24787c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
24797c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
24807c478bd9Sstevel@tonic-gate 			}
24817c478bd9Sstevel@tonic-gate 			lock = nlock;
24827c478bd9Sstevel@tonic-gate 		}
24837c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);    /* release mutex on lock graph */
24847c478bd9Sstevel@tonic-gate 	}
24857c478bd9Sstevel@tonic-gate }
24867c478bd9Sstevel@tonic-gate 
24877c478bd9Sstevel@tonic-gate /*
24887c478bd9Sstevel@tonic-gate  * Delete all locks in the system that belongs to the sysid of the request.
24897c478bd9Sstevel@tonic-gate  */
24907c478bd9Sstevel@tonic-gate 
24917c478bd9Sstevel@tonic-gate static void
flk_delete_locks_by_sysid(lock_descriptor_t * request)24927c478bd9Sstevel@tonic-gate flk_delete_locks_by_sysid(lock_descriptor_t *request)
24937c478bd9Sstevel@tonic-gate {
24947c478bd9Sstevel@tonic-gate 	int	sysid  = request->l_flock.l_sysid;
24957c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
24967c478bd9Sstevel@tonic-gate 	graph_t	*gp;
24977c478bd9Sstevel@tonic-gate 	int i;
24987c478bd9Sstevel@tonic-gate 
24997c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
25007c478bd9Sstevel@tonic-gate 	ASSERT(sysid != 0);
25017c478bd9Sstevel@tonic-gate 
25027c478bd9Sstevel@tonic-gate 	mutex_exit(&request->l_graph->gp_mutex);
25037c478bd9Sstevel@tonic-gate 
25047c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
25057c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
25067c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
25077c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
25087c478bd9Sstevel@tonic-gate 
25097c478bd9Sstevel@tonic-gate 		if (gp == NULL)
25107c478bd9Sstevel@tonic-gate 			continue;
25117c478bd9Sstevel@tonic-gate 
25127c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
25137c478bd9Sstevel@tonic-gate 
25147c478bd9Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
25157c478bd9Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
25167c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
25177c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
25187c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
25197c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
25207c478bd9Sstevel@tonic-gate 			}
25217c478bd9Sstevel@tonic-gate 			lock = nlock;
25227c478bd9Sstevel@tonic-gate 		}
25237c478bd9Sstevel@tonic-gate 
25247c478bd9Sstevel@tonic-gate 		/* delete active locks */
25257c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
25267c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
25277c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
25287c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
25297c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
25307c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
25317c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
25327c478bd9Sstevel@tonic-gate 			}
25337c478bd9Sstevel@tonic-gate 			lock = nlock;
25347c478bd9Sstevel@tonic-gate 		}
25357c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
25367c478bd9Sstevel@tonic-gate 	}
25377c478bd9Sstevel@tonic-gate 
25387c478bd9Sstevel@tonic-gate 	mutex_enter(&request->l_graph->gp_mutex);
25397c478bd9Sstevel@tonic-gate }
25407c478bd9Sstevel@tonic-gate 
25417c478bd9Sstevel@tonic-gate /*
25427c478bd9Sstevel@tonic-gate  * Clustering: Deletes PXFS locks
25437c478bd9Sstevel@tonic-gate  * Effects: Delete all locks on files in the given file system and with the
25447c478bd9Sstevel@tonic-gate  *  given PXFS id.
25457c478bd9Sstevel@tonic-gate  */
25467c478bd9Sstevel@tonic-gate void
cl_flk_delete_pxfs_locks(struct vfs * vfsp,int pxfsid)25477c478bd9Sstevel@tonic-gate cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
25487c478bd9Sstevel@tonic-gate {
25497c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
25507c478bd9Sstevel@tonic-gate 	graph_t	*gp;
25517c478bd9Sstevel@tonic-gate 	int i;
25527c478bd9Sstevel@tonic-gate 
25537c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
25547c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
25557c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
25567c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
25577c478bd9Sstevel@tonic-gate 
25587c478bd9Sstevel@tonic-gate 		if (gp == NULL)
25597c478bd9Sstevel@tonic-gate 			continue;
25607c478bd9Sstevel@tonic-gate 
25617c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
25627c478bd9Sstevel@tonic-gate 
25637c478bd9Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
25647c478bd9Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
25657c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
25667c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
25677c478bd9Sstevel@tonic-gate 			if (lock->l_vnode->v_vfsp == vfsp) {
25687c478bd9Sstevel@tonic-gate 				ASSERT(IS_PXFS(lock));
25697c478bd9Sstevel@tonic-gate 				if (GETPXFSID(lock->l_flock.l_sysid) ==
25707c478bd9Sstevel@tonic-gate 				    pxfsid) {
25717c478bd9Sstevel@tonic-gate 					flk_set_state(lock,
25727c478bd9Sstevel@tonic-gate 					    FLK_CANCELLED_STATE);
25737c478bd9Sstevel@tonic-gate 					flk_cancel_sleeping_lock(lock, 1);
25747c478bd9Sstevel@tonic-gate 				}
25757c478bd9Sstevel@tonic-gate 			}
25767c478bd9Sstevel@tonic-gate 			lock = nlock;
25777c478bd9Sstevel@tonic-gate 		}
25787c478bd9Sstevel@tonic-gate 
25797c478bd9Sstevel@tonic-gate 		/* delete active locks */
25807c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
25817c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
25827c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
25837c478bd9Sstevel@tonic-gate 			if (lock->l_vnode->v_vfsp == vfsp) {
25847c478bd9Sstevel@tonic-gate 				ASSERT(IS_PXFS(lock));
25857c478bd9Sstevel@tonic-gate 				if (GETPXFSID(lock->l_flock.l_sysid) ==
25867c478bd9Sstevel@tonic-gate 				    pxfsid) {
25877c478bd9Sstevel@tonic-gate 					flk_delete_active_lock(lock, 0);
25887c478bd9Sstevel@tonic-gate 					flk_wakeup(lock, 1);
25897c478bd9Sstevel@tonic-gate 					flk_free_lock(lock);
25907c478bd9Sstevel@tonic-gate 				}
25917c478bd9Sstevel@tonic-gate 			}
25927c478bd9Sstevel@tonic-gate 			lock = nlock;
25937c478bd9Sstevel@tonic-gate 		}
25947c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
25957c478bd9Sstevel@tonic-gate 	}
25967c478bd9Sstevel@tonic-gate }
25977c478bd9Sstevel@tonic-gate 
25987c478bd9Sstevel@tonic-gate /*
25997c478bd9Sstevel@tonic-gate  * Search for a sleeping lock manager lock which matches exactly this lock
26007c478bd9Sstevel@tonic-gate  * request; if one is found, fake a signal to cancel it.
26017c478bd9Sstevel@tonic-gate  *
26027c478bd9Sstevel@tonic-gate  * Return 1 if a matching lock was found, 0 otherwise.
26037c478bd9Sstevel@tonic-gate  */
26047c478bd9Sstevel@tonic-gate 
26057c478bd9Sstevel@tonic-gate static int
flk_canceled(lock_descriptor_t * request)26067c478bd9Sstevel@tonic-gate flk_canceled(lock_descriptor_t *request)
26077c478bd9Sstevel@tonic-gate {
26087c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
26097c478bd9Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
26107c478bd9Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
26117c478bd9Sstevel@tonic-gate 
26127c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
26137c478bd9Sstevel@tonic-gate 	ASSERT(IS_LOCKMGR(request));
26147c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
26157c478bd9Sstevel@tonic-gate 
26167c478bd9Sstevel@tonic-gate 	if (lock) {
26177c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
26187c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
26197c478bd9Sstevel@tonic-gate 			if (SAME_OWNER(lock, request) &&
26207c478bd9Sstevel@tonic-gate 			    lock->l_start == request->l_start &&
26217c478bd9Sstevel@tonic-gate 			    lock->l_end == request->l_end) {
26227c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
26237c478bd9Sstevel@tonic-gate 				return (1);
26247c478bd9Sstevel@tonic-gate 			}
26257c478bd9Sstevel@tonic-gate 			lock = nlock;
26267c478bd9Sstevel@tonic-gate 		}
26277c478bd9Sstevel@tonic-gate 	}
26287c478bd9Sstevel@tonic-gate 	return (0);
26297c478bd9Sstevel@tonic-gate }
26307c478bd9Sstevel@tonic-gate 
26317c478bd9Sstevel@tonic-gate /*
26327c478bd9Sstevel@tonic-gate  * Remove all the locks for the vnode belonging to the given pid and sysid.
26337c478bd9Sstevel@tonic-gate  */
26347c478bd9Sstevel@tonic-gate 
26357c478bd9Sstevel@tonic-gate void
cleanlocks(vnode_t * vp,pid_t pid,int sysid)26367c478bd9Sstevel@tonic-gate cleanlocks(vnode_t *vp, pid_t pid, int sysid)
26377c478bd9Sstevel@tonic-gate {
26387c478bd9Sstevel@tonic-gate 	graph_t	*gp;
26397c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
26407c478bd9Sstevel@tonic-gate 	lock_descriptor_t *link_stack;
26417c478bd9Sstevel@tonic-gate 
26427c478bd9Sstevel@tonic-gate 	STACK_INIT(link_stack);
26437c478bd9Sstevel@tonic-gate 
26447c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
26457c478bd9Sstevel@tonic-gate 
26467c478bd9Sstevel@tonic-gate 	if (gp == NULL)
26477c478bd9Sstevel@tonic-gate 		return;
26487c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
26497c478bd9Sstevel@tonic-gate 
26507c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
26517c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
26527c478bd9Sstevel@tonic-gate 
26537c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
26547c478bd9Sstevel@tonic-gate 
26557c478bd9Sstevel@tonic-gate 	if (lock) {
26567c478bd9Sstevel@tonic-gate 		do {
26577c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
26587c478bd9Sstevel@tonic-gate 			if ((lock->l_flock.l_pid == pid ||
26597c478bd9Sstevel@tonic-gate 			    pid == IGN_PID) &&
26607c478bd9Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid) {
26617c478bd9Sstevel@tonic-gate 				CANCEL_WAKEUP(lock);
26627c478bd9Sstevel@tonic-gate 			}
26637c478bd9Sstevel@tonic-gate 			lock = nlock;
26647c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
26657c478bd9Sstevel@tonic-gate 	}
26667c478bd9Sstevel@tonic-gate 
26677c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
26687c478bd9Sstevel@tonic-gate 
26697c478bd9Sstevel@tonic-gate 	if (lock) {
26707c478bd9Sstevel@tonic-gate 		do {
26717c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
26727c478bd9Sstevel@tonic-gate 			if ((lock->l_flock.l_pid == pid ||
26737c478bd9Sstevel@tonic-gate 			    pid == IGN_PID) &&
26747c478bd9Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid) {
26757c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
26767c478bd9Sstevel@tonic-gate 				STACK_PUSH(link_stack, lock, l_stack);
26777c478bd9Sstevel@tonic-gate 			}
26787c478bd9Sstevel@tonic-gate 			lock = nlock;
26797c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
26807c478bd9Sstevel@tonic-gate 	}
26817c478bd9Sstevel@tonic-gate 
26827c478bd9Sstevel@tonic-gate 	while ((lock = STACK_TOP(link_stack)) != NULL) {
26837c478bd9Sstevel@tonic-gate 		STACK_POP(link_stack, l_stack);
26847c478bd9Sstevel@tonic-gate 		flk_wakeup(lock, 1);
26857c478bd9Sstevel@tonic-gate 		flk_free_lock(lock);
26867c478bd9Sstevel@tonic-gate 	}
26877c478bd9Sstevel@tonic-gate 
26887c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
26897c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
26907c478bd9Sstevel@tonic-gate 	CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
26917c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
26927c478bd9Sstevel@tonic-gate }
26937c478bd9Sstevel@tonic-gate 
26947c478bd9Sstevel@tonic-gate 
26957c478bd9Sstevel@tonic-gate /*
26967c478bd9Sstevel@tonic-gate  * Called from 'fs' read and write routines for files that have mandatory
26977c478bd9Sstevel@tonic-gate  * locking enabled.
26987c478bd9Sstevel@tonic-gate  */
26997c478bd9Sstevel@tonic-gate 
27007c478bd9Sstevel@tonic-gate int
chklock(struct vnode * vp,int iomode,u_offset_t offset,ssize_t len,int fmode,caller_context_t * ct)2701*7a0cc5a9SMarcel Telka chklock(struct vnode *vp, int iomode, u_offset_t offset, ssize_t len, int fmode,
27027c478bd9Sstevel@tonic-gate     caller_context_t *ct)
27037c478bd9Sstevel@tonic-gate {
27047c478bd9Sstevel@tonic-gate 	register int	i;
27057c478bd9Sstevel@tonic-gate 	struct flock64 	bf;
27067c478bd9Sstevel@tonic-gate 	int 		error = 0;
27077c478bd9Sstevel@tonic-gate 
27087c478bd9Sstevel@tonic-gate 	bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
27097c478bd9Sstevel@tonic-gate 	bf.l_whence = 0;
27107c478bd9Sstevel@tonic-gate 	bf.l_start = offset;
27117c478bd9Sstevel@tonic-gate 	bf.l_len = len;
27127c478bd9Sstevel@tonic-gate 	if (ct == NULL) {
27137c478bd9Sstevel@tonic-gate 		bf.l_pid = curproc->p_pid;
27147c478bd9Sstevel@tonic-gate 		bf.l_sysid = 0;
27157c478bd9Sstevel@tonic-gate 	} else {
27167c478bd9Sstevel@tonic-gate 		bf.l_pid = ct->cc_pid;
27177c478bd9Sstevel@tonic-gate 		bf.l_sysid = ct->cc_sysid;
27187c478bd9Sstevel@tonic-gate 	}
27197c478bd9Sstevel@tonic-gate 	i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
27207c478bd9Sstevel@tonic-gate 	if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
27217c478bd9Sstevel@tonic-gate 	    bf.l_type != F_UNLCK)
27227c478bd9Sstevel@tonic-gate 		error = i ? i : EAGAIN;
27237c478bd9Sstevel@tonic-gate 	return (error);
27247c478bd9Sstevel@tonic-gate }
27257c478bd9Sstevel@tonic-gate 
27267c478bd9Sstevel@tonic-gate /*
27277c478bd9Sstevel@tonic-gate  * convoff - converts the given data (start, whence) to the
27287c478bd9Sstevel@tonic-gate  * given whence.
27297c478bd9Sstevel@tonic-gate  */
27307c478bd9Sstevel@tonic-gate int
convoff(struct vnode * vp,struct flock64 * lckdat,int whence,offset_t offset)2731*7a0cc5a9SMarcel Telka convoff(struct vnode *vp, struct flock64 *lckdat, int whence, offset_t offset)
27327c478bd9Sstevel@tonic-gate {
27337c478bd9Sstevel@tonic-gate 	int 		error;
27347c478bd9Sstevel@tonic-gate 	struct vattr 	vattr;
27357c478bd9Sstevel@tonic-gate 
27367c478bd9Sstevel@tonic-gate 	if ((lckdat->l_whence == 2) || (whence == 2)) {
27377c478bd9Sstevel@tonic-gate 		vattr.va_mask = AT_SIZE;
2738da6c28aaSamw 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
27397c478bd9Sstevel@tonic-gate 			return (error);
27407c478bd9Sstevel@tonic-gate 	}
27417c478bd9Sstevel@tonic-gate 
27427c478bd9Sstevel@tonic-gate 	switch (lckdat->l_whence) {
27437c478bd9Sstevel@tonic-gate 	case 1:
27447c478bd9Sstevel@tonic-gate 		lckdat->l_start += offset;
27457c478bd9Sstevel@tonic-gate 		break;
27467c478bd9Sstevel@tonic-gate 	case 2:
27477c478bd9Sstevel@tonic-gate 		lckdat->l_start += vattr.va_size;
27487c478bd9Sstevel@tonic-gate 		/* FALLTHRU */
27497c478bd9Sstevel@tonic-gate 	case 0:
27507c478bd9Sstevel@tonic-gate 		break;
27517c478bd9Sstevel@tonic-gate 	default:
27527c478bd9Sstevel@tonic-gate 		return (EINVAL);
27537c478bd9Sstevel@tonic-gate 	}
27547c478bd9Sstevel@tonic-gate 
27557c478bd9Sstevel@tonic-gate 	if (lckdat->l_start < 0)
27567c478bd9Sstevel@tonic-gate 		return (EINVAL);
27577c478bd9Sstevel@tonic-gate 
27587c478bd9Sstevel@tonic-gate 	switch (whence) {
27597c478bd9Sstevel@tonic-gate 	case 1:
27607c478bd9Sstevel@tonic-gate 		lckdat->l_start -= offset;
27617c478bd9Sstevel@tonic-gate 		break;
27627c478bd9Sstevel@tonic-gate 	case 2:
27637c478bd9Sstevel@tonic-gate 		lckdat->l_start -= vattr.va_size;
27647c478bd9Sstevel@tonic-gate 		/* FALLTHRU */
27657c478bd9Sstevel@tonic-gate 	case 0:
27667c478bd9Sstevel@tonic-gate 		break;
27677c478bd9Sstevel@tonic-gate 	default:
27687c478bd9Sstevel@tonic-gate 		return (EINVAL);
27697c478bd9Sstevel@tonic-gate 	}
27707c478bd9Sstevel@tonic-gate 
27717c478bd9Sstevel@tonic-gate 	lckdat->l_whence = (short)whence;
27727c478bd9Sstevel@tonic-gate 	return (0);
27737c478bd9Sstevel@tonic-gate }
27747c478bd9Sstevel@tonic-gate 
27757c478bd9Sstevel@tonic-gate 
27767c478bd9Sstevel@tonic-gate /* 	proc_graph function definitions */
27777c478bd9Sstevel@tonic-gate 
27787c478bd9Sstevel@tonic-gate /*
27797c478bd9Sstevel@tonic-gate  * Function checks for deadlock due to the new 'lock'. If deadlock found
27807c478bd9Sstevel@tonic-gate  * edges of this lock are freed and returned.
27817c478bd9Sstevel@tonic-gate  */
27827c478bd9Sstevel@tonic-gate 
27837c478bd9Sstevel@tonic-gate static int
flk_check_deadlock(lock_descriptor_t * lock)27847c478bd9Sstevel@tonic-gate flk_check_deadlock(lock_descriptor_t *lock)
27857c478bd9Sstevel@tonic-gate {
27867c478bd9Sstevel@tonic-gate 	proc_vertex_t	*start_vertex, *pvertex;
27877c478bd9Sstevel@tonic-gate 	proc_vertex_t *dvertex;
27887c478bd9Sstevel@tonic-gate 	proc_edge_t *pep, *ppep;
27897c478bd9Sstevel@tonic-gate 	edge_t	*ep, *nep;
27907c478bd9Sstevel@tonic-gate 	proc_vertex_t *process_stack;
27917c478bd9Sstevel@tonic-gate 
27927c478bd9Sstevel@tonic-gate 	STACK_INIT(process_stack);
27937c478bd9Sstevel@tonic-gate 
27947c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
27957c478bd9Sstevel@tonic-gate 	start_vertex = flk_get_proc_vertex(lock);
27967c478bd9Sstevel@tonic-gate 	ASSERT(start_vertex != NULL);
27977c478bd9Sstevel@tonic-gate 
27987c478bd9Sstevel@tonic-gate 	/* construct the edges from this process to other processes */
27997c478bd9Sstevel@tonic-gate 
28007c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock);
28017c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
28027c478bd9Sstevel@tonic-gate 		proc_vertex_t *adj_proc;
28037c478bd9Sstevel@tonic-gate 
28047c478bd9Sstevel@tonic-gate 		adj_proc = flk_get_proc_vertex(ep->to_vertex);
28057c478bd9Sstevel@tonic-gate 		for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
28067c478bd9Sstevel@tonic-gate 			if (pep->to_proc == adj_proc) {
28077c478bd9Sstevel@tonic-gate 				ASSERT(pep->refcount);
28087c478bd9Sstevel@tonic-gate 				pep->refcount++;
28097c478bd9Sstevel@tonic-gate 				break;
28107c478bd9Sstevel@tonic-gate 			}
28117c478bd9Sstevel@tonic-gate 		}
28127c478bd9Sstevel@tonic-gate 		if (pep == NULL) {
28137c478bd9Sstevel@tonic-gate 			pep = flk_get_proc_edge();
28147c478bd9Sstevel@tonic-gate 			pep->to_proc = adj_proc;
28157c478bd9Sstevel@tonic-gate 			pep->refcount = 1;
28167c478bd9Sstevel@tonic-gate 			adj_proc->incount++;
28177c478bd9Sstevel@tonic-gate 			pep->next = start_vertex->edge;
28187c478bd9Sstevel@tonic-gate 			start_vertex->edge = pep;
28197c478bd9Sstevel@tonic-gate 		}
28207c478bd9Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
28217c478bd9Sstevel@tonic-gate 	}
28227c478bd9Sstevel@tonic-gate 
28237c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
28247c478bd9Sstevel@tonic-gate 
28257c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
28267c478bd9Sstevel@tonic-gate 		proc_vertex_t *in_proc;
28277c478bd9Sstevel@tonic-gate 
28287c478bd9Sstevel@tonic-gate 		in_proc = flk_get_proc_vertex(ep->from_vertex);
28297c478bd9Sstevel@tonic-gate 
28307c478bd9Sstevel@tonic-gate 		for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
28317c478bd9Sstevel@tonic-gate 			if (pep->to_proc == start_vertex) {
28327c478bd9Sstevel@tonic-gate 				ASSERT(pep->refcount);
28337c478bd9Sstevel@tonic-gate 				pep->refcount++;
28347c478bd9Sstevel@tonic-gate 				break;
28357c478bd9Sstevel@tonic-gate 			}
28367c478bd9Sstevel@tonic-gate 		}
28377c478bd9Sstevel@tonic-gate 		if (pep == NULL) {
28387c478bd9Sstevel@tonic-gate 			pep = flk_get_proc_edge();
28397c478bd9Sstevel@tonic-gate 			pep->to_proc = start_vertex;
28407c478bd9Sstevel@tonic-gate 			pep->refcount = 1;
28417c478bd9Sstevel@tonic-gate 			start_vertex->incount++;
28427c478bd9Sstevel@tonic-gate 			pep->next = in_proc->edge;
28437c478bd9Sstevel@tonic-gate 			in_proc->edge = pep;
28447c478bd9Sstevel@tonic-gate 		}
28457c478bd9Sstevel@tonic-gate 		ep = NEXT_IN(ep);
28467c478bd9Sstevel@tonic-gate 	}
28477c478bd9Sstevel@tonic-gate 
28487c478bd9Sstevel@tonic-gate 	if (start_vertex->incount == 0) {
28497c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
28507c478bd9Sstevel@tonic-gate 		return (0);
28517c478bd9Sstevel@tonic-gate 	}
28527c478bd9Sstevel@tonic-gate 
28537c478bd9Sstevel@tonic-gate 	flk_proc_graph_uncolor();
28547c478bd9Sstevel@tonic-gate 
28557c478bd9Sstevel@tonic-gate 	start_vertex->p_sedge = start_vertex->edge;
28567c478bd9Sstevel@tonic-gate 
28577c478bd9Sstevel@tonic-gate 	STACK_PUSH(process_stack, start_vertex, p_stack);
28587c478bd9Sstevel@tonic-gate 
28597c478bd9Sstevel@tonic-gate 	while ((pvertex = STACK_TOP(process_stack)) != NULL) {
28607c478bd9Sstevel@tonic-gate 		for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
28617c478bd9Sstevel@tonic-gate 			dvertex = pep->to_proc;
28627c478bd9Sstevel@tonic-gate 			if (!PROC_ARRIVED(dvertex)) {
28637c478bd9Sstevel@tonic-gate 				STACK_PUSH(process_stack, dvertex, p_stack);
28647c478bd9Sstevel@tonic-gate 				dvertex->p_sedge = dvertex->edge;
28657c478bd9Sstevel@tonic-gate 				PROC_ARRIVE(pvertex);
28667c478bd9Sstevel@tonic-gate 				pvertex->p_sedge = pep->next;
28677c478bd9Sstevel@tonic-gate 				break;
28687c478bd9Sstevel@tonic-gate 			}
28697c478bd9Sstevel@tonic-gate 			if (!PROC_DEPARTED(dvertex))
28707c478bd9Sstevel@tonic-gate 				goto deadlock;
28717c478bd9Sstevel@tonic-gate 		}
28727c478bd9Sstevel@tonic-gate 		if (pep == NULL) {
28737c478bd9Sstevel@tonic-gate 			PROC_DEPART(pvertex);
28747c478bd9Sstevel@tonic-gate 			STACK_POP(process_stack, p_stack);
28757c478bd9Sstevel@tonic-gate 		}
28767c478bd9Sstevel@tonic-gate 	}
28777c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
28787c478bd9Sstevel@tonic-gate 	return (0);
28797c478bd9Sstevel@tonic-gate 
28807c478bd9Sstevel@tonic-gate deadlock:
28817c478bd9Sstevel@tonic-gate 
28827c478bd9Sstevel@tonic-gate 	/* we remove all lock edges and proc edges */
28837c478bd9Sstevel@tonic-gate 
28847c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock);
28857c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
28867c478bd9Sstevel@tonic-gate 		proc_vertex_t *adj_proc;
28877c478bd9Sstevel@tonic-gate 		adj_proc = flk_get_proc_vertex(ep->to_vertex);
28887c478bd9Sstevel@tonic-gate 		nep = NEXT_ADJ(ep);
28897c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
28907c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
28917c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
28927c478bd9Sstevel@tonic-gate 		ppep = start_vertex->edge;
28937c478bd9Sstevel@tonic-gate 		for (pep = start_vertex->edge; pep != NULL; ppep = pep,
28947c478bd9Sstevel@tonic-gate 		    pep = ppep->next) {
28957c478bd9Sstevel@tonic-gate 			if (pep->to_proc == adj_proc) {
28967c478bd9Sstevel@tonic-gate 				pep->refcount--;
28977c478bd9Sstevel@tonic-gate 				if (pep->refcount == 0) {
28987c478bd9Sstevel@tonic-gate 					if (pep == ppep) {
28997c478bd9Sstevel@tonic-gate 						start_vertex->edge = pep->next;
29007c478bd9Sstevel@tonic-gate 					} else {
29017c478bd9Sstevel@tonic-gate 						ppep->next = pep->next;
29027c478bd9Sstevel@tonic-gate 					}
29037c478bd9Sstevel@tonic-gate 					adj_proc->incount--;
29047c478bd9Sstevel@tonic-gate 					flk_proc_release(adj_proc);
29057c478bd9Sstevel@tonic-gate 					flk_free_proc_edge(pep);
29067c478bd9Sstevel@tonic-gate 				}
29077c478bd9Sstevel@tonic-gate 				break;
29087c478bd9Sstevel@tonic-gate 			}
29097c478bd9Sstevel@tonic-gate 		}
29107c478bd9Sstevel@tonic-gate 		ep = nep;
29117c478bd9Sstevel@tonic-gate 	}
29127c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
29137c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
29147c478bd9Sstevel@tonic-gate 		proc_vertex_t *in_proc;
29157c478bd9Sstevel@tonic-gate 		in_proc = flk_get_proc_vertex(ep->from_vertex);
29167c478bd9Sstevel@tonic-gate 		nep = NEXT_IN(ep);
29177c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
29187c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
29197c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
29207c478bd9Sstevel@tonic-gate 		ppep = in_proc->edge;
29217c478bd9Sstevel@tonic-gate 		for (pep = in_proc->edge; pep != NULL; ppep = pep,
29227c478bd9Sstevel@tonic-gate 		    pep = ppep->next) {
29237c478bd9Sstevel@tonic-gate 			if (pep->to_proc == start_vertex) {
29247c478bd9Sstevel@tonic-gate 				pep->refcount--;
29257c478bd9Sstevel@tonic-gate 				if (pep->refcount == 0) {
29267c478bd9Sstevel@tonic-gate 					if (pep == ppep) {
29277c478bd9Sstevel@tonic-gate 						in_proc->edge = pep->next;
29287c478bd9Sstevel@tonic-gate 					} else {
29297c478bd9Sstevel@tonic-gate 						ppep->next = pep->next;
29307c478bd9Sstevel@tonic-gate 					}
29317c478bd9Sstevel@tonic-gate 					start_vertex->incount--;
29327c478bd9Sstevel@tonic-gate 					flk_proc_release(in_proc);
29337c478bd9Sstevel@tonic-gate 					flk_free_proc_edge(pep);
29347c478bd9Sstevel@tonic-gate 				}
29357c478bd9Sstevel@tonic-gate 				break;
29367c478bd9Sstevel@tonic-gate 			}
29377c478bd9Sstevel@tonic-gate 		}
29387c478bd9Sstevel@tonic-gate 		ep = nep;
29397c478bd9Sstevel@tonic-gate 	}
29407c478bd9Sstevel@tonic-gate 	flk_proc_release(start_vertex);
29417c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
29427c478bd9Sstevel@tonic-gate 	return (1);
29437c478bd9Sstevel@tonic-gate }
29447c478bd9Sstevel@tonic-gate 
29457c478bd9Sstevel@tonic-gate /*
29467c478bd9Sstevel@tonic-gate  * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
29477c478bd9Sstevel@tonic-gate  * from the list we return that, otherwise we allocate one. If necessary,
29487c478bd9Sstevel@tonic-gate  * we grow the list of vertices also.
29497c478bd9Sstevel@tonic-gate  */
29507c478bd9Sstevel@tonic-gate 
29517c478bd9Sstevel@tonic-gate static proc_vertex_t *
flk_get_proc_vertex(lock_descriptor_t * lock)29527c478bd9Sstevel@tonic-gate flk_get_proc_vertex(lock_descriptor_t *lock)
29537c478bd9Sstevel@tonic-gate {
29547c478bd9Sstevel@tonic-gate 	int i;
29557c478bd9Sstevel@tonic-gate 	proc_vertex_t	*pv;
29567c478bd9Sstevel@tonic-gate 	proc_vertex_t	**palloc;
29577c478bd9Sstevel@tonic-gate 
29587c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
29597c478bd9Sstevel@tonic-gate 	if (lock->pvertex != -1) {
29607c478bd9Sstevel@tonic-gate 		ASSERT(lock->pvertex >= 0);
29617c478bd9Sstevel@tonic-gate 		pv = pgraph.proc[lock->pvertex];
29627c478bd9Sstevel@tonic-gate 		if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
29637c478bd9Sstevel@tonic-gate 			return (pv);
29647c478bd9Sstevel@tonic-gate 		}
29657c478bd9Sstevel@tonic-gate 	}
29667c478bd9Sstevel@tonic-gate 	for (i = 0; i < pgraph.gcount; i++) {
29677c478bd9Sstevel@tonic-gate 		pv = pgraph.proc[i];
29687c478bd9Sstevel@tonic-gate 		if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
29697c478bd9Sstevel@tonic-gate 			lock->pvertex = pv->index = i;
29707c478bd9Sstevel@tonic-gate 			return (pv);
29717c478bd9Sstevel@tonic-gate 		}
29727c478bd9Sstevel@tonic-gate 	}
29737c478bd9Sstevel@tonic-gate 	pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
29747c478bd9Sstevel@tonic-gate 	pv->pid = lock->l_flock.l_pid;
29757c478bd9Sstevel@tonic-gate 	pv->sysid = lock->l_flock.l_sysid;
29767c478bd9Sstevel@tonic-gate 	flk_proc_vertex_allocs++;
29777c478bd9Sstevel@tonic-gate 	if (pgraph.free != 0) {
29787c478bd9Sstevel@tonic-gate 		for (i = 0; i < pgraph.gcount; i++) {
29797c478bd9Sstevel@tonic-gate 			if (pgraph.proc[i] == NULL) {
29807c478bd9Sstevel@tonic-gate 				pgraph.proc[i] = pv;
29817c478bd9Sstevel@tonic-gate 				lock->pvertex = pv->index = i;
29827c478bd9Sstevel@tonic-gate 				pgraph.free--;
29837c478bd9Sstevel@tonic-gate 				return (pv);
29847c478bd9Sstevel@tonic-gate 			}
29857c478bd9Sstevel@tonic-gate 		}
29867c478bd9Sstevel@tonic-gate 	}
29877c478bd9Sstevel@tonic-gate 	palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
29887c478bd9Sstevel@tonic-gate 	    sizeof (proc_vertex_t *), KM_SLEEP);
29897c478bd9Sstevel@tonic-gate 
29907c478bd9Sstevel@tonic-gate 	if (pgraph.proc) {
29917c478bd9Sstevel@tonic-gate 		bcopy(pgraph.proc, palloc,
29927c478bd9Sstevel@tonic-gate 		    pgraph.gcount * sizeof (proc_vertex_t *));
29937c478bd9Sstevel@tonic-gate 
29947c478bd9Sstevel@tonic-gate 		kmem_free(pgraph.proc,
29957c478bd9Sstevel@tonic-gate 		    pgraph.gcount * sizeof (proc_vertex_t *));
29967c478bd9Sstevel@tonic-gate 	}
29977c478bd9Sstevel@tonic-gate 	pgraph.proc = palloc;
29987c478bd9Sstevel@tonic-gate 	pgraph.free += (PROC_CHUNK - 1);
29997c478bd9Sstevel@tonic-gate 	pv->index = lock->pvertex = pgraph.gcount;
30007c478bd9Sstevel@tonic-gate 	pgraph.gcount += PROC_CHUNK;
30017c478bd9Sstevel@tonic-gate 	pgraph.proc[pv->index] = pv;
30027c478bd9Sstevel@tonic-gate 	return (pv);
30037c478bd9Sstevel@tonic-gate }
30047c478bd9Sstevel@tonic-gate 
30057c478bd9Sstevel@tonic-gate /*
30067c478bd9Sstevel@tonic-gate  * Allocate a proc edge.
30077c478bd9Sstevel@tonic-gate  */
30087c478bd9Sstevel@tonic-gate 
30097c478bd9Sstevel@tonic-gate static proc_edge_t *
flk_get_proc_edge()30107c478bd9Sstevel@tonic-gate flk_get_proc_edge()
30117c478bd9Sstevel@tonic-gate {
30127c478bd9Sstevel@tonic-gate 	proc_edge_t *pep;
30137c478bd9Sstevel@tonic-gate 
30147c478bd9Sstevel@tonic-gate 	pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
30157c478bd9Sstevel@tonic-gate 	flk_proc_edge_allocs++;
30167c478bd9Sstevel@tonic-gate 	return (pep);
30177c478bd9Sstevel@tonic-gate }
30187c478bd9Sstevel@tonic-gate 
30197c478bd9Sstevel@tonic-gate /*
30207c478bd9Sstevel@tonic-gate  * Free the proc edge. Called whenever its reference count goes to zero.
30217c478bd9Sstevel@tonic-gate  */
30227c478bd9Sstevel@tonic-gate 
30237c478bd9Sstevel@tonic-gate static void
flk_free_proc_edge(proc_edge_t * pep)30247c478bd9Sstevel@tonic-gate flk_free_proc_edge(proc_edge_t *pep)
30257c478bd9Sstevel@tonic-gate {
30267c478bd9Sstevel@tonic-gate 	ASSERT(pep->refcount == 0);
30277c478bd9Sstevel@tonic-gate 	kmem_free((void *)pep, sizeof (proc_edge_t));
30287c478bd9Sstevel@tonic-gate 	flk_proc_edge_frees++;
30297c478bd9Sstevel@tonic-gate }
30307c478bd9Sstevel@tonic-gate 
30317c478bd9Sstevel@tonic-gate /*
30327c478bd9Sstevel@tonic-gate  * Color the graph explicitly done only when the mark value hits max value.
30337c478bd9Sstevel@tonic-gate  */
30347c478bd9Sstevel@tonic-gate 
30357c478bd9Sstevel@tonic-gate static void
flk_proc_graph_uncolor()30367c478bd9Sstevel@tonic-gate flk_proc_graph_uncolor()
30377c478bd9Sstevel@tonic-gate {
30387c478bd9Sstevel@tonic-gate 	int i;
30397c478bd9Sstevel@tonic-gate 
30407c478bd9Sstevel@tonic-gate 	if (pgraph.mark == UINT_MAX) {
30417c478bd9Sstevel@tonic-gate 		for (i = 0; i < pgraph.gcount; i++)
30427c478bd9Sstevel@tonic-gate 			if (pgraph.proc[i] != NULL) {
30437c478bd9Sstevel@tonic-gate 				pgraph.proc[i]->atime = 0;
30447c478bd9Sstevel@tonic-gate 				pgraph.proc[i]->dtime = 0;
30457c478bd9Sstevel@tonic-gate 			}
30467c478bd9Sstevel@tonic-gate 		pgraph.mark = 1;
30477c478bd9Sstevel@tonic-gate 	} else {
30487c478bd9Sstevel@tonic-gate 		pgraph.mark++;
30497c478bd9Sstevel@tonic-gate 	}
30507c478bd9Sstevel@tonic-gate }
30517c478bd9Sstevel@tonic-gate 
30527c478bd9Sstevel@tonic-gate /*
30537c478bd9Sstevel@tonic-gate  * Release the proc vertex iff both there are no in edges and out edges
30547c478bd9Sstevel@tonic-gate  */
30557c478bd9Sstevel@tonic-gate 
30567c478bd9Sstevel@tonic-gate static void
flk_proc_release(proc_vertex_t * proc)30577c478bd9Sstevel@tonic-gate flk_proc_release(proc_vertex_t *proc)
30587c478bd9Sstevel@tonic-gate {
30597c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
30607c478bd9Sstevel@tonic-gate 	if (proc->edge == NULL && proc->incount == 0) {
30617c478bd9Sstevel@tonic-gate 		pgraph.proc[proc->index] = NULL;
30627c478bd9Sstevel@tonic-gate 		pgraph.free++;
30637c478bd9Sstevel@tonic-gate 		kmem_free(proc, sizeof (proc_vertex_t));
30647c478bd9Sstevel@tonic-gate 		flk_proc_vertex_frees++;
30657c478bd9Sstevel@tonic-gate 	}
30667c478bd9Sstevel@tonic-gate }
30677c478bd9Sstevel@tonic-gate 
30687c478bd9Sstevel@tonic-gate /*
30697c478bd9Sstevel@tonic-gate  * Updates process graph to reflect change in a lock_graph.
30707c478bd9Sstevel@tonic-gate  * Note: We should call this function only after we have a correctly
30717c478bd9Sstevel@tonic-gate  * recomputed lock graph. Otherwise we might miss a deadlock detection.
30727c478bd9Sstevel@tonic-gate  * eg: in function flk_relation() we call this function after flk_recompute_
30737c478bd9Sstevel@tonic-gate  * dependencies() otherwise if a process tries to lock a vnode hashed
30747c478bd9Sstevel@tonic-gate  * into another graph it might sleep for ever.
30757c478bd9Sstevel@tonic-gate  */
30767c478bd9Sstevel@tonic-gate 
30777c478bd9Sstevel@tonic-gate static void
flk_update_proc_graph(edge_t * ep,int delete)30787c478bd9Sstevel@tonic-gate flk_update_proc_graph(edge_t *ep, int delete)
30797c478bd9Sstevel@tonic-gate {
30807c478bd9Sstevel@tonic-gate 	proc_vertex_t *toproc, *fromproc;
30817c478bd9Sstevel@tonic-gate 	proc_edge_t *pep, *prevpep;
30827c478bd9Sstevel@tonic-gate 
30837c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
30847c478bd9Sstevel@tonic-gate 	toproc = flk_get_proc_vertex(ep->to_vertex);
30857c478bd9Sstevel@tonic-gate 	fromproc = flk_get_proc_vertex(ep->from_vertex);
30867c478bd9Sstevel@tonic-gate 
30877c478bd9Sstevel@tonic-gate 	if (!delete)
30887c478bd9Sstevel@tonic-gate 		goto add;
30897c478bd9Sstevel@tonic-gate 	pep = prevpep = fromproc->edge;
30907c478bd9Sstevel@tonic-gate 
30917c478bd9Sstevel@tonic-gate 	ASSERT(pep != NULL);
30927c478bd9Sstevel@tonic-gate 	while (pep != NULL) {
30937c478bd9Sstevel@tonic-gate 		if (pep->to_proc == toproc) {
30947c478bd9Sstevel@tonic-gate 			ASSERT(pep->refcount > 0);
30957c478bd9Sstevel@tonic-gate 			pep->refcount--;
30967c478bd9Sstevel@tonic-gate 			if (pep->refcount == 0) {
30977c478bd9Sstevel@tonic-gate 				if (pep == prevpep) {
30987c478bd9Sstevel@tonic-gate 					fromproc->edge = pep->next;
30997c478bd9Sstevel@tonic-gate 				} else {
31007c478bd9Sstevel@tonic-gate 					prevpep->next = pep->next;
31017c478bd9Sstevel@tonic-gate 				}
31027c478bd9Sstevel@tonic-gate 				toproc->incount--;
31037c478bd9Sstevel@tonic-gate 				flk_proc_release(toproc);
31047c478bd9Sstevel@tonic-gate 				flk_free_proc_edge(pep);
31057c478bd9Sstevel@tonic-gate 			}
31067c478bd9Sstevel@tonic-gate 			break;
31077c478bd9Sstevel@tonic-gate 		}
31087c478bd9Sstevel@tonic-gate 		prevpep = pep;
31097c478bd9Sstevel@tonic-gate 		pep = pep->next;
31107c478bd9Sstevel@tonic-gate 	}
31117c478bd9Sstevel@tonic-gate 	flk_proc_release(fromproc);
31127c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
31137c478bd9Sstevel@tonic-gate 	return;
31147c478bd9Sstevel@tonic-gate add:
31157c478bd9Sstevel@tonic-gate 
31167c478bd9Sstevel@tonic-gate 	pep = fromproc->edge;
31177c478bd9Sstevel@tonic-gate 
31187c478bd9Sstevel@tonic-gate 	while (pep != NULL) {
31197c478bd9Sstevel@tonic-gate 		if (pep->to_proc == toproc) {
31207c478bd9Sstevel@tonic-gate 			ASSERT(pep->refcount > 0);
31217c478bd9Sstevel@tonic-gate 			pep->refcount++;
31227c478bd9Sstevel@tonic-gate 			break;
31237c478bd9Sstevel@tonic-gate 		}
31247c478bd9Sstevel@tonic-gate 		pep = pep->next;
31257c478bd9Sstevel@tonic-gate 	}
31267c478bd9Sstevel@tonic-gate 	if (pep == NULL) {
31277c478bd9Sstevel@tonic-gate 		pep = flk_get_proc_edge();
31287c478bd9Sstevel@tonic-gate 		pep->to_proc = toproc;
31297c478bd9Sstevel@tonic-gate 		pep->refcount = 1;
31307c478bd9Sstevel@tonic-gate 		toproc->incount++;
31317c478bd9Sstevel@tonic-gate 		pep->next = fromproc->edge;
31327c478bd9Sstevel@tonic-gate 		fromproc->edge = pep;
31337c478bd9Sstevel@tonic-gate 	}
31347c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
31357c478bd9Sstevel@tonic-gate }
31367c478bd9Sstevel@tonic-gate 
31377c478bd9Sstevel@tonic-gate /*
31387c478bd9Sstevel@tonic-gate  * Set the control status for lock manager requests.
31397c478bd9Sstevel@tonic-gate  *
31407c478bd9Sstevel@tonic-gate  */
31417c478bd9Sstevel@tonic-gate 
31427c478bd9Sstevel@tonic-gate /*
31437c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
31447c478bd9Sstevel@tonic-gate  *
31457c478bd9Sstevel@tonic-gate  * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
31467c478bd9Sstevel@tonic-gate  * Effects: Set the state of the NLM server identified by "nlmid"
31477c478bd9Sstevel@tonic-gate  *   in the NLM registry to state "nlm_state."
31487c478bd9Sstevel@tonic-gate  *   Raises exception no_such_nlm if "nlmid" doesn't identify a known
31497c478bd9Sstevel@tonic-gate  *   NLM server to this LLM.
31507c478bd9Sstevel@tonic-gate  *   Note that when this routine is called with NLM_SHUTTING_DOWN there
31517c478bd9Sstevel@tonic-gate  *   may be locks requests that have gotten started but not finished.  In
31527c478bd9Sstevel@tonic-gate  *   particular, there may be blocking requests that are in the callback code
31537c478bd9Sstevel@tonic-gate  *   before sleeping (so they're not holding the lock for the graph).  If
31547c478bd9Sstevel@tonic-gate  *   such a thread reacquires the graph's lock (to go to sleep) after
31557c478bd9Sstevel@tonic-gate  *   NLM state in the NLM registry  is set to a non-up value,
31567c478bd9Sstevel@tonic-gate  *   it will notice the status and bail out.  If the request gets
31577c478bd9Sstevel@tonic-gate  *   granted before the thread can check the NLM registry, let it
31587c478bd9Sstevel@tonic-gate  *   continue normally.  It will get flushed when we are called with NLM_DOWN.
31597c478bd9Sstevel@tonic-gate  *
31607c478bd9Sstevel@tonic-gate  * Modifies: nlm_reg_obj (global)
31617c478bd9Sstevel@tonic-gate  * Arguments:
31627c478bd9Sstevel@tonic-gate  *    nlmid	(IN):    id uniquely identifying an NLM server
31637c478bd9Sstevel@tonic-gate  *    nlm_state (IN):    NLM server state to change "nlmid" to
31647c478bd9Sstevel@tonic-gate  */
31657c478bd9Sstevel@tonic-gate void
cl_flk_set_nlm_status(int nlmid,flk_nlm_status_t nlm_state)31667c478bd9Sstevel@tonic-gate cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
31677c478bd9Sstevel@tonic-gate {
31687c478bd9Sstevel@tonic-gate 	/*
31697c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
31707c478bd9Sstevel@tonic-gate 	 */
31717c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
31727c478bd9Sstevel@tonic-gate 		return;
31737c478bd9Sstevel@tonic-gate 	}
31747c478bd9Sstevel@tonic-gate 
31757c478bd9Sstevel@tonic-gate 	/*
31767c478bd9Sstevel@tonic-gate 	 * Check for development/debugging.  It is possible to boot a node
31777c478bd9Sstevel@tonic-gate 	 * in non-cluster mode, and then run a special script, currently
31787c478bd9Sstevel@tonic-gate 	 * available only to developers, to bring up the node as part of a
31797c478bd9Sstevel@tonic-gate 	 * cluster.  The problem is that running such a script does not
31807c478bd9Sstevel@tonic-gate 	 * result in the routine flk_init() being called and hence global array
31817c478bd9Sstevel@tonic-gate 	 * nlm_reg_status is NULL.  The NLM thinks it's in cluster mode,
31827c478bd9Sstevel@tonic-gate 	 * but the LLM needs to do an additional check to see if the global
31837c478bd9Sstevel@tonic-gate 	 * array has been created or not. If nlm_reg_status is NULL, then
31847c478bd9Sstevel@tonic-gate 	 * return, else continue.
31857c478bd9Sstevel@tonic-gate 	 */
31867c478bd9Sstevel@tonic-gate 	if (nlm_reg_status == NULL) {
31877c478bd9Sstevel@tonic-gate 		return;
31887c478bd9Sstevel@tonic-gate 	}
31897c478bd9Sstevel@tonic-gate 
31907c478bd9Sstevel@tonic-gate 	ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
31917c478bd9Sstevel@tonic-gate 	mutex_enter(&nlm_reg_lock);
31927c478bd9Sstevel@tonic-gate 
31937c478bd9Sstevel@tonic-gate 	if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
31947c478bd9Sstevel@tonic-gate 		/*
31957c478bd9Sstevel@tonic-gate 		 * If the NLM server "nlmid" is unknown in the NLM registry,
31967c478bd9Sstevel@tonic-gate 		 * add it to the registry in the nlm shutting down state.
31977c478bd9Sstevel@tonic-gate 		 */
31987c478bd9Sstevel@tonic-gate 		FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
31997c478bd9Sstevel@tonic-gate 		    FLK_NLM_SHUTTING_DOWN);
32007c478bd9Sstevel@tonic-gate 	} else {
32017c478bd9Sstevel@tonic-gate 		/*
32027c478bd9Sstevel@tonic-gate 		 * Change the state of the NLM server identified by "nlmid"
32037c478bd9Sstevel@tonic-gate 		 * in the NLM registry to the argument "nlm_state."
32047c478bd9Sstevel@tonic-gate 		 */
32057c478bd9Sstevel@tonic-gate 		FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
32067c478bd9Sstevel@tonic-gate 		    nlm_state);
32077c478bd9Sstevel@tonic-gate 	}
32087c478bd9Sstevel@tonic-gate 
32097c478bd9Sstevel@tonic-gate 	/*
32107c478bd9Sstevel@tonic-gate 	 *  The reason we must register the NLM server that is shutting down
32117c478bd9Sstevel@tonic-gate 	 *  with an LLM that doesn't already know about it (never sent a lock
32127c478bd9Sstevel@tonic-gate 	 *  request) is to handle correctly a race between shutdown and a new
32137c478bd9Sstevel@tonic-gate 	 *  lock request.  Suppose that a shutdown request from the NLM server
32147c478bd9Sstevel@tonic-gate 	 *  invokes this routine at the LLM, and a thread is spawned to
32157c478bd9Sstevel@tonic-gate 	 *  service the request. Now suppose a new lock request is in
32167c478bd9Sstevel@tonic-gate 	 *  progress and has already passed the first line of defense in
32177c478bd9Sstevel@tonic-gate 	 *  reclock(), which denies new locks requests from NLM servers
32187c478bd9Sstevel@tonic-gate 	 *  that are not in the NLM_UP state.  After the current routine
32197c478bd9Sstevel@tonic-gate 	 *  is invoked for both phases of shutdown, the routine will return,
32207c478bd9Sstevel@tonic-gate 	 *  having done nothing, and the lock request will proceed and
32217c478bd9Sstevel@tonic-gate 	 *  probably be granted.  The problem is that the shutdown was ignored
32227c478bd9Sstevel@tonic-gate 	 *  by the lock request because there was no record of that NLM server
32237c478bd9Sstevel@tonic-gate 	 *  shutting down.   We will be in the peculiar position of thinking
32247c478bd9Sstevel@tonic-gate 	 *  that we've shutdown the NLM server and all locks at all LLMs have
32257c478bd9Sstevel@tonic-gate 	 *  been discarded, but in fact there's still one lock held.
32267c478bd9Sstevel@tonic-gate 	 *  The solution is to record the existence of NLM server and change
32277c478bd9Sstevel@tonic-gate 	 *  its state immediately to NLM_SHUTTING_DOWN.  The lock request in
32287c478bd9Sstevel@tonic-gate 	 *  progress may proceed because the next phase NLM_DOWN will catch
32297c478bd9Sstevel@tonic-gate 	 *  this lock and discard it.
32307c478bd9Sstevel@tonic-gate 	 */
32317c478bd9Sstevel@tonic-gate 	mutex_exit(&nlm_reg_lock);
32327c478bd9Sstevel@tonic-gate 
32337c478bd9Sstevel@tonic-gate 	switch (nlm_state) {
32347c478bd9Sstevel@tonic-gate 	case FLK_NLM_UP:
32357c478bd9Sstevel@tonic-gate 		/*
32367c478bd9Sstevel@tonic-gate 		 * Change the NLM state of all locks still held on behalf of
32377c478bd9Sstevel@tonic-gate 		 * the NLM server identified by "nlmid" to NLM_UP.
32387c478bd9Sstevel@tonic-gate 		 */
32397c478bd9Sstevel@tonic-gate 		cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
32407c478bd9Sstevel@tonic-gate 		break;
32417c478bd9Sstevel@tonic-gate 
32427c478bd9Sstevel@tonic-gate 	case FLK_NLM_SHUTTING_DOWN:
32437c478bd9Sstevel@tonic-gate 		/*
32447c478bd9Sstevel@tonic-gate 		 * Wake up all sleeping locks for the NLM server identified
32457c478bd9Sstevel@tonic-gate 		 * by "nlmid." Note that eventually all woken threads will
32467c478bd9Sstevel@tonic-gate 		 * have their lock requests cancelled and descriptors
32477c478bd9Sstevel@tonic-gate 		 * removed from the sleeping lock list.  Note that the NLM
32487c478bd9Sstevel@tonic-gate 		 * server state associated with each lock descriptor is
32497c478bd9Sstevel@tonic-gate 		 * changed to FLK_NLM_SHUTTING_DOWN.
32507c478bd9Sstevel@tonic-gate 		 */
32517c478bd9Sstevel@tonic-gate 		cl_flk_wakeup_sleeping_nlm_locks(nlmid);
32527c478bd9Sstevel@tonic-gate 		break;
32537c478bd9Sstevel@tonic-gate 
32547c478bd9Sstevel@tonic-gate 	case FLK_NLM_DOWN:
32557c478bd9Sstevel@tonic-gate 		/*
32567c478bd9Sstevel@tonic-gate 		 * Discard all active, granted locks for this NLM server
32577c478bd9Sstevel@tonic-gate 		 * identified by "nlmid."
32587c478bd9Sstevel@tonic-gate 		 */
32597c478bd9Sstevel@tonic-gate 		cl_flk_unlock_nlm_granted(nlmid);
32607c478bd9Sstevel@tonic-gate 		break;
32617c478bd9Sstevel@tonic-gate 
32627c478bd9Sstevel@tonic-gate 	default:
32637c478bd9Sstevel@tonic-gate 		panic("cl_set_nlm_status: bad status (%d)", nlm_state);
32647c478bd9Sstevel@tonic-gate 	}
32657c478bd9Sstevel@tonic-gate }
32667c478bd9Sstevel@tonic-gate 
32677c478bd9Sstevel@tonic-gate /*
32687c478bd9Sstevel@tonic-gate  * Set the control status for lock manager requests.
32697c478bd9Sstevel@tonic-gate  *
32707c478bd9Sstevel@tonic-gate  * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
32717c478bd9Sstevel@tonic-gate  * may be locks requests that have gotten started but not finished.  In
32727c478bd9Sstevel@tonic-gate  * particular, there may be blocking requests that are in the callback code
32737c478bd9Sstevel@tonic-gate  * before sleeping (so they're not holding the lock for the graph).  If
32747c478bd9Sstevel@tonic-gate  * such a thread reacquires the graph's lock (to go to sleep) after
32757c478bd9Sstevel@tonic-gate  * flk_lockmgr_status is set to a non-up value, it will notice the status
32767c478bd9Sstevel@tonic-gate  * and bail out.  If the request gets granted before the thread can check
32777c478bd9Sstevel@tonic-gate  * flk_lockmgr_status, let it continue normally.  It will get flushed when
32787c478bd9Sstevel@tonic-gate  * we are called with FLK_LOCKMGR_DOWN.
32797c478bd9Sstevel@tonic-gate  */
32807c478bd9Sstevel@tonic-gate 
32817c478bd9Sstevel@tonic-gate void
flk_set_lockmgr_status(flk_lockmgr_status_t status)32827c478bd9Sstevel@tonic-gate flk_set_lockmgr_status(flk_lockmgr_status_t status)
32837c478bd9Sstevel@tonic-gate {
32847c478bd9Sstevel@tonic-gate 	int i;
32857c478bd9Sstevel@tonic-gate 	graph_t *gp;
32867c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
32877c478bd9Sstevel@tonic-gate 
32887c478bd9Sstevel@tonic-gate 	fg = flk_get_globals();
32897c478bd9Sstevel@tonic-gate 	ASSERT(fg != NULL);
32907c478bd9Sstevel@tonic-gate 
32917c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
32927c478bd9Sstevel@tonic-gate 	fg->flk_lockmgr_status = status;
32937c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
32947c478bd9Sstevel@tonic-gate 
32957c478bd9Sstevel@tonic-gate 	/*
32967c478bd9Sstevel@tonic-gate 	 * If the lock manager is coming back up, all that's needed is to
32977c478bd9Sstevel@tonic-gate 	 * propagate this information to the graphs.  If the lock manager
32987c478bd9Sstevel@tonic-gate 	 * is going down, additional action is required, and each graph's
32997c478bd9Sstevel@tonic-gate 	 * copy of the state is updated atomically with this other action.
33007c478bd9Sstevel@tonic-gate 	 */
33017c478bd9Sstevel@tonic-gate 	switch (status) {
33027c478bd9Sstevel@tonic-gate 	case FLK_LOCKMGR_UP:
33037c478bd9Sstevel@tonic-gate 		for (i = 0; i < HASH_SIZE; i++) {
33047c478bd9Sstevel@tonic-gate 			mutex_enter(&flock_lock);
33057c478bd9Sstevel@tonic-gate 			gp = lock_graph[i];
33067c478bd9Sstevel@tonic-gate 			mutex_exit(&flock_lock);
33077c478bd9Sstevel@tonic-gate 			if (gp == NULL)
33087c478bd9Sstevel@tonic-gate 				continue;
33097c478bd9Sstevel@tonic-gate 			mutex_enter(&gp->gp_mutex);
33107c478bd9Sstevel@tonic-gate 			fg->lockmgr_status[i] = status;
33117c478bd9Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
33127c478bd9Sstevel@tonic-gate 		}
33137c478bd9Sstevel@tonic-gate 		break;
33147c478bd9Sstevel@tonic-gate 	case FLK_WAKEUP_SLEEPERS:
33157c478bd9Sstevel@tonic-gate 		wakeup_sleeping_lockmgr_locks(fg);
33167c478bd9Sstevel@tonic-gate 		break;
33177c478bd9Sstevel@tonic-gate 	case FLK_LOCKMGR_DOWN:
33187c478bd9Sstevel@tonic-gate 		unlock_lockmgr_granted(fg);
33197c478bd9Sstevel@tonic-gate 		break;
33207c478bd9Sstevel@tonic-gate 	default:
33217c478bd9Sstevel@tonic-gate 		panic("flk_set_lockmgr_status: bad status (%d)", status);
33227c478bd9Sstevel@tonic-gate 		break;
33237c478bd9Sstevel@tonic-gate 	}
33247c478bd9Sstevel@tonic-gate }
33257c478bd9Sstevel@tonic-gate 
33267c478bd9Sstevel@tonic-gate /*
33277c478bd9Sstevel@tonic-gate  * This routine returns all the locks that are active or sleeping and are
33287c478bd9Sstevel@tonic-gate  * associated with a particular set of identifiers.  If lock_state != 0, then
33297c478bd9Sstevel@tonic-gate  * only locks that match the lock_state are returned. If lock_state == 0, then
33307c478bd9Sstevel@tonic-gate  * all locks are returned. If pid == NOPID, the pid is ignored.  If
33317c478bd9Sstevel@tonic-gate  * use_sysid is FALSE, then the sysid is ignored.  If vp is NULL, then the
33327c478bd9Sstevel@tonic-gate  * vnode pointer is ignored.
33337c478bd9Sstevel@tonic-gate  *
33347c478bd9Sstevel@tonic-gate  * A list containing the vnode pointer and an flock structure
33357c478bd9Sstevel@tonic-gate  * describing the lock is returned.  Each element in the list is
3336da6c28aaSamw  * dynamically allocated and must be freed by the caller.  The
33377c478bd9Sstevel@tonic-gate  * last item in the list is denoted by a NULL value in the ll_next
33387c478bd9Sstevel@tonic-gate  * field.
33397c478bd9Sstevel@tonic-gate  *
33407c478bd9Sstevel@tonic-gate  * The vnode pointers returned are held.  The caller is responsible
33417c478bd9Sstevel@tonic-gate  * for releasing these.  Note that the returned list is only a snapshot of
33427c478bd9Sstevel@tonic-gate  * the current lock information, and that it is a snapshot of a moving
33437c478bd9Sstevel@tonic-gate  * target (only one graph is locked at a time).
33447c478bd9Sstevel@tonic-gate  */
33457c478bd9Sstevel@tonic-gate 
33467c478bd9Sstevel@tonic-gate locklist_t *
get_lock_list(int list_type,int lock_state,int sysid,boolean_t use_sysid,pid_t pid,const vnode_t * vp,zoneid_t zoneid)33477c478bd9Sstevel@tonic-gate get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
33487c478bd9Sstevel@tonic-gate     pid_t pid, const vnode_t *vp, zoneid_t zoneid)
33497c478bd9Sstevel@tonic-gate {
33507c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
33517c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*graph_head;
33527c478bd9Sstevel@tonic-gate 	locklist_t		listhead;
33537c478bd9Sstevel@tonic-gate 	locklist_t		*llheadp;
33547c478bd9Sstevel@tonic-gate 	locklist_t		*llp;
33557c478bd9Sstevel@tonic-gate 	locklist_t		*lltp;
33567c478bd9Sstevel@tonic-gate 	graph_t			*gp;
33577c478bd9Sstevel@tonic-gate 	int			i;
33587c478bd9Sstevel@tonic-gate 	int			first_index; /* graph index */
33597c478bd9Sstevel@tonic-gate 	int			num_indexes; /* graph index */
33607c478bd9Sstevel@tonic-gate 
33617c478bd9Sstevel@tonic-gate 	ASSERT((list_type == FLK_ACTIVE_STATE) ||
33627c478bd9Sstevel@tonic-gate 	    (list_type == FLK_SLEEPING_STATE));
33637c478bd9Sstevel@tonic-gate 
33647c478bd9Sstevel@tonic-gate 	/*
33657c478bd9Sstevel@tonic-gate 	 * Get a pointer to something to use as a list head while building
33667c478bd9Sstevel@tonic-gate 	 * the rest of the list.
33677c478bd9Sstevel@tonic-gate 	 */
33687c478bd9Sstevel@tonic-gate 	llheadp = &listhead;
33697c478bd9Sstevel@tonic-gate 	lltp = llheadp;
33707c478bd9Sstevel@tonic-gate 	llheadp->ll_next = (locklist_t *)NULL;
33717c478bd9Sstevel@tonic-gate 
33727c478bd9Sstevel@tonic-gate 	/* Figure out which graphs we want to look at. */
33737c478bd9Sstevel@tonic-gate 	if (vp == NULL) {
33747c478bd9Sstevel@tonic-gate 		first_index = 0;
33757c478bd9Sstevel@tonic-gate 		num_indexes = HASH_SIZE;
33767c478bd9Sstevel@tonic-gate 	} else {
33777c478bd9Sstevel@tonic-gate 		first_index = HASH_INDEX(vp);
33787c478bd9Sstevel@tonic-gate 		num_indexes = 1;
33797c478bd9Sstevel@tonic-gate 	}
33807c478bd9Sstevel@tonic-gate 
33817c478bd9Sstevel@tonic-gate 	for (i = first_index; i < first_index + num_indexes; i++) {
33827c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
33837c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
33847c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
33857c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
33867c478bd9Sstevel@tonic-gate 			continue;
33877c478bd9Sstevel@tonic-gate 		}
33887c478bd9Sstevel@tonic-gate 
33897c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
33907c478bd9Sstevel@tonic-gate 		graph_head = (list_type == FLK_ACTIVE_STATE) ?
33917c478bd9Sstevel@tonic-gate 		    ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
33927c478bd9Sstevel@tonic-gate 		for (lock = graph_head->l_next;
33937c478bd9Sstevel@tonic-gate 		    lock != graph_head;
33947c478bd9Sstevel@tonic-gate 		    lock = lock->l_next) {
33957c478bd9Sstevel@tonic-gate 			if (use_sysid && lock->l_flock.l_sysid != sysid)
33967c478bd9Sstevel@tonic-gate 				continue;
33977c478bd9Sstevel@tonic-gate 			if (pid != NOPID && lock->l_flock.l_pid != pid)
33987c478bd9Sstevel@tonic-gate 				continue;
33997c478bd9Sstevel@tonic-gate 			if (vp != NULL && lock->l_vnode != vp)
34007c478bd9Sstevel@tonic-gate 				continue;
34017c478bd9Sstevel@tonic-gate 			if (lock_state && !(lock_state & lock->l_state))
34027c478bd9Sstevel@tonic-gate 				continue;
34037c478bd9Sstevel@tonic-gate 			if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
34047c478bd9Sstevel@tonic-gate 				continue;
34057c478bd9Sstevel@tonic-gate 			/*
34067c478bd9Sstevel@tonic-gate 			 * A matching lock was found.  Allocate
34077c478bd9Sstevel@tonic-gate 			 * space for a new locklist entry and fill
34087c478bd9Sstevel@tonic-gate 			 * it in.
34097c478bd9Sstevel@tonic-gate 			 */
34107c478bd9Sstevel@tonic-gate 			llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
34117c478bd9Sstevel@tonic-gate 			lltp->ll_next = llp;
34127c478bd9Sstevel@tonic-gate 			VN_HOLD(lock->l_vnode);
34137c478bd9Sstevel@tonic-gate 			llp->ll_vp = lock->l_vnode;
34147c478bd9Sstevel@tonic-gate 			create_flock(lock, &(llp->ll_flock));
34157c478bd9Sstevel@tonic-gate 			llp->ll_next = (locklist_t *)NULL;
34167c478bd9Sstevel@tonic-gate 			lltp = llp;
34177c478bd9Sstevel@tonic-gate 		}
34187c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
34197c478bd9Sstevel@tonic-gate 	}
34207c478bd9Sstevel@tonic-gate 
34217c478bd9Sstevel@tonic-gate 	llp = llheadp->ll_next;
34227c478bd9Sstevel@tonic-gate 	return (llp);
34237c478bd9Sstevel@tonic-gate }
34247c478bd9Sstevel@tonic-gate 
34257c478bd9Sstevel@tonic-gate /*
34267c478bd9Sstevel@tonic-gate  * These two functions are simply interfaces to get_lock_list.  They return
34277c478bd9Sstevel@tonic-gate  * a list of sleeping or active locks for the given sysid and pid.  See
34287c478bd9Sstevel@tonic-gate  * get_lock_list for details.
34297c478bd9Sstevel@tonic-gate  *
34307c478bd9Sstevel@tonic-gate  * In either case we don't particularly care to specify the zone of interest;
34317c478bd9Sstevel@tonic-gate  * the sysid-space is global across zones, so the sysid will map to exactly one
34327c478bd9Sstevel@tonic-gate  * zone, and we'll return information for that zone.
34337c478bd9Sstevel@tonic-gate  */
34347c478bd9Sstevel@tonic-gate 
34357c478bd9Sstevel@tonic-gate locklist_t *
flk_get_sleeping_locks(int sysid,pid_t pid)34367c478bd9Sstevel@tonic-gate flk_get_sleeping_locks(int sysid, pid_t pid)
34377c478bd9Sstevel@tonic-gate {
34387c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
34397c478bd9Sstevel@tonic-gate 	    ALL_ZONES));
34407c478bd9Sstevel@tonic-gate }
34417c478bd9Sstevel@tonic-gate 
34427c478bd9Sstevel@tonic-gate locklist_t *
flk_get_active_locks(int sysid,pid_t pid)34437c478bd9Sstevel@tonic-gate flk_get_active_locks(int sysid, pid_t pid)
34447c478bd9Sstevel@tonic-gate {
34457c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
34467c478bd9Sstevel@tonic-gate 	    ALL_ZONES));
34477c478bd9Sstevel@tonic-gate }
34487c478bd9Sstevel@tonic-gate 
34497c478bd9Sstevel@tonic-gate /*
34507c478bd9Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
34517c478bd9Sstevel@tonic-gate  * locks for a given vnode.  Again, see get_lock_list for details.
34527c478bd9Sstevel@tonic-gate  *
34537c478bd9Sstevel@tonic-gate  * We don't need to specify which zone's locks we're interested in.  The matter
34547c478bd9Sstevel@tonic-gate  * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
34557c478bd9Sstevel@tonic-gate  * be used by multiple zones, so the list of locks will all be from the right
34567c478bd9Sstevel@tonic-gate  * zone.
34577c478bd9Sstevel@tonic-gate  */
34587c478bd9Sstevel@tonic-gate 
34597c478bd9Sstevel@tonic-gate locklist_t *
flk_active_locks_for_vp(const vnode_t * vp)34607c478bd9Sstevel@tonic-gate flk_active_locks_for_vp(const vnode_t *vp)
34617c478bd9Sstevel@tonic-gate {
34627c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
34637c478bd9Sstevel@tonic-gate 	    ALL_ZONES));
34647c478bd9Sstevel@tonic-gate }
34657c478bd9Sstevel@tonic-gate 
34667c478bd9Sstevel@tonic-gate /*
34677c478bd9Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
34687c478bd9Sstevel@tonic-gate  * nbmand locks for a given vnode.  Again, see get_lock_list for details.
34697c478bd9Sstevel@tonic-gate  *
34707c478bd9Sstevel@tonic-gate  * See the comment for flk_active_locks_for_vp() for why we don't care to
34717c478bd9Sstevel@tonic-gate  * specify the particular zone of interest.
34727c478bd9Sstevel@tonic-gate  */
34737c478bd9Sstevel@tonic-gate locklist_t *
flk_active_nbmand_locks_for_vp(const vnode_t * vp)34747c478bd9Sstevel@tonic-gate flk_active_nbmand_locks_for_vp(const vnode_t *vp)
34757c478bd9Sstevel@tonic-gate {
34767c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
34777c478bd9Sstevel@tonic-gate 	    NOPID, vp, ALL_ZONES));
34787c478bd9Sstevel@tonic-gate }
34797c478bd9Sstevel@tonic-gate 
34807c478bd9Sstevel@tonic-gate /*
34817c478bd9Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
34827c478bd9Sstevel@tonic-gate  * nbmand locks for a given pid.  Again, see get_lock_list for details.
34837c478bd9Sstevel@tonic-gate  *
34847c478bd9Sstevel@tonic-gate  * The zone doesn't need to be specified here; the locks held by a
34857c478bd9Sstevel@tonic-gate  * particular process will either be local (ie, non-NFS) or from the zone
34867c478bd9Sstevel@tonic-gate  * the process is executing in.  This is because other parts of the system
34877c478bd9Sstevel@tonic-gate  * ensure that an NFS vnode can't be used in a zone other than that in
34887c478bd9Sstevel@tonic-gate  * which it was opened.
34897c478bd9Sstevel@tonic-gate  */
34907c478bd9Sstevel@tonic-gate locklist_t *
flk_active_nbmand_locks(pid_t pid)34917c478bd9Sstevel@tonic-gate flk_active_nbmand_locks(pid_t pid)
34927c478bd9Sstevel@tonic-gate {
34937c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
34947c478bd9Sstevel@tonic-gate 	    pid, NULL, ALL_ZONES));
34957c478bd9Sstevel@tonic-gate }
34967c478bd9Sstevel@tonic-gate 
34977c478bd9Sstevel@tonic-gate /*
34987c478bd9Sstevel@tonic-gate  * Free up all entries in the locklist.
34997c478bd9Sstevel@tonic-gate  */
35007c478bd9Sstevel@tonic-gate void
flk_free_locklist(locklist_t * llp)35017c478bd9Sstevel@tonic-gate flk_free_locklist(locklist_t *llp)
35027c478bd9Sstevel@tonic-gate {
35037c478bd9Sstevel@tonic-gate 	locklist_t *next_llp;
35047c478bd9Sstevel@tonic-gate 
35057c478bd9Sstevel@tonic-gate 	while (llp) {
35067c478bd9Sstevel@tonic-gate 		next_llp = llp->ll_next;
35077c478bd9Sstevel@tonic-gate 		VN_RELE(llp->ll_vp);
35087c478bd9Sstevel@tonic-gate 		kmem_free(llp, sizeof (*llp));
35097c478bd9Sstevel@tonic-gate 		llp = next_llp;
35107c478bd9Sstevel@tonic-gate 	}
35117c478bd9Sstevel@tonic-gate }
35127c478bd9Sstevel@tonic-gate 
35137c478bd9Sstevel@tonic-gate static void
cl_flk_change_nlm_state_all_locks(int nlmid,flk_nlm_status_t nlm_state)35147c478bd9Sstevel@tonic-gate cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
35157c478bd9Sstevel@tonic-gate {
35167c478bd9Sstevel@tonic-gate 	/*
35177c478bd9Sstevel@tonic-gate 	 * For each graph "lg" in the hash table lock_graph do
35187c478bd9Sstevel@tonic-gate 	 * a.  Get the list of sleeping locks
35197c478bd9Sstevel@tonic-gate 	 * b.  For each lock descriptor in the list do
35207c478bd9Sstevel@tonic-gate 	 *	i.   If the requested lock is an NLM server request AND
35217c478bd9Sstevel@tonic-gate 	 *		the nlmid is the same as the routine argument then
35227c478bd9Sstevel@tonic-gate 	 *		change the lock descriptor's state field to
35237c478bd9Sstevel@tonic-gate 	 *		"nlm_state."
35247c478bd9Sstevel@tonic-gate 	 * c.  Get the list of active locks
35257c478bd9Sstevel@tonic-gate 	 * d.  For each lock descriptor in the list do
35267c478bd9Sstevel@tonic-gate 	 *	i.   If the requested lock is an NLM server request AND
35277c478bd9Sstevel@tonic-gate 	 *		the nlmid is the same as the routine argument then
35287c478bd9Sstevel@tonic-gate 	 *		change the lock descriptor's state field to
35297c478bd9Sstevel@tonic-gate 	 *		"nlm_state."
35307c478bd9Sstevel@tonic-gate 	 */
35317c478bd9Sstevel@tonic-gate 
35327c478bd9Sstevel@tonic-gate 	int			i;
35337c478bd9Sstevel@tonic-gate 	graph_t			*gp;			/* lock graph */
35347c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;			/* lock */
35357c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*nlock = NULL;		/* next lock */
35367c478bd9Sstevel@tonic-gate 	int			lock_nlmid;
35377c478bd9Sstevel@tonic-gate 
35387c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
35397c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
35407c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
35417c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
35427c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
35437c478bd9Sstevel@tonic-gate 			continue;
35447c478bd9Sstevel@tonic-gate 		}
35457c478bd9Sstevel@tonic-gate 
35467c478bd9Sstevel@tonic-gate 		/* Get list of sleeping locks in current lock graph. */
35477c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
35487c478bd9Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
35497c478bd9Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
35507c478bd9Sstevel@tonic-gate 		    lock = nlock) {
35517c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
35527c478bd9Sstevel@tonic-gate 			/* get NLM id */
35537c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
35547c478bd9Sstevel@tonic-gate 
35557c478bd9Sstevel@tonic-gate 			/*
35567c478bd9Sstevel@tonic-gate 			 * If NLM server request AND nlmid of lock matches
35577c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
35587c478bd9Sstevel@tonic-gate 			 * lock to "nlm_state."
35597c478bd9Sstevel@tonic-gate 			 */
35607c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
35617c478bd9Sstevel@tonic-gate 				SET_NLM_STATE(lock, nlm_state);
35627c478bd9Sstevel@tonic-gate 			}
35637c478bd9Sstevel@tonic-gate 		}
35647c478bd9Sstevel@tonic-gate 
35657c478bd9Sstevel@tonic-gate 		/* Get list of active locks in current lock graph. */
35667c478bd9Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
35677c478bd9Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
35687c478bd9Sstevel@tonic-gate 		    lock = nlock) {
35697c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
35707c478bd9Sstevel@tonic-gate 			/* get NLM id */
35717c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
35727c478bd9Sstevel@tonic-gate 
35737c478bd9Sstevel@tonic-gate 			/*
35747c478bd9Sstevel@tonic-gate 			 * If NLM server request AND nlmid of lock matches
35757c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
35767c478bd9Sstevel@tonic-gate 			 * lock to "nlm_state."
35777c478bd9Sstevel@tonic-gate 			 */
35787c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
35797c478bd9Sstevel@tonic-gate 				ASSERT(IS_ACTIVE(lock));
35807c478bd9Sstevel@tonic-gate 				SET_NLM_STATE(lock, nlm_state);
35817c478bd9Sstevel@tonic-gate 			}
35827c478bd9Sstevel@tonic-gate 		}
35837c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
35847c478bd9Sstevel@tonic-gate 	}
35857c478bd9Sstevel@tonic-gate }
35867c478bd9Sstevel@tonic-gate 
35877c478bd9Sstevel@tonic-gate /*
35887c478bd9Sstevel@tonic-gate  * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
35897c478bd9Sstevel@tonic-gate  * Effects: Find all sleeping lock manager requests _only_ for the NLM server
35907c478bd9Sstevel@tonic-gate  *   identified by "nlmid." Poke those lock requests.
35917c478bd9Sstevel@tonic-gate  */
35927c478bd9Sstevel@tonic-gate static void
cl_flk_wakeup_sleeping_nlm_locks(int nlmid)35937c478bd9Sstevel@tonic-gate cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
35947c478bd9Sstevel@tonic-gate {
35957c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
35967c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
35977c478bd9Sstevel@tonic-gate 	int i;
35987c478bd9Sstevel@tonic-gate 	graph_t *gp;
35997c478bd9Sstevel@tonic-gate 	int	lock_nlmid;
36007c478bd9Sstevel@tonic-gate 
36017c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
36027c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
36037c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
36047c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
36057c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
36067c478bd9Sstevel@tonic-gate 			continue;
36077c478bd9Sstevel@tonic-gate 		}
36087c478bd9Sstevel@tonic-gate 
36097c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
36107c478bd9Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
36117c478bd9Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
36127c478bd9Sstevel@tonic-gate 		    lock = nlock) {
36137c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
36147c478bd9Sstevel@tonic-gate 			/*
36157c478bd9Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
36167c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
36177c478bd9Sstevel@tonic-gate 			 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
36187c478bd9Sstevel@tonic-gate 			 * request.
36197c478bd9Sstevel@tonic-gate 			 */
36207c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock)) {
36217c478bd9Sstevel@tonic-gate 				/* get NLM id */
36227c478bd9Sstevel@tonic-gate 				lock_nlmid =
36237c478bd9Sstevel@tonic-gate 				    GETNLMID(lock->l_flock.l_sysid);
36247c478bd9Sstevel@tonic-gate 				if (nlmid == lock_nlmid) {
36257c478bd9Sstevel@tonic-gate 					SET_NLM_STATE(lock,
36267c478bd9Sstevel@tonic-gate 					    FLK_NLM_SHUTTING_DOWN);
36277c478bd9Sstevel@tonic-gate 					INTERRUPT_WAKEUP(lock);
36287c478bd9Sstevel@tonic-gate 				}
36297c478bd9Sstevel@tonic-gate 			}
36307c478bd9Sstevel@tonic-gate 		}
36317c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
36327c478bd9Sstevel@tonic-gate 	}
36337c478bd9Sstevel@tonic-gate }
36347c478bd9Sstevel@tonic-gate 
36357c478bd9Sstevel@tonic-gate /*
36367c478bd9Sstevel@tonic-gate  * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
36377c478bd9Sstevel@tonic-gate  * Effects:  Find all active (granted) lock manager locks _only_ for the
36387c478bd9Sstevel@tonic-gate  *   NLM server identified by "nlmid" and release them.
36397c478bd9Sstevel@tonic-gate  */
36407c478bd9Sstevel@tonic-gate static void
cl_flk_unlock_nlm_granted(int nlmid)36417c478bd9Sstevel@tonic-gate cl_flk_unlock_nlm_granted(int nlmid)
36427c478bd9Sstevel@tonic-gate {
36437c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
36447c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
36457c478bd9Sstevel@tonic-gate 	int i;
36467c478bd9Sstevel@tonic-gate 	graph_t *gp;
36477c478bd9Sstevel@tonic-gate 	int	lock_nlmid;
36487c478bd9Sstevel@tonic-gate 
36497c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
36507c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
36517c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
36527c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
36537c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
36547c478bd9Sstevel@tonic-gate 			continue;
36557c478bd9Sstevel@tonic-gate 		}
36567c478bd9Sstevel@tonic-gate 
36577c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
36587c478bd9Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
36597c478bd9Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
36607c478bd9Sstevel@tonic-gate 		    lock = nlock) {
36617c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
36627c478bd9Sstevel@tonic-gate 			ASSERT(IS_ACTIVE(lock));
36637c478bd9Sstevel@tonic-gate 
36647c478bd9Sstevel@tonic-gate 			/*
36657c478bd9Sstevel@tonic-gate 			 * If it's an  NLM server request _and_ nlmid of
36667c478bd9Sstevel@tonic-gate 			 * the lock matches nlmid of argument, then
36677c478bd9Sstevel@tonic-gate 			 * remove the active lock the list, wakup blocked
36687c478bd9Sstevel@tonic-gate 			 * threads, and free the storage for the lock.
36697c478bd9Sstevel@tonic-gate 			 * Note that there's no need to mark the NLM state
36707c478bd9Sstevel@tonic-gate 			 * of this lock to NLM_DOWN because the lock will
36717c478bd9Sstevel@tonic-gate 			 * be deleted anyway and its storage freed.
36727c478bd9Sstevel@tonic-gate 			 */
36737c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock)) {
36747c478bd9Sstevel@tonic-gate 				/* get NLM id */
36757c478bd9Sstevel@tonic-gate 				lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
36767c478bd9Sstevel@tonic-gate 				if (nlmid == lock_nlmid) {
36777c478bd9Sstevel@tonic-gate 					flk_delete_active_lock(lock, 0);
36787c478bd9Sstevel@tonic-gate 					flk_wakeup(lock, 1);
36797c478bd9Sstevel@tonic-gate 					flk_free_lock(lock);
36807c478bd9Sstevel@tonic-gate 				}
36817c478bd9Sstevel@tonic-gate 			}
36827c478bd9Sstevel@tonic-gate 		}
36837c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
36847c478bd9Sstevel@tonic-gate 	}
36857c478bd9Sstevel@tonic-gate }
36867c478bd9Sstevel@tonic-gate 
36877c478bd9Sstevel@tonic-gate /*
36887c478bd9Sstevel@tonic-gate  * Find all sleeping lock manager requests and poke them.
36897c478bd9Sstevel@tonic-gate  */
36907c478bd9Sstevel@tonic-gate static void
wakeup_sleeping_lockmgr_locks(struct flock_globals * fg)36917c478bd9Sstevel@tonic-gate wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
36927c478bd9Sstevel@tonic-gate {
36937c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
36947c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
36957c478bd9Sstevel@tonic-gate 	int i;
36967c478bd9Sstevel@tonic-gate 	graph_t *gp;
36977c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
36987c478bd9Sstevel@tonic-gate 
36997c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
37007c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
37017c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
37027c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
37037c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
37047c478bd9Sstevel@tonic-gate 			continue;
37057c478bd9Sstevel@tonic-gate 		}
37067c478bd9Sstevel@tonic-gate 
37077c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
37087c478bd9Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
37097c478bd9Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
37107c478bd9Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
37117c478bd9Sstevel@tonic-gate 		    lock = nlock) {
37127c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
37137c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
37147c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
37157c478bd9Sstevel@tonic-gate 			}
37167c478bd9Sstevel@tonic-gate 		}
37177c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
37187c478bd9Sstevel@tonic-gate 	}
37197c478bd9Sstevel@tonic-gate }
37207c478bd9Sstevel@tonic-gate 
37217c478bd9Sstevel@tonic-gate 
37227c478bd9Sstevel@tonic-gate /*
37237c478bd9Sstevel@tonic-gate  * Find all active (granted) lock manager locks and release them.
37247c478bd9Sstevel@tonic-gate  */
37257c478bd9Sstevel@tonic-gate static void
unlock_lockmgr_granted(struct flock_globals * fg)37267c478bd9Sstevel@tonic-gate unlock_lockmgr_granted(struct flock_globals *fg)
37277c478bd9Sstevel@tonic-gate {
37287c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
37297c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
37307c478bd9Sstevel@tonic-gate 	int i;
37317c478bd9Sstevel@tonic-gate 	graph_t *gp;
37327c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
37337c478bd9Sstevel@tonic-gate 
37347c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
37357c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
37367c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
37377c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
37387c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
37397c478bd9Sstevel@tonic-gate 			continue;
37407c478bd9Sstevel@tonic-gate 		}
37417c478bd9Sstevel@tonic-gate 
37427c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
37437c478bd9Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
37447c478bd9Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
37457c478bd9Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
37467c478bd9Sstevel@tonic-gate 		    lock = nlock) {
37477c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
37487c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
37497c478bd9Sstevel@tonic-gate 				ASSERT(IS_ACTIVE(lock));
37507c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
37517c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
37527c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
37537c478bd9Sstevel@tonic-gate 			}
37547c478bd9Sstevel@tonic-gate 		}
37557c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
37567c478bd9Sstevel@tonic-gate 	}
37577c478bd9Sstevel@tonic-gate }
37587c478bd9Sstevel@tonic-gate 
37597c478bd9Sstevel@tonic-gate 
37607c478bd9Sstevel@tonic-gate /*
37617c478bd9Sstevel@tonic-gate  * Wait until a lock is granted, cancelled, or interrupted.
37627c478bd9Sstevel@tonic-gate  */
37637c478bd9Sstevel@tonic-gate 
37647c478bd9Sstevel@tonic-gate static void
wait_for_lock(lock_descriptor_t * request)37657c478bd9Sstevel@tonic-gate wait_for_lock(lock_descriptor_t *request)
37667c478bd9Sstevel@tonic-gate {
37677c478bd9Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
37687c478bd9Sstevel@tonic-gate 
37697c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
37707c478bd9Sstevel@tonic-gate 
37717c478bd9Sstevel@tonic-gate 	while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
37727c478bd9Sstevel@tonic-gate 	    !(IS_INTERRUPTED(request))) {
37737c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
37747c478bd9Sstevel@tonic-gate 			flk_set_state(request, FLK_INTERRUPTED_STATE);
37757c478bd9Sstevel@tonic-gate 			request->l_state |= INTERRUPTED_LOCK;
37767c478bd9Sstevel@tonic-gate 		}
37777c478bd9Sstevel@tonic-gate 	}
37787c478bd9Sstevel@tonic-gate }
37797c478bd9Sstevel@tonic-gate 
37807c478bd9Sstevel@tonic-gate /*
37817c478bd9Sstevel@tonic-gate  * Create an flock structure from the existing lock information
37827c478bd9Sstevel@tonic-gate  *
37837c478bd9Sstevel@tonic-gate  * This routine is used to create flock structures for the lock manager
3784da6c28aaSamw  * to use in a reclaim request.  Since the lock was originated on this
37857c478bd9Sstevel@tonic-gate  * host, it must be conforming to UNIX semantics, so no checking is
37867c478bd9Sstevel@tonic-gate  * done to make sure it falls within the lower half of the 32-bit range.
37877c478bd9Sstevel@tonic-gate  */
37887c478bd9Sstevel@tonic-gate 
37897c478bd9Sstevel@tonic-gate static void
create_flock(lock_descriptor_t * lp,flock64_t * flp)37907c478bd9Sstevel@tonic-gate create_flock(lock_descriptor_t *lp, flock64_t *flp)
37917c478bd9Sstevel@tonic-gate {
37927c478bd9Sstevel@tonic-gate 	ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
37937c478bd9Sstevel@tonic-gate 	ASSERT(lp->l_end >= lp->l_start);
37947c478bd9Sstevel@tonic-gate 
37957c478bd9Sstevel@tonic-gate 	flp->l_type = lp->l_type;
37967c478bd9Sstevel@tonic-gate 	flp->l_whence = 0;
37977c478bd9Sstevel@tonic-gate 	flp->l_start = lp->l_start;
37987c478bd9Sstevel@tonic-gate 	flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
37997c478bd9Sstevel@tonic-gate 	    (lp->l_end - lp->l_start + 1);
38007c478bd9Sstevel@tonic-gate 	flp->l_sysid = lp->l_flock.l_sysid;
38017c478bd9Sstevel@tonic-gate 	flp->l_pid = lp->l_flock.l_pid;
38027c478bd9Sstevel@tonic-gate }
38037c478bd9Sstevel@tonic-gate 
38047c478bd9Sstevel@tonic-gate /*
38057c478bd9Sstevel@tonic-gate  * Convert flock_t data describing a lock range into unsigned long starting
38067c478bd9Sstevel@tonic-gate  * and ending points, which are put into lock_request.  Returns 0 or an
38077c478bd9Sstevel@tonic-gate  * errno value.
38087c478bd9Sstevel@tonic-gate  */
38097c478bd9Sstevel@tonic-gate 
38107c478bd9Sstevel@tonic-gate int
flk_convert_lock_data(vnode_t * vp,flock64_t * flp,u_offset_t * start,u_offset_t * end,offset_t offset)38117c478bd9Sstevel@tonic-gate flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
38127c478bd9Sstevel@tonic-gate     u_offset_t *start, u_offset_t *end, offset_t offset)
38137c478bd9Sstevel@tonic-gate {
38147c478bd9Sstevel@tonic-gate 	struct vattr	vattr;
38157c478bd9Sstevel@tonic-gate 	int	error;
38167c478bd9Sstevel@tonic-gate 
38177c478bd9Sstevel@tonic-gate 	/*
38187c478bd9Sstevel@tonic-gate 	 * Determine the starting point of the request
38197c478bd9Sstevel@tonic-gate 	 */
38207c478bd9Sstevel@tonic-gate 	switch (flp->l_whence) {
38217c478bd9Sstevel@tonic-gate 	case 0:		/* SEEK_SET */
38227c478bd9Sstevel@tonic-gate 		*start = (u_offset_t)flp->l_start;
38237c478bd9Sstevel@tonic-gate 		break;
38247c478bd9Sstevel@tonic-gate 	case 1:		/* SEEK_CUR */
38257c478bd9Sstevel@tonic-gate 		*start = (u_offset_t)(flp->l_start + offset);
38267c478bd9Sstevel@tonic-gate 		break;
38277c478bd9Sstevel@tonic-gate 	case 2:		/* SEEK_END */
38287c478bd9Sstevel@tonic-gate 		vattr.va_mask = AT_SIZE;
3829da6c28aaSamw 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
38307c478bd9Sstevel@tonic-gate 			return (error);
38317c478bd9Sstevel@tonic-gate 		*start = (u_offset_t)(flp->l_start + vattr.va_size);
38327c478bd9Sstevel@tonic-gate 		break;
38337c478bd9Sstevel@tonic-gate 	default:
38347c478bd9Sstevel@tonic-gate 		return (EINVAL);
38357c478bd9Sstevel@tonic-gate 	}
38367c478bd9Sstevel@tonic-gate 
38377c478bd9Sstevel@tonic-gate 	/*
38387c478bd9Sstevel@tonic-gate 	 * Determine the range covered by the request.
38397c478bd9Sstevel@tonic-gate 	 */
38407c478bd9Sstevel@tonic-gate 	if (flp->l_len == 0)
38417c478bd9Sstevel@tonic-gate 		*end = MAX_U_OFFSET_T;
38427c478bd9Sstevel@tonic-gate 	else if ((offset_t)flp->l_len > 0) {
38437c478bd9Sstevel@tonic-gate 		*end = (u_offset_t)(*start + (flp->l_len - 1));
38447c478bd9Sstevel@tonic-gate 	} else {
38457c478bd9Sstevel@tonic-gate 		/*
38467c478bd9Sstevel@tonic-gate 		 * Negative length; why do we even allow this ?
38477c478bd9Sstevel@tonic-gate 		 * Because this allows easy specification of
38487c478bd9Sstevel@tonic-gate 		 * the last n bytes of the file.
38497c478bd9Sstevel@tonic-gate 		 */
38507c478bd9Sstevel@tonic-gate 		*end = *start;
38517c478bd9Sstevel@tonic-gate 		*start += (u_offset_t)flp->l_len;
38527c478bd9Sstevel@tonic-gate 		(*start)++;
38537c478bd9Sstevel@tonic-gate 	}
38547c478bd9Sstevel@tonic-gate 	return (0);
38557c478bd9Sstevel@tonic-gate }
38567c478bd9Sstevel@tonic-gate 
38577c478bd9Sstevel@tonic-gate /*
38587c478bd9Sstevel@tonic-gate  * Check the validity of lock data.  This can used by the NFS
38597c478bd9Sstevel@tonic-gate  * frlock routines to check data before contacting the server.  The
38607c478bd9Sstevel@tonic-gate  * server must support semantics that aren't as restrictive as
38617c478bd9Sstevel@tonic-gate  * the UNIX API, so the NFS client is required to check.
38627c478bd9Sstevel@tonic-gate  * The maximum is now passed in by the caller.
38637c478bd9Sstevel@tonic-gate  */
38647c478bd9Sstevel@tonic-gate 
38657c478bd9Sstevel@tonic-gate int
flk_check_lock_data(u_offset_t start,u_offset_t end,offset_t max)38667c478bd9Sstevel@tonic-gate flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
38677c478bd9Sstevel@tonic-gate {
38687c478bd9Sstevel@tonic-gate 	/*
38697c478bd9Sstevel@tonic-gate 	 * The end (length) for local locking should never be greater
38707c478bd9Sstevel@tonic-gate 	 * than MAXEND. However, the representation for
38717c478bd9Sstevel@tonic-gate 	 * the entire file is MAX_U_OFFSET_T.
38727c478bd9Sstevel@tonic-gate 	 */
38737c478bd9Sstevel@tonic-gate 	if ((start > max) ||
38747c478bd9Sstevel@tonic-gate 	    ((end > max) && (end != MAX_U_OFFSET_T))) {
38757c478bd9Sstevel@tonic-gate 		return (EINVAL);
38767c478bd9Sstevel@tonic-gate 	}
38777c478bd9Sstevel@tonic-gate 	if (start > end) {
38787c478bd9Sstevel@tonic-gate 		return (EINVAL);
38797c478bd9Sstevel@tonic-gate 	}
38807c478bd9Sstevel@tonic-gate 	return (0);
38817c478bd9Sstevel@tonic-gate }
38827c478bd9Sstevel@tonic-gate 
38837c478bd9Sstevel@tonic-gate /*
38847c478bd9Sstevel@tonic-gate  * Fill in request->l_flock with information about the lock blocking the
38857c478bd9Sstevel@tonic-gate  * request.  The complexity here is that lock manager requests are allowed
38867c478bd9Sstevel@tonic-gate  * to see into the upper part of the 32-bit address range, whereas local
38877c478bd9Sstevel@tonic-gate  * requests are only allowed to see signed values.
38887c478bd9Sstevel@tonic-gate  *
38897c478bd9Sstevel@tonic-gate  * What should be done when "blocker" is a lock manager lock that uses the
38907c478bd9Sstevel@tonic-gate  * upper portion of the 32-bit range, but "request" is local?  Since the
38917c478bd9Sstevel@tonic-gate  * request has already been determined to have been blocked by the blocker,
38927c478bd9Sstevel@tonic-gate  * at least some portion of "blocker" must be in the range of the request,
38937c478bd9Sstevel@tonic-gate  * or the request extends to the end of file.  For the first case, the
38947c478bd9Sstevel@tonic-gate  * portion in the lower range is returned with the indication that it goes
38957c478bd9Sstevel@tonic-gate  * "to EOF."  For the second case, the last byte of the lower range is
38967c478bd9Sstevel@tonic-gate  * returned with the indication that it goes "to EOF."
38977c478bd9Sstevel@tonic-gate  */
38987c478bd9Sstevel@tonic-gate 
38997c478bd9Sstevel@tonic-gate static void
report_blocker(lock_descriptor_t * blocker,lock_descriptor_t * request)39007c478bd9Sstevel@tonic-gate report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
39017c478bd9Sstevel@tonic-gate {
39027c478bd9Sstevel@tonic-gate 	flock64_t *flrp;			/* l_flock portion of request */
39037c478bd9Sstevel@tonic-gate 
39047c478bd9Sstevel@tonic-gate 	ASSERT(blocker != NULL);
39057c478bd9Sstevel@tonic-gate 
39067c478bd9Sstevel@tonic-gate 	flrp = &request->l_flock;
39077c478bd9Sstevel@tonic-gate 	flrp->l_whence = 0;
39087c478bd9Sstevel@tonic-gate 	flrp->l_type = blocker->l_type;
39097c478bd9Sstevel@tonic-gate 	flrp->l_pid = blocker->l_flock.l_pid;
39107c478bd9Sstevel@tonic-gate 	flrp->l_sysid = blocker->l_flock.l_sysid;
39117c478bd9Sstevel@tonic-gate 
39127c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
39137c478bd9Sstevel@tonic-gate 		flrp->l_start = blocker->l_start;
39147c478bd9Sstevel@tonic-gate 		if (blocker->l_end == MAX_U_OFFSET_T)
39157c478bd9Sstevel@tonic-gate 			flrp->l_len = 0;
39167c478bd9Sstevel@tonic-gate 		else
39177c478bd9Sstevel@tonic-gate 			flrp->l_len = blocker->l_end - blocker->l_start + 1;
39187c478bd9Sstevel@tonic-gate 	} else {
39197c478bd9Sstevel@tonic-gate 		if (blocker->l_start > MAXEND) {
39207c478bd9Sstevel@tonic-gate 			flrp->l_start = MAXEND;
39217c478bd9Sstevel@tonic-gate 			flrp->l_len = 0;
39227c478bd9Sstevel@tonic-gate 		} else {
39237c478bd9Sstevel@tonic-gate 			flrp->l_start = blocker->l_start;
39247c478bd9Sstevel@tonic-gate 			if (blocker->l_end == MAX_U_OFFSET_T)
39257c478bd9Sstevel@tonic-gate 				flrp->l_len = 0;
39267c478bd9Sstevel@tonic-gate 			else
39277c478bd9Sstevel@tonic-gate 				flrp->l_len = blocker->l_end -
39287c478bd9Sstevel@tonic-gate 				    blocker->l_start + 1;
39297c478bd9Sstevel@tonic-gate 		}
39307c478bd9Sstevel@tonic-gate 	}
39317c478bd9Sstevel@tonic-gate }
39327c478bd9Sstevel@tonic-gate 
39337c478bd9Sstevel@tonic-gate /*
39347c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
39357c478bd9Sstevel@tonic-gate  */
39367c478bd9Sstevel@tonic-gate /*
39377c478bd9Sstevel@tonic-gate  * This is the public routine exported by flock.h.
39387c478bd9Sstevel@tonic-gate  */
39397c478bd9Sstevel@tonic-gate void
cl_flk_change_nlm_state_to_unknown(int nlmid)39407c478bd9Sstevel@tonic-gate cl_flk_change_nlm_state_to_unknown(int nlmid)
39417c478bd9Sstevel@tonic-gate {
39427c478bd9Sstevel@tonic-gate 	/*
39437c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
39447c478bd9Sstevel@tonic-gate 	 */
39457c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
39467c478bd9Sstevel@tonic-gate 		return;
39477c478bd9Sstevel@tonic-gate 	}
39487c478bd9Sstevel@tonic-gate 
39497c478bd9Sstevel@tonic-gate 	/*
39507c478bd9Sstevel@tonic-gate 	 * See comment in cl_flk_set_nlm_status().
39517c478bd9Sstevel@tonic-gate 	 */
39527c478bd9Sstevel@tonic-gate 	if (nlm_reg_status == NULL) {
39537c478bd9Sstevel@tonic-gate 		return;
39547c478bd9Sstevel@tonic-gate 	}
39557c478bd9Sstevel@tonic-gate 
39567c478bd9Sstevel@tonic-gate 	/*
39577c478bd9Sstevel@tonic-gate 	 * protect NLM registry state with a mutex.
39587c478bd9Sstevel@tonic-gate 	 */
39597c478bd9Sstevel@tonic-gate 	ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
39607c478bd9Sstevel@tonic-gate 	mutex_enter(&nlm_reg_lock);
39617c478bd9Sstevel@tonic-gate 	FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
39627c478bd9Sstevel@tonic-gate 	mutex_exit(&nlm_reg_lock);
39637c478bd9Sstevel@tonic-gate }
39647c478bd9Sstevel@tonic-gate 
39657c478bd9Sstevel@tonic-gate /*
39667c478bd9Sstevel@tonic-gate  * Return non-zero if the given I/O request conflicts with an active NBMAND
39677c478bd9Sstevel@tonic-gate  * lock.
39687c478bd9Sstevel@tonic-gate  * If svmand is non-zero, it means look at all active locks, not just NBMAND
39697c478bd9Sstevel@tonic-gate  * locks.
39707c478bd9Sstevel@tonic-gate  */
39717c478bd9Sstevel@tonic-gate 
39727c478bd9Sstevel@tonic-gate int
nbl_lock_conflict(vnode_t * vp,nbl_op_t op,u_offset_t offset,ssize_t length,int svmand,caller_context_t * ct)39737c478bd9Sstevel@tonic-gate nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3974da6c28aaSamw     ssize_t length, int svmand, caller_context_t *ct)
39757c478bd9Sstevel@tonic-gate {
39767c478bd9Sstevel@tonic-gate 	int conflict = 0;
39777c478bd9Sstevel@tonic-gate 	graph_t			*gp;
39787c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
3979da6c28aaSamw 	pid_t pid;
3980da6c28aaSamw 	int sysid;
3981da6c28aaSamw 
3982da6c28aaSamw 	if (ct == NULL) {
3983da6c28aaSamw 		pid = curproc->p_pid;
3984da6c28aaSamw 		sysid = 0;
3985da6c28aaSamw 	} else {
3986da6c28aaSamw 		pid = ct->cc_pid;
3987da6c28aaSamw 		sysid = ct->cc_sysid;
3988da6c28aaSamw 	}
39897c478bd9Sstevel@tonic-gate 
39907c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
39917c478bd9Sstevel@tonic-gate 	gp = lock_graph[HASH_INDEX(vp)];
39927c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
39937c478bd9Sstevel@tonic-gate 	if (gp == NULL)
39947c478bd9Sstevel@tonic-gate 		return (0);
39957c478bd9Sstevel@tonic-gate 
39967c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
39977c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
39987c478bd9Sstevel@tonic-gate 
39997c478bd9Sstevel@tonic-gate 	for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
40007c478bd9Sstevel@tonic-gate 		if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
4001da6c28aaSamw 		    (lock->l_flock.l_sysid != sysid ||
4002da6c28aaSamw 		    lock->l_flock.l_pid != pid) &&
40037c478bd9Sstevel@tonic-gate 		    lock_blocks_io(op, offset, length,
40047c478bd9Sstevel@tonic-gate 		    lock->l_type, lock->l_start, lock->l_end)) {
40057c478bd9Sstevel@tonic-gate 			conflict = 1;
40067c478bd9Sstevel@tonic-gate 			break;
40077c478bd9Sstevel@tonic-gate 		}
40087c478bd9Sstevel@tonic-gate 	}
40097c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
40107c478bd9Sstevel@tonic-gate 
40117c478bd9Sstevel@tonic-gate 	return (conflict);
40127c478bd9Sstevel@tonic-gate }
40137c478bd9Sstevel@tonic-gate 
40147c478bd9Sstevel@tonic-gate /*
40157c478bd9Sstevel@tonic-gate  * Return non-zero if the given I/O request conflicts with the given lock.
40167c478bd9Sstevel@tonic-gate  */
40177c478bd9Sstevel@tonic-gate 
40187c478bd9Sstevel@tonic-gate static int
lock_blocks_io(nbl_op_t op,u_offset_t offset,ssize_t length,int lock_type,u_offset_t lock_start,u_offset_t lock_end)40197c478bd9Sstevel@tonic-gate lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
40207c478bd9Sstevel@tonic-gate     int lock_type, u_offset_t lock_start, u_offset_t lock_end)
40217c478bd9Sstevel@tonic-gate {
40227c478bd9Sstevel@tonic-gate 	ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
40237c478bd9Sstevel@tonic-gate 	ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
40247c478bd9Sstevel@tonic-gate 
40257c478bd9Sstevel@tonic-gate 	if (op == NBL_READ && lock_type == F_RDLCK)
40267c478bd9Sstevel@tonic-gate 		return (0);
40277c478bd9Sstevel@tonic-gate 
40287c478bd9Sstevel@tonic-gate 	if (offset <= lock_start && lock_start < offset + length)
40297c478bd9Sstevel@tonic-gate 		return (1);
40307c478bd9Sstevel@tonic-gate 	if (lock_start <= offset && offset <= lock_end)
40317c478bd9Sstevel@tonic-gate 		return (1);
40327c478bd9Sstevel@tonic-gate 
40337c478bd9Sstevel@tonic-gate 	return (0);
40347c478bd9Sstevel@tonic-gate }
40357c478bd9Sstevel@tonic-gate 
40367c478bd9Sstevel@tonic-gate #ifdef DEBUG
40377c478bd9Sstevel@tonic-gate static void
check_active_locks(graph_t * gp)40387c478bd9Sstevel@tonic-gate check_active_locks(graph_t *gp)
40397c478bd9Sstevel@tonic-gate {
40407c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *lock1;
40417c478bd9Sstevel@tonic-gate 	edge_t	*ep;
40427c478bd9Sstevel@tonic-gate 
40437c478bd9Sstevel@tonic-gate 	for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
40447c478bd9Sstevel@tonic-gate 	    lock = lock->l_next) {
40457c478bd9Sstevel@tonic-gate 		ASSERT(IS_ACTIVE(lock));
40467c478bd9Sstevel@tonic-gate 		ASSERT(NOT_BLOCKED(lock));
40477c478bd9Sstevel@tonic-gate 		ASSERT(!IS_BARRIER(lock));
40487c478bd9Sstevel@tonic-gate 
40497c478bd9Sstevel@tonic-gate 		ep = FIRST_IN(lock);
40507c478bd9Sstevel@tonic-gate 
40517c478bd9Sstevel@tonic-gate 		while (ep != HEAD(lock)) {
40527c478bd9Sstevel@tonic-gate 			ASSERT(IS_SLEEPING(ep->from_vertex));
40537c478bd9Sstevel@tonic-gate 			ASSERT(!NOT_BLOCKED(ep->from_vertex));
40547c478bd9Sstevel@tonic-gate 			ep = NEXT_IN(ep);
40557c478bd9Sstevel@tonic-gate 		}
40567c478bd9Sstevel@tonic-gate 
40577c478bd9Sstevel@tonic-gate 		for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
40587c478bd9Sstevel@tonic-gate 		    lock1 = lock1->l_next) {
40597c478bd9Sstevel@tonic-gate 			if (lock1->l_vnode == lock->l_vnode) {
40607c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock1, lock)) {
40617c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
40627c478bd9Sstevel@tonic-gate 				    "active lock %p blocks %p",
40637c478bd9Sstevel@tonic-gate 				    (void *)lock1, (void *)lock);
40647c478bd9Sstevel@tonic-gate 			} else if (BLOCKS(lock, lock1)) {
40657c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
40667c478bd9Sstevel@tonic-gate 				    "active lock %p blocks %p",
40677c478bd9Sstevel@tonic-gate 				    (void *)lock, (void *)lock1);
40687c478bd9Sstevel@tonic-gate 			}
40697c478bd9Sstevel@tonic-gate 			}
40707c478bd9Sstevel@tonic-gate 		}
40717c478bd9Sstevel@tonic-gate 	}
40727c478bd9Sstevel@tonic-gate }
40737c478bd9Sstevel@tonic-gate 
40747c478bd9Sstevel@tonic-gate /*
40757c478bd9Sstevel@tonic-gate  * Effect: This functions checks to see if the transition from 'old_state' to
40767c478bd9Sstevel@tonic-gate  *	'new_state' is a valid one.  It returns 0 if the transition is valid
40777c478bd9Sstevel@tonic-gate  *	and 1 if it is not.
40787c478bd9Sstevel@tonic-gate  *	For a map of valid transitions, see sys/flock_impl.h
40797c478bd9Sstevel@tonic-gate  */
40807c478bd9Sstevel@tonic-gate static int
check_lock_transition(int old_state,int new_state)40817c478bd9Sstevel@tonic-gate check_lock_transition(int old_state, int new_state)
40827c478bd9Sstevel@tonic-gate {
40837c478bd9Sstevel@tonic-gate 	switch (old_state) {
40847c478bd9Sstevel@tonic-gate 	case FLK_INITIAL_STATE:
40857c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_START_STATE) ||
40867c478bd9Sstevel@tonic-gate 		    (new_state == FLK_SLEEPING_STATE) ||
40877c478bd9Sstevel@tonic-gate 		    (new_state == FLK_ACTIVE_STATE) ||
40887c478bd9Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
40897c478bd9Sstevel@tonic-gate 			return (0);
40907c478bd9Sstevel@tonic-gate 		} else {
40917c478bd9Sstevel@tonic-gate 			return (1);
40927c478bd9Sstevel@tonic-gate 		}
40937c478bd9Sstevel@tonic-gate 	case FLK_START_STATE:
40947c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_ACTIVE_STATE) ||
40957c478bd9Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
40967c478bd9Sstevel@tonic-gate 			return (0);
40977c478bd9Sstevel@tonic-gate 		} else {
40987c478bd9Sstevel@tonic-gate 			return (1);
40997c478bd9Sstevel@tonic-gate 		}
41007c478bd9Sstevel@tonic-gate 	case FLK_ACTIVE_STATE:
41017c478bd9Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
41027c478bd9Sstevel@tonic-gate 			return (0);
41037c478bd9Sstevel@tonic-gate 		} else {
41047c478bd9Sstevel@tonic-gate 			return (1);
41057c478bd9Sstevel@tonic-gate 		}
41067c478bd9Sstevel@tonic-gate 	case FLK_SLEEPING_STATE:
41077c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_GRANTED_STATE) ||
41087c478bd9Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE) ||
41097c478bd9Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
41107c478bd9Sstevel@tonic-gate 			return (0);
41117c478bd9Sstevel@tonic-gate 		} else {
41127c478bd9Sstevel@tonic-gate 			return (1);
41137c478bd9Sstevel@tonic-gate 		}
41147c478bd9Sstevel@tonic-gate 	case FLK_GRANTED_STATE:
41157c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_START_STATE) ||
41167c478bd9Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE) ||
41177c478bd9Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
41187c478bd9Sstevel@tonic-gate 			return (0);
41197c478bd9Sstevel@tonic-gate 		} else {
41207c478bd9Sstevel@tonic-gate 			return (1);
41217c478bd9Sstevel@tonic-gate 		}
41227c478bd9Sstevel@tonic-gate 	case FLK_CANCELLED_STATE:
41237c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_INTERRUPTED_STATE) ||
41247c478bd9Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
41257c478bd9Sstevel@tonic-gate 			return (0);
41267c478bd9Sstevel@tonic-gate 		} else {
41277c478bd9Sstevel@tonic-gate 			return (1);
41287c478bd9Sstevel@tonic-gate 		}
41297c478bd9Sstevel@tonic-gate 	case FLK_INTERRUPTED_STATE:
41307c478bd9Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
41317c478bd9Sstevel@tonic-gate 			return (0);
41327c478bd9Sstevel@tonic-gate 		} else {
41337c478bd9Sstevel@tonic-gate 			return (1);
41347c478bd9Sstevel@tonic-gate 		}
41357c478bd9Sstevel@tonic-gate 	case FLK_DEAD_STATE:
41367c478bd9Sstevel@tonic-gate 		/* May be set more than once */
41377c478bd9Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
41387c478bd9Sstevel@tonic-gate 			return (0);
41397c478bd9Sstevel@tonic-gate 		} else {
41407c478bd9Sstevel@tonic-gate 			return (1);
41417c478bd9Sstevel@tonic-gate 		}
41427c478bd9Sstevel@tonic-gate 	default:
41437c478bd9Sstevel@tonic-gate 		return (1);
41447c478bd9Sstevel@tonic-gate 	}
41457c478bd9Sstevel@tonic-gate }
41467c478bd9Sstevel@tonic-gate 
41477c478bd9Sstevel@tonic-gate static void
check_sleeping_locks(graph_t * gp)41487c478bd9Sstevel@tonic-gate check_sleeping_locks(graph_t *gp)
41497c478bd9Sstevel@tonic-gate {
41507c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock1, *lock2;
41517c478bd9Sstevel@tonic-gate 	edge_t *ep;
41527c478bd9Sstevel@tonic-gate 	for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
41537c478bd9Sstevel@tonic-gate 	    lock1 = lock1->l_next) {
41547c478bd9Sstevel@tonic-gate 				ASSERT(!IS_BARRIER(lock1));
41557c478bd9Sstevel@tonic-gate 	for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
41567c478bd9Sstevel@tonic-gate 	    lock2 = lock2->l_next) {
41577c478bd9Sstevel@tonic-gate 		if (lock1->l_vnode == lock2->l_vnode) {
41587c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock2, lock1)) {
41597c478bd9Sstevel@tonic-gate 				ASSERT(!IS_GRANTED(lock1));
41607c478bd9Sstevel@tonic-gate 				ASSERT(!NOT_BLOCKED(lock1));
41617c478bd9Sstevel@tonic-gate 				path(lock1, lock2);
41627c478bd9Sstevel@tonic-gate 			}
41637c478bd9Sstevel@tonic-gate 		}
41647c478bd9Sstevel@tonic-gate 	}
41657c478bd9Sstevel@tonic-gate 
41667c478bd9Sstevel@tonic-gate 	for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
41677c478bd9Sstevel@tonic-gate 	    lock2 = lock2->l_next) {
41687c478bd9Sstevel@tonic-gate 				ASSERT(!IS_BARRIER(lock1));
41697c478bd9Sstevel@tonic-gate 		if (lock1->l_vnode == lock2->l_vnode) {
41707c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock2, lock1)) {
41717c478bd9Sstevel@tonic-gate 				ASSERT(!IS_GRANTED(lock1));
41727c478bd9Sstevel@tonic-gate 				ASSERT(!NOT_BLOCKED(lock1));
41737c478bd9Sstevel@tonic-gate 				path(lock1, lock2);
41747c478bd9Sstevel@tonic-gate 			}
41757c478bd9Sstevel@tonic-gate 		}
41767c478bd9Sstevel@tonic-gate 	}
41777c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock1);
41787c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
41797c478bd9Sstevel@tonic-gate 		ASSERT(BLOCKS(ep->to_vertex, lock1));
41807c478bd9Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
41817c478bd9Sstevel@tonic-gate 	}
41827c478bd9Sstevel@tonic-gate 	}
41837c478bd9Sstevel@tonic-gate }
41847c478bd9Sstevel@tonic-gate 
41857c478bd9Sstevel@tonic-gate static int
level_two_path(lock_descriptor_t * lock1,lock_descriptor_t * lock2,int no_path)41867c478bd9Sstevel@tonic-gate level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
41877c478bd9Sstevel@tonic-gate {
41887c478bd9Sstevel@tonic-gate 	edge_t	*ep;
41897c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*vertex;
41907c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
41917c478bd9Sstevel@tonic-gate 
41927c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
41937c478bd9Sstevel@tonic-gate 
41947c478bd9Sstevel@tonic-gate 	flk_graph_uncolor(lock1->l_graph);
41957c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock1);
41967c478bd9Sstevel@tonic-gate 	ASSERT(ep != HEAD(lock1));
41977c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
41987c478bd9Sstevel@tonic-gate 		if (no_path)
41997c478bd9Sstevel@tonic-gate 			ASSERT(ep->to_vertex != lock2);
42007c478bd9Sstevel@tonic-gate 		STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
42017c478bd9Sstevel@tonic-gate 		COLOR(ep->to_vertex);
42027c478bd9Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
42037c478bd9Sstevel@tonic-gate 	}
42047c478bd9Sstevel@tonic-gate 
42057c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
42067c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_dstack);
42077c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
42087c478bd9Sstevel@tonic-gate 		    ep = NEXT_ADJ(ep)) {
42097c478bd9Sstevel@tonic-gate 			if (COLORED(ep->to_vertex))
42107c478bd9Sstevel@tonic-gate 				continue;
42117c478bd9Sstevel@tonic-gate 			COLOR(ep->to_vertex);
42127c478bd9Sstevel@tonic-gate 			if (ep->to_vertex == lock2)
42137c478bd9Sstevel@tonic-gate 				return (1);
42147c478bd9Sstevel@tonic-gate 
42157c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
42167c478bd9Sstevel@tonic-gate 		}
42177c478bd9Sstevel@tonic-gate 	}
42187c478bd9Sstevel@tonic-gate 	return (0);
42197c478bd9Sstevel@tonic-gate }
42207c478bd9Sstevel@tonic-gate 
42217c478bd9Sstevel@tonic-gate static void
check_owner_locks(graph_t * gp,pid_t pid,int sysid,vnode_t * vp)42227c478bd9Sstevel@tonic-gate check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
42237c478bd9Sstevel@tonic-gate {
42247c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
42257c478bd9Sstevel@tonic-gate 
42267c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
42277c478bd9Sstevel@tonic-gate 
42287c478bd9Sstevel@tonic-gate 	if (lock) {
42297c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
42307c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_pid == pid &&
42317c478bd9Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid)
42327c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
42337c478bd9Sstevel@tonic-gate 				    "owner pid %d's lock %p in active queue",
42347c478bd9Sstevel@tonic-gate 				    pid, (void *)lock);
42357c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
42367c478bd9Sstevel@tonic-gate 		}
42377c478bd9Sstevel@tonic-gate 	}
42387c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
42397c478bd9Sstevel@tonic-gate 
42407c478bd9Sstevel@tonic-gate 	if (lock) {
42417c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
42427c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_pid == pid &&
42437c478bd9Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid)
42447c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
42457c478bd9Sstevel@tonic-gate 				    "owner pid %d's lock %p in sleep queue",
42467c478bd9Sstevel@tonic-gate 				    pid, (void *)lock);
42477c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
42487c478bd9Sstevel@tonic-gate 		}
42497c478bd9Sstevel@tonic-gate 	}
42507c478bd9Sstevel@tonic-gate }
42517c478bd9Sstevel@tonic-gate 
42527c478bd9Sstevel@tonic-gate static int
level_one_path(lock_descriptor_t * lock1,lock_descriptor_t * lock2)42537c478bd9Sstevel@tonic-gate level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
42547c478bd9Sstevel@tonic-gate {
42557c478bd9Sstevel@tonic-gate 	edge_t *ep = FIRST_ADJ(lock1);
42567c478bd9Sstevel@tonic-gate 
42577c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
42587c478bd9Sstevel@tonic-gate 		if (ep->to_vertex == lock2)
42597c478bd9Sstevel@tonic-gate 			return (1);
42607c478bd9Sstevel@tonic-gate 		else
42617c478bd9Sstevel@tonic-gate 			ep = NEXT_ADJ(ep);
42627c478bd9Sstevel@tonic-gate 	}
42637c478bd9Sstevel@tonic-gate 	return (0);
42647c478bd9Sstevel@tonic-gate }
42657c478bd9Sstevel@tonic-gate 
42667c478bd9Sstevel@tonic-gate static int
no_path(lock_descriptor_t * lock1,lock_descriptor_t * lock2)42677c478bd9Sstevel@tonic-gate no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
42687c478bd9Sstevel@tonic-gate {
42697c478bd9Sstevel@tonic-gate 	return (!level_two_path(lock1, lock2, 1));
42707c478bd9Sstevel@tonic-gate }
42717c478bd9Sstevel@tonic-gate 
42727c478bd9Sstevel@tonic-gate static void
path(lock_descriptor_t * lock1,lock_descriptor_t * lock2)42737c478bd9Sstevel@tonic-gate path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
42747c478bd9Sstevel@tonic-gate {
42757c478bd9Sstevel@tonic-gate 	if (level_one_path(lock1, lock2)) {
42767c478bd9Sstevel@tonic-gate 		if (level_two_path(lock1, lock2, 0) != 0) {
42777c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
42787c478bd9Sstevel@tonic-gate 			    "one edge one path from lock1 %p lock2 %p",
42797c478bd9Sstevel@tonic-gate 			    (void *)lock1, (void *)lock2);
42807c478bd9Sstevel@tonic-gate 		}
42817c478bd9Sstevel@tonic-gate 	} else if (no_path(lock1, lock2)) {
42827c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC,
42837c478bd9Sstevel@tonic-gate 		    "No path from  lock1 %p to lock2 %p",
42847c478bd9Sstevel@tonic-gate 		    (void *)lock1, (void *)lock2);
42857c478bd9Sstevel@tonic-gate 	}
42867c478bd9Sstevel@tonic-gate }
42877c478bd9Sstevel@tonic-gate #endif /* DEBUG */
4288