xref: /titanic_53/usr/src/uts/common/os/flock.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
23*7c478bd9Sstevel@tonic-gate 
24*7c478bd9Sstevel@tonic-gate /*
25*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
26*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
27*7c478bd9Sstevel@tonic-gate  */
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30*7c478bd9Sstevel@tonic-gate /*	All Rights Reserved */
31*7c478bd9Sstevel@tonic-gate 
32*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
33*7c478bd9Sstevel@tonic-gate 
34*7c478bd9Sstevel@tonic-gate #include <sys/flock_impl.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>		/* for <sys/callb.h> */
37*7c478bd9Sstevel@tonic-gate #include <sys/callb.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/clconf.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/cladm.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/nbmlock.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/policy.h>
43*7c478bd9Sstevel@tonic-gate 
44*7c478bd9Sstevel@tonic-gate /*
45*7c478bd9Sstevel@tonic-gate  * The following four variables are for statistics purposes and they are
46*7c478bd9Sstevel@tonic-gate  * not protected by locks. They may not be accurate but will at least be
47*7c478bd9Sstevel@tonic-gate  * close to the actual value.
48*7c478bd9Sstevel@tonic-gate  */
49*7c478bd9Sstevel@tonic-gate 
50*7c478bd9Sstevel@tonic-gate int	flk_lock_allocs;
51*7c478bd9Sstevel@tonic-gate int	flk_lock_frees;
52*7c478bd9Sstevel@tonic-gate int 	edge_allocs;
53*7c478bd9Sstevel@tonic-gate int	edge_frees;
54*7c478bd9Sstevel@tonic-gate int 	flk_proc_vertex_allocs;
55*7c478bd9Sstevel@tonic-gate int 	flk_proc_edge_allocs;
56*7c478bd9Sstevel@tonic-gate int	flk_proc_vertex_frees;
57*7c478bd9Sstevel@tonic-gate int	flk_proc_edge_frees;
58*7c478bd9Sstevel@tonic-gate 
59*7c478bd9Sstevel@tonic-gate static kmutex_t flock_lock;
60*7c478bd9Sstevel@tonic-gate 
61*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
62*7c478bd9Sstevel@tonic-gate int check_debug = 0;
63*7c478bd9Sstevel@tonic-gate #define	CHECK_ACTIVE_LOCKS(gp)	if (check_debug) \
64*7c478bd9Sstevel@tonic-gate 					check_active_locks(gp);
65*7c478bd9Sstevel@tonic-gate #define	CHECK_SLEEPING_LOCKS(gp)	if (check_debug) \
66*7c478bd9Sstevel@tonic-gate 						check_sleeping_locks(gp);
67*7c478bd9Sstevel@tonic-gate #define	CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 	\
68*7c478bd9Sstevel@tonic-gate 		if (check_debug)	\
69*7c478bd9Sstevel@tonic-gate 			check_owner_locks(gp, pid, sysid, vp);
70*7c478bd9Sstevel@tonic-gate #define	CHECK_LOCK_TRANSITION(old_state, new_state) \
71*7c478bd9Sstevel@tonic-gate 	{ \
72*7c478bd9Sstevel@tonic-gate 		if (check_lock_transition(old_state, new_state)) { \
73*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_PANIC, "Illegal lock transition \
74*7c478bd9Sstevel@tonic-gate 			    from %d to %d", old_state, new_state); \
75*7c478bd9Sstevel@tonic-gate 		} \
76*7c478bd9Sstevel@tonic-gate 	}
77*7c478bd9Sstevel@tonic-gate #else
78*7c478bd9Sstevel@tonic-gate 
79*7c478bd9Sstevel@tonic-gate #define	CHECK_ACTIVE_LOCKS(gp)
80*7c478bd9Sstevel@tonic-gate #define	CHECK_SLEEPING_LOCKS(gp)
81*7c478bd9Sstevel@tonic-gate #define	CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
82*7c478bd9Sstevel@tonic-gate #define	CHECK_LOCK_TRANSITION(old_state, new_state)
83*7c478bd9Sstevel@tonic-gate 
84*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
85*7c478bd9Sstevel@tonic-gate 
86*7c478bd9Sstevel@tonic-gate struct kmem_cache	*flk_edge_cache;
87*7c478bd9Sstevel@tonic-gate 
88*7c478bd9Sstevel@tonic-gate graph_t		*lock_graph[HASH_SIZE];
89*7c478bd9Sstevel@tonic-gate proc_graph_t	pgraph;
90*7c478bd9Sstevel@tonic-gate 
91*7c478bd9Sstevel@tonic-gate /*
92*7c478bd9Sstevel@tonic-gate  * Clustering.
93*7c478bd9Sstevel@tonic-gate  *
94*7c478bd9Sstevel@tonic-gate  * NLM REGISTRY TYPE IMPLEMENTATION
95*7c478bd9Sstevel@tonic-gate  *
96*7c478bd9Sstevel@tonic-gate  * Assumptions:
97*7c478bd9Sstevel@tonic-gate  *  1.  Nodes in a cluster are numbered starting at 1; always non-negative
98*7c478bd9Sstevel@tonic-gate  *	integers; maximum node id is returned by clconf_maximum_nodeid().
99*7c478bd9Sstevel@tonic-gate  *  2.  We use this node id to identify the node an NLM server runs on.
100*7c478bd9Sstevel@tonic-gate  */
101*7c478bd9Sstevel@tonic-gate 
102*7c478bd9Sstevel@tonic-gate /*
103*7c478bd9Sstevel@tonic-gate  * NLM registry object keeps track of NLM servers via their
104*7c478bd9Sstevel@tonic-gate  * nlmids (which are the node ids of the node in the cluster they run on)
105*7c478bd9Sstevel@tonic-gate  * that have requested locks at this LLM with which this registry is
106*7c478bd9Sstevel@tonic-gate  * associated.
107*7c478bd9Sstevel@tonic-gate  *
108*7c478bd9Sstevel@tonic-gate  * Representation of abstraction:
109*7c478bd9Sstevel@tonic-gate  *    rep = record[	states: array[nlm_state],
110*7c478bd9Sstevel@tonic-gate  *			lock: mutex]
111*7c478bd9Sstevel@tonic-gate  *
112*7c478bd9Sstevel@tonic-gate  *    Representation invariants:
113*7c478bd9Sstevel@tonic-gate  *	1. index i of rep.states is between 0 and n - 1 where n is number
114*7c478bd9Sstevel@tonic-gate  *	   of elements in the array, which happen to be the maximum number
115*7c478bd9Sstevel@tonic-gate  *	   of nodes in the cluster configuration + 1.
116*7c478bd9Sstevel@tonic-gate  *	2. map nlmid to index i of rep.states
117*7c478bd9Sstevel@tonic-gate  *		0   -> 0
118*7c478bd9Sstevel@tonic-gate  *		1   -> 1
119*7c478bd9Sstevel@tonic-gate  *		2   -> 2
120*7c478bd9Sstevel@tonic-gate  *		n-1 -> clconf_maximum_nodeid()+1
121*7c478bd9Sstevel@tonic-gate  *	3.  This 1-1 mapping is quite convenient and it avoids errors resulting
122*7c478bd9Sstevel@tonic-gate  *	    from forgetting to subtract 1 from the index.
123*7c478bd9Sstevel@tonic-gate  *	4.  The reason we keep the 0th index is the following.  A legitimate
124*7c478bd9Sstevel@tonic-gate  *	    cluster configuration includes making a UFS file system NFS
125*7c478bd9Sstevel@tonic-gate  *	    exportable.  The code is structured so that if you're in a cluster
126*7c478bd9Sstevel@tonic-gate  *	    you do one thing; otherwise, you do something else.  The problem
127*7c478bd9Sstevel@tonic-gate  *	    is what to do if you think you're in a cluster with PXFS loaded,
128*7c478bd9Sstevel@tonic-gate  *	    but you're using UFS not PXFS?  The upper two bytes of the sysid
129*7c478bd9Sstevel@tonic-gate  *	    encode the node id of the node where NLM server runs; these bytes
130*7c478bd9Sstevel@tonic-gate  *	    are zero for UFS.  Since the nodeid is used to index into the
131*7c478bd9Sstevel@tonic-gate  *	    registry, we can record the NLM server state information at index
132*7c478bd9Sstevel@tonic-gate  *	    0 using the same mechanism used for PXFS file locks!
133*7c478bd9Sstevel@tonic-gate  */
134*7c478bd9Sstevel@tonic-gate static flk_nlm_status_t *nlm_reg_status = NULL;	/* state array 0..N-1 */
135*7c478bd9Sstevel@tonic-gate static kmutex_t nlm_reg_lock;			/* lock to protect arrary */
136*7c478bd9Sstevel@tonic-gate static uint_t nlm_status_size;			/* size of state array */
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate /*
139*7c478bd9Sstevel@tonic-gate  * Although we need a global lock dependency graph (and associated data
140*7c478bd9Sstevel@tonic-gate  * structures), we also need a per-zone notion of whether the lock manager is
141*7c478bd9Sstevel@tonic-gate  * running, and so whether to allow lock manager requests or not.
142*7c478bd9Sstevel@tonic-gate  *
143*7c478bd9Sstevel@tonic-gate  * Thus, on a per-zone basis we maintain a ``global'' variable
144*7c478bd9Sstevel@tonic-gate  * (flk_lockmgr_status), protected by flock_lock, and set when the lock
145*7c478bd9Sstevel@tonic-gate  * manager is determined to be changing state (starting or stopping).
146*7c478bd9Sstevel@tonic-gate  *
147*7c478bd9Sstevel@tonic-gate  * Each graph/zone pair also has a copy of this variable, which is protected by
148*7c478bd9Sstevel@tonic-gate  * the graph's mutex.
149*7c478bd9Sstevel@tonic-gate  *
150*7c478bd9Sstevel@tonic-gate  * The per-graph copies are used to synchronize lock requests with shutdown
151*7c478bd9Sstevel@tonic-gate  * requests.  The global copy is used to initialize the per-graph field when a
152*7c478bd9Sstevel@tonic-gate  * new graph is created.
153*7c478bd9Sstevel@tonic-gate  */
154*7c478bd9Sstevel@tonic-gate struct flock_globals {
155*7c478bd9Sstevel@tonic-gate 	flk_lockmgr_status_t flk_lockmgr_status;
156*7c478bd9Sstevel@tonic-gate 	flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
157*7c478bd9Sstevel@tonic-gate };
158*7c478bd9Sstevel@tonic-gate 
159*7c478bd9Sstevel@tonic-gate zone_key_t flock_zone_key;
160*7c478bd9Sstevel@tonic-gate 
161*7c478bd9Sstevel@tonic-gate static void create_flock(lock_descriptor_t *, flock64_t *);
162*7c478bd9Sstevel@tonic-gate static lock_descriptor_t	*flk_get_lock(void);
163*7c478bd9Sstevel@tonic-gate static void	flk_free_lock(lock_descriptor_t	*lock);
164*7c478bd9Sstevel@tonic-gate static void	flk_get_first_blocking_lock(lock_descriptor_t *request);
165*7c478bd9Sstevel@tonic-gate static int flk_process_request(lock_descriptor_t *);
166*7c478bd9Sstevel@tonic-gate static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
167*7c478bd9Sstevel@tonic-gate static edge_t *flk_get_edge(void);
168*7c478bd9Sstevel@tonic-gate static int flk_wait_execute_request(lock_descriptor_t *);
169*7c478bd9Sstevel@tonic-gate static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
170*7c478bd9Sstevel@tonic-gate static void flk_insert_active_lock(lock_descriptor_t *);
171*7c478bd9Sstevel@tonic-gate static void flk_delete_active_lock(lock_descriptor_t *, int);
172*7c478bd9Sstevel@tonic-gate static void flk_insert_sleeping_lock(lock_descriptor_t *);
173*7c478bd9Sstevel@tonic-gate static void flk_graph_uncolor(graph_t *);
174*7c478bd9Sstevel@tonic-gate static void flk_wakeup(lock_descriptor_t *, int);
175*7c478bd9Sstevel@tonic-gate static void flk_free_edge(edge_t *);
176*7c478bd9Sstevel@tonic-gate static void flk_recompute_dependencies(lock_descriptor_t *,
177*7c478bd9Sstevel@tonic-gate 			lock_descriptor_t **,  int, int);
178*7c478bd9Sstevel@tonic-gate static int flk_find_barriers(lock_descriptor_t *);
179*7c478bd9Sstevel@tonic-gate static void flk_update_barriers(lock_descriptor_t *);
180*7c478bd9Sstevel@tonic-gate static int flk_color_reachables(lock_descriptor_t *);
181*7c478bd9Sstevel@tonic-gate static int flk_canceled(lock_descriptor_t *);
182*7c478bd9Sstevel@tonic-gate static void flk_delete_locks_by_sysid(lock_descriptor_t *);
183*7c478bd9Sstevel@tonic-gate static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
184*7c478bd9Sstevel@tonic-gate static void wait_for_lock(lock_descriptor_t *);
185*7c478bd9Sstevel@tonic-gate static void unlock_lockmgr_granted(struct flock_globals *);
186*7c478bd9Sstevel@tonic-gate static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
187*7c478bd9Sstevel@tonic-gate 
188*7c478bd9Sstevel@tonic-gate /* Clustering hooks */
189*7c478bd9Sstevel@tonic-gate static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
190*7c478bd9Sstevel@tonic-gate static void cl_flk_wakeup_sleeping_nlm_locks(int);
191*7c478bd9Sstevel@tonic-gate static void cl_flk_unlock_nlm_granted(int);
192*7c478bd9Sstevel@tonic-gate 
193*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
194*7c478bd9Sstevel@tonic-gate static int check_lock_transition(int, int);
195*7c478bd9Sstevel@tonic-gate static void check_sleeping_locks(graph_t *);
196*7c478bd9Sstevel@tonic-gate static void check_active_locks(graph_t *);
197*7c478bd9Sstevel@tonic-gate static int no_path(lock_descriptor_t *, lock_descriptor_t *);
198*7c478bd9Sstevel@tonic-gate static void path(lock_descriptor_t *, lock_descriptor_t *);
199*7c478bd9Sstevel@tonic-gate static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
200*7c478bd9Sstevel@tonic-gate static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
201*7c478bd9Sstevel@tonic-gate static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
202*7c478bd9Sstevel@tonic-gate #endif
203*7c478bd9Sstevel@tonic-gate 
204*7c478bd9Sstevel@tonic-gate /*	proc_graph function definitons */
205*7c478bd9Sstevel@tonic-gate static int flk_check_deadlock(lock_descriptor_t *);
206*7c478bd9Sstevel@tonic-gate static void flk_proc_graph_uncolor(void);
207*7c478bd9Sstevel@tonic-gate static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
208*7c478bd9Sstevel@tonic-gate static proc_edge_t *flk_get_proc_edge(void);
209*7c478bd9Sstevel@tonic-gate static void flk_proc_release(proc_vertex_t *);
210*7c478bd9Sstevel@tonic-gate static void flk_free_proc_edge(proc_edge_t *);
211*7c478bd9Sstevel@tonic-gate static void flk_update_proc_graph(edge_t *, int);
212*7c478bd9Sstevel@tonic-gate 
213*7c478bd9Sstevel@tonic-gate /* Non-blocking mandatory locking */
214*7c478bd9Sstevel@tonic-gate static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
215*7c478bd9Sstevel@tonic-gate 			u_offset_t);
216*7c478bd9Sstevel@tonic-gate 
217*7c478bd9Sstevel@tonic-gate static struct flock_globals *
218*7c478bd9Sstevel@tonic-gate flk_get_globals(void)
219*7c478bd9Sstevel@tonic-gate {
220*7c478bd9Sstevel@tonic-gate 	/*
221*7c478bd9Sstevel@tonic-gate 	 * The KLM module had better be loaded if we're attempting to handle
222*7c478bd9Sstevel@tonic-gate 	 * lockmgr requests.
223*7c478bd9Sstevel@tonic-gate 	 */
224*7c478bd9Sstevel@tonic-gate 	ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
225*7c478bd9Sstevel@tonic-gate 	return (zone_getspecific(flock_zone_key, curproc->p_zone));
226*7c478bd9Sstevel@tonic-gate }
227*7c478bd9Sstevel@tonic-gate 
228*7c478bd9Sstevel@tonic-gate static flk_lockmgr_status_t
229*7c478bd9Sstevel@tonic-gate flk_get_lockmgr_status(void)
230*7c478bd9Sstevel@tonic-gate {
231*7c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
232*7c478bd9Sstevel@tonic-gate 
233*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
234*7c478bd9Sstevel@tonic-gate 
235*7c478bd9Sstevel@tonic-gate 	if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
236*7c478bd9Sstevel@tonic-gate 		/*
237*7c478bd9Sstevel@tonic-gate 		 * KLM module not loaded; lock manager definitely not running.
238*7c478bd9Sstevel@tonic-gate 		 */
239*7c478bd9Sstevel@tonic-gate 		return (FLK_LOCKMGR_DOWN);
240*7c478bd9Sstevel@tonic-gate 	}
241*7c478bd9Sstevel@tonic-gate 	fg = flk_get_globals();
242*7c478bd9Sstevel@tonic-gate 	return (fg->flk_lockmgr_status);
243*7c478bd9Sstevel@tonic-gate }
244*7c478bd9Sstevel@tonic-gate 
245*7c478bd9Sstevel@tonic-gate /*
246*7c478bd9Sstevel@tonic-gate  * Routine called from fs_frlock in fs/fs_subr.c
247*7c478bd9Sstevel@tonic-gate  */
248*7c478bd9Sstevel@tonic-gate 
249*7c478bd9Sstevel@tonic-gate int
250*7c478bd9Sstevel@tonic-gate reclock(vnode_t		*vp,
251*7c478bd9Sstevel@tonic-gate 	flock64_t	*lckdat,
252*7c478bd9Sstevel@tonic-gate 	int		cmd,
253*7c478bd9Sstevel@tonic-gate 	int		flag,
254*7c478bd9Sstevel@tonic-gate 	u_offset_t	offset,
255*7c478bd9Sstevel@tonic-gate 	flk_callback_t	*flk_cbp)
256*7c478bd9Sstevel@tonic-gate {
257*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	stack_lock_request;
258*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock_request;
259*7c478bd9Sstevel@tonic-gate 	int error = 0;
260*7c478bd9Sstevel@tonic-gate 	graph_t	*gp;
261*7c478bd9Sstevel@tonic-gate 	int			nlmid;
262*7c478bd9Sstevel@tonic-gate 
263*7c478bd9Sstevel@tonic-gate 	/*
264*7c478bd9Sstevel@tonic-gate 	 * Check access permissions
265*7c478bd9Sstevel@tonic-gate 	 */
266*7c478bd9Sstevel@tonic-gate 	if ((cmd & SETFLCK) &&
267*7c478bd9Sstevel@tonic-gate 		((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
268*7c478bd9Sstevel@tonic-gate 		(lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
269*7c478bd9Sstevel@tonic-gate 			return (EBADF);
270*7c478bd9Sstevel@tonic-gate 
271*7c478bd9Sstevel@tonic-gate 	/*
272*7c478bd9Sstevel@tonic-gate 	 * for query and unlock we use the stack_lock_request
273*7c478bd9Sstevel@tonic-gate 	 */
274*7c478bd9Sstevel@tonic-gate 
275*7c478bd9Sstevel@tonic-gate 	if ((lckdat->l_type == F_UNLCK) ||
276*7c478bd9Sstevel@tonic-gate 			!((cmd & INOFLCK) || (cmd & SETFLCK))) {
277*7c478bd9Sstevel@tonic-gate 		lock_request = &stack_lock_request;
278*7c478bd9Sstevel@tonic-gate 		(void) bzero((caddr_t)lock_request,
279*7c478bd9Sstevel@tonic-gate 				sizeof (lock_descriptor_t));
280*7c478bd9Sstevel@tonic-gate 
281*7c478bd9Sstevel@tonic-gate 		/*
282*7c478bd9Sstevel@tonic-gate 		 * following is added to make the assertions in
283*7c478bd9Sstevel@tonic-gate 		 * flk_execute_request() to pass through
284*7c478bd9Sstevel@tonic-gate 		 */
285*7c478bd9Sstevel@tonic-gate 
286*7c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_in_next = &lock_request->l_edge;
287*7c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
288*7c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
289*7c478bd9Sstevel@tonic-gate 		lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
290*7c478bd9Sstevel@tonic-gate 		lock_request->l_status = FLK_INITIAL_STATE;
291*7c478bd9Sstevel@tonic-gate 	} else {
292*7c478bd9Sstevel@tonic-gate 		lock_request = flk_get_lock();
293*7c478bd9Sstevel@tonic-gate 	}
294*7c478bd9Sstevel@tonic-gate 	lock_request->l_state = 0;
295*7c478bd9Sstevel@tonic-gate 	lock_request->l_vnode = vp;
296*7c478bd9Sstevel@tonic-gate 	lock_request->l_zoneid = getzoneid();
297*7c478bd9Sstevel@tonic-gate 
298*7c478bd9Sstevel@tonic-gate 	/*
299*7c478bd9Sstevel@tonic-gate 	 * Convert the request range into the canonical start and end
300*7c478bd9Sstevel@tonic-gate 	 * values.  The NLM protocol supports locking over the entire
301*7c478bd9Sstevel@tonic-gate 	 * 32-bit range, so there's no range checking for remote requests,
302*7c478bd9Sstevel@tonic-gate 	 * but we still need to verify that local requests obey the rules.
303*7c478bd9Sstevel@tonic-gate 	 */
304*7c478bd9Sstevel@tonic-gate 	/* Clustering */
305*7c478bd9Sstevel@tonic-gate 	if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
306*7c478bd9Sstevel@tonic-gate 		ASSERT(lckdat->l_whence == 0);
307*7c478bd9Sstevel@tonic-gate 		lock_request->l_start = lckdat->l_start;
308*7c478bd9Sstevel@tonic-gate 		lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
309*7c478bd9Sstevel@tonic-gate 			lckdat->l_start + (lckdat->l_len - 1);
310*7c478bd9Sstevel@tonic-gate 	} else {
311*7c478bd9Sstevel@tonic-gate 		/* check the validity of the lock range */
312*7c478bd9Sstevel@tonic-gate 		error = flk_convert_lock_data(vp, lckdat,
313*7c478bd9Sstevel@tonic-gate 			&lock_request->l_start, &lock_request->l_end,
314*7c478bd9Sstevel@tonic-gate 			offset);
315*7c478bd9Sstevel@tonic-gate 		if (error) {
316*7c478bd9Sstevel@tonic-gate 			goto done;
317*7c478bd9Sstevel@tonic-gate 		}
318*7c478bd9Sstevel@tonic-gate 		error = flk_check_lock_data(lock_request->l_start,
319*7c478bd9Sstevel@tonic-gate 					    lock_request->l_end, MAXEND);
320*7c478bd9Sstevel@tonic-gate 		if (error) {
321*7c478bd9Sstevel@tonic-gate 			goto done;
322*7c478bd9Sstevel@tonic-gate 		}
323*7c478bd9Sstevel@tonic-gate 	}
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 	ASSERT(lock_request->l_end >= lock_request->l_start);
326*7c478bd9Sstevel@tonic-gate 
327*7c478bd9Sstevel@tonic-gate 	lock_request->l_type = lckdat->l_type;
328*7c478bd9Sstevel@tonic-gate 	if (cmd & INOFLCK)
329*7c478bd9Sstevel@tonic-gate 		lock_request->l_state |= IO_LOCK;
330*7c478bd9Sstevel@tonic-gate 	if (cmd & SLPFLCK)
331*7c478bd9Sstevel@tonic-gate 		lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
332*7c478bd9Sstevel@tonic-gate 	if (cmd & RCMDLCK)
333*7c478bd9Sstevel@tonic-gate 		lock_request->l_state |= LOCKMGR_LOCK;
334*7c478bd9Sstevel@tonic-gate 	if (cmd & NBMLCK)
335*7c478bd9Sstevel@tonic-gate 		lock_request->l_state |= NBMAND_LOCK;
336*7c478bd9Sstevel@tonic-gate 	/*
337*7c478bd9Sstevel@tonic-gate 	 * Clustering: set flag for PXFS locks
338*7c478bd9Sstevel@tonic-gate 	 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
339*7c478bd9Sstevel@tonic-gate 	 * also be of type 'RCMDLCK'.
340*7c478bd9Sstevel@tonic-gate 	 * We do not _only_ check the GETPXFSID() macro because local PXFS
341*7c478bd9Sstevel@tonic-gate 	 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
342*7c478bd9Sstevel@tonic-gate 	 */
343*7c478bd9Sstevel@tonic-gate 
344*7c478bd9Sstevel@tonic-gate 	if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
345*7c478bd9Sstevel@tonic-gate 		lock_request->l_state |= PXFS_LOCK;
346*7c478bd9Sstevel@tonic-gate 	}
347*7c478bd9Sstevel@tonic-gate 	if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
348*7c478bd9Sstevel@tonic-gate 		if (lock_request->l_type == F_RDLCK ||
349*7c478bd9Sstevel@tonic-gate 			lock_request->l_type == F_WRLCK)
350*7c478bd9Sstevel@tonic-gate 			lock_request->l_state |= QUERY_LOCK;
351*7c478bd9Sstevel@tonic-gate 	}
352*7c478bd9Sstevel@tonic-gate 	lock_request->l_flock = (*lckdat);
353*7c478bd9Sstevel@tonic-gate 	lock_request->l_callbacks = flk_cbp;
354*7c478bd9Sstevel@tonic-gate 
355*7c478bd9Sstevel@tonic-gate 	/*
356*7c478bd9Sstevel@tonic-gate 	 * We are ready for processing the request
357*7c478bd9Sstevel@tonic-gate 	 */
358*7c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(lock_request)) {
359*7c478bd9Sstevel@tonic-gate 		/*
360*7c478bd9Sstevel@tonic-gate 		 * If the lock request is an NLM server request ....
361*7c478bd9Sstevel@tonic-gate 		 */
362*7c478bd9Sstevel@tonic-gate 		if (nlm_status_size == 0) { /* not booted as cluster */
363*7c478bd9Sstevel@tonic-gate 			mutex_enter(&flock_lock);
364*7c478bd9Sstevel@tonic-gate 			/*
365*7c478bd9Sstevel@tonic-gate 			 * Bail out if this is a lock manager request and the
366*7c478bd9Sstevel@tonic-gate 			 * lock manager is not supposed to be running.
367*7c478bd9Sstevel@tonic-gate 			 */
368*7c478bd9Sstevel@tonic-gate 			if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
369*7c478bd9Sstevel@tonic-gate 				mutex_exit(&flock_lock);
370*7c478bd9Sstevel@tonic-gate 				error = ENOLCK;
371*7c478bd9Sstevel@tonic-gate 				goto done;
372*7c478bd9Sstevel@tonic-gate 			}
373*7c478bd9Sstevel@tonic-gate 			mutex_exit(&flock_lock);
374*7c478bd9Sstevel@tonic-gate 		} else {			/* booted as a cluster */
375*7c478bd9Sstevel@tonic-gate 			nlmid = GETNLMID(lock_request->l_flock.l_sysid);
376*7c478bd9Sstevel@tonic-gate 			ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
377*7c478bd9Sstevel@tonic-gate 
378*7c478bd9Sstevel@tonic-gate 			mutex_enter(&nlm_reg_lock);
379*7c478bd9Sstevel@tonic-gate 			/*
380*7c478bd9Sstevel@tonic-gate 			 * If the NLM registry does not know about this
381*7c478bd9Sstevel@tonic-gate 			 * NLM server making the request, add its nlmid
382*7c478bd9Sstevel@tonic-gate 			 * to the registry.
383*7c478bd9Sstevel@tonic-gate 			 */
384*7c478bd9Sstevel@tonic-gate 			if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
385*7c478bd9Sstevel@tonic-gate 				nlmid)) {
386*7c478bd9Sstevel@tonic-gate 				FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
387*7c478bd9Sstevel@tonic-gate 			} else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
388*7c478bd9Sstevel@tonic-gate 				nlmid)) {
389*7c478bd9Sstevel@tonic-gate 				/*
390*7c478bd9Sstevel@tonic-gate 				 * If the NLM server is already known (has made
391*7c478bd9Sstevel@tonic-gate 				 * previous lock requests) and its state is
392*7c478bd9Sstevel@tonic-gate 				 * not NLM_UP (means that NLM server is
393*7c478bd9Sstevel@tonic-gate 				 * shutting down), then bail out with an
394*7c478bd9Sstevel@tonic-gate 				 * error to deny the lock request.
395*7c478bd9Sstevel@tonic-gate 				 */
396*7c478bd9Sstevel@tonic-gate 				mutex_exit(&nlm_reg_lock);
397*7c478bd9Sstevel@tonic-gate 				error = ENOLCK;
398*7c478bd9Sstevel@tonic-gate 				goto done;
399*7c478bd9Sstevel@tonic-gate 			}
400*7c478bd9Sstevel@tonic-gate 			mutex_exit(&nlm_reg_lock);
401*7c478bd9Sstevel@tonic-gate 		}
402*7c478bd9Sstevel@tonic-gate 	}
403*7c478bd9Sstevel@tonic-gate 
404*7c478bd9Sstevel@tonic-gate 	/* Now get the lock graph for a particular vnode */
405*7c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
406*7c478bd9Sstevel@tonic-gate 
407*7c478bd9Sstevel@tonic-gate 	/*
408*7c478bd9Sstevel@tonic-gate 	 * We drop rwlock here otherwise this might end up causing a
409*7c478bd9Sstevel@tonic-gate 	 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
410*7c478bd9Sstevel@tonic-gate 	 */
411*7c478bd9Sstevel@tonic-gate 
412*7c478bd9Sstevel@tonic-gate 	if (IS_IO_LOCK(lock_request)) {
413*7c478bd9Sstevel@tonic-gate 		VOP_RWUNLOCK(vp,
414*7c478bd9Sstevel@tonic-gate 			(lock_request->l_type == F_RDLCK) ?
415*7c478bd9Sstevel@tonic-gate 				V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
416*7c478bd9Sstevel@tonic-gate 	}
417*7c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
418*7c478bd9Sstevel@tonic-gate 
419*7c478bd9Sstevel@tonic-gate 	lock_request->l_state |= REFERENCED_LOCK;
420*7c478bd9Sstevel@tonic-gate 	lock_request->l_graph = gp;
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate 	switch (lock_request->l_type) {
423*7c478bd9Sstevel@tonic-gate 	case F_RDLCK:
424*7c478bd9Sstevel@tonic-gate 	case F_WRLCK:
425*7c478bd9Sstevel@tonic-gate 		if (IS_QUERY_LOCK(lock_request)) {
426*7c478bd9Sstevel@tonic-gate 			flk_get_first_blocking_lock(lock_request);
427*7c478bd9Sstevel@tonic-gate 			(*lckdat) = lock_request->l_flock;
428*7c478bd9Sstevel@tonic-gate 			break;
429*7c478bd9Sstevel@tonic-gate 		}
430*7c478bd9Sstevel@tonic-gate 
431*7c478bd9Sstevel@tonic-gate 		/* process the request now */
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate 		error = flk_process_request(lock_request);
434*7c478bd9Sstevel@tonic-gate 		break;
435*7c478bd9Sstevel@tonic-gate 
436*7c478bd9Sstevel@tonic-gate 	case F_UNLCK:
437*7c478bd9Sstevel@tonic-gate 		/* unlock request will not block so execute it immediately */
438*7c478bd9Sstevel@tonic-gate 
439*7c478bd9Sstevel@tonic-gate 		if (IS_LOCKMGR(lock_request) &&
440*7c478bd9Sstevel@tonic-gate 		    flk_canceled(lock_request)) {
441*7c478bd9Sstevel@tonic-gate 			error = 0;
442*7c478bd9Sstevel@tonic-gate 		} else {
443*7c478bd9Sstevel@tonic-gate 			error = flk_execute_request(lock_request);
444*7c478bd9Sstevel@tonic-gate 		}
445*7c478bd9Sstevel@tonic-gate 		break;
446*7c478bd9Sstevel@tonic-gate 
447*7c478bd9Sstevel@tonic-gate 	case F_UNLKSYS:
448*7c478bd9Sstevel@tonic-gate 		/*
449*7c478bd9Sstevel@tonic-gate 		 * Recovery mechanism to release lock manager locks when
450*7c478bd9Sstevel@tonic-gate 		 * NFS client crashes and restart. NFS server will clear
451*7c478bd9Sstevel@tonic-gate 		 * old locks and grant new locks.
452*7c478bd9Sstevel@tonic-gate 		 */
453*7c478bd9Sstevel@tonic-gate 
454*7c478bd9Sstevel@tonic-gate 		if (lock_request->l_flock.l_sysid == 0) {
455*7c478bd9Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
456*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
457*7c478bd9Sstevel@tonic-gate 		}
458*7c478bd9Sstevel@tonic-gate 		if (secpolicy_nfs(CRED()) != 0) {
459*7c478bd9Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
460*7c478bd9Sstevel@tonic-gate 			return (EPERM);
461*7c478bd9Sstevel@tonic-gate 		}
462*7c478bd9Sstevel@tonic-gate 		flk_delete_locks_by_sysid(lock_request);
463*7c478bd9Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
464*7c478bd9Sstevel@tonic-gate 		flk_set_state(lock_request, FLK_DEAD_STATE);
465*7c478bd9Sstevel@tonic-gate 		flk_free_lock(lock_request);
466*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
467*7c478bd9Sstevel@tonic-gate 		return (0);
468*7c478bd9Sstevel@tonic-gate 
469*7c478bd9Sstevel@tonic-gate 	default:
470*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
471*7c478bd9Sstevel@tonic-gate 		break;
472*7c478bd9Sstevel@tonic-gate 	}
473*7c478bd9Sstevel@tonic-gate 
474*7c478bd9Sstevel@tonic-gate 	/* Clustering: For blocked PXFS locks, return */
475*7c478bd9Sstevel@tonic-gate 	if (error == PXFS_LOCK_BLOCKED) {
476*7c478bd9Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
477*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
478*7c478bd9Sstevel@tonic-gate 		return (error);
479*7c478bd9Sstevel@tonic-gate 	}
480*7c478bd9Sstevel@tonic-gate 
481*7c478bd9Sstevel@tonic-gate 	/*
482*7c478bd9Sstevel@tonic-gate 	 * Now that we have seen the status of locks in the system for
483*7c478bd9Sstevel@tonic-gate 	 * this vnode we acquire the rwlock if it is an IO_LOCK.
484*7c478bd9Sstevel@tonic-gate 	 */
485*7c478bd9Sstevel@tonic-gate 
486*7c478bd9Sstevel@tonic-gate 	if (IS_IO_LOCK(lock_request)) {
487*7c478bd9Sstevel@tonic-gate 		(void) VOP_RWLOCK(vp,
488*7c478bd9Sstevel@tonic-gate 			(lock_request->l_type == F_RDLCK) ?
489*7c478bd9Sstevel@tonic-gate 				V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
490*7c478bd9Sstevel@tonic-gate 		if (!error) {
491*7c478bd9Sstevel@tonic-gate 			lckdat->l_type = F_UNLCK;
492*7c478bd9Sstevel@tonic-gate 
493*7c478bd9Sstevel@tonic-gate 			/*
494*7c478bd9Sstevel@tonic-gate 			 * This wake up is needed otherwise
495*7c478bd9Sstevel@tonic-gate 			 * if IO_LOCK has slept the dependents on this
496*7c478bd9Sstevel@tonic-gate 			 * will not be woken up at all. (bugid # 1185482).
497*7c478bd9Sstevel@tonic-gate 			 */
498*7c478bd9Sstevel@tonic-gate 
499*7c478bd9Sstevel@tonic-gate 			flk_wakeup(lock_request, 1);
500*7c478bd9Sstevel@tonic-gate 			flk_set_state(lock_request, FLK_DEAD_STATE);
501*7c478bd9Sstevel@tonic-gate 			flk_free_lock(lock_request);
502*7c478bd9Sstevel@tonic-gate 		}
503*7c478bd9Sstevel@tonic-gate 		/*
504*7c478bd9Sstevel@tonic-gate 		 * else if error had occurred either flk_process_request()
505*7c478bd9Sstevel@tonic-gate 		 * has returned EDEADLK in which case there will be no
506*7c478bd9Sstevel@tonic-gate 		 * dependents for this lock or EINTR from flk_wait_execute_
507*7c478bd9Sstevel@tonic-gate 		 * request() in which case flk_cancel_sleeping_lock()
508*7c478bd9Sstevel@tonic-gate 		 * would have been done. same is true with EBADF.
509*7c478bd9Sstevel@tonic-gate 		 */
510*7c478bd9Sstevel@tonic-gate 	}
511*7c478bd9Sstevel@tonic-gate 
512*7c478bd9Sstevel@tonic-gate 	if (lock_request == &stack_lock_request) {
513*7c478bd9Sstevel@tonic-gate 		flk_set_state(lock_request, FLK_DEAD_STATE);
514*7c478bd9Sstevel@tonic-gate 	} else {
515*7c478bd9Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
516*7c478bd9Sstevel@tonic-gate 		if ((error != 0) || IS_DELETED(lock_request)) {
517*7c478bd9Sstevel@tonic-gate 			flk_set_state(lock_request, FLK_DEAD_STATE);
518*7c478bd9Sstevel@tonic-gate 			flk_free_lock(lock_request);
519*7c478bd9Sstevel@tonic-gate 		}
520*7c478bd9Sstevel@tonic-gate 	}
521*7c478bd9Sstevel@tonic-gate 
522*7c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
523*7c478bd9Sstevel@tonic-gate 	return (error);
524*7c478bd9Sstevel@tonic-gate 
525*7c478bd9Sstevel@tonic-gate done:
526*7c478bd9Sstevel@tonic-gate 	flk_set_state(lock_request, FLK_DEAD_STATE);
527*7c478bd9Sstevel@tonic-gate 	if (lock_request != &stack_lock_request)
528*7c478bd9Sstevel@tonic-gate 		flk_free_lock(lock_request);
529*7c478bd9Sstevel@tonic-gate 	return (error);
530*7c478bd9Sstevel@tonic-gate }
531*7c478bd9Sstevel@tonic-gate 
532*7c478bd9Sstevel@tonic-gate /*
533*7c478bd9Sstevel@tonic-gate  * Invoke the callbacks in the given list.  If before sleeping, invoke in
534*7c478bd9Sstevel@tonic-gate  * list order.  If after sleeping, invoke in reverse order.
535*7c478bd9Sstevel@tonic-gate  *
536*7c478bd9Sstevel@tonic-gate  * CPR (suspend/resume) support: if one of the callbacks returns a
537*7c478bd9Sstevel@tonic-gate  * callb_cpr_t, return it.   This will be used to make the thread CPR-safe
538*7c478bd9Sstevel@tonic-gate  * while it is sleeping.  There should be at most one callb_cpr_t for the
539*7c478bd9Sstevel@tonic-gate  * thread.
540*7c478bd9Sstevel@tonic-gate  * XXX This is unnecessarily complicated.  The CPR information should just
541*7c478bd9Sstevel@tonic-gate  * get passed in directly through VOP_FRLOCK and reclock, rather than
542*7c478bd9Sstevel@tonic-gate  * sneaking it in via a callback.
543*7c478bd9Sstevel@tonic-gate  */
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate callb_cpr_t *
546*7c478bd9Sstevel@tonic-gate flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
547*7c478bd9Sstevel@tonic-gate {
548*7c478bd9Sstevel@tonic-gate 	callb_cpr_t *cpr_callbackp = NULL;
549*7c478bd9Sstevel@tonic-gate 	callb_cpr_t *one_result;
550*7c478bd9Sstevel@tonic-gate 	flk_callback_t *cb;
551*7c478bd9Sstevel@tonic-gate 
552*7c478bd9Sstevel@tonic-gate 	if (cblist == NULL)
553*7c478bd9Sstevel@tonic-gate 		return (NULL);
554*7c478bd9Sstevel@tonic-gate 
555*7c478bd9Sstevel@tonic-gate 	if (when == FLK_BEFORE_SLEEP) {
556*7c478bd9Sstevel@tonic-gate 		cb = cblist;
557*7c478bd9Sstevel@tonic-gate 		do {
558*7c478bd9Sstevel@tonic-gate 			one_result = (*cb->cb_callback)(when, cb->cb_data);
559*7c478bd9Sstevel@tonic-gate 			if (one_result != NULL) {
560*7c478bd9Sstevel@tonic-gate 				ASSERT(cpr_callbackp == NULL);
561*7c478bd9Sstevel@tonic-gate 				cpr_callbackp = one_result;
562*7c478bd9Sstevel@tonic-gate 			}
563*7c478bd9Sstevel@tonic-gate 			cb = cb->cb_next;
564*7c478bd9Sstevel@tonic-gate 		} while (cb != cblist);
565*7c478bd9Sstevel@tonic-gate 	} else {
566*7c478bd9Sstevel@tonic-gate 		cb = cblist->cb_prev;
567*7c478bd9Sstevel@tonic-gate 		do {
568*7c478bd9Sstevel@tonic-gate 			one_result = (*cb->cb_callback)(when, cb->cb_data);
569*7c478bd9Sstevel@tonic-gate 			if (one_result != NULL) {
570*7c478bd9Sstevel@tonic-gate 				cpr_callbackp = one_result;
571*7c478bd9Sstevel@tonic-gate 			}
572*7c478bd9Sstevel@tonic-gate 			cb = cb->cb_prev;
573*7c478bd9Sstevel@tonic-gate 		} while (cb != cblist->cb_prev);
574*7c478bd9Sstevel@tonic-gate 	}
575*7c478bd9Sstevel@tonic-gate 
576*7c478bd9Sstevel@tonic-gate 	return (cpr_callbackp);
577*7c478bd9Sstevel@tonic-gate }
578*7c478bd9Sstevel@tonic-gate 
579*7c478bd9Sstevel@tonic-gate /*
580*7c478bd9Sstevel@tonic-gate  * Initialize a flk_callback_t to hold the given callback.
581*7c478bd9Sstevel@tonic-gate  */
582*7c478bd9Sstevel@tonic-gate 
583*7c478bd9Sstevel@tonic-gate void
584*7c478bd9Sstevel@tonic-gate flk_init_callback(flk_callback_t *flk_cb,
585*7c478bd9Sstevel@tonic-gate 	callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
586*7c478bd9Sstevel@tonic-gate {
587*7c478bd9Sstevel@tonic-gate 	flk_cb->cb_next = flk_cb;
588*7c478bd9Sstevel@tonic-gate 	flk_cb->cb_prev = flk_cb;
589*7c478bd9Sstevel@tonic-gate 	flk_cb->cb_callback = cb_fcn;
590*7c478bd9Sstevel@tonic-gate 	flk_cb->cb_data = cbdata;
591*7c478bd9Sstevel@tonic-gate }
592*7c478bd9Sstevel@tonic-gate 
593*7c478bd9Sstevel@tonic-gate /*
594*7c478bd9Sstevel@tonic-gate  * Initialize an flk_callback_t and then link it into the head of an
595*7c478bd9Sstevel@tonic-gate  * existing list (which may be NULL).
596*7c478bd9Sstevel@tonic-gate  */
597*7c478bd9Sstevel@tonic-gate 
598*7c478bd9Sstevel@tonic-gate void
599*7c478bd9Sstevel@tonic-gate flk_add_callback(flk_callback_t *newcb,
600*7c478bd9Sstevel@tonic-gate 		callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
601*7c478bd9Sstevel@tonic-gate 		void *cbdata, flk_callback_t *cblist)
602*7c478bd9Sstevel@tonic-gate {
603*7c478bd9Sstevel@tonic-gate 	flk_init_callback(newcb, cb_fcn, cbdata);
604*7c478bd9Sstevel@tonic-gate 
605*7c478bd9Sstevel@tonic-gate 	if (cblist == NULL)
606*7c478bd9Sstevel@tonic-gate 		return;
607*7c478bd9Sstevel@tonic-gate 
608*7c478bd9Sstevel@tonic-gate 	newcb->cb_prev = cblist->cb_prev;
609*7c478bd9Sstevel@tonic-gate 	newcb->cb_next = cblist;
610*7c478bd9Sstevel@tonic-gate 	cblist->cb_prev->cb_next = newcb;
611*7c478bd9Sstevel@tonic-gate 	cblist->cb_prev = newcb;
612*7c478bd9Sstevel@tonic-gate }
613*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
614*7c478bd9Sstevel@tonic-gate 
615*7c478bd9Sstevel@tonic-gate /*
616*7c478bd9Sstevel@tonic-gate  * Initialize the flk_edge_cache data structure and create the
617*7c478bd9Sstevel@tonic-gate  * nlm_reg_status array.
618*7c478bd9Sstevel@tonic-gate  */
619*7c478bd9Sstevel@tonic-gate 
620*7c478bd9Sstevel@tonic-gate void
621*7c478bd9Sstevel@tonic-gate flk_init(void)
622*7c478bd9Sstevel@tonic-gate {
623*7c478bd9Sstevel@tonic-gate 	uint_t	i;
624*7c478bd9Sstevel@tonic-gate 
625*7c478bd9Sstevel@tonic-gate 	flk_edge_cache = kmem_cache_create("flk_edges",
626*7c478bd9Sstevel@tonic-gate 		sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
627*7c478bd9Sstevel@tonic-gate 	if (flk_edge_cache == NULL) {
628*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
629*7c478bd9Sstevel@tonic-gate 	}
630*7c478bd9Sstevel@tonic-gate 	/*
631*7c478bd9Sstevel@tonic-gate 	 * Create the NLM registry object.
632*7c478bd9Sstevel@tonic-gate 	 */
633*7c478bd9Sstevel@tonic-gate 
634*7c478bd9Sstevel@tonic-gate 	if (cluster_bootflags & CLUSTER_BOOTED) {
635*7c478bd9Sstevel@tonic-gate 		/*
636*7c478bd9Sstevel@tonic-gate 		 * This routine tells you the maximum node id that will be used
637*7c478bd9Sstevel@tonic-gate 		 * in the cluster.  This number will be the size of the nlm
638*7c478bd9Sstevel@tonic-gate 		 * registry status array.  We add 1 because we will be using
639*7c478bd9Sstevel@tonic-gate 		 * all entries indexed from 0 to maxnodeid; e.g., from 0
640*7c478bd9Sstevel@tonic-gate 		 * to 64, for a total of 65 entries.
641*7c478bd9Sstevel@tonic-gate 		 */
642*7c478bd9Sstevel@tonic-gate 		nlm_status_size = clconf_maximum_nodeid() + 1;
643*7c478bd9Sstevel@tonic-gate 	} else {
644*7c478bd9Sstevel@tonic-gate 		nlm_status_size = 0;
645*7c478bd9Sstevel@tonic-gate 	}
646*7c478bd9Sstevel@tonic-gate 
647*7c478bd9Sstevel@tonic-gate 	if (nlm_status_size != 0) {	/* booted as a cluster */
648*7c478bd9Sstevel@tonic-gate 		nlm_reg_status = (flk_nlm_status_t *)
649*7c478bd9Sstevel@tonic-gate 			kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
650*7c478bd9Sstevel@tonic-gate 				KM_SLEEP);
651*7c478bd9Sstevel@tonic-gate 
652*7c478bd9Sstevel@tonic-gate 		/* initialize all NLM states in array to NLM_UNKNOWN */
653*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < nlm_status_size; i++) {
654*7c478bd9Sstevel@tonic-gate 			nlm_reg_status[i] = FLK_NLM_UNKNOWN;
655*7c478bd9Sstevel@tonic-gate 		}
656*7c478bd9Sstevel@tonic-gate 	}
657*7c478bd9Sstevel@tonic-gate }
658*7c478bd9Sstevel@tonic-gate 
659*7c478bd9Sstevel@tonic-gate /*
660*7c478bd9Sstevel@tonic-gate  * Zone constructor/destructor callbacks to be executed when a zone is
661*7c478bd9Sstevel@tonic-gate  * created/destroyed.
662*7c478bd9Sstevel@tonic-gate  */
663*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
664*7c478bd9Sstevel@tonic-gate void *
665*7c478bd9Sstevel@tonic-gate flk_zone_init(zoneid_t zoneid)
666*7c478bd9Sstevel@tonic-gate {
667*7c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
668*7c478bd9Sstevel@tonic-gate 	uint_t i;
669*7c478bd9Sstevel@tonic-gate 
670*7c478bd9Sstevel@tonic-gate 	fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
671*7c478bd9Sstevel@tonic-gate 	fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
672*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++)
673*7c478bd9Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
674*7c478bd9Sstevel@tonic-gate 	return (fg);
675*7c478bd9Sstevel@tonic-gate }
676*7c478bd9Sstevel@tonic-gate 
677*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
678*7c478bd9Sstevel@tonic-gate void
679*7c478bd9Sstevel@tonic-gate flk_zone_fini(zoneid_t zoneid, void *data)
680*7c478bd9Sstevel@tonic-gate {
681*7c478bd9Sstevel@tonic-gate 	struct flock_globals *fg = data;
682*7c478bd9Sstevel@tonic-gate 
683*7c478bd9Sstevel@tonic-gate 	kmem_free(fg, sizeof (*fg));
684*7c478bd9Sstevel@tonic-gate }
685*7c478bd9Sstevel@tonic-gate 
686*7c478bd9Sstevel@tonic-gate /*
687*7c478bd9Sstevel@tonic-gate  * Get a lock_descriptor structure with initialisation of edge lists.
688*7c478bd9Sstevel@tonic-gate  */
689*7c478bd9Sstevel@tonic-gate 
690*7c478bd9Sstevel@tonic-gate static lock_descriptor_t *
691*7c478bd9Sstevel@tonic-gate flk_get_lock(void)
692*7c478bd9Sstevel@tonic-gate {
693*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*l;
694*7c478bd9Sstevel@tonic-gate 
695*7c478bd9Sstevel@tonic-gate 	l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
696*7c478bd9Sstevel@tonic-gate 
697*7c478bd9Sstevel@tonic-gate 	cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
698*7c478bd9Sstevel@tonic-gate 	l->l_edge.edge_in_next = &l->l_edge;
699*7c478bd9Sstevel@tonic-gate 	l->l_edge.edge_in_prev = &l->l_edge;
700*7c478bd9Sstevel@tonic-gate 	l->l_edge.edge_adj_next = &l->l_edge;
701*7c478bd9Sstevel@tonic-gate 	l->l_edge.edge_adj_prev = &l->l_edge;
702*7c478bd9Sstevel@tonic-gate 	l->pvertex = -1;
703*7c478bd9Sstevel@tonic-gate 	l->l_status = FLK_INITIAL_STATE;
704*7c478bd9Sstevel@tonic-gate 	flk_lock_allocs++;
705*7c478bd9Sstevel@tonic-gate 	return (l);
706*7c478bd9Sstevel@tonic-gate }
707*7c478bd9Sstevel@tonic-gate 
708*7c478bd9Sstevel@tonic-gate /*
709*7c478bd9Sstevel@tonic-gate  * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
710*7c478bd9Sstevel@tonic-gate  * when some thread has a reference to it as in reclock().
711*7c478bd9Sstevel@tonic-gate  */
712*7c478bd9Sstevel@tonic-gate 
713*7c478bd9Sstevel@tonic-gate void
714*7c478bd9Sstevel@tonic-gate flk_free_lock(lock_descriptor_t	*lock)
715*7c478bd9Sstevel@tonic-gate {
716*7c478bd9Sstevel@tonic-gate 	ASSERT(IS_DEAD(lock));
717*7c478bd9Sstevel@tonic-gate 	if (IS_REFERENCED(lock)) {
718*7c478bd9Sstevel@tonic-gate 		lock->l_state |= DELETED_LOCK;
719*7c478bd9Sstevel@tonic-gate 		return;
720*7c478bd9Sstevel@tonic-gate 	}
721*7c478bd9Sstevel@tonic-gate 	flk_lock_frees++;
722*7c478bd9Sstevel@tonic-gate 	kmem_free((void *)lock, sizeof (lock_descriptor_t));
723*7c478bd9Sstevel@tonic-gate }
724*7c478bd9Sstevel@tonic-gate 
725*7c478bd9Sstevel@tonic-gate void
726*7c478bd9Sstevel@tonic-gate flk_set_state(lock_descriptor_t *lock, int new_state)
727*7c478bd9Sstevel@tonic-gate {
728*7c478bd9Sstevel@tonic-gate 	/*
729*7c478bd9Sstevel@tonic-gate 	 * Locks in the sleeping list may be woken up in a number of ways,
730*7c478bd9Sstevel@tonic-gate 	 * and more than once.  If a sleeping lock is signalled awake more
731*7c478bd9Sstevel@tonic-gate 	 * than once, then it may or may not change state depending on its
732*7c478bd9Sstevel@tonic-gate 	 * current state.
733*7c478bd9Sstevel@tonic-gate 	 * Also note that NLM locks that are sleeping could be moved to an
734*7c478bd9Sstevel@tonic-gate 	 * interrupted state more than once if the unlock request is
735*7c478bd9Sstevel@tonic-gate 	 * retransmitted by the NLM client - the second time around, this is
736*7c478bd9Sstevel@tonic-gate 	 * just a nop.
737*7c478bd9Sstevel@tonic-gate 	 * The ordering of being signalled awake is:
738*7c478bd9Sstevel@tonic-gate 	 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
739*7c478bd9Sstevel@tonic-gate 	 * The checks below implement this ordering.
740*7c478bd9Sstevel@tonic-gate 	 */
741*7c478bd9Sstevel@tonic-gate 	if (IS_INTERRUPTED(lock)) {
742*7c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_CANCELLED_STATE) ||
743*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_GRANTED_STATE) ||
744*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE)) {
745*7c478bd9Sstevel@tonic-gate 			return;
746*7c478bd9Sstevel@tonic-gate 		}
747*7c478bd9Sstevel@tonic-gate 	}
748*7c478bd9Sstevel@tonic-gate 	if (IS_CANCELLED(lock)) {
749*7c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_GRANTED_STATE) ||
750*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
751*7c478bd9Sstevel@tonic-gate 			return;
752*7c478bd9Sstevel@tonic-gate 		}
753*7c478bd9Sstevel@tonic-gate 	}
754*7c478bd9Sstevel@tonic-gate 	CHECK_LOCK_TRANSITION(lock->l_status, new_state);
755*7c478bd9Sstevel@tonic-gate 	if (IS_PXFS(lock)) {
756*7c478bd9Sstevel@tonic-gate 		cl_flk_state_transition_notify(lock, lock->l_status, new_state);
757*7c478bd9Sstevel@tonic-gate 	}
758*7c478bd9Sstevel@tonic-gate 	lock->l_status = new_state;
759*7c478bd9Sstevel@tonic-gate }
760*7c478bd9Sstevel@tonic-gate 
761*7c478bd9Sstevel@tonic-gate /*
762*7c478bd9Sstevel@tonic-gate  * Routine that checks whether there are any blocking locks in the system.
763*7c478bd9Sstevel@tonic-gate  *
764*7c478bd9Sstevel@tonic-gate  * The policy followed is if a write lock is sleeping we don't allow read
765*7c478bd9Sstevel@tonic-gate  * locks before this write lock even though there may not be any active
766*7c478bd9Sstevel@tonic-gate  * locks corresponding to the read locks' region.
767*7c478bd9Sstevel@tonic-gate  *
768*7c478bd9Sstevel@tonic-gate  * flk_add_edge() function adds an edge between l1 and l2 iff there
769*7c478bd9Sstevel@tonic-gate  * is no path between l1 and l2. This is done to have a "minimum
770*7c478bd9Sstevel@tonic-gate  * storage representation" of the dependency graph.
771*7c478bd9Sstevel@tonic-gate  *
772*7c478bd9Sstevel@tonic-gate  * Another property of the graph is since only the new request throws
773*7c478bd9Sstevel@tonic-gate  * edges to the existing locks in the graph, the graph is always topologically
774*7c478bd9Sstevel@tonic-gate  * ordered.
775*7c478bd9Sstevel@tonic-gate  */
776*7c478bd9Sstevel@tonic-gate 
777*7c478bd9Sstevel@tonic-gate static int
778*7c478bd9Sstevel@tonic-gate flk_process_request(lock_descriptor_t *request)
779*7c478bd9Sstevel@tonic-gate {
780*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
781*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
782*7c478bd9Sstevel@tonic-gate 	int request_blocked_by_active = 0;
783*7c478bd9Sstevel@tonic-gate 	int request_blocked_by_granted = 0;
784*7c478bd9Sstevel@tonic-gate 	int request_blocked_by_sleeping = 0;
785*7c478bd9Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
786*7c478bd9Sstevel@tonic-gate 	int	error = 0;
787*7c478bd9Sstevel@tonic-gate 	int request_will_wait = 0;
788*7c478bd9Sstevel@tonic-gate 	int found_covering_lock = 0;
789*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *covered_by = NULL;
790*7c478bd9Sstevel@tonic-gate 
791*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
792*7c478bd9Sstevel@tonic-gate 	request_will_wait = IS_WILLING_TO_SLEEP(request);
793*7c478bd9Sstevel@tonic-gate 
794*7c478bd9Sstevel@tonic-gate 	/*
795*7c478bd9Sstevel@tonic-gate 	 * check active locks
796*7c478bd9Sstevel@tonic-gate 	 */
797*7c478bd9Sstevel@tonic-gate 
798*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
799*7c478bd9Sstevel@tonic-gate 
800*7c478bd9Sstevel@tonic-gate 
801*7c478bd9Sstevel@tonic-gate 	if (lock) {
802*7c478bd9Sstevel@tonic-gate 		do {
803*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
804*7c478bd9Sstevel@tonic-gate 				if (!request_will_wait)
805*7c478bd9Sstevel@tonic-gate 					return (EAGAIN);
806*7c478bd9Sstevel@tonic-gate 				request_blocked_by_active = 1;
807*7c478bd9Sstevel@tonic-gate 				break;
808*7c478bd9Sstevel@tonic-gate 			}
809*7c478bd9Sstevel@tonic-gate 			/*
810*7c478bd9Sstevel@tonic-gate 			 * Grant lock if it is for the same owner holding active
811*7c478bd9Sstevel@tonic-gate 			 * lock that covers the request.
812*7c478bd9Sstevel@tonic-gate 			 */
813*7c478bd9Sstevel@tonic-gate 
814*7c478bd9Sstevel@tonic-gate 			if (SAME_OWNER(lock, request) &&
815*7c478bd9Sstevel@tonic-gate 					COVERS(lock, request) &&
816*7c478bd9Sstevel@tonic-gate 						(request->l_type == F_RDLCK))
817*7c478bd9Sstevel@tonic-gate 				return (flk_execute_request(request));
818*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
819*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
820*7c478bd9Sstevel@tonic-gate 	}
821*7c478bd9Sstevel@tonic-gate 
822*7c478bd9Sstevel@tonic-gate 	if (!request_blocked_by_active) {
823*7c478bd9Sstevel@tonic-gate 			lock_descriptor_t *lk[1];
824*7c478bd9Sstevel@tonic-gate 			lock_descriptor_t *first_glock = NULL;
825*7c478bd9Sstevel@tonic-gate 		/*
826*7c478bd9Sstevel@tonic-gate 		 * Shall we grant this?! NO!!
827*7c478bd9Sstevel@tonic-gate 		 * What about those locks that were just granted and still
828*7c478bd9Sstevel@tonic-gate 		 * in sleep queue. Those threads are woken up and so locks
829*7c478bd9Sstevel@tonic-gate 		 * are almost active.
830*7c478bd9Sstevel@tonic-gate 		 */
831*7c478bd9Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
832*7c478bd9Sstevel@tonic-gate 		if (lock) {
833*7c478bd9Sstevel@tonic-gate 			do {
834*7c478bd9Sstevel@tonic-gate 				if (BLOCKS(lock, request)) {
835*7c478bd9Sstevel@tonic-gate 					if (IS_GRANTED(lock)) {
836*7c478bd9Sstevel@tonic-gate 						request_blocked_by_granted = 1;
837*7c478bd9Sstevel@tonic-gate 					} else {
838*7c478bd9Sstevel@tonic-gate 						request_blocked_by_sleeping = 1;
839*7c478bd9Sstevel@tonic-gate 					}
840*7c478bd9Sstevel@tonic-gate 				}
841*7c478bd9Sstevel@tonic-gate 
842*7c478bd9Sstevel@tonic-gate 				lock = lock->l_next;
843*7c478bd9Sstevel@tonic-gate 			} while ((lock->l_vnode == vp));
844*7c478bd9Sstevel@tonic-gate 			first_glock = lock->l_prev;
845*7c478bd9Sstevel@tonic-gate 			ASSERT(first_glock->l_vnode == vp);
846*7c478bd9Sstevel@tonic-gate 		}
847*7c478bd9Sstevel@tonic-gate 
848*7c478bd9Sstevel@tonic-gate 		if (request_blocked_by_granted)
849*7c478bd9Sstevel@tonic-gate 			goto block;
850*7c478bd9Sstevel@tonic-gate 
851*7c478bd9Sstevel@tonic-gate 		if (!request_blocked_by_sleeping) {
852*7c478bd9Sstevel@tonic-gate 			/*
853*7c478bd9Sstevel@tonic-gate 			 * If the request isn't going to be blocked by a
854*7c478bd9Sstevel@tonic-gate 			 * sleeping request, we know that it isn't going to
855*7c478bd9Sstevel@tonic-gate 			 * be blocked; we can just execute the request --
856*7c478bd9Sstevel@tonic-gate 			 * without performing costly deadlock detection.
857*7c478bd9Sstevel@tonic-gate 			 */
858*7c478bd9Sstevel@tonic-gate 			ASSERT(!request_blocked_by_active);
859*7c478bd9Sstevel@tonic-gate 			return (flk_execute_request(request));
860*7c478bd9Sstevel@tonic-gate 		} else if (request->l_type == F_RDLCK) {
861*7c478bd9Sstevel@tonic-gate 			/*
862*7c478bd9Sstevel@tonic-gate 			 * If we have a sleeping writer in the requested
863*7c478bd9Sstevel@tonic-gate 			 * lock's range, block.
864*7c478bd9Sstevel@tonic-gate 			 */
865*7c478bd9Sstevel@tonic-gate 			goto block;
866*7c478bd9Sstevel@tonic-gate 		}
867*7c478bd9Sstevel@tonic-gate 
868*7c478bd9Sstevel@tonic-gate 		lk[0] = request;
869*7c478bd9Sstevel@tonic-gate 		request->l_state |= RECOMPUTE_LOCK;
870*7c478bd9Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
871*7c478bd9Sstevel@tonic-gate 		if (lock) {
872*7c478bd9Sstevel@tonic-gate 			do {
873*7c478bd9Sstevel@tonic-gate 				flk_recompute_dependencies(lock, lk, 1, 0);
874*7c478bd9Sstevel@tonic-gate 				lock = lock->l_next;
875*7c478bd9Sstevel@tonic-gate 			} while (lock->l_vnode == vp);
876*7c478bd9Sstevel@tonic-gate 		}
877*7c478bd9Sstevel@tonic-gate 		lock = first_glock;
878*7c478bd9Sstevel@tonic-gate 		if (lock) {
879*7c478bd9Sstevel@tonic-gate 			do {
880*7c478bd9Sstevel@tonic-gate 				if (IS_GRANTED(lock)) {
881*7c478bd9Sstevel@tonic-gate 				flk_recompute_dependencies(lock, lk, 1, 0);
882*7c478bd9Sstevel@tonic-gate 				}
883*7c478bd9Sstevel@tonic-gate 				lock = lock->l_prev;
884*7c478bd9Sstevel@tonic-gate 			} while ((lock->l_vnode == vp));
885*7c478bd9Sstevel@tonic-gate 		}
886*7c478bd9Sstevel@tonic-gate 		request->l_state &= ~RECOMPUTE_LOCK;
887*7c478bd9Sstevel@tonic-gate 		if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
888*7c478bd9Sstevel@tonic-gate 			return (EDEADLK);
889*7c478bd9Sstevel@tonic-gate 		return (flk_execute_request(request));
890*7c478bd9Sstevel@tonic-gate 	}
891*7c478bd9Sstevel@tonic-gate 
892*7c478bd9Sstevel@tonic-gate block:
893*7c478bd9Sstevel@tonic-gate 	if (request_will_wait)
894*7c478bd9Sstevel@tonic-gate 		flk_graph_uncolor(gp);
895*7c478bd9Sstevel@tonic-gate 
896*7c478bd9Sstevel@tonic-gate 	/* check sleeping locks */
897*7c478bd9Sstevel@tonic-gate 
898*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
899*7c478bd9Sstevel@tonic-gate 
900*7c478bd9Sstevel@tonic-gate 	/*
901*7c478bd9Sstevel@tonic-gate 	 * If we find a sleeping write lock that is a superset of the
902*7c478bd9Sstevel@tonic-gate 	 * region wanted by request we can be assured that by adding an
903*7c478bd9Sstevel@tonic-gate 	 * edge to this write lock we have paths to all locks in the
904*7c478bd9Sstevel@tonic-gate 	 * graph that blocks the request except in one case and that is why
905*7c478bd9Sstevel@tonic-gate 	 * another check for SAME_OWNER in the loop below. The exception
906*7c478bd9Sstevel@tonic-gate 	 * case is when this process that owns the sleeping write lock 'l1'
907*7c478bd9Sstevel@tonic-gate 	 * has other locks l2, l3, l4 that are in the system and arrived
908*7c478bd9Sstevel@tonic-gate 	 * before l1. l1 does not have path to these locks as they are from
909*7c478bd9Sstevel@tonic-gate 	 * same process. We break when we find a second covering sleeping
910*7c478bd9Sstevel@tonic-gate 	 * lock l5 owned by a process different from that owning l1, because
911*7c478bd9Sstevel@tonic-gate 	 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
912*7c478bd9Sstevel@tonic-gate 	 * it has l1 would have produced a deadlock already.
913*7c478bd9Sstevel@tonic-gate 	 */
914*7c478bd9Sstevel@tonic-gate 
915*7c478bd9Sstevel@tonic-gate 	if (lock) {
916*7c478bd9Sstevel@tonic-gate 		do {
917*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
918*7c478bd9Sstevel@tonic-gate 				if (!request_will_wait)
919*7c478bd9Sstevel@tonic-gate 					return (EAGAIN);
920*7c478bd9Sstevel@tonic-gate 				if (COVERS(lock, request) &&
921*7c478bd9Sstevel@tonic-gate 						lock->l_type == F_WRLCK) {
922*7c478bd9Sstevel@tonic-gate 					if (found_covering_lock &&
923*7c478bd9Sstevel@tonic-gate 					    !SAME_OWNER(lock, covered_by)) {
924*7c478bd9Sstevel@tonic-gate 						found_covering_lock++;
925*7c478bd9Sstevel@tonic-gate 						break;
926*7c478bd9Sstevel@tonic-gate 					}
927*7c478bd9Sstevel@tonic-gate 					found_covering_lock = 1;
928*7c478bd9Sstevel@tonic-gate 					covered_by = lock;
929*7c478bd9Sstevel@tonic-gate 				}
930*7c478bd9Sstevel@tonic-gate 				if (found_covering_lock &&
931*7c478bd9Sstevel@tonic-gate 					!SAME_OWNER(lock, covered_by)) {
932*7c478bd9Sstevel@tonic-gate 					lock = lock->l_next;
933*7c478bd9Sstevel@tonic-gate 					continue;
934*7c478bd9Sstevel@tonic-gate 				}
935*7c478bd9Sstevel@tonic-gate 				if ((error = flk_add_edge(request, lock,
936*7c478bd9Sstevel@tonic-gate 						!found_covering_lock, 0)))
937*7c478bd9Sstevel@tonic-gate 					return (error);
938*7c478bd9Sstevel@tonic-gate 			}
939*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
940*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
941*7c478bd9Sstevel@tonic-gate 	}
942*7c478bd9Sstevel@tonic-gate 
943*7c478bd9Sstevel@tonic-gate /*
944*7c478bd9Sstevel@tonic-gate  * found_covering_lock == 2 iff at this point 'request' has paths
945*7c478bd9Sstevel@tonic-gate  * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
946*7c478bd9Sstevel@tonic-gate  * point 'request' has paths to all locks that blocks 'request' whose owners
947*7c478bd9Sstevel@tonic-gate  * are not same as the one that covers 'request' (covered_by above) and
948*7c478bd9Sstevel@tonic-gate  * we can have locks whose owner is same as covered_by in the active list.
949*7c478bd9Sstevel@tonic-gate  */
950*7c478bd9Sstevel@tonic-gate 
951*7c478bd9Sstevel@tonic-gate 	if (request_blocked_by_active && found_covering_lock != 2) {
952*7c478bd9Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
953*7c478bd9Sstevel@tonic-gate 		ASSERT(lock != NULL);
954*7c478bd9Sstevel@tonic-gate 		do {
955*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
956*7c478bd9Sstevel@tonic-gate 				if (found_covering_lock &&
957*7c478bd9Sstevel@tonic-gate 					!SAME_OWNER(lock, covered_by)) {
958*7c478bd9Sstevel@tonic-gate 					lock = lock->l_next;
959*7c478bd9Sstevel@tonic-gate 					continue;
960*7c478bd9Sstevel@tonic-gate 				}
961*7c478bd9Sstevel@tonic-gate 				if ((error = flk_add_edge(request, lock,
962*7c478bd9Sstevel@tonic-gate 							CHECK_CYCLE, 0)))
963*7c478bd9Sstevel@tonic-gate 					return (error);
964*7c478bd9Sstevel@tonic-gate 			}
965*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
966*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
967*7c478bd9Sstevel@tonic-gate 	}
968*7c478bd9Sstevel@tonic-gate 
969*7c478bd9Sstevel@tonic-gate 	if (NOT_BLOCKED(request)) {
970*7c478bd9Sstevel@tonic-gate 		/*
971*7c478bd9Sstevel@tonic-gate 		 * request not dependent on any other locks
972*7c478bd9Sstevel@tonic-gate 		 * so execute this request
973*7c478bd9Sstevel@tonic-gate 		 */
974*7c478bd9Sstevel@tonic-gate 		return (flk_execute_request(request));
975*7c478bd9Sstevel@tonic-gate 	} else {
976*7c478bd9Sstevel@tonic-gate 		/*
977*7c478bd9Sstevel@tonic-gate 		 * check for deadlock
978*7c478bd9Sstevel@tonic-gate 		 */
979*7c478bd9Sstevel@tonic-gate 		if (flk_check_deadlock(request))
980*7c478bd9Sstevel@tonic-gate 			return (EDEADLK);
981*7c478bd9Sstevel@tonic-gate 		/*
982*7c478bd9Sstevel@tonic-gate 		 * this thread has to sleep
983*7c478bd9Sstevel@tonic-gate 		 */
984*7c478bd9Sstevel@tonic-gate 		return (flk_wait_execute_request(request));
985*7c478bd9Sstevel@tonic-gate 	}
986*7c478bd9Sstevel@tonic-gate }
987*7c478bd9Sstevel@tonic-gate 
988*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
989*7c478bd9Sstevel@tonic-gate /*
990*7c478bd9Sstevel@tonic-gate  * The actual execution of the request in the simple case is only to
991*7c478bd9Sstevel@tonic-gate  * insert the 'request' in the list of active locks if it is not an
992*7c478bd9Sstevel@tonic-gate  * UNLOCK.
993*7c478bd9Sstevel@tonic-gate  * We have to consider the existing active locks' relation to
994*7c478bd9Sstevel@tonic-gate  * this 'request' if they are owned by same process. flk_relation() does
995*7c478bd9Sstevel@tonic-gate  * this job and sees to that the dependency graph information is maintained
996*7c478bd9Sstevel@tonic-gate  * properly.
997*7c478bd9Sstevel@tonic-gate  */
998*7c478bd9Sstevel@tonic-gate 
999*7c478bd9Sstevel@tonic-gate int
1000*7c478bd9Sstevel@tonic-gate flk_execute_request(lock_descriptor_t *request)
1001*7c478bd9Sstevel@tonic-gate {
1002*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
1003*7c478bd9Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
1004*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock, *lock1;
1005*7c478bd9Sstevel@tonic-gate 	int done_searching = 0;
1006*7c478bd9Sstevel@tonic-gate 
1007*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
1008*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
1009*7c478bd9Sstevel@tonic-gate 
1010*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1011*7c478bd9Sstevel@tonic-gate 
1012*7c478bd9Sstevel@tonic-gate 	flk_set_state(request, FLK_START_STATE);
1013*7c478bd9Sstevel@tonic-gate 
1014*7c478bd9Sstevel@tonic-gate 	ASSERT(NOT_BLOCKED(request));
1015*7c478bd9Sstevel@tonic-gate 
1016*7c478bd9Sstevel@tonic-gate 	/* IO_LOCK requests are only to check status */
1017*7c478bd9Sstevel@tonic-gate 
1018*7c478bd9Sstevel@tonic-gate 	if (IS_IO_LOCK(request))
1019*7c478bd9Sstevel@tonic-gate 		return (0);
1020*7c478bd9Sstevel@tonic-gate 
1021*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1022*7c478bd9Sstevel@tonic-gate 
1023*7c478bd9Sstevel@tonic-gate 	if (lock == NULL && request->l_type == F_UNLCK)
1024*7c478bd9Sstevel@tonic-gate 		return (0);
1025*7c478bd9Sstevel@tonic-gate 	if (lock == NULL) {
1026*7c478bd9Sstevel@tonic-gate 		flk_insert_active_lock(request);
1027*7c478bd9Sstevel@tonic-gate 		return (0);
1028*7c478bd9Sstevel@tonic-gate 	}
1029*7c478bd9Sstevel@tonic-gate 
1030*7c478bd9Sstevel@tonic-gate 	do {
1031*7c478bd9Sstevel@tonic-gate 		lock1 = lock->l_next;
1032*7c478bd9Sstevel@tonic-gate 		if (SAME_OWNER(request, lock)) {
1033*7c478bd9Sstevel@tonic-gate 			done_searching = flk_relation(lock, request);
1034*7c478bd9Sstevel@tonic-gate 		}
1035*7c478bd9Sstevel@tonic-gate 		lock = lock1;
1036*7c478bd9Sstevel@tonic-gate 	} while (lock->l_vnode == vp && !done_searching);
1037*7c478bd9Sstevel@tonic-gate 
1038*7c478bd9Sstevel@tonic-gate 	/*
1039*7c478bd9Sstevel@tonic-gate 	 * insert in active queue
1040*7c478bd9Sstevel@tonic-gate 	 */
1041*7c478bd9Sstevel@tonic-gate 
1042*7c478bd9Sstevel@tonic-gate 	if (request->l_type != F_UNLCK)
1043*7c478bd9Sstevel@tonic-gate 		flk_insert_active_lock(request);
1044*7c478bd9Sstevel@tonic-gate 
1045*7c478bd9Sstevel@tonic-gate 	return (0);
1046*7c478bd9Sstevel@tonic-gate }
1047*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
1048*7c478bd9Sstevel@tonic-gate 
1049*7c478bd9Sstevel@tonic-gate /*
1050*7c478bd9Sstevel@tonic-gate  * 'request' is blocked by some one therefore we put it into sleep queue.
1051*7c478bd9Sstevel@tonic-gate  */
1052*7c478bd9Sstevel@tonic-gate static int
1053*7c478bd9Sstevel@tonic-gate flk_wait_execute_request(lock_descriptor_t *request)
1054*7c478bd9Sstevel@tonic-gate {
1055*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
1056*7c478bd9Sstevel@tonic-gate 	callb_cpr_t 	*cprp;		/* CPR info from callback */
1057*7c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
1058*7c478bd9Sstevel@tonic-gate 	int index;
1059*7c478bd9Sstevel@tonic-gate 
1060*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1061*7c478bd9Sstevel@tonic-gate 	ASSERT(IS_WILLING_TO_SLEEP(request));
1062*7c478bd9Sstevel@tonic-gate 
1063*7c478bd9Sstevel@tonic-gate 	flk_insert_sleeping_lock(request);
1064*7c478bd9Sstevel@tonic-gate 
1065*7c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
1066*7c478bd9Sstevel@tonic-gate 		index = HASH_INDEX(request->l_vnode);
1067*7c478bd9Sstevel@tonic-gate 		fg = flk_get_globals();
1068*7c478bd9Sstevel@tonic-gate 
1069*7c478bd9Sstevel@tonic-gate 		if (nlm_status_size == 0) {	/* not booted as a cluster */
1070*7c478bd9Sstevel@tonic-gate 			if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1071*7c478bd9Sstevel@tonic-gate 				flk_cancel_sleeping_lock(request, 1);
1072*7c478bd9Sstevel@tonic-gate 				return (ENOLCK);
1073*7c478bd9Sstevel@tonic-gate 			}
1074*7c478bd9Sstevel@tonic-gate 		} else {			/* booted as a cluster */
1075*7c478bd9Sstevel@tonic-gate 			/*
1076*7c478bd9Sstevel@tonic-gate 			 * If the request is an NLM server lock request,
1077*7c478bd9Sstevel@tonic-gate 			 * and the NLM state of the lock request is not
1078*7c478bd9Sstevel@tonic-gate 			 * NLM_UP (because the NLM server is shutting
1079*7c478bd9Sstevel@tonic-gate 			 * down), then cancel the sleeping lock and
1080*7c478bd9Sstevel@tonic-gate 			 * return error ENOLCK that will encourage the
1081*7c478bd9Sstevel@tonic-gate 			 * client to retransmit.
1082*7c478bd9Sstevel@tonic-gate 			 */
1083*7c478bd9Sstevel@tonic-gate 			if (!IS_NLM_UP(request)) {
1084*7c478bd9Sstevel@tonic-gate 				flk_cancel_sleeping_lock(request, 1);
1085*7c478bd9Sstevel@tonic-gate 				return (ENOLCK);
1086*7c478bd9Sstevel@tonic-gate 			}
1087*7c478bd9Sstevel@tonic-gate 		}
1088*7c478bd9Sstevel@tonic-gate 	}
1089*7c478bd9Sstevel@tonic-gate 
1090*7c478bd9Sstevel@tonic-gate 	/* Clustering: For blocking PXFS locks, return */
1091*7c478bd9Sstevel@tonic-gate 	if (IS_PXFS(request)) {
1092*7c478bd9Sstevel@tonic-gate 		/*
1093*7c478bd9Sstevel@tonic-gate 		 * PXFS locks sleep on the client side.
1094*7c478bd9Sstevel@tonic-gate 		 * The callback argument is used to wake up the sleeper
1095*7c478bd9Sstevel@tonic-gate 		 * when the lock is granted.
1096*7c478bd9Sstevel@tonic-gate 		 * We return -1 (rather than an errno value) to indicate
1097*7c478bd9Sstevel@tonic-gate 		 * the client side should sleep
1098*7c478bd9Sstevel@tonic-gate 		 */
1099*7c478bd9Sstevel@tonic-gate 		return (PXFS_LOCK_BLOCKED);
1100*7c478bd9Sstevel@tonic-gate 	}
1101*7c478bd9Sstevel@tonic-gate 
1102*7c478bd9Sstevel@tonic-gate 	if (request->l_callbacks != NULL) {
1103*7c478bd9Sstevel@tonic-gate 		/*
1104*7c478bd9Sstevel@tonic-gate 		 * To make sure the shutdown code works correctly, either
1105*7c478bd9Sstevel@tonic-gate 		 * the callback must happen after putting the lock on the
1106*7c478bd9Sstevel@tonic-gate 		 * sleep list, or we must check the shutdown status after
1107*7c478bd9Sstevel@tonic-gate 		 * returning from the callback (and before sleeping).  At
1108*7c478bd9Sstevel@tonic-gate 		 * least for now, we'll use the first option.  If a
1109*7c478bd9Sstevel@tonic-gate 		 * shutdown or signal or whatever happened while the graph
1110*7c478bd9Sstevel@tonic-gate 		 * mutex was dropped, that will be detected by
1111*7c478bd9Sstevel@tonic-gate 		 * wait_for_lock().
1112*7c478bd9Sstevel@tonic-gate 		 */
1113*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
1114*7c478bd9Sstevel@tonic-gate 
1115*7c478bd9Sstevel@tonic-gate 		cprp = flk_invoke_callbacks(request->l_callbacks,
1116*7c478bd9Sstevel@tonic-gate 					    FLK_BEFORE_SLEEP);
1117*7c478bd9Sstevel@tonic-gate 
1118*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
1119*7c478bd9Sstevel@tonic-gate 
1120*7c478bd9Sstevel@tonic-gate 		if (cprp == NULL) {
1121*7c478bd9Sstevel@tonic-gate 			wait_for_lock(request);
1122*7c478bd9Sstevel@tonic-gate 		} else {
1123*7c478bd9Sstevel@tonic-gate 			mutex_enter(cprp->cc_lockp);
1124*7c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(cprp);
1125*7c478bd9Sstevel@tonic-gate 			mutex_exit(cprp->cc_lockp);
1126*7c478bd9Sstevel@tonic-gate 			wait_for_lock(request);
1127*7c478bd9Sstevel@tonic-gate 			mutex_enter(cprp->cc_lockp);
1128*7c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1129*7c478bd9Sstevel@tonic-gate 			mutex_exit(cprp->cc_lockp);
1130*7c478bd9Sstevel@tonic-gate 		}
1131*7c478bd9Sstevel@tonic-gate 
1132*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
1133*7c478bd9Sstevel@tonic-gate 		(void) flk_invoke_callbacks(request->l_callbacks,
1134*7c478bd9Sstevel@tonic-gate 					    FLK_AFTER_SLEEP);
1135*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
1136*7c478bd9Sstevel@tonic-gate 	} else {
1137*7c478bd9Sstevel@tonic-gate 		wait_for_lock(request);
1138*7c478bd9Sstevel@tonic-gate 	}
1139*7c478bd9Sstevel@tonic-gate 
1140*7c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
1141*7c478bd9Sstevel@tonic-gate 		/*
1142*7c478bd9Sstevel@tonic-gate 		 * If the lock manager is shutting down, return an
1143*7c478bd9Sstevel@tonic-gate 		 * error that will encourage the client to retransmit.
1144*7c478bd9Sstevel@tonic-gate 		 */
1145*7c478bd9Sstevel@tonic-gate 		if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1146*7c478bd9Sstevel@tonic-gate 			!IS_GRANTED(request)) {
1147*7c478bd9Sstevel@tonic-gate 			flk_cancel_sleeping_lock(request, 1);
1148*7c478bd9Sstevel@tonic-gate 			return (ENOLCK);
1149*7c478bd9Sstevel@tonic-gate 		}
1150*7c478bd9Sstevel@tonic-gate 	}
1151*7c478bd9Sstevel@tonic-gate 
1152*7c478bd9Sstevel@tonic-gate 	if (IS_INTERRUPTED(request)) {
1153*7c478bd9Sstevel@tonic-gate 		/* we got a signal, or act like we did */
1154*7c478bd9Sstevel@tonic-gate 		flk_cancel_sleeping_lock(request, 1);
1155*7c478bd9Sstevel@tonic-gate 		return (EINTR);
1156*7c478bd9Sstevel@tonic-gate 	}
1157*7c478bd9Sstevel@tonic-gate 
1158*7c478bd9Sstevel@tonic-gate 	/* Cancelled if some other thread has closed the file */
1159*7c478bd9Sstevel@tonic-gate 
1160*7c478bd9Sstevel@tonic-gate 	if (IS_CANCELLED(request)) {
1161*7c478bd9Sstevel@tonic-gate 		flk_cancel_sleeping_lock(request, 1);
1162*7c478bd9Sstevel@tonic-gate 		return (EBADF);
1163*7c478bd9Sstevel@tonic-gate 	}
1164*7c478bd9Sstevel@tonic-gate 
1165*7c478bd9Sstevel@tonic-gate 	request->l_state &= ~GRANTED_LOCK;
1166*7c478bd9Sstevel@tonic-gate 	REMOVE_SLEEP_QUEUE(request);
1167*7c478bd9Sstevel@tonic-gate 	return (flk_execute_request(request));
1168*7c478bd9Sstevel@tonic-gate }
1169*7c478bd9Sstevel@tonic-gate 
1170*7c478bd9Sstevel@tonic-gate /*
1171*7c478bd9Sstevel@tonic-gate  * This routine adds an edge between from and to because from depends
1172*7c478bd9Sstevel@tonic-gate  * to. If asked to check for deadlock it checks whether there are any
1173*7c478bd9Sstevel@tonic-gate  * reachable locks from "from_lock" that is owned by the same process
1174*7c478bd9Sstevel@tonic-gate  * as "from_lock".
1175*7c478bd9Sstevel@tonic-gate  * NOTE: It is the caller's responsibility to make sure that the color
1176*7c478bd9Sstevel@tonic-gate  * of the graph is consistent between the calls to flk_add_edge as done
1177*7c478bd9Sstevel@tonic-gate  * in flk_process_request. This routine does not color and check for
1178*7c478bd9Sstevel@tonic-gate  * deadlock explicitly.
1179*7c478bd9Sstevel@tonic-gate  */
1180*7c478bd9Sstevel@tonic-gate 
1181*7c478bd9Sstevel@tonic-gate static int
1182*7c478bd9Sstevel@tonic-gate flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1183*7c478bd9Sstevel@tonic-gate 			int check_cycle, int update_graph)
1184*7c478bd9Sstevel@tonic-gate {
1185*7c478bd9Sstevel@tonic-gate 	edge_t	*edge;
1186*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
1187*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*vertex;
1188*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
1189*7c478bd9Sstevel@tonic-gate 
1190*7c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
1191*7c478bd9Sstevel@tonic-gate 
1192*7c478bd9Sstevel@tonic-gate 	/*
1193*7c478bd9Sstevel@tonic-gate 	 * if to vertex already has mark_color just return
1194*7c478bd9Sstevel@tonic-gate 	 * don't add an edge as it is reachable from from vertex
1195*7c478bd9Sstevel@tonic-gate 	 * before itself.
1196*7c478bd9Sstevel@tonic-gate 	 */
1197*7c478bd9Sstevel@tonic-gate 
1198*7c478bd9Sstevel@tonic-gate 	if (COLORED(to_lock))
1199*7c478bd9Sstevel@tonic-gate 		return (0);
1200*7c478bd9Sstevel@tonic-gate 
1201*7c478bd9Sstevel@tonic-gate 	edge = flk_get_edge();
1202*7c478bd9Sstevel@tonic-gate 
1203*7c478bd9Sstevel@tonic-gate 	/*
1204*7c478bd9Sstevel@tonic-gate 	 * set the from and to vertex
1205*7c478bd9Sstevel@tonic-gate 	 */
1206*7c478bd9Sstevel@tonic-gate 
1207*7c478bd9Sstevel@tonic-gate 	edge->from_vertex = from_lock;
1208*7c478bd9Sstevel@tonic-gate 	edge->to_vertex = to_lock;
1209*7c478bd9Sstevel@tonic-gate 
1210*7c478bd9Sstevel@tonic-gate 	/*
1211*7c478bd9Sstevel@tonic-gate 	 * put in adjacency list of from vertex
1212*7c478bd9Sstevel@tonic-gate 	 */
1213*7c478bd9Sstevel@tonic-gate 
1214*7c478bd9Sstevel@tonic-gate 	from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1215*7c478bd9Sstevel@tonic-gate 	edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1216*7c478bd9Sstevel@tonic-gate 	edge->edge_adj_prev = &from_lock->l_edge;
1217*7c478bd9Sstevel@tonic-gate 	from_lock->l_edge.edge_adj_next = edge;
1218*7c478bd9Sstevel@tonic-gate 
1219*7c478bd9Sstevel@tonic-gate 	/*
1220*7c478bd9Sstevel@tonic-gate 	 * put in in list of to vertex
1221*7c478bd9Sstevel@tonic-gate 	 */
1222*7c478bd9Sstevel@tonic-gate 
1223*7c478bd9Sstevel@tonic-gate 	to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1224*7c478bd9Sstevel@tonic-gate 	edge->edge_in_next = to_lock->l_edge.edge_in_next;
1225*7c478bd9Sstevel@tonic-gate 	to_lock->l_edge.edge_in_next = edge;
1226*7c478bd9Sstevel@tonic-gate 	edge->edge_in_prev = &to_lock->l_edge;
1227*7c478bd9Sstevel@tonic-gate 
1228*7c478bd9Sstevel@tonic-gate 
1229*7c478bd9Sstevel@tonic-gate 	if (update_graph) {
1230*7c478bd9Sstevel@tonic-gate 		flk_update_proc_graph(edge, 0);
1231*7c478bd9Sstevel@tonic-gate 		return (0);
1232*7c478bd9Sstevel@tonic-gate 	}
1233*7c478bd9Sstevel@tonic-gate 	if (!check_cycle) {
1234*7c478bd9Sstevel@tonic-gate 		return (0);
1235*7c478bd9Sstevel@tonic-gate 	}
1236*7c478bd9Sstevel@tonic-gate 
1237*7c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, from_lock, l_stack);
1238*7c478bd9Sstevel@tonic-gate 
1239*7c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1240*7c478bd9Sstevel@tonic-gate 
1241*7c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack);
1242*7c478bd9Sstevel@tonic-gate 
1243*7c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex);
1244*7c478bd9Sstevel@tonic-gate 			ep != HEAD(vertex);
1245*7c478bd9Sstevel@tonic-gate 				ep = NEXT_ADJ(ep)) {
1246*7c478bd9Sstevel@tonic-gate 			if (COLORED(ep->to_vertex))
1247*7c478bd9Sstevel@tonic-gate 				continue;
1248*7c478bd9Sstevel@tonic-gate 			COLOR(ep->to_vertex);
1249*7c478bd9Sstevel@tonic-gate 			if (SAME_OWNER(ep->to_vertex, from_lock))
1250*7c478bd9Sstevel@tonic-gate 				goto dead_lock;
1251*7c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1252*7c478bd9Sstevel@tonic-gate 		}
1253*7c478bd9Sstevel@tonic-gate 	}
1254*7c478bd9Sstevel@tonic-gate 	return (0);
1255*7c478bd9Sstevel@tonic-gate 
1256*7c478bd9Sstevel@tonic-gate dead_lock:
1257*7c478bd9Sstevel@tonic-gate 
1258*7c478bd9Sstevel@tonic-gate 	/*
1259*7c478bd9Sstevel@tonic-gate 	 * remove all edges
1260*7c478bd9Sstevel@tonic-gate 	 */
1261*7c478bd9Sstevel@tonic-gate 
1262*7c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(from_lock);
1263*7c478bd9Sstevel@tonic-gate 
1264*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(from_lock)) {
1265*7c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
1266*7c478bd9Sstevel@tonic-gate 		from_lock->l_sedge = NEXT_ADJ(ep);
1267*7c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
1268*7c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
1269*7c478bd9Sstevel@tonic-gate 		ep = from_lock->l_sedge;
1270*7c478bd9Sstevel@tonic-gate 	}
1271*7c478bd9Sstevel@tonic-gate 	return (EDEADLK);
1272*7c478bd9Sstevel@tonic-gate }
1273*7c478bd9Sstevel@tonic-gate 
1274*7c478bd9Sstevel@tonic-gate /*
1275*7c478bd9Sstevel@tonic-gate  * Get an edge structure for representing the dependency between two locks.
1276*7c478bd9Sstevel@tonic-gate  */
1277*7c478bd9Sstevel@tonic-gate 
1278*7c478bd9Sstevel@tonic-gate static edge_t *
1279*7c478bd9Sstevel@tonic-gate flk_get_edge()
1280*7c478bd9Sstevel@tonic-gate {
1281*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
1282*7c478bd9Sstevel@tonic-gate 
1283*7c478bd9Sstevel@tonic-gate 	ASSERT(flk_edge_cache != NULL);
1284*7c478bd9Sstevel@tonic-gate 
1285*7c478bd9Sstevel@tonic-gate 	ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1286*7c478bd9Sstevel@tonic-gate 	edge_allocs++;
1287*7c478bd9Sstevel@tonic-gate 	return (ep);
1288*7c478bd9Sstevel@tonic-gate }
1289*7c478bd9Sstevel@tonic-gate 
1290*7c478bd9Sstevel@tonic-gate /*
1291*7c478bd9Sstevel@tonic-gate  * Free the edge structure.
1292*7c478bd9Sstevel@tonic-gate  */
1293*7c478bd9Sstevel@tonic-gate 
1294*7c478bd9Sstevel@tonic-gate static void
1295*7c478bd9Sstevel@tonic-gate flk_free_edge(edge_t *ep)
1296*7c478bd9Sstevel@tonic-gate {
1297*7c478bd9Sstevel@tonic-gate 	edge_frees++;
1298*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(flk_edge_cache, (void *)ep);
1299*7c478bd9Sstevel@tonic-gate }
1300*7c478bd9Sstevel@tonic-gate 
1301*7c478bd9Sstevel@tonic-gate /*
1302*7c478bd9Sstevel@tonic-gate  * Check the relationship of request with lock and perform the
1303*7c478bd9Sstevel@tonic-gate  * recomputation of dependencies, break lock if required, and return
1304*7c478bd9Sstevel@tonic-gate  * 1 if request cannot have any more relationship with the next
1305*7c478bd9Sstevel@tonic-gate  * active locks.
1306*7c478bd9Sstevel@tonic-gate  * The 'lock' and 'request' are compared and in case of overlap we
1307*7c478bd9Sstevel@tonic-gate  * delete the 'lock' and form new locks to represent the non-overlapped
1308*7c478bd9Sstevel@tonic-gate  * portion of original 'lock'. This function has side effects such as
1309*7c478bd9Sstevel@tonic-gate  * 'lock' will be freed, new locks will be added to the active list.
1310*7c478bd9Sstevel@tonic-gate  */
1311*7c478bd9Sstevel@tonic-gate 
1312*7c478bd9Sstevel@tonic-gate static int
1313*7c478bd9Sstevel@tonic-gate flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1314*7c478bd9Sstevel@tonic-gate {
1315*7c478bd9Sstevel@tonic-gate 	int lock_effect;
1316*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock1, *lock2;
1317*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *topology[3];
1318*7c478bd9Sstevel@tonic-gate 	int nvertex = 0;
1319*7c478bd9Sstevel@tonic-gate 	int i;
1320*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
1321*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = (lock->l_graph);
1322*7c478bd9Sstevel@tonic-gate 
1323*7c478bd9Sstevel@tonic-gate 
1324*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
1325*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
1326*7c478bd9Sstevel@tonic-gate 
1327*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1328*7c478bd9Sstevel@tonic-gate 
1329*7c478bd9Sstevel@tonic-gate 	topology[0] = topology[1] = topology[2] = NULL;
1330*7c478bd9Sstevel@tonic-gate 
1331*7c478bd9Sstevel@tonic-gate 	if (request->l_type == F_UNLCK)
1332*7c478bd9Sstevel@tonic-gate 		lock_effect = FLK_UNLOCK;
1333*7c478bd9Sstevel@tonic-gate 	else if (request->l_type == F_RDLCK &&
1334*7c478bd9Sstevel@tonic-gate 			lock->l_type == F_WRLCK)
1335*7c478bd9Sstevel@tonic-gate 		lock_effect = FLK_DOWNGRADE;
1336*7c478bd9Sstevel@tonic-gate 	else if (request->l_type == F_WRLCK &&
1337*7c478bd9Sstevel@tonic-gate 			lock->l_type == F_RDLCK)
1338*7c478bd9Sstevel@tonic-gate 		lock_effect = FLK_UPGRADE;
1339*7c478bd9Sstevel@tonic-gate 	else
1340*7c478bd9Sstevel@tonic-gate 		lock_effect = FLK_STAY_SAME;
1341*7c478bd9Sstevel@tonic-gate 
1342*7c478bd9Sstevel@tonic-gate 	if (lock->l_end < request->l_start) {
1343*7c478bd9Sstevel@tonic-gate 		if (lock->l_end == request->l_start - 1 &&
1344*7c478bd9Sstevel@tonic-gate 				lock_effect == FLK_STAY_SAME) {
1345*7c478bd9Sstevel@tonic-gate 			topology[0] = request;
1346*7c478bd9Sstevel@tonic-gate 			request->l_start = lock->l_start;
1347*7c478bd9Sstevel@tonic-gate 			nvertex = 1;
1348*7c478bd9Sstevel@tonic-gate 			goto recompute;
1349*7c478bd9Sstevel@tonic-gate 		} else {
1350*7c478bd9Sstevel@tonic-gate 			return (0);
1351*7c478bd9Sstevel@tonic-gate 		}
1352*7c478bd9Sstevel@tonic-gate 	}
1353*7c478bd9Sstevel@tonic-gate 
1354*7c478bd9Sstevel@tonic-gate 	if (lock->l_start > request->l_end) {
1355*7c478bd9Sstevel@tonic-gate 		if (request->l_end == lock->l_start - 1 &&
1356*7c478bd9Sstevel@tonic-gate 					lock_effect == FLK_STAY_SAME) {
1357*7c478bd9Sstevel@tonic-gate 			topology[0] = request;
1358*7c478bd9Sstevel@tonic-gate 			request->l_end = lock->l_end;
1359*7c478bd9Sstevel@tonic-gate 			nvertex = 1;
1360*7c478bd9Sstevel@tonic-gate 			goto recompute;
1361*7c478bd9Sstevel@tonic-gate 		} else {
1362*7c478bd9Sstevel@tonic-gate 			return (1);
1363*7c478bd9Sstevel@tonic-gate 		}
1364*7c478bd9Sstevel@tonic-gate 	}
1365*7c478bd9Sstevel@tonic-gate 
1366*7c478bd9Sstevel@tonic-gate 	if (request->l_end < lock->l_end) {
1367*7c478bd9Sstevel@tonic-gate 		if (request->l_start > lock->l_start) {
1368*7c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
1369*7c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
1370*7c478bd9Sstevel@tonic-gate 				request->l_end = lock->l_end;
1371*7c478bd9Sstevel@tonic-gate 				topology[0] = request;
1372*7c478bd9Sstevel@tonic-gate 				nvertex = 1;
1373*7c478bd9Sstevel@tonic-gate 			} else {
1374*7c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
1375*7c478bd9Sstevel@tonic-gate 				lock2 = flk_get_lock();
1376*7c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
1377*7c478bd9Sstevel@tonic-gate 				COPY(lock2, lock);
1378*7c478bd9Sstevel@tonic-gate 				lock1->l_start = lock->l_start;
1379*7c478bd9Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
1380*7c478bd9Sstevel@tonic-gate 				lock2->l_start = request->l_end + 1;
1381*7c478bd9Sstevel@tonic-gate 				lock2->l_end = lock->l_end;
1382*7c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
1383*7c478bd9Sstevel@tonic-gate 				topology[1] = lock2;
1384*7c478bd9Sstevel@tonic-gate 				topology[2] = request;
1385*7c478bd9Sstevel@tonic-gate 				nvertex = 3;
1386*7c478bd9Sstevel@tonic-gate 			}
1387*7c478bd9Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start) {
1388*7c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
1389*7c478bd9Sstevel@tonic-gate 				request->l_end = lock->l_end;
1390*7c478bd9Sstevel@tonic-gate 				topology[0] = request;
1391*7c478bd9Sstevel@tonic-gate 				nvertex = 1;
1392*7c478bd9Sstevel@tonic-gate 			} else {
1393*7c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
1394*7c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
1395*7c478bd9Sstevel@tonic-gate 				lock1->l_start = request->l_end + 1;
1396*7c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
1397*7c478bd9Sstevel@tonic-gate 				topology[1] = request;
1398*7c478bd9Sstevel@tonic-gate 				nvertex = 2;
1399*7c478bd9Sstevel@tonic-gate 			}
1400*7c478bd9Sstevel@tonic-gate 		} else  {
1401*7c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
1402*7c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
1403*7c478bd9Sstevel@tonic-gate 				request->l_end = lock->l_end;
1404*7c478bd9Sstevel@tonic-gate 				topology[0] = request;
1405*7c478bd9Sstevel@tonic-gate 				nvertex = 1;
1406*7c478bd9Sstevel@tonic-gate 			} else {
1407*7c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
1408*7c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
1409*7c478bd9Sstevel@tonic-gate 				lock1->l_start = request->l_end + 1;
1410*7c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
1411*7c478bd9Sstevel@tonic-gate 				topology[1] = request;
1412*7c478bd9Sstevel@tonic-gate 				nvertex = 2;
1413*7c478bd9Sstevel@tonic-gate 			}
1414*7c478bd9Sstevel@tonic-gate 		}
1415*7c478bd9Sstevel@tonic-gate 	} else if (request->l_end > lock->l_end) {
1416*7c478bd9Sstevel@tonic-gate 		if (request->l_start > lock->l_start)  {
1417*7c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
1418*7c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
1419*7c478bd9Sstevel@tonic-gate 				topology[0] = request;
1420*7c478bd9Sstevel@tonic-gate 				nvertex = 1;
1421*7c478bd9Sstevel@tonic-gate 			} else {
1422*7c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
1423*7c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
1424*7c478bd9Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
1425*7c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
1426*7c478bd9Sstevel@tonic-gate 				topology[1] = request;
1427*7c478bd9Sstevel@tonic-gate 				nvertex = 2;
1428*7c478bd9Sstevel@tonic-gate 			}
1429*7c478bd9Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start)  {
1430*7c478bd9Sstevel@tonic-gate 			topology[0] = request;
1431*7c478bd9Sstevel@tonic-gate 			nvertex = 1;
1432*7c478bd9Sstevel@tonic-gate 		} else {
1433*7c478bd9Sstevel@tonic-gate 			topology[0] = request;
1434*7c478bd9Sstevel@tonic-gate 			nvertex = 1;
1435*7c478bd9Sstevel@tonic-gate 		}
1436*7c478bd9Sstevel@tonic-gate 	} else {
1437*7c478bd9Sstevel@tonic-gate 		if (request->l_start > lock->l_start) {
1438*7c478bd9Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
1439*7c478bd9Sstevel@tonic-gate 				request->l_start = lock->l_start;
1440*7c478bd9Sstevel@tonic-gate 				topology[0] = request;
1441*7c478bd9Sstevel@tonic-gate 				nvertex = 1;
1442*7c478bd9Sstevel@tonic-gate 			} else {
1443*7c478bd9Sstevel@tonic-gate 				lock1 = flk_get_lock();
1444*7c478bd9Sstevel@tonic-gate 				COPY(lock1, lock);
1445*7c478bd9Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
1446*7c478bd9Sstevel@tonic-gate 				topology[0] = lock1;
1447*7c478bd9Sstevel@tonic-gate 				topology[1] = request;
1448*7c478bd9Sstevel@tonic-gate 				nvertex = 2;
1449*7c478bd9Sstevel@tonic-gate 			}
1450*7c478bd9Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start) {
1451*7c478bd9Sstevel@tonic-gate 			topology[0] = request;
1452*7c478bd9Sstevel@tonic-gate 			nvertex = 1;
1453*7c478bd9Sstevel@tonic-gate 		} else {
1454*7c478bd9Sstevel@tonic-gate 			if (lock_effect !=  FLK_UNLOCK) {
1455*7c478bd9Sstevel@tonic-gate 				topology[0] = request;
1456*7c478bd9Sstevel@tonic-gate 				nvertex = 1;
1457*7c478bd9Sstevel@tonic-gate 			} else {
1458*7c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
1459*7c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
1460*7c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
1461*7c478bd9Sstevel@tonic-gate 				CHECK_SLEEPING_LOCKS(gp);
1462*7c478bd9Sstevel@tonic-gate 				CHECK_ACTIVE_LOCKS(gp);
1463*7c478bd9Sstevel@tonic-gate 				return (1);
1464*7c478bd9Sstevel@tonic-gate 			}
1465*7c478bd9Sstevel@tonic-gate 		}
1466*7c478bd9Sstevel@tonic-gate 	}
1467*7c478bd9Sstevel@tonic-gate 
1468*7c478bd9Sstevel@tonic-gate recompute:
1469*7c478bd9Sstevel@tonic-gate 
1470*7c478bd9Sstevel@tonic-gate 	/*
1471*7c478bd9Sstevel@tonic-gate 	 * For unlock we don't send the 'request' to for recomputing
1472*7c478bd9Sstevel@tonic-gate 	 * dependencies because no lock will add an edge to this.
1473*7c478bd9Sstevel@tonic-gate 	 */
1474*7c478bd9Sstevel@tonic-gate 
1475*7c478bd9Sstevel@tonic-gate 	if (lock_effect == FLK_UNLOCK) {
1476*7c478bd9Sstevel@tonic-gate 		topology[nvertex-1] = NULL;
1477*7c478bd9Sstevel@tonic-gate 		nvertex--;
1478*7c478bd9Sstevel@tonic-gate 	}
1479*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
1480*7c478bd9Sstevel@tonic-gate 		topology[i]->l_state |= RECOMPUTE_LOCK;
1481*7c478bd9Sstevel@tonic-gate 		topology[i]->l_color = NO_COLOR;
1482*7c478bd9Sstevel@tonic-gate 	}
1483*7c478bd9Sstevel@tonic-gate 
1484*7c478bd9Sstevel@tonic-gate 	ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1485*7c478bd9Sstevel@tonic-gate 
1486*7c478bd9Sstevel@tonic-gate 	/*
1487*7c478bd9Sstevel@tonic-gate 	 * we remove the adjacent edges for all vertices' to this vertex
1488*7c478bd9Sstevel@tonic-gate 	 * 'lock'.
1489*7c478bd9Sstevel@tonic-gate 	 */
1490*7c478bd9Sstevel@tonic-gate 
1491*7c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
1492*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
1493*7c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
1494*7c478bd9Sstevel@tonic-gate 		ep = NEXT_IN(ep);
1495*7c478bd9Sstevel@tonic-gate 	}
1496*7c478bd9Sstevel@tonic-gate 
1497*7c478bd9Sstevel@tonic-gate 	flk_delete_active_lock(lock, 0);
1498*7c478bd9Sstevel@tonic-gate 
1499*7c478bd9Sstevel@tonic-gate 	/* We are ready for recomputing the dependencies now */
1500*7c478bd9Sstevel@tonic-gate 
1501*7c478bd9Sstevel@tonic-gate 	flk_recompute_dependencies(lock, topology, nvertex, 1);
1502*7c478bd9Sstevel@tonic-gate 
1503*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
1504*7c478bd9Sstevel@tonic-gate 		topology[i]->l_state &= ~RECOMPUTE_LOCK;
1505*7c478bd9Sstevel@tonic-gate 		topology[i]->l_color = NO_COLOR;
1506*7c478bd9Sstevel@tonic-gate 	}
1507*7c478bd9Sstevel@tonic-gate 
1508*7c478bd9Sstevel@tonic-gate 
1509*7c478bd9Sstevel@tonic-gate 	if (lock_effect == FLK_UNLOCK) {
1510*7c478bd9Sstevel@tonic-gate 		nvertex++;
1511*7c478bd9Sstevel@tonic-gate 	}
1512*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex - 1; i++) {
1513*7c478bd9Sstevel@tonic-gate 		flk_insert_active_lock(topology[i]);
1514*7c478bd9Sstevel@tonic-gate 	}
1515*7c478bd9Sstevel@tonic-gate 
1516*7c478bd9Sstevel@tonic-gate 
1517*7c478bd9Sstevel@tonic-gate 	if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1518*7c478bd9Sstevel@tonic-gate 		flk_wakeup(lock, 0);
1519*7c478bd9Sstevel@tonic-gate 	} else {
1520*7c478bd9Sstevel@tonic-gate 		ep = FIRST_IN(lock);
1521*7c478bd9Sstevel@tonic-gate 		while (ep != HEAD(lock)) {
1522*7c478bd9Sstevel@tonic-gate 			lock->l_sedge = NEXT_IN(ep);
1523*7c478bd9Sstevel@tonic-gate 			IN_LIST_REMOVE(ep);
1524*7c478bd9Sstevel@tonic-gate 			flk_update_proc_graph(ep, 1);
1525*7c478bd9Sstevel@tonic-gate 			flk_free_edge(ep);
1526*7c478bd9Sstevel@tonic-gate 			ep = lock->l_sedge;
1527*7c478bd9Sstevel@tonic-gate 		}
1528*7c478bd9Sstevel@tonic-gate 	}
1529*7c478bd9Sstevel@tonic-gate 	flk_free_lock(lock);
1530*7c478bd9Sstevel@tonic-gate 
1531*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
1532*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
1533*7c478bd9Sstevel@tonic-gate 	return (0);
1534*7c478bd9Sstevel@tonic-gate }
1535*7c478bd9Sstevel@tonic-gate 
1536*7c478bd9Sstevel@tonic-gate /*
1537*7c478bd9Sstevel@tonic-gate  * Insert a lock into the active queue.
1538*7c478bd9Sstevel@tonic-gate  */
1539*7c478bd9Sstevel@tonic-gate 
1540*7c478bd9Sstevel@tonic-gate static void
1541*7c478bd9Sstevel@tonic-gate flk_insert_active_lock(lock_descriptor_t *new_lock)
1542*7c478bd9Sstevel@tonic-gate {
1543*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = new_lock->l_graph;
1544*7c478bd9Sstevel@tonic-gate 	vnode_t	*vp = new_lock->l_vnode;
1545*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *first_lock, *lock;
1546*7c478bd9Sstevel@tonic-gate 
1547*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1548*7c478bd9Sstevel@tonic-gate 
1549*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1550*7c478bd9Sstevel@tonic-gate 	first_lock = lock;
1551*7c478bd9Sstevel@tonic-gate 
1552*7c478bd9Sstevel@tonic-gate 	if (first_lock != NULL) {
1553*7c478bd9Sstevel@tonic-gate 		for (; (lock->l_vnode == vp &&
1554*7c478bd9Sstevel@tonic-gate 			lock->l_start < new_lock->l_start); lock = lock->l_next)
1555*7c478bd9Sstevel@tonic-gate 			;
1556*7c478bd9Sstevel@tonic-gate 	} else {
1557*7c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp);
1558*7c478bd9Sstevel@tonic-gate 	}
1559*7c478bd9Sstevel@tonic-gate 
1560*7c478bd9Sstevel@tonic-gate 	lock->l_prev->l_next = new_lock;
1561*7c478bd9Sstevel@tonic-gate 	new_lock->l_next = lock;
1562*7c478bd9Sstevel@tonic-gate 	new_lock->l_prev = lock->l_prev;
1563*7c478bd9Sstevel@tonic-gate 	lock->l_prev = new_lock;
1564*7c478bd9Sstevel@tonic-gate 
1565*7c478bd9Sstevel@tonic-gate 	if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1566*7c478bd9Sstevel@tonic-gate 		vp->v_filocks = (struct filock *)new_lock;
1567*7c478bd9Sstevel@tonic-gate 	}
1568*7c478bd9Sstevel@tonic-gate 	flk_set_state(new_lock, FLK_ACTIVE_STATE);
1569*7c478bd9Sstevel@tonic-gate 	new_lock->l_state |= ACTIVE_LOCK;
1570*7c478bd9Sstevel@tonic-gate 
1571*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
1572*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
1573*7c478bd9Sstevel@tonic-gate }
1574*7c478bd9Sstevel@tonic-gate 
1575*7c478bd9Sstevel@tonic-gate /*
1576*7c478bd9Sstevel@tonic-gate  * Delete the active lock : Performs two functions depending on the
1577*7c478bd9Sstevel@tonic-gate  * value of second parameter. One is to remove from the active lists
1578*7c478bd9Sstevel@tonic-gate  * only and other is to both remove and free the lock.
1579*7c478bd9Sstevel@tonic-gate  */
1580*7c478bd9Sstevel@tonic-gate 
1581*7c478bd9Sstevel@tonic-gate static void
1582*7c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1583*7c478bd9Sstevel@tonic-gate {
1584*7c478bd9Sstevel@tonic-gate 	vnode_t *vp = lock->l_vnode;
1585*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = lock->l_graph;
1586*7c478bd9Sstevel@tonic-gate 
1587*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1588*7c478bd9Sstevel@tonic-gate 	if (free_lock)
1589*7c478bd9Sstevel@tonic-gate 		ASSERT(NO_DEPENDENTS(lock));
1590*7c478bd9Sstevel@tonic-gate 	ASSERT(NOT_BLOCKED(lock));
1591*7c478bd9Sstevel@tonic-gate 	ASSERT(IS_ACTIVE(lock));
1592*7c478bd9Sstevel@tonic-gate 
1593*7c478bd9Sstevel@tonic-gate 	ASSERT((vp->v_filocks != NULL));
1594*7c478bd9Sstevel@tonic-gate 
1595*7c478bd9Sstevel@tonic-gate 	if (vp->v_filocks == (struct filock *)lock) {
1596*7c478bd9Sstevel@tonic-gate 		vp->v_filocks = (struct filock *)
1597*7c478bd9Sstevel@tonic-gate 				((lock->l_next->l_vnode == vp) ? lock->l_next :
1598*7c478bd9Sstevel@tonic-gate 								NULL);
1599*7c478bd9Sstevel@tonic-gate 	}
1600*7c478bd9Sstevel@tonic-gate 	lock->l_next->l_prev = lock->l_prev;
1601*7c478bd9Sstevel@tonic-gate 	lock->l_prev->l_next = lock->l_next;
1602*7c478bd9Sstevel@tonic-gate 	lock->l_next = lock->l_prev = NULL;
1603*7c478bd9Sstevel@tonic-gate 	flk_set_state(lock, FLK_DEAD_STATE);
1604*7c478bd9Sstevel@tonic-gate 	lock->l_state &= ~ACTIVE_LOCK;
1605*7c478bd9Sstevel@tonic-gate 
1606*7c478bd9Sstevel@tonic-gate 	if (free_lock)
1607*7c478bd9Sstevel@tonic-gate 		flk_free_lock(lock);
1608*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
1609*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
1610*7c478bd9Sstevel@tonic-gate }
1611*7c478bd9Sstevel@tonic-gate 
1612*7c478bd9Sstevel@tonic-gate /*
1613*7c478bd9Sstevel@tonic-gate  * Insert into the sleep queue.
1614*7c478bd9Sstevel@tonic-gate  */
1615*7c478bd9Sstevel@tonic-gate 
1616*7c478bd9Sstevel@tonic-gate static void
1617*7c478bd9Sstevel@tonic-gate flk_insert_sleeping_lock(lock_descriptor_t *request)
1618*7c478bd9Sstevel@tonic-gate {
1619*7c478bd9Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
1620*7c478bd9Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
1621*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
1622*7c478bd9Sstevel@tonic-gate 
1623*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1624*7c478bd9Sstevel@tonic-gate 	ASSERT(IS_INITIAL(request));
1625*7c478bd9Sstevel@tonic-gate 
1626*7c478bd9Sstevel@tonic-gate 	for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1627*7c478bd9Sstevel@tonic-gate 		lock->l_vnode < vp); lock = lock->l_next)
1628*7c478bd9Sstevel@tonic-gate 		;
1629*7c478bd9Sstevel@tonic-gate 
1630*7c478bd9Sstevel@tonic-gate 	lock->l_prev->l_next = request;
1631*7c478bd9Sstevel@tonic-gate 	request->l_prev = lock->l_prev;
1632*7c478bd9Sstevel@tonic-gate 	lock->l_prev = request;
1633*7c478bd9Sstevel@tonic-gate 	request->l_next = lock;
1634*7c478bd9Sstevel@tonic-gate 	flk_set_state(request, FLK_SLEEPING_STATE);
1635*7c478bd9Sstevel@tonic-gate 	request->l_state |= SLEEPING_LOCK;
1636*7c478bd9Sstevel@tonic-gate }
1637*7c478bd9Sstevel@tonic-gate 
1638*7c478bd9Sstevel@tonic-gate /*
1639*7c478bd9Sstevel@tonic-gate  * Cancelling a sleeping lock implies removing a vertex from the
1640*7c478bd9Sstevel@tonic-gate  * dependency graph and therefore we should recompute the dependencies
1641*7c478bd9Sstevel@tonic-gate  * of all vertices that have a path  to this vertex, w.r.t. all
1642*7c478bd9Sstevel@tonic-gate  * vertices reachable from this vertex.
1643*7c478bd9Sstevel@tonic-gate  */
1644*7c478bd9Sstevel@tonic-gate 
1645*7c478bd9Sstevel@tonic-gate void
1646*7c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1647*7c478bd9Sstevel@tonic-gate {
1648*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
1649*7c478bd9Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
1650*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t **topology = NULL;
1651*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
1652*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lock;
1653*7c478bd9Sstevel@tonic-gate 	int nvertex = 0;
1654*7c478bd9Sstevel@tonic-gate 	int i;
1655*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
1656*7c478bd9Sstevel@tonic-gate 
1657*7c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
1658*7c478bd9Sstevel@tonic-gate 
1659*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1660*7c478bd9Sstevel@tonic-gate 	/*
1661*7c478bd9Sstevel@tonic-gate 	 * count number of vertex pointers that has to be allocated
1662*7c478bd9Sstevel@tonic-gate 	 * All vertices that are reachable from request.
1663*7c478bd9Sstevel@tonic-gate 	 */
1664*7c478bd9Sstevel@tonic-gate 
1665*7c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, request, l_stack);
1666*7c478bd9Sstevel@tonic-gate 
1667*7c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1668*7c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack);
1669*7c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1670*7c478bd9Sstevel@tonic-gate 					ep = NEXT_ADJ(ep)) {
1671*7c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(ep->to_vertex))
1672*7c478bd9Sstevel@tonic-gate 				continue;
1673*7c478bd9Sstevel@tonic-gate 			ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1674*7c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1675*7c478bd9Sstevel@tonic-gate 			nvertex++;
1676*7c478bd9Sstevel@tonic-gate 		}
1677*7c478bd9Sstevel@tonic-gate 	}
1678*7c478bd9Sstevel@tonic-gate 
1679*7c478bd9Sstevel@tonic-gate 	/*
1680*7c478bd9Sstevel@tonic-gate 	 * allocate memory for holding the vertex pointers
1681*7c478bd9Sstevel@tonic-gate 	 */
1682*7c478bd9Sstevel@tonic-gate 
1683*7c478bd9Sstevel@tonic-gate 	if (nvertex) {
1684*7c478bd9Sstevel@tonic-gate 		topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1685*7c478bd9Sstevel@tonic-gate 						KM_SLEEP);
1686*7c478bd9Sstevel@tonic-gate 	}
1687*7c478bd9Sstevel@tonic-gate 
1688*7c478bd9Sstevel@tonic-gate 	/*
1689*7c478bd9Sstevel@tonic-gate 	 * one more pass to actually store the vertices in the
1690*7c478bd9Sstevel@tonic-gate 	 * allocated array.
1691*7c478bd9Sstevel@tonic-gate 	 * We first check sleeping locks and then active locks
1692*7c478bd9Sstevel@tonic-gate 	 * so that topology array will be in a topological
1693*7c478bd9Sstevel@tonic-gate 	 * order.
1694*7c478bd9Sstevel@tonic-gate 	 */
1695*7c478bd9Sstevel@tonic-gate 
1696*7c478bd9Sstevel@tonic-gate 	nvertex = 0;
1697*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1698*7c478bd9Sstevel@tonic-gate 
1699*7c478bd9Sstevel@tonic-gate 	if (lock) {
1700*7c478bd9Sstevel@tonic-gate 		do {
1701*7c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock)) {
1702*7c478bd9Sstevel@tonic-gate 				lock->l_index = nvertex;
1703*7c478bd9Sstevel@tonic-gate 				topology[nvertex++] = lock;
1704*7c478bd9Sstevel@tonic-gate 			}
1705*7c478bd9Sstevel@tonic-gate 			lock->l_color = NO_COLOR;
1706*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
1707*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
1708*7c478bd9Sstevel@tonic-gate 	}
1709*7c478bd9Sstevel@tonic-gate 
1710*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1711*7c478bd9Sstevel@tonic-gate 
1712*7c478bd9Sstevel@tonic-gate 	if (lock) {
1713*7c478bd9Sstevel@tonic-gate 		do {
1714*7c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock)) {
1715*7c478bd9Sstevel@tonic-gate 				lock->l_index = nvertex;
1716*7c478bd9Sstevel@tonic-gate 				topology[nvertex++] = lock;
1717*7c478bd9Sstevel@tonic-gate 			}
1718*7c478bd9Sstevel@tonic-gate 			lock->l_color = NO_COLOR;
1719*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
1720*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
1721*7c478bd9Sstevel@tonic-gate 	}
1722*7c478bd9Sstevel@tonic-gate 
1723*7c478bd9Sstevel@tonic-gate 	/*
1724*7c478bd9Sstevel@tonic-gate 	 * remove in and out edges of request
1725*7c478bd9Sstevel@tonic-gate 	 * They are freed after updating proc_graph below.
1726*7c478bd9Sstevel@tonic-gate 	 */
1727*7c478bd9Sstevel@tonic-gate 
1728*7c478bd9Sstevel@tonic-gate 	for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
1729*7c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
1730*7c478bd9Sstevel@tonic-gate 	}
1731*7c478bd9Sstevel@tonic-gate 
1732*7c478bd9Sstevel@tonic-gate 
1733*7c478bd9Sstevel@tonic-gate 	if (remove_from_queue)
1734*7c478bd9Sstevel@tonic-gate 		REMOVE_SLEEP_QUEUE(request);
1735*7c478bd9Sstevel@tonic-gate 
1736*7c478bd9Sstevel@tonic-gate 	/* we are ready to recompute */
1737*7c478bd9Sstevel@tonic-gate 
1738*7c478bd9Sstevel@tonic-gate 	flk_recompute_dependencies(request, topology, nvertex, 1);
1739*7c478bd9Sstevel@tonic-gate 
1740*7c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(request);
1741*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(request)) {
1742*7c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
1743*7c478bd9Sstevel@tonic-gate 		request->l_sedge = NEXT_ADJ(ep);
1744*7c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
1745*7c478bd9Sstevel@tonic-gate 		flk_update_proc_graph(ep, 1);
1746*7c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
1747*7c478bd9Sstevel@tonic-gate 		ep = request->l_sedge;
1748*7c478bd9Sstevel@tonic-gate 	}
1749*7c478bd9Sstevel@tonic-gate 
1750*7c478bd9Sstevel@tonic-gate 
1751*7c478bd9Sstevel@tonic-gate 	/*
1752*7c478bd9Sstevel@tonic-gate 	 * unset the RECOMPUTE flag in those vertices
1753*7c478bd9Sstevel@tonic-gate 	 */
1754*7c478bd9Sstevel@tonic-gate 
1755*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
1756*7c478bd9Sstevel@tonic-gate 		topology[i]->l_state &= ~RECOMPUTE_LOCK;
1757*7c478bd9Sstevel@tonic-gate 	}
1758*7c478bd9Sstevel@tonic-gate 
1759*7c478bd9Sstevel@tonic-gate 	/*
1760*7c478bd9Sstevel@tonic-gate 	 * free the topology
1761*7c478bd9Sstevel@tonic-gate 	 */
1762*7c478bd9Sstevel@tonic-gate 	if (nvertex)
1763*7c478bd9Sstevel@tonic-gate 		kmem_free((void *)topology,
1764*7c478bd9Sstevel@tonic-gate 			(nvertex * sizeof (lock_descriptor_t *)));
1765*7c478bd9Sstevel@tonic-gate 	/*
1766*7c478bd9Sstevel@tonic-gate 	 * Possibility of some locks unblocked now
1767*7c478bd9Sstevel@tonic-gate 	 */
1768*7c478bd9Sstevel@tonic-gate 
1769*7c478bd9Sstevel@tonic-gate 	flk_wakeup(request, 0);
1770*7c478bd9Sstevel@tonic-gate 
1771*7c478bd9Sstevel@tonic-gate 	/*
1772*7c478bd9Sstevel@tonic-gate 	 * we expect to have a correctly recomputed graph  now.
1773*7c478bd9Sstevel@tonic-gate 	 */
1774*7c478bd9Sstevel@tonic-gate 	flk_set_state(request, FLK_DEAD_STATE);
1775*7c478bd9Sstevel@tonic-gate 	flk_free_lock(request);
1776*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
1777*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
1778*7c478bd9Sstevel@tonic-gate 
1779*7c478bd9Sstevel@tonic-gate }
1780*7c478bd9Sstevel@tonic-gate 
1781*7c478bd9Sstevel@tonic-gate /*
1782*7c478bd9Sstevel@tonic-gate  * Uncoloring the graph is simply to increment the mark value of the graph
1783*7c478bd9Sstevel@tonic-gate  * And only when wrap round takes place will we color all vertices in
1784*7c478bd9Sstevel@tonic-gate  * the graph explicitly.
1785*7c478bd9Sstevel@tonic-gate  */
1786*7c478bd9Sstevel@tonic-gate 
1787*7c478bd9Sstevel@tonic-gate static void
1788*7c478bd9Sstevel@tonic-gate flk_graph_uncolor(graph_t *gp)
1789*7c478bd9Sstevel@tonic-gate {
1790*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
1791*7c478bd9Sstevel@tonic-gate 
1792*7c478bd9Sstevel@tonic-gate 	if (gp->mark == UINT_MAX) {
1793*7c478bd9Sstevel@tonic-gate 		gp->mark = 1;
1794*7c478bd9Sstevel@tonic-gate 	for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
1795*7c478bd9Sstevel@tonic-gate 					lock = lock->l_next)
1796*7c478bd9Sstevel@tonic-gate 			lock->l_color  = 0;
1797*7c478bd9Sstevel@tonic-gate 
1798*7c478bd9Sstevel@tonic-gate 	for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
1799*7c478bd9Sstevel@tonic-gate 					lock = lock->l_next)
1800*7c478bd9Sstevel@tonic-gate 			lock->l_color  = 0;
1801*7c478bd9Sstevel@tonic-gate 	} else {
1802*7c478bd9Sstevel@tonic-gate 		gp->mark++;
1803*7c478bd9Sstevel@tonic-gate 	}
1804*7c478bd9Sstevel@tonic-gate }
1805*7c478bd9Sstevel@tonic-gate 
1806*7c478bd9Sstevel@tonic-gate /*
1807*7c478bd9Sstevel@tonic-gate  * Wake up locks that are blocked on the given lock.
1808*7c478bd9Sstevel@tonic-gate  */
1809*7c478bd9Sstevel@tonic-gate 
1810*7c478bd9Sstevel@tonic-gate static void
1811*7c478bd9Sstevel@tonic-gate flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
1812*7c478bd9Sstevel@tonic-gate {
1813*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
1814*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = lock->l_graph;
1815*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lck;
1816*7c478bd9Sstevel@tonic-gate 
1817*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1818*7c478bd9Sstevel@tonic-gate 	if (NO_DEPENDENTS(lock))
1819*7c478bd9Sstevel@tonic-gate 		return;
1820*7c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
1821*7c478bd9Sstevel@tonic-gate 	do {
1822*7c478bd9Sstevel@tonic-gate 		/*
1823*7c478bd9Sstevel@tonic-gate 		 * delete the edge from the adjacency list
1824*7c478bd9Sstevel@tonic-gate 		 * of from vertex. if no more adjacent edges
1825*7c478bd9Sstevel@tonic-gate 		 * for this vertex wake this process.
1826*7c478bd9Sstevel@tonic-gate 		 */
1827*7c478bd9Sstevel@tonic-gate 		lck = ep->from_vertex;
1828*7c478bd9Sstevel@tonic-gate 		if (adj_list_remove)
1829*7c478bd9Sstevel@tonic-gate 			ADJ_LIST_REMOVE(ep);
1830*7c478bd9Sstevel@tonic-gate 		flk_update_proc_graph(ep, 1);
1831*7c478bd9Sstevel@tonic-gate 		if (NOT_BLOCKED(lck)) {
1832*7c478bd9Sstevel@tonic-gate 			GRANT_WAKEUP(lck);
1833*7c478bd9Sstevel@tonic-gate 		}
1834*7c478bd9Sstevel@tonic-gate 		lock->l_sedge = NEXT_IN(ep);
1835*7c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
1836*7c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
1837*7c478bd9Sstevel@tonic-gate 		ep = lock->l_sedge;
1838*7c478bd9Sstevel@tonic-gate 	} while (ep != HEAD(lock));
1839*7c478bd9Sstevel@tonic-gate 	ASSERT(NO_DEPENDENTS(lock));
1840*7c478bd9Sstevel@tonic-gate }
1841*7c478bd9Sstevel@tonic-gate 
1842*7c478bd9Sstevel@tonic-gate /*
1843*7c478bd9Sstevel@tonic-gate  * The dependents of request, is checked for its dependency against the
1844*7c478bd9Sstevel@tonic-gate  * locks in topology (called topology because the array is and should be in
1845*7c478bd9Sstevel@tonic-gate  * topological order for this algorithm, if not in topological order the
1846*7c478bd9Sstevel@tonic-gate  * inner loop below might add more edges than necessary. Topological ordering
1847*7c478bd9Sstevel@tonic-gate  * of vertices satisfies the property that all edges will be from left to
1848*7c478bd9Sstevel@tonic-gate  * right i.e., topology[i] can have an edge to  topology[j], iff i<j)
1849*7c478bd9Sstevel@tonic-gate  * If lock l1 in the dependent set of request is dependent (blocked by)
1850*7c478bd9Sstevel@tonic-gate  * on lock l2 in topology but does not have a path to it, we add an edge
1851*7c478bd9Sstevel@tonic-gate  * in the inner loop below.
1852*7c478bd9Sstevel@tonic-gate  *
1853*7c478bd9Sstevel@tonic-gate  * We don't want to add an edge between l1 and l2 if there exists
1854*7c478bd9Sstevel@tonic-gate  * already a path from l1 to l2, so care has to be taken for those vertices
1855*7c478bd9Sstevel@tonic-gate  * that  have two paths to 'request'. These vertices are referred to here
1856*7c478bd9Sstevel@tonic-gate  * as barrier locks.
1857*7c478bd9Sstevel@tonic-gate  *
1858*7c478bd9Sstevel@tonic-gate  * The barriers has to be found (those vertex that originally had two paths
1859*7c478bd9Sstevel@tonic-gate  * to request) because otherwise we may end up adding edges unnecessarily
1860*7c478bd9Sstevel@tonic-gate  * to vertices in topology, and thus barrier vertices can have an edge
1861*7c478bd9Sstevel@tonic-gate  * to a vertex in topology as well a path to it.
1862*7c478bd9Sstevel@tonic-gate  */
1863*7c478bd9Sstevel@tonic-gate 
1864*7c478bd9Sstevel@tonic-gate static void
1865*7c478bd9Sstevel@tonic-gate flk_recompute_dependencies(lock_descriptor_t *request,
1866*7c478bd9Sstevel@tonic-gate 		lock_descriptor_t **topology,
1867*7c478bd9Sstevel@tonic-gate 			int nvertex, int update_graph)
1868*7c478bd9Sstevel@tonic-gate {
1869*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lock;
1870*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
1871*7c478bd9Sstevel@tonic-gate 	int i, count;
1872*7c478bd9Sstevel@tonic-gate 	int barrier_found = 0;
1873*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
1874*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
1875*7c478bd9Sstevel@tonic-gate 
1876*7c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
1877*7c478bd9Sstevel@tonic-gate 
1878*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
1879*7c478bd9Sstevel@tonic-gate 	if (nvertex == 0)
1880*7c478bd9Sstevel@tonic-gate 		return;
1881*7c478bd9Sstevel@tonic-gate 	flk_graph_uncolor(request->l_graph);
1882*7c478bd9Sstevel@tonic-gate 	barrier_found = flk_find_barriers(request);
1883*7c478bd9Sstevel@tonic-gate 	request->l_state |= RECOMPUTE_DONE;
1884*7c478bd9Sstevel@tonic-gate 
1885*7c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, request, l_stack);
1886*7c478bd9Sstevel@tonic-gate 	request->l_sedge = FIRST_IN(request);
1887*7c478bd9Sstevel@tonic-gate 
1888*7c478bd9Sstevel@tonic-gate 
1889*7c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1890*7c478bd9Sstevel@tonic-gate 		if (vertex->l_state & RECOMPUTE_DONE) {
1891*7c478bd9Sstevel@tonic-gate 			count = 0;
1892*7c478bd9Sstevel@tonic-gate 			goto next_in_edge;
1893*7c478bd9Sstevel@tonic-gate 		}
1894*7c478bd9Sstevel@tonic-gate 		if (IS_BARRIER(vertex)) {
1895*7c478bd9Sstevel@tonic-gate 			/* decrement the barrier count */
1896*7c478bd9Sstevel@tonic-gate 			if (vertex->l_index) {
1897*7c478bd9Sstevel@tonic-gate 				vertex->l_index--;
1898*7c478bd9Sstevel@tonic-gate 				/* this guy will be pushed again anyway ? */
1899*7c478bd9Sstevel@tonic-gate 				STACK_POP(vertex_stack, l_stack);
1900*7c478bd9Sstevel@tonic-gate 				if (vertex->l_index == 0)  {
1901*7c478bd9Sstevel@tonic-gate 				/*
1902*7c478bd9Sstevel@tonic-gate 				 * barrier is over we can recompute
1903*7c478bd9Sstevel@tonic-gate 				 * dependencies for this lock in the
1904*7c478bd9Sstevel@tonic-gate 				 * next stack pop
1905*7c478bd9Sstevel@tonic-gate 				 */
1906*7c478bd9Sstevel@tonic-gate 					vertex->l_state &= ~BARRIER_LOCK;
1907*7c478bd9Sstevel@tonic-gate 				}
1908*7c478bd9Sstevel@tonic-gate 				continue;
1909*7c478bd9Sstevel@tonic-gate 			}
1910*7c478bd9Sstevel@tonic-gate 		}
1911*7c478bd9Sstevel@tonic-gate 		vertex->l_state |= RECOMPUTE_DONE;
1912*7c478bd9Sstevel@tonic-gate 		flk_graph_uncolor(gp);
1913*7c478bd9Sstevel@tonic-gate 		count = flk_color_reachables(vertex);
1914*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < nvertex; i++) {
1915*7c478bd9Sstevel@tonic-gate 			lock = topology[i];
1916*7c478bd9Sstevel@tonic-gate 			if (COLORED(lock))
1917*7c478bd9Sstevel@tonic-gate 				continue;
1918*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, vertex)) {
1919*7c478bd9Sstevel@tonic-gate 				(void) flk_add_edge(vertex, lock,
1920*7c478bd9Sstevel@tonic-gate 				    NO_CHECK_CYCLE, update_graph);
1921*7c478bd9Sstevel@tonic-gate 				COLOR(lock);
1922*7c478bd9Sstevel@tonic-gate 				count++;
1923*7c478bd9Sstevel@tonic-gate 				count += flk_color_reachables(lock);
1924*7c478bd9Sstevel@tonic-gate 			}
1925*7c478bd9Sstevel@tonic-gate 
1926*7c478bd9Sstevel@tonic-gate 		}
1927*7c478bd9Sstevel@tonic-gate 
1928*7c478bd9Sstevel@tonic-gate next_in_edge:
1929*7c478bd9Sstevel@tonic-gate 		if (count == nvertex ||
1930*7c478bd9Sstevel@tonic-gate 				vertex->l_sedge == HEAD(vertex)) {
1931*7c478bd9Sstevel@tonic-gate 			/* prune the tree below this */
1932*7c478bd9Sstevel@tonic-gate 			STACK_POP(vertex_stack, l_stack);
1933*7c478bd9Sstevel@tonic-gate 			vertex->l_state &= ~RECOMPUTE_DONE;
1934*7c478bd9Sstevel@tonic-gate 			/* update the barrier locks below this! */
1935*7c478bd9Sstevel@tonic-gate 			if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
1936*7c478bd9Sstevel@tonic-gate 				flk_graph_uncolor(gp);
1937*7c478bd9Sstevel@tonic-gate 				flk_update_barriers(vertex);
1938*7c478bd9Sstevel@tonic-gate 			}
1939*7c478bd9Sstevel@tonic-gate 			continue;
1940*7c478bd9Sstevel@tonic-gate 		}
1941*7c478bd9Sstevel@tonic-gate 
1942*7c478bd9Sstevel@tonic-gate 		ep = vertex->l_sedge;
1943*7c478bd9Sstevel@tonic-gate 		lock = ep->from_vertex;
1944*7c478bd9Sstevel@tonic-gate 		STACK_PUSH(vertex_stack, lock, l_stack);
1945*7c478bd9Sstevel@tonic-gate 		lock->l_sedge = FIRST_IN(lock);
1946*7c478bd9Sstevel@tonic-gate 		vertex->l_sedge = NEXT_IN(ep);
1947*7c478bd9Sstevel@tonic-gate 	}
1948*7c478bd9Sstevel@tonic-gate 
1949*7c478bd9Sstevel@tonic-gate }
1950*7c478bd9Sstevel@tonic-gate 
1951*7c478bd9Sstevel@tonic-gate /*
1952*7c478bd9Sstevel@tonic-gate  * Color all reachable vertices from vertex that belongs to topology (here
1953*7c478bd9Sstevel@tonic-gate  * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
1954*7c478bd9Sstevel@tonic-gate  *
1955*7c478bd9Sstevel@tonic-gate  * Note: we need to use a different stack_link l_stack1 because this is
1956*7c478bd9Sstevel@tonic-gate  * called from flk_recompute_dependencies() that already uses a stack with
1957*7c478bd9Sstevel@tonic-gate  * l_stack as stack_link.
1958*7c478bd9Sstevel@tonic-gate  */
1959*7c478bd9Sstevel@tonic-gate 
1960*7c478bd9Sstevel@tonic-gate static int
1961*7c478bd9Sstevel@tonic-gate flk_color_reachables(lock_descriptor_t *vertex)
1962*7c478bd9Sstevel@tonic-gate {
1963*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *ver, *lock;
1964*7c478bd9Sstevel@tonic-gate 	int count;
1965*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
1966*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
1967*7c478bd9Sstevel@tonic-gate 
1968*7c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
1969*7c478bd9Sstevel@tonic-gate 
1970*7c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, vertex, l_stack1);
1971*7c478bd9Sstevel@tonic-gate 	count = 0;
1972*7c478bd9Sstevel@tonic-gate 	while ((ver = STACK_TOP(vertex_stack)) != NULL) {
1973*7c478bd9Sstevel@tonic-gate 
1974*7c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
1975*7c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
1976*7c478bd9Sstevel@tonic-gate 					ep = NEXT_ADJ(ep)) {
1977*7c478bd9Sstevel@tonic-gate 			lock = ep->to_vertex;
1978*7c478bd9Sstevel@tonic-gate 			if (COLORED(lock))
1979*7c478bd9Sstevel@tonic-gate 				continue;
1980*7c478bd9Sstevel@tonic-gate 			COLOR(lock);
1981*7c478bd9Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock))
1982*7c478bd9Sstevel@tonic-gate 				count++;
1983*7c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lock, l_stack1);
1984*7c478bd9Sstevel@tonic-gate 		}
1985*7c478bd9Sstevel@tonic-gate 
1986*7c478bd9Sstevel@tonic-gate 	}
1987*7c478bd9Sstevel@tonic-gate 	return (count);
1988*7c478bd9Sstevel@tonic-gate }
1989*7c478bd9Sstevel@tonic-gate 
1990*7c478bd9Sstevel@tonic-gate /*
1991*7c478bd9Sstevel@tonic-gate  * Called from flk_recompute_dependencies() this routine decrements
1992*7c478bd9Sstevel@tonic-gate  * the barrier count of barrier vertices that are reachable from lock.
1993*7c478bd9Sstevel@tonic-gate  */
1994*7c478bd9Sstevel@tonic-gate 
1995*7c478bd9Sstevel@tonic-gate static void
1996*7c478bd9Sstevel@tonic-gate flk_update_barriers(lock_descriptor_t *lock)
1997*7c478bd9Sstevel@tonic-gate {
1998*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lck;
1999*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
2000*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
2001*7c478bd9Sstevel@tonic-gate 
2002*7c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
2003*7c478bd9Sstevel@tonic-gate 
2004*7c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, lock, l_stack1);
2005*7c478bd9Sstevel@tonic-gate 
2006*7c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2007*7c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
2008*7c478bd9Sstevel@tonic-gate 		for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2009*7c478bd9Sstevel@tonic-gate 						ep = NEXT_IN(ep)) {
2010*7c478bd9Sstevel@tonic-gate 			lck = ep->from_vertex;
2011*7c478bd9Sstevel@tonic-gate 			if (COLORED(lck)) {
2012*7c478bd9Sstevel@tonic-gate 				if (IS_BARRIER(lck)) {
2013*7c478bd9Sstevel@tonic-gate 					ASSERT(lck->l_index > 0);
2014*7c478bd9Sstevel@tonic-gate 					lck->l_index--;
2015*7c478bd9Sstevel@tonic-gate 					if (lck->l_index == 0)
2016*7c478bd9Sstevel@tonic-gate 						lck->l_state &= ~BARRIER_LOCK;
2017*7c478bd9Sstevel@tonic-gate 				}
2018*7c478bd9Sstevel@tonic-gate 				continue;
2019*7c478bd9Sstevel@tonic-gate 			}
2020*7c478bd9Sstevel@tonic-gate 			COLOR(lck);
2021*7c478bd9Sstevel@tonic-gate 			if (IS_BARRIER(lck)) {
2022*7c478bd9Sstevel@tonic-gate 				ASSERT(lck->l_index > 0);
2023*7c478bd9Sstevel@tonic-gate 				lck->l_index--;
2024*7c478bd9Sstevel@tonic-gate 				if (lck->l_index == 0)
2025*7c478bd9Sstevel@tonic-gate 					lck->l_state &= ~BARRIER_LOCK;
2026*7c478bd9Sstevel@tonic-gate 			}
2027*7c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lck, l_stack1);
2028*7c478bd9Sstevel@tonic-gate 		}
2029*7c478bd9Sstevel@tonic-gate 	}
2030*7c478bd9Sstevel@tonic-gate }
2031*7c478bd9Sstevel@tonic-gate 
2032*7c478bd9Sstevel@tonic-gate /*
2033*7c478bd9Sstevel@tonic-gate  * Finds all vertices that are reachable from 'lock' more than once and
2034*7c478bd9Sstevel@tonic-gate  * mark them as barrier vertices and increment their barrier count.
2035*7c478bd9Sstevel@tonic-gate  * The barrier count is one minus the total number of paths from lock
2036*7c478bd9Sstevel@tonic-gate  * to that vertex.
2037*7c478bd9Sstevel@tonic-gate  */
2038*7c478bd9Sstevel@tonic-gate 
2039*7c478bd9Sstevel@tonic-gate static int
2040*7c478bd9Sstevel@tonic-gate flk_find_barriers(lock_descriptor_t *lock)
2041*7c478bd9Sstevel@tonic-gate {
2042*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lck;
2043*7c478bd9Sstevel@tonic-gate 	int found = 0;
2044*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
2045*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
2046*7c478bd9Sstevel@tonic-gate 
2047*7c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
2048*7c478bd9Sstevel@tonic-gate 
2049*7c478bd9Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, lock, l_stack1);
2050*7c478bd9Sstevel@tonic-gate 
2051*7c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2052*7c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
2053*7c478bd9Sstevel@tonic-gate 		for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2054*7c478bd9Sstevel@tonic-gate 						ep = NEXT_IN(ep)) {
2055*7c478bd9Sstevel@tonic-gate 			lck = ep->from_vertex;
2056*7c478bd9Sstevel@tonic-gate 			if (COLORED(lck)) {
2057*7c478bd9Sstevel@tonic-gate 				/* this is a barrier */
2058*7c478bd9Sstevel@tonic-gate 				lck->l_state |= BARRIER_LOCK;
2059*7c478bd9Sstevel@tonic-gate 				/* index will have barrier count */
2060*7c478bd9Sstevel@tonic-gate 				lck->l_index++;
2061*7c478bd9Sstevel@tonic-gate 				if (!found)
2062*7c478bd9Sstevel@tonic-gate 					found = 1;
2063*7c478bd9Sstevel@tonic-gate 				continue;
2064*7c478bd9Sstevel@tonic-gate 			}
2065*7c478bd9Sstevel@tonic-gate 			COLOR(lck);
2066*7c478bd9Sstevel@tonic-gate 			lck->l_index = 0;
2067*7c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lck, l_stack1);
2068*7c478bd9Sstevel@tonic-gate 		}
2069*7c478bd9Sstevel@tonic-gate 	}
2070*7c478bd9Sstevel@tonic-gate 	return (found);
2071*7c478bd9Sstevel@tonic-gate }
2072*7c478bd9Sstevel@tonic-gate 
2073*7c478bd9Sstevel@tonic-gate /*
2074*7c478bd9Sstevel@tonic-gate  * Finds the first lock that is mainly responsible for blocking this
2075*7c478bd9Sstevel@tonic-gate  * request.  If there is no such lock, request->l_flock.l_type is set to
2076*7c478bd9Sstevel@tonic-gate  * F_UNLCK.  Otherwise, request->l_flock is filled in with the particulars
2077*7c478bd9Sstevel@tonic-gate  * of the blocking lock.
2078*7c478bd9Sstevel@tonic-gate  *
2079*7c478bd9Sstevel@tonic-gate  * Note: It is possible a request is blocked by a sleeping lock because
2080*7c478bd9Sstevel@tonic-gate  * of the fairness policy used in flk_process_request() to construct the
2081*7c478bd9Sstevel@tonic-gate  * dependencies. (see comments before flk_process_request()).
2082*7c478bd9Sstevel@tonic-gate  */
2083*7c478bd9Sstevel@tonic-gate 
2084*7c478bd9Sstevel@tonic-gate static void
2085*7c478bd9Sstevel@tonic-gate flk_get_first_blocking_lock(lock_descriptor_t *request)
2086*7c478bd9Sstevel@tonic-gate {
2087*7c478bd9Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
2088*7c478bd9Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
2089*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *blocker;
2090*7c478bd9Sstevel@tonic-gate 
2091*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
2092*7c478bd9Sstevel@tonic-gate 	blocker = NULL;
2093*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2094*7c478bd9Sstevel@tonic-gate 
2095*7c478bd9Sstevel@tonic-gate 	if (lock) {
2096*7c478bd9Sstevel@tonic-gate 		do {
2097*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
2098*7c478bd9Sstevel@tonic-gate 				blocker = lock;
2099*7c478bd9Sstevel@tonic-gate 				break;
2100*7c478bd9Sstevel@tonic-gate 			}
2101*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
2102*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
2103*7c478bd9Sstevel@tonic-gate 	}
2104*7c478bd9Sstevel@tonic-gate 
2105*7c478bd9Sstevel@tonic-gate 	if (blocker) {
2106*7c478bd9Sstevel@tonic-gate 		report_blocker(blocker, request);
2107*7c478bd9Sstevel@tonic-gate 	} else
2108*7c478bd9Sstevel@tonic-gate 		request->l_flock.l_type = F_UNLCK;
2109*7c478bd9Sstevel@tonic-gate }
2110*7c478bd9Sstevel@tonic-gate 
2111*7c478bd9Sstevel@tonic-gate /*
2112*7c478bd9Sstevel@tonic-gate  * Get the graph_t structure associated with a vnode.
2113*7c478bd9Sstevel@tonic-gate  * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2114*7c478bd9Sstevel@tonic-gate  * not yet been initialized, then a new element is allocated and returned.
2115*7c478bd9Sstevel@tonic-gate  */
2116*7c478bd9Sstevel@tonic-gate graph_t *
2117*7c478bd9Sstevel@tonic-gate flk_get_lock_graph(vnode_t *vp, int initialize)
2118*7c478bd9Sstevel@tonic-gate {
2119*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
2120*7c478bd9Sstevel@tonic-gate 	graph_t *gp_alloc = NULL;
2121*7c478bd9Sstevel@tonic-gate 	int index = HASH_INDEX(vp);
2122*7c478bd9Sstevel@tonic-gate 
2123*7c478bd9Sstevel@tonic-gate 	if (initialize == FLK_USE_GRAPH) {
2124*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
2125*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[index];
2126*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
2127*7c478bd9Sstevel@tonic-gate 		return (gp);
2128*7c478bd9Sstevel@tonic-gate 	}
2129*7c478bd9Sstevel@tonic-gate 
2130*7c478bd9Sstevel@tonic-gate 	ASSERT(initialize == FLK_INIT_GRAPH);
2131*7c478bd9Sstevel@tonic-gate 
2132*7c478bd9Sstevel@tonic-gate 	if (lock_graph[index] == NULL) {
2133*7c478bd9Sstevel@tonic-gate 
2134*7c478bd9Sstevel@tonic-gate 		gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2135*7c478bd9Sstevel@tonic-gate 
2136*7c478bd9Sstevel@tonic-gate 		/* Initialize the graph */
2137*7c478bd9Sstevel@tonic-gate 
2138*7c478bd9Sstevel@tonic-gate 		gp_alloc->active_locks.l_next =
2139*7c478bd9Sstevel@tonic-gate 		    gp_alloc->active_locks.l_prev =
2140*7c478bd9Sstevel@tonic-gate 		    (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2141*7c478bd9Sstevel@tonic-gate 		gp_alloc->sleeping_locks.l_next =
2142*7c478bd9Sstevel@tonic-gate 		    gp_alloc->sleeping_locks.l_prev =
2143*7c478bd9Sstevel@tonic-gate 		    (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2144*7c478bd9Sstevel@tonic-gate 		gp_alloc->index = index;
2145*7c478bd9Sstevel@tonic-gate 		mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2146*7c478bd9Sstevel@tonic-gate 	}
2147*7c478bd9Sstevel@tonic-gate 
2148*7c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
2149*7c478bd9Sstevel@tonic-gate 
2150*7c478bd9Sstevel@tonic-gate 	gp = lock_graph[index];
2151*7c478bd9Sstevel@tonic-gate 
2152*7c478bd9Sstevel@tonic-gate 	/* Recheck the value within flock_lock */
2153*7c478bd9Sstevel@tonic-gate 	if (gp == NULL) {
2154*7c478bd9Sstevel@tonic-gate 		struct flock_globals *fg;
2155*7c478bd9Sstevel@tonic-gate 
2156*7c478bd9Sstevel@tonic-gate 		/* We must have previously allocated the graph_t structure */
2157*7c478bd9Sstevel@tonic-gate 		ASSERT(gp_alloc != NULL);
2158*7c478bd9Sstevel@tonic-gate 		lock_graph[index] = gp = gp_alloc;
2159*7c478bd9Sstevel@tonic-gate 		/*
2160*7c478bd9Sstevel@tonic-gate 		 * The lockmgr status is only needed if KLM is loaded.
2161*7c478bd9Sstevel@tonic-gate 		 */
2162*7c478bd9Sstevel@tonic-gate 		if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2163*7c478bd9Sstevel@tonic-gate 			fg = flk_get_globals();
2164*7c478bd9Sstevel@tonic-gate 			fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2165*7c478bd9Sstevel@tonic-gate 		}
2166*7c478bd9Sstevel@tonic-gate 	}
2167*7c478bd9Sstevel@tonic-gate 
2168*7c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
2169*7c478bd9Sstevel@tonic-gate 
2170*7c478bd9Sstevel@tonic-gate 	if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2171*7c478bd9Sstevel@tonic-gate 		/* There was a race to allocate the graph_t and we lost */
2172*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&gp_alloc->gp_mutex);
2173*7c478bd9Sstevel@tonic-gate 		kmem_free(gp_alloc, sizeof (graph_t));
2174*7c478bd9Sstevel@tonic-gate 	}
2175*7c478bd9Sstevel@tonic-gate 
2176*7c478bd9Sstevel@tonic-gate 	return (gp);
2177*7c478bd9Sstevel@tonic-gate }
2178*7c478bd9Sstevel@tonic-gate 
2179*7c478bd9Sstevel@tonic-gate /*
2180*7c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
2181*7c478bd9Sstevel@tonic-gate  */
2182*7c478bd9Sstevel@tonic-gate int
2183*7c478bd9Sstevel@tonic-gate cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2184*7c478bd9Sstevel@tonic-gate {
2185*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
2186*7c478bd9Sstevel@tonic-gate 	int result = 0;
2187*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
2188*7c478bd9Sstevel@tonic-gate 	int			lock_nlmid;
2189*7c478bd9Sstevel@tonic-gate 
2190*7c478bd9Sstevel@tonic-gate 	/*
2191*7c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
2192*7c478bd9Sstevel@tonic-gate 	 */
2193*7c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2194*7c478bd9Sstevel@tonic-gate 		return (0);
2195*7c478bd9Sstevel@tonic-gate 	}
2196*7c478bd9Sstevel@tonic-gate 
2197*7c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2198*7c478bd9Sstevel@tonic-gate 	if (gp == NULL) {
2199*7c478bd9Sstevel@tonic-gate 		return (0);
2200*7c478bd9Sstevel@tonic-gate 	}
2201*7c478bd9Sstevel@tonic-gate 
2202*7c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
2203*7c478bd9Sstevel@tonic-gate 
2204*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2205*7c478bd9Sstevel@tonic-gate 
2206*7c478bd9Sstevel@tonic-gate 	if (lock) {
2207*7c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
2208*7c478bd9Sstevel@tonic-gate 			/* get NLM id from sysid */
2209*7c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2210*7c478bd9Sstevel@tonic-gate 
2211*7c478bd9Sstevel@tonic-gate 			/*
2212*7c478bd9Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
2213*7c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then we've found a remote lock.
2214*7c478bd9Sstevel@tonic-gate 			 */
2215*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2216*7c478bd9Sstevel@tonic-gate 				result = 1;
2217*7c478bd9Sstevel@tonic-gate 				goto done;
2218*7c478bd9Sstevel@tonic-gate 			}
2219*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
2220*7c478bd9Sstevel@tonic-gate 		}
2221*7c478bd9Sstevel@tonic-gate 	}
2222*7c478bd9Sstevel@tonic-gate 
2223*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2224*7c478bd9Sstevel@tonic-gate 
2225*7c478bd9Sstevel@tonic-gate 	if (lock) {
2226*7c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
2227*7c478bd9Sstevel@tonic-gate 			/* get NLM id from sysid */
2228*7c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2229*7c478bd9Sstevel@tonic-gate 
2230*7c478bd9Sstevel@tonic-gate 			/*
2231*7c478bd9Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
2232*7c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then we've found a remote lock.
2233*7c478bd9Sstevel@tonic-gate 			 */
2234*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2235*7c478bd9Sstevel@tonic-gate 				result = 1;
2236*7c478bd9Sstevel@tonic-gate 				goto done;
2237*7c478bd9Sstevel@tonic-gate 			}
2238*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
2239*7c478bd9Sstevel@tonic-gate 		}
2240*7c478bd9Sstevel@tonic-gate 	}
2241*7c478bd9Sstevel@tonic-gate 
2242*7c478bd9Sstevel@tonic-gate done:
2243*7c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
2244*7c478bd9Sstevel@tonic-gate 	return (result);
2245*7c478bd9Sstevel@tonic-gate }
2246*7c478bd9Sstevel@tonic-gate 
2247*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
2248*7c478bd9Sstevel@tonic-gate /*
2249*7c478bd9Sstevel@tonic-gate  * Determine whether there are any locks for the given vnode with a remote
2250*7c478bd9Sstevel@tonic-gate  * sysid.  Returns zero if not, non-zero if there are.
2251*7c478bd9Sstevel@tonic-gate  *
2252*7c478bd9Sstevel@tonic-gate  * Note that the return value from this function is potentially invalid
2253*7c478bd9Sstevel@tonic-gate  * once it has been returned.  The caller is responsible for providing its
2254*7c478bd9Sstevel@tonic-gate  * own synchronization mechanism to ensure that the return value is useful
2255*7c478bd9Sstevel@tonic-gate  * (e.g., see nfs_lockcompletion()).
2256*7c478bd9Sstevel@tonic-gate  */
2257*7c478bd9Sstevel@tonic-gate int
2258*7c478bd9Sstevel@tonic-gate flk_has_remote_locks(vnode_t *vp)
2259*7c478bd9Sstevel@tonic-gate {
2260*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
2261*7c478bd9Sstevel@tonic-gate 	int result = 0;
2262*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
2263*7c478bd9Sstevel@tonic-gate 
2264*7c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2265*7c478bd9Sstevel@tonic-gate 	if (gp == NULL) {
2266*7c478bd9Sstevel@tonic-gate 		return (0);
2267*7c478bd9Sstevel@tonic-gate 	}
2268*7c478bd9Sstevel@tonic-gate 
2269*7c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
2270*7c478bd9Sstevel@tonic-gate 
2271*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2272*7c478bd9Sstevel@tonic-gate 
2273*7c478bd9Sstevel@tonic-gate 	if (lock) {
2274*7c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
2275*7c478bd9Sstevel@tonic-gate 			if (IS_REMOTE(lock)) {
2276*7c478bd9Sstevel@tonic-gate 				result = 1;
2277*7c478bd9Sstevel@tonic-gate 				goto done;
2278*7c478bd9Sstevel@tonic-gate 			}
2279*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
2280*7c478bd9Sstevel@tonic-gate 		}
2281*7c478bd9Sstevel@tonic-gate 	}
2282*7c478bd9Sstevel@tonic-gate 
2283*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2284*7c478bd9Sstevel@tonic-gate 
2285*7c478bd9Sstevel@tonic-gate 	if (lock) {
2286*7c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
2287*7c478bd9Sstevel@tonic-gate 			if (IS_REMOTE(lock)) {
2288*7c478bd9Sstevel@tonic-gate 				result = 1;
2289*7c478bd9Sstevel@tonic-gate 				goto done;
2290*7c478bd9Sstevel@tonic-gate 			}
2291*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
2292*7c478bd9Sstevel@tonic-gate 		}
2293*7c478bd9Sstevel@tonic-gate 	}
2294*7c478bd9Sstevel@tonic-gate 
2295*7c478bd9Sstevel@tonic-gate done:
2296*7c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
2297*7c478bd9Sstevel@tonic-gate 	return (result);
2298*7c478bd9Sstevel@tonic-gate }
2299*7c478bd9Sstevel@tonic-gate 
2300*7c478bd9Sstevel@tonic-gate /*
2301*7c478bd9Sstevel@tonic-gate  * Determine if there are any locks owned by the given sysid.
2302*7c478bd9Sstevel@tonic-gate  * Returns zero if not, non-zero if there are.  Note that this return code
2303*7c478bd9Sstevel@tonic-gate  * could be derived from flk_get_{sleeping,active}_locks, but this routine
2304*7c478bd9Sstevel@tonic-gate  * avoids all the memory allocations of those routines.
2305*7c478bd9Sstevel@tonic-gate  *
2306*7c478bd9Sstevel@tonic-gate  * This routine has the same synchronization issues as
2307*7c478bd9Sstevel@tonic-gate  * flk_has_remote_locks.
2308*7c478bd9Sstevel@tonic-gate  */
2309*7c478bd9Sstevel@tonic-gate 
2310*7c478bd9Sstevel@tonic-gate int
2311*7c478bd9Sstevel@tonic-gate flk_sysid_has_locks(int sysid, int lck_type)
2312*7c478bd9Sstevel@tonic-gate {
2313*7c478bd9Sstevel@tonic-gate 	int		has_locks = 0;
2314*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
2315*7c478bd9Sstevel@tonic-gate 	graph_t 	*gp;
2316*7c478bd9Sstevel@tonic-gate 	int		i;
2317*7c478bd9Sstevel@tonic-gate 
2318*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2319*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
2320*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
2321*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
2322*7c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
2323*7c478bd9Sstevel@tonic-gate 			continue;
2324*7c478bd9Sstevel@tonic-gate 		}
2325*7c478bd9Sstevel@tonic-gate 
2326*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
2327*7c478bd9Sstevel@tonic-gate 
2328*7c478bd9Sstevel@tonic-gate 		if (lck_type & FLK_QUERY_ACTIVE) {
2329*7c478bd9Sstevel@tonic-gate 			for (lock = ACTIVE_HEAD(gp)->l_next;
2330*7c478bd9Sstevel@tonic-gate 			    lock != ACTIVE_HEAD(gp) && !has_locks;
2331*7c478bd9Sstevel@tonic-gate 			    lock = lock->l_next) {
2332*7c478bd9Sstevel@tonic-gate 				if (lock->l_flock.l_sysid == sysid)
2333*7c478bd9Sstevel@tonic-gate 					has_locks = 1;
2334*7c478bd9Sstevel@tonic-gate 			}
2335*7c478bd9Sstevel@tonic-gate 		}
2336*7c478bd9Sstevel@tonic-gate 
2337*7c478bd9Sstevel@tonic-gate 		if (lck_type & FLK_QUERY_SLEEPING) {
2338*7c478bd9Sstevel@tonic-gate 			for (lock = SLEEPING_HEAD(gp)->l_next;
2339*7c478bd9Sstevel@tonic-gate 				lock != SLEEPING_HEAD(gp) && !has_locks;
2340*7c478bd9Sstevel@tonic-gate 				lock = lock->l_next) {
2341*7c478bd9Sstevel@tonic-gate 				if (lock->l_flock.l_sysid == sysid)
2342*7c478bd9Sstevel@tonic-gate 					has_locks = 1;
2343*7c478bd9Sstevel@tonic-gate 			}
2344*7c478bd9Sstevel@tonic-gate 		}
2345*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
2346*7c478bd9Sstevel@tonic-gate 	}
2347*7c478bd9Sstevel@tonic-gate 
2348*7c478bd9Sstevel@tonic-gate 	return (has_locks);
2349*7c478bd9Sstevel@tonic-gate }
2350*7c478bd9Sstevel@tonic-gate 
2351*7c478bd9Sstevel@tonic-gate 
2352*7c478bd9Sstevel@tonic-gate /*
2353*7c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
2354*7c478bd9Sstevel@tonic-gate  *
2355*7c478bd9Sstevel@tonic-gate  * Requires: "sysid" is a pair [nlmid, sysid].  The lower half is 16-bit
2356*7c478bd9Sstevel@tonic-gate  *  quantity, the real sysid generated by the NLM server; the upper half
2357*7c478bd9Sstevel@tonic-gate  *  identifies the node of the cluster where the NLM server ran.
2358*7c478bd9Sstevel@tonic-gate  *  This routine is only called by an NLM server running in a cluster.
2359*7c478bd9Sstevel@tonic-gate  * Effects: Remove all locks held on behalf of the client identified
2360*7c478bd9Sstevel@tonic-gate  *  by "sysid."
2361*7c478bd9Sstevel@tonic-gate  */
2362*7c478bd9Sstevel@tonic-gate void
2363*7c478bd9Sstevel@tonic-gate cl_flk_remove_locks_by_sysid(int sysid)
2364*7c478bd9Sstevel@tonic-gate {
2365*7c478bd9Sstevel@tonic-gate 	graph_t	*gp;
2366*7c478bd9Sstevel@tonic-gate 	int i;
2367*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
2368*7c478bd9Sstevel@tonic-gate 
2369*7c478bd9Sstevel@tonic-gate 	/*
2370*7c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
2371*7c478bd9Sstevel@tonic-gate 	 */
2372*7c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2373*7c478bd9Sstevel@tonic-gate 		return;
2374*7c478bd9Sstevel@tonic-gate 	}
2375*7c478bd9Sstevel@tonic-gate 
2376*7c478bd9Sstevel@tonic-gate 	ASSERT(sysid != 0);
2377*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
2378*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
2379*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
2380*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
2381*7c478bd9Sstevel@tonic-gate 
2382*7c478bd9Sstevel@tonic-gate 		if (gp == NULL)
2383*7c478bd9Sstevel@tonic-gate 			continue;
2384*7c478bd9Sstevel@tonic-gate 
2385*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);	/*  get mutex on lock graph */
2386*7c478bd9Sstevel@tonic-gate 
2387*7c478bd9Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
2388*7c478bd9Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
2389*7c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
2390*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2391*7c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
2392*7c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
2393*7c478bd9Sstevel@tonic-gate 			}
2394*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2395*7c478bd9Sstevel@tonic-gate 		}
2396*7c478bd9Sstevel@tonic-gate 
2397*7c478bd9Sstevel@tonic-gate 		/* delete active locks */
2398*7c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
2399*7c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
2400*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2401*7c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
2402*7c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
2403*7c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
2404*7c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
2405*7c478bd9Sstevel@tonic-gate 			}
2406*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2407*7c478bd9Sstevel@tonic-gate 		}
2408*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);    /* release mutex on lock graph */
2409*7c478bd9Sstevel@tonic-gate 	}
2410*7c478bd9Sstevel@tonic-gate }
2411*7c478bd9Sstevel@tonic-gate 
2412*7c478bd9Sstevel@tonic-gate /*
2413*7c478bd9Sstevel@tonic-gate  * Delete all locks in the system that belongs to the sysid of the request.
2414*7c478bd9Sstevel@tonic-gate  */
2415*7c478bd9Sstevel@tonic-gate 
2416*7c478bd9Sstevel@tonic-gate static void
2417*7c478bd9Sstevel@tonic-gate flk_delete_locks_by_sysid(lock_descriptor_t *request)
2418*7c478bd9Sstevel@tonic-gate {
2419*7c478bd9Sstevel@tonic-gate 	int	sysid  = request->l_flock.l_sysid;
2420*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
2421*7c478bd9Sstevel@tonic-gate 	graph_t	*gp;
2422*7c478bd9Sstevel@tonic-gate 	int i;
2423*7c478bd9Sstevel@tonic-gate 
2424*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2425*7c478bd9Sstevel@tonic-gate 	ASSERT(sysid != 0);
2426*7c478bd9Sstevel@tonic-gate 
2427*7c478bd9Sstevel@tonic-gate 	mutex_exit(&request->l_graph->gp_mutex);
2428*7c478bd9Sstevel@tonic-gate 
2429*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
2430*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
2431*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
2432*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
2433*7c478bd9Sstevel@tonic-gate 
2434*7c478bd9Sstevel@tonic-gate 		if (gp == NULL)
2435*7c478bd9Sstevel@tonic-gate 			continue;
2436*7c478bd9Sstevel@tonic-gate 
2437*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
2438*7c478bd9Sstevel@tonic-gate 
2439*7c478bd9Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
2440*7c478bd9Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
2441*7c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
2442*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2443*7c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
2444*7c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
2445*7c478bd9Sstevel@tonic-gate 			}
2446*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2447*7c478bd9Sstevel@tonic-gate 		}
2448*7c478bd9Sstevel@tonic-gate 
2449*7c478bd9Sstevel@tonic-gate 		/* delete active locks */
2450*7c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
2451*7c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
2452*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2453*7c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
2454*7c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
2455*7c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
2456*7c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
2457*7c478bd9Sstevel@tonic-gate 			}
2458*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2459*7c478bd9Sstevel@tonic-gate 		}
2460*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
2461*7c478bd9Sstevel@tonic-gate 	}
2462*7c478bd9Sstevel@tonic-gate 
2463*7c478bd9Sstevel@tonic-gate 	mutex_enter(&request->l_graph->gp_mutex);
2464*7c478bd9Sstevel@tonic-gate }
2465*7c478bd9Sstevel@tonic-gate 
2466*7c478bd9Sstevel@tonic-gate /*
2467*7c478bd9Sstevel@tonic-gate  * Clustering: Deletes PXFS locks
2468*7c478bd9Sstevel@tonic-gate  * Effects: Delete all locks on files in the given file system and with the
2469*7c478bd9Sstevel@tonic-gate  *  given PXFS id.
2470*7c478bd9Sstevel@tonic-gate  */
2471*7c478bd9Sstevel@tonic-gate void
2472*7c478bd9Sstevel@tonic-gate cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2473*7c478bd9Sstevel@tonic-gate {
2474*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
2475*7c478bd9Sstevel@tonic-gate 	graph_t	*gp;
2476*7c478bd9Sstevel@tonic-gate 	int i;
2477*7c478bd9Sstevel@tonic-gate 
2478*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
2479*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
2480*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
2481*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
2482*7c478bd9Sstevel@tonic-gate 
2483*7c478bd9Sstevel@tonic-gate 		if (gp == NULL)
2484*7c478bd9Sstevel@tonic-gate 			continue;
2485*7c478bd9Sstevel@tonic-gate 
2486*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
2487*7c478bd9Sstevel@tonic-gate 
2488*7c478bd9Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
2489*7c478bd9Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
2490*7c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
2491*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2492*7c478bd9Sstevel@tonic-gate 			if (lock->l_vnode->v_vfsp == vfsp) {
2493*7c478bd9Sstevel@tonic-gate 				ASSERT(IS_PXFS(lock));
2494*7c478bd9Sstevel@tonic-gate 				if (GETPXFSID(lock->l_flock.l_sysid) ==
2495*7c478bd9Sstevel@tonic-gate 				    pxfsid) {
2496*7c478bd9Sstevel@tonic-gate 					flk_set_state(lock,
2497*7c478bd9Sstevel@tonic-gate 					    FLK_CANCELLED_STATE);
2498*7c478bd9Sstevel@tonic-gate 					flk_cancel_sleeping_lock(lock, 1);
2499*7c478bd9Sstevel@tonic-gate 				}
2500*7c478bd9Sstevel@tonic-gate 			}
2501*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2502*7c478bd9Sstevel@tonic-gate 		}
2503*7c478bd9Sstevel@tonic-gate 
2504*7c478bd9Sstevel@tonic-gate 		/* delete active locks */
2505*7c478bd9Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
2506*7c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
2507*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2508*7c478bd9Sstevel@tonic-gate 			if (lock->l_vnode->v_vfsp == vfsp) {
2509*7c478bd9Sstevel@tonic-gate 				ASSERT(IS_PXFS(lock));
2510*7c478bd9Sstevel@tonic-gate 				if (GETPXFSID(lock->l_flock.l_sysid) ==
2511*7c478bd9Sstevel@tonic-gate 				    pxfsid) {
2512*7c478bd9Sstevel@tonic-gate 					flk_delete_active_lock(lock, 0);
2513*7c478bd9Sstevel@tonic-gate 					flk_wakeup(lock, 1);
2514*7c478bd9Sstevel@tonic-gate 					flk_free_lock(lock);
2515*7c478bd9Sstevel@tonic-gate 				}
2516*7c478bd9Sstevel@tonic-gate 			}
2517*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2518*7c478bd9Sstevel@tonic-gate 		}
2519*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
2520*7c478bd9Sstevel@tonic-gate 	}
2521*7c478bd9Sstevel@tonic-gate }
2522*7c478bd9Sstevel@tonic-gate 
2523*7c478bd9Sstevel@tonic-gate /*
2524*7c478bd9Sstevel@tonic-gate  * Search for a sleeping lock manager lock which matches exactly this lock
2525*7c478bd9Sstevel@tonic-gate  * request; if one is found, fake a signal to cancel it.
2526*7c478bd9Sstevel@tonic-gate  *
2527*7c478bd9Sstevel@tonic-gate  * Return 1 if a matching lock was found, 0 otherwise.
2528*7c478bd9Sstevel@tonic-gate  */
2529*7c478bd9Sstevel@tonic-gate 
2530*7c478bd9Sstevel@tonic-gate static int
2531*7c478bd9Sstevel@tonic-gate flk_canceled(lock_descriptor_t *request)
2532*7c478bd9Sstevel@tonic-gate {
2533*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
2534*7c478bd9Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
2535*7c478bd9Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
2536*7c478bd9Sstevel@tonic-gate 
2537*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
2538*7c478bd9Sstevel@tonic-gate 	ASSERT(IS_LOCKMGR(request));
2539*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2540*7c478bd9Sstevel@tonic-gate 
2541*7c478bd9Sstevel@tonic-gate 	if (lock) {
2542*7c478bd9Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
2543*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2544*7c478bd9Sstevel@tonic-gate 			if (SAME_OWNER(lock, request) &&
2545*7c478bd9Sstevel@tonic-gate 				lock->l_start == request->l_start &&
2546*7c478bd9Sstevel@tonic-gate 					lock->l_end == request->l_end) {
2547*7c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
2548*7c478bd9Sstevel@tonic-gate 				return (1);
2549*7c478bd9Sstevel@tonic-gate 			}
2550*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2551*7c478bd9Sstevel@tonic-gate 		}
2552*7c478bd9Sstevel@tonic-gate 	}
2553*7c478bd9Sstevel@tonic-gate 	return (0);
2554*7c478bd9Sstevel@tonic-gate }
2555*7c478bd9Sstevel@tonic-gate 
2556*7c478bd9Sstevel@tonic-gate /*
2557*7c478bd9Sstevel@tonic-gate  * Remove all the locks for the vnode belonging to the given pid and sysid.
2558*7c478bd9Sstevel@tonic-gate  */
2559*7c478bd9Sstevel@tonic-gate 
2560*7c478bd9Sstevel@tonic-gate void
2561*7c478bd9Sstevel@tonic-gate cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2562*7c478bd9Sstevel@tonic-gate {
2563*7c478bd9Sstevel@tonic-gate 	graph_t	*gp;
2564*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
2565*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *link_stack;
2566*7c478bd9Sstevel@tonic-gate 
2567*7c478bd9Sstevel@tonic-gate 	STACK_INIT(link_stack);
2568*7c478bd9Sstevel@tonic-gate 
2569*7c478bd9Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2570*7c478bd9Sstevel@tonic-gate 
2571*7c478bd9Sstevel@tonic-gate 	if (gp == NULL)
2572*7c478bd9Sstevel@tonic-gate 		return;
2573*7c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
2574*7c478bd9Sstevel@tonic-gate 
2575*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
2576*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
2577*7c478bd9Sstevel@tonic-gate 
2578*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2579*7c478bd9Sstevel@tonic-gate 
2580*7c478bd9Sstevel@tonic-gate 	if (lock) {
2581*7c478bd9Sstevel@tonic-gate 		do {
2582*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2583*7c478bd9Sstevel@tonic-gate 			if ((lock->l_flock.l_pid == pid ||
2584*7c478bd9Sstevel@tonic-gate 					pid == IGN_PID) &&
2585*7c478bd9Sstevel@tonic-gate 				lock->l_flock.l_sysid == sysid) {
2586*7c478bd9Sstevel@tonic-gate 				CANCEL_WAKEUP(lock);
2587*7c478bd9Sstevel@tonic-gate 			}
2588*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2589*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
2590*7c478bd9Sstevel@tonic-gate 	}
2591*7c478bd9Sstevel@tonic-gate 
2592*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2593*7c478bd9Sstevel@tonic-gate 
2594*7c478bd9Sstevel@tonic-gate 	if (lock) {
2595*7c478bd9Sstevel@tonic-gate 		do {
2596*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
2597*7c478bd9Sstevel@tonic-gate 			if ((lock->l_flock.l_pid == pid ||
2598*7c478bd9Sstevel@tonic-gate 					pid == IGN_PID) &&
2599*7c478bd9Sstevel@tonic-gate 				lock->l_flock.l_sysid == sysid) {
2600*7c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
2601*7c478bd9Sstevel@tonic-gate 				STACK_PUSH(link_stack, lock, l_stack);
2602*7c478bd9Sstevel@tonic-gate 			}
2603*7c478bd9Sstevel@tonic-gate 			lock = nlock;
2604*7c478bd9Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
2605*7c478bd9Sstevel@tonic-gate 	}
2606*7c478bd9Sstevel@tonic-gate 
2607*7c478bd9Sstevel@tonic-gate 	while ((lock = STACK_TOP(link_stack)) != NULL) {
2608*7c478bd9Sstevel@tonic-gate 		STACK_POP(link_stack, l_stack);
2609*7c478bd9Sstevel@tonic-gate 		flk_wakeup(lock, 1);
2610*7c478bd9Sstevel@tonic-gate 		flk_free_lock(lock);
2611*7c478bd9Sstevel@tonic-gate 	}
2612*7c478bd9Sstevel@tonic-gate 
2613*7c478bd9Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
2614*7c478bd9Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
2615*7c478bd9Sstevel@tonic-gate 	CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2616*7c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
2617*7c478bd9Sstevel@tonic-gate }
2618*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
2619*7c478bd9Sstevel@tonic-gate 
2620*7c478bd9Sstevel@tonic-gate 
2621*7c478bd9Sstevel@tonic-gate /*
2622*7c478bd9Sstevel@tonic-gate  * Called from 'fs' read and write routines for files that have mandatory
2623*7c478bd9Sstevel@tonic-gate  * locking enabled.
2624*7c478bd9Sstevel@tonic-gate  */
2625*7c478bd9Sstevel@tonic-gate 
2626*7c478bd9Sstevel@tonic-gate int
2627*7c478bd9Sstevel@tonic-gate chklock(
2628*7c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
2629*7c478bd9Sstevel@tonic-gate 	int 		iomode,
2630*7c478bd9Sstevel@tonic-gate 	u_offset_t	offset,
2631*7c478bd9Sstevel@tonic-gate 	ssize_t		len,
2632*7c478bd9Sstevel@tonic-gate 	int 		fmode,
2633*7c478bd9Sstevel@tonic-gate 	caller_context_t *ct)
2634*7c478bd9Sstevel@tonic-gate {
2635*7c478bd9Sstevel@tonic-gate 	register int	i;
2636*7c478bd9Sstevel@tonic-gate 	struct flock64 	bf;
2637*7c478bd9Sstevel@tonic-gate 	int 		error = 0;
2638*7c478bd9Sstevel@tonic-gate 
2639*7c478bd9Sstevel@tonic-gate 	bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
2640*7c478bd9Sstevel@tonic-gate 	bf.l_whence = 0;
2641*7c478bd9Sstevel@tonic-gate 	bf.l_start = offset;
2642*7c478bd9Sstevel@tonic-gate 	bf.l_len = len;
2643*7c478bd9Sstevel@tonic-gate 	if (ct == NULL) {
2644*7c478bd9Sstevel@tonic-gate 		bf.l_pid = curproc->p_pid;
2645*7c478bd9Sstevel@tonic-gate 		bf.l_sysid = 0;
2646*7c478bd9Sstevel@tonic-gate 	} else {
2647*7c478bd9Sstevel@tonic-gate 		bf.l_pid = ct->cc_pid;
2648*7c478bd9Sstevel@tonic-gate 		bf.l_sysid = ct->cc_sysid;
2649*7c478bd9Sstevel@tonic-gate 	}
2650*7c478bd9Sstevel@tonic-gate 	i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
2651*7c478bd9Sstevel@tonic-gate 	if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
2652*7c478bd9Sstevel@tonic-gate 	    bf.l_type != F_UNLCK)
2653*7c478bd9Sstevel@tonic-gate 		error = i ? i : EAGAIN;
2654*7c478bd9Sstevel@tonic-gate 	return (error);
2655*7c478bd9Sstevel@tonic-gate }
2656*7c478bd9Sstevel@tonic-gate 
2657*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
2658*7c478bd9Sstevel@tonic-gate /*
2659*7c478bd9Sstevel@tonic-gate  * convoff - converts the given data (start, whence) to the
2660*7c478bd9Sstevel@tonic-gate  * given whence.
2661*7c478bd9Sstevel@tonic-gate  */
2662*7c478bd9Sstevel@tonic-gate int
2663*7c478bd9Sstevel@tonic-gate convoff(vp, lckdat, whence, offset)
2664*7c478bd9Sstevel@tonic-gate 	struct vnode 	*vp;
2665*7c478bd9Sstevel@tonic-gate 	struct flock64 	*lckdat;
2666*7c478bd9Sstevel@tonic-gate 	int 		whence;
2667*7c478bd9Sstevel@tonic-gate 	offset_t	offset;
2668*7c478bd9Sstevel@tonic-gate {
2669*7c478bd9Sstevel@tonic-gate 	int 		error;
2670*7c478bd9Sstevel@tonic-gate 	struct vattr 	vattr;
2671*7c478bd9Sstevel@tonic-gate 
2672*7c478bd9Sstevel@tonic-gate 	if ((lckdat->l_whence == 2) || (whence == 2)) {
2673*7c478bd9Sstevel@tonic-gate 		vattr.va_mask = AT_SIZE;
2674*7c478bd9Sstevel@tonic-gate 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED()))
2675*7c478bd9Sstevel@tonic-gate 			return (error);
2676*7c478bd9Sstevel@tonic-gate 	}
2677*7c478bd9Sstevel@tonic-gate 
2678*7c478bd9Sstevel@tonic-gate 	switch (lckdat->l_whence) {
2679*7c478bd9Sstevel@tonic-gate 	case 1:
2680*7c478bd9Sstevel@tonic-gate 		lckdat->l_start += offset;
2681*7c478bd9Sstevel@tonic-gate 		break;
2682*7c478bd9Sstevel@tonic-gate 	case 2:
2683*7c478bd9Sstevel@tonic-gate 		lckdat->l_start += vattr.va_size;
2684*7c478bd9Sstevel@tonic-gate 		/* FALLTHRU */
2685*7c478bd9Sstevel@tonic-gate 	case 0:
2686*7c478bd9Sstevel@tonic-gate 		break;
2687*7c478bd9Sstevel@tonic-gate 	default:
2688*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
2689*7c478bd9Sstevel@tonic-gate 	}
2690*7c478bd9Sstevel@tonic-gate 
2691*7c478bd9Sstevel@tonic-gate 	if (lckdat->l_start < 0)
2692*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
2693*7c478bd9Sstevel@tonic-gate 
2694*7c478bd9Sstevel@tonic-gate 	switch (whence) {
2695*7c478bd9Sstevel@tonic-gate 	case 1:
2696*7c478bd9Sstevel@tonic-gate 		lckdat->l_start -= offset;
2697*7c478bd9Sstevel@tonic-gate 		break;
2698*7c478bd9Sstevel@tonic-gate 	case 2:
2699*7c478bd9Sstevel@tonic-gate 		lckdat->l_start -= vattr.va_size;
2700*7c478bd9Sstevel@tonic-gate 		/* FALLTHRU */
2701*7c478bd9Sstevel@tonic-gate 	case 0:
2702*7c478bd9Sstevel@tonic-gate 		break;
2703*7c478bd9Sstevel@tonic-gate 	default:
2704*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
2705*7c478bd9Sstevel@tonic-gate 	}
2706*7c478bd9Sstevel@tonic-gate 
2707*7c478bd9Sstevel@tonic-gate 	lckdat->l_whence = (short)whence;
2708*7c478bd9Sstevel@tonic-gate 	return (0);
2709*7c478bd9Sstevel@tonic-gate }
2710*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
2711*7c478bd9Sstevel@tonic-gate 
2712*7c478bd9Sstevel@tonic-gate 
2713*7c478bd9Sstevel@tonic-gate /* 	proc_graph function definitions */
2714*7c478bd9Sstevel@tonic-gate 
2715*7c478bd9Sstevel@tonic-gate /*
2716*7c478bd9Sstevel@tonic-gate  * Function checks for deadlock due to the new 'lock'. If deadlock found
2717*7c478bd9Sstevel@tonic-gate  * edges of this lock are freed and returned.
2718*7c478bd9Sstevel@tonic-gate  */
2719*7c478bd9Sstevel@tonic-gate 
2720*7c478bd9Sstevel@tonic-gate static int
2721*7c478bd9Sstevel@tonic-gate flk_check_deadlock(lock_descriptor_t *lock)
2722*7c478bd9Sstevel@tonic-gate {
2723*7c478bd9Sstevel@tonic-gate 	proc_vertex_t	*start_vertex, *pvertex;
2724*7c478bd9Sstevel@tonic-gate 	proc_vertex_t *dvertex;
2725*7c478bd9Sstevel@tonic-gate 	proc_edge_t *pep, *ppep;
2726*7c478bd9Sstevel@tonic-gate 	edge_t	*ep, *nep;
2727*7c478bd9Sstevel@tonic-gate 	proc_vertex_t *process_stack;
2728*7c478bd9Sstevel@tonic-gate 
2729*7c478bd9Sstevel@tonic-gate 	STACK_INIT(process_stack);
2730*7c478bd9Sstevel@tonic-gate 
2731*7c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
2732*7c478bd9Sstevel@tonic-gate 	start_vertex = flk_get_proc_vertex(lock);
2733*7c478bd9Sstevel@tonic-gate 	ASSERT(start_vertex != NULL);
2734*7c478bd9Sstevel@tonic-gate 
2735*7c478bd9Sstevel@tonic-gate 	/* construct the edges from this process to other processes */
2736*7c478bd9Sstevel@tonic-gate 
2737*7c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock);
2738*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
2739*7c478bd9Sstevel@tonic-gate 		proc_vertex_t *adj_proc;
2740*7c478bd9Sstevel@tonic-gate 
2741*7c478bd9Sstevel@tonic-gate 		adj_proc = flk_get_proc_vertex(ep->to_vertex);
2742*7c478bd9Sstevel@tonic-gate 		for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
2743*7c478bd9Sstevel@tonic-gate 			if (pep->to_proc == adj_proc) {
2744*7c478bd9Sstevel@tonic-gate 				ASSERT(pep->refcount);
2745*7c478bd9Sstevel@tonic-gate 				pep->refcount++;
2746*7c478bd9Sstevel@tonic-gate 				break;
2747*7c478bd9Sstevel@tonic-gate 			}
2748*7c478bd9Sstevel@tonic-gate 		}
2749*7c478bd9Sstevel@tonic-gate 		if (pep == NULL) {
2750*7c478bd9Sstevel@tonic-gate 			pep = flk_get_proc_edge();
2751*7c478bd9Sstevel@tonic-gate 			pep->to_proc = adj_proc;
2752*7c478bd9Sstevel@tonic-gate 			pep->refcount = 1;
2753*7c478bd9Sstevel@tonic-gate 			adj_proc->incount++;
2754*7c478bd9Sstevel@tonic-gate 			pep->next = start_vertex->edge;
2755*7c478bd9Sstevel@tonic-gate 			start_vertex->edge = pep;
2756*7c478bd9Sstevel@tonic-gate 		}
2757*7c478bd9Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
2758*7c478bd9Sstevel@tonic-gate 	}
2759*7c478bd9Sstevel@tonic-gate 
2760*7c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
2761*7c478bd9Sstevel@tonic-gate 
2762*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
2763*7c478bd9Sstevel@tonic-gate 		proc_vertex_t *in_proc;
2764*7c478bd9Sstevel@tonic-gate 
2765*7c478bd9Sstevel@tonic-gate 		in_proc = flk_get_proc_vertex(ep->from_vertex);
2766*7c478bd9Sstevel@tonic-gate 
2767*7c478bd9Sstevel@tonic-gate 		for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
2768*7c478bd9Sstevel@tonic-gate 			if (pep->to_proc == start_vertex) {
2769*7c478bd9Sstevel@tonic-gate 				ASSERT(pep->refcount);
2770*7c478bd9Sstevel@tonic-gate 				pep->refcount++;
2771*7c478bd9Sstevel@tonic-gate 				break;
2772*7c478bd9Sstevel@tonic-gate 			}
2773*7c478bd9Sstevel@tonic-gate 		}
2774*7c478bd9Sstevel@tonic-gate 		if (pep == NULL) {
2775*7c478bd9Sstevel@tonic-gate 			pep = flk_get_proc_edge();
2776*7c478bd9Sstevel@tonic-gate 			pep->to_proc = start_vertex;
2777*7c478bd9Sstevel@tonic-gate 			pep->refcount = 1;
2778*7c478bd9Sstevel@tonic-gate 			start_vertex->incount++;
2779*7c478bd9Sstevel@tonic-gate 			pep->next = in_proc->edge;
2780*7c478bd9Sstevel@tonic-gate 			in_proc->edge = pep;
2781*7c478bd9Sstevel@tonic-gate 		}
2782*7c478bd9Sstevel@tonic-gate 		ep = NEXT_IN(ep);
2783*7c478bd9Sstevel@tonic-gate 	}
2784*7c478bd9Sstevel@tonic-gate 
2785*7c478bd9Sstevel@tonic-gate 	if (start_vertex->incount == 0) {
2786*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
2787*7c478bd9Sstevel@tonic-gate 		return (0);
2788*7c478bd9Sstevel@tonic-gate 	}
2789*7c478bd9Sstevel@tonic-gate 
2790*7c478bd9Sstevel@tonic-gate 	flk_proc_graph_uncolor();
2791*7c478bd9Sstevel@tonic-gate 
2792*7c478bd9Sstevel@tonic-gate 	start_vertex->p_sedge = start_vertex->edge;
2793*7c478bd9Sstevel@tonic-gate 
2794*7c478bd9Sstevel@tonic-gate 	STACK_PUSH(process_stack, start_vertex, p_stack);
2795*7c478bd9Sstevel@tonic-gate 
2796*7c478bd9Sstevel@tonic-gate 	while ((pvertex = STACK_TOP(process_stack)) != NULL) {
2797*7c478bd9Sstevel@tonic-gate 		for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
2798*7c478bd9Sstevel@tonic-gate 			dvertex = pep->to_proc;
2799*7c478bd9Sstevel@tonic-gate 			if (!PROC_ARRIVED(dvertex)) {
2800*7c478bd9Sstevel@tonic-gate 				STACK_PUSH(process_stack, dvertex, p_stack);
2801*7c478bd9Sstevel@tonic-gate 				dvertex->p_sedge = dvertex->edge;
2802*7c478bd9Sstevel@tonic-gate 				PROC_ARRIVE(pvertex);
2803*7c478bd9Sstevel@tonic-gate 				pvertex->p_sedge = pep->next;
2804*7c478bd9Sstevel@tonic-gate 				break;
2805*7c478bd9Sstevel@tonic-gate 			}
2806*7c478bd9Sstevel@tonic-gate 			if (!PROC_DEPARTED(dvertex))
2807*7c478bd9Sstevel@tonic-gate 				goto deadlock;
2808*7c478bd9Sstevel@tonic-gate 		}
2809*7c478bd9Sstevel@tonic-gate 		if (pep == NULL) {
2810*7c478bd9Sstevel@tonic-gate 			PROC_DEPART(pvertex);
2811*7c478bd9Sstevel@tonic-gate 			STACK_POP(process_stack, p_stack);
2812*7c478bd9Sstevel@tonic-gate 		}
2813*7c478bd9Sstevel@tonic-gate 	}
2814*7c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
2815*7c478bd9Sstevel@tonic-gate 	return (0);
2816*7c478bd9Sstevel@tonic-gate 
2817*7c478bd9Sstevel@tonic-gate deadlock:
2818*7c478bd9Sstevel@tonic-gate 
2819*7c478bd9Sstevel@tonic-gate 	/* we remove all lock edges and proc edges */
2820*7c478bd9Sstevel@tonic-gate 
2821*7c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock);
2822*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
2823*7c478bd9Sstevel@tonic-gate 		proc_vertex_t *adj_proc;
2824*7c478bd9Sstevel@tonic-gate 		adj_proc = flk_get_proc_vertex(ep->to_vertex);
2825*7c478bd9Sstevel@tonic-gate 		nep = NEXT_ADJ(ep);
2826*7c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
2827*7c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
2828*7c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
2829*7c478bd9Sstevel@tonic-gate 		ppep = start_vertex->edge;
2830*7c478bd9Sstevel@tonic-gate 		for (pep = start_vertex->edge; pep != NULL; ppep = pep,
2831*7c478bd9Sstevel@tonic-gate 						pep = ppep->next) {
2832*7c478bd9Sstevel@tonic-gate 			if (pep->to_proc == adj_proc) {
2833*7c478bd9Sstevel@tonic-gate 				pep->refcount--;
2834*7c478bd9Sstevel@tonic-gate 				if (pep->refcount == 0) {
2835*7c478bd9Sstevel@tonic-gate 					if (pep == ppep) {
2836*7c478bd9Sstevel@tonic-gate 						start_vertex->edge = pep->next;
2837*7c478bd9Sstevel@tonic-gate 					} else {
2838*7c478bd9Sstevel@tonic-gate 						ppep->next = pep->next;
2839*7c478bd9Sstevel@tonic-gate 					}
2840*7c478bd9Sstevel@tonic-gate 					adj_proc->incount--;
2841*7c478bd9Sstevel@tonic-gate 					flk_proc_release(adj_proc);
2842*7c478bd9Sstevel@tonic-gate 					flk_free_proc_edge(pep);
2843*7c478bd9Sstevel@tonic-gate 				}
2844*7c478bd9Sstevel@tonic-gate 				break;
2845*7c478bd9Sstevel@tonic-gate 			}
2846*7c478bd9Sstevel@tonic-gate 		}
2847*7c478bd9Sstevel@tonic-gate 		ep = nep;
2848*7c478bd9Sstevel@tonic-gate 	}
2849*7c478bd9Sstevel@tonic-gate 	ep = FIRST_IN(lock);
2850*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
2851*7c478bd9Sstevel@tonic-gate 		proc_vertex_t *in_proc;
2852*7c478bd9Sstevel@tonic-gate 		in_proc = flk_get_proc_vertex(ep->from_vertex);
2853*7c478bd9Sstevel@tonic-gate 		nep = NEXT_IN(ep);
2854*7c478bd9Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
2855*7c478bd9Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
2856*7c478bd9Sstevel@tonic-gate 		flk_free_edge(ep);
2857*7c478bd9Sstevel@tonic-gate 		ppep = in_proc->edge;
2858*7c478bd9Sstevel@tonic-gate 		for (pep = in_proc->edge; pep != NULL; ppep = pep,
2859*7c478bd9Sstevel@tonic-gate 						pep = ppep->next) {
2860*7c478bd9Sstevel@tonic-gate 			if (pep->to_proc == start_vertex) {
2861*7c478bd9Sstevel@tonic-gate 				pep->refcount--;
2862*7c478bd9Sstevel@tonic-gate 				if (pep->refcount == 0) {
2863*7c478bd9Sstevel@tonic-gate 					if (pep == ppep) {
2864*7c478bd9Sstevel@tonic-gate 						in_proc->edge = pep->next;
2865*7c478bd9Sstevel@tonic-gate 					} else {
2866*7c478bd9Sstevel@tonic-gate 						ppep->next = pep->next;
2867*7c478bd9Sstevel@tonic-gate 					}
2868*7c478bd9Sstevel@tonic-gate 					start_vertex->incount--;
2869*7c478bd9Sstevel@tonic-gate 					flk_proc_release(in_proc);
2870*7c478bd9Sstevel@tonic-gate 					flk_free_proc_edge(pep);
2871*7c478bd9Sstevel@tonic-gate 				}
2872*7c478bd9Sstevel@tonic-gate 				break;
2873*7c478bd9Sstevel@tonic-gate 			}
2874*7c478bd9Sstevel@tonic-gate 		}
2875*7c478bd9Sstevel@tonic-gate 		ep = nep;
2876*7c478bd9Sstevel@tonic-gate 	}
2877*7c478bd9Sstevel@tonic-gate 	flk_proc_release(start_vertex);
2878*7c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
2879*7c478bd9Sstevel@tonic-gate 	return (1);
2880*7c478bd9Sstevel@tonic-gate }
2881*7c478bd9Sstevel@tonic-gate 
2882*7c478bd9Sstevel@tonic-gate /*
2883*7c478bd9Sstevel@tonic-gate  * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
2884*7c478bd9Sstevel@tonic-gate  * from the list we return that, otherwise we allocate one. If necessary,
2885*7c478bd9Sstevel@tonic-gate  * we grow the list of vertices also.
2886*7c478bd9Sstevel@tonic-gate  */
2887*7c478bd9Sstevel@tonic-gate 
2888*7c478bd9Sstevel@tonic-gate static proc_vertex_t *
2889*7c478bd9Sstevel@tonic-gate flk_get_proc_vertex(lock_descriptor_t *lock)
2890*7c478bd9Sstevel@tonic-gate {
2891*7c478bd9Sstevel@tonic-gate 	int i;
2892*7c478bd9Sstevel@tonic-gate 	proc_vertex_t	*pv;
2893*7c478bd9Sstevel@tonic-gate 	proc_vertex_t	**palloc;
2894*7c478bd9Sstevel@tonic-gate 
2895*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
2896*7c478bd9Sstevel@tonic-gate 	if (lock->pvertex != -1) {
2897*7c478bd9Sstevel@tonic-gate 		ASSERT(lock->pvertex >= 0);
2898*7c478bd9Sstevel@tonic-gate 		pv = pgraph.proc[lock->pvertex];
2899*7c478bd9Sstevel@tonic-gate 		if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2900*7c478bd9Sstevel@tonic-gate 			return (pv);
2901*7c478bd9Sstevel@tonic-gate 		}
2902*7c478bd9Sstevel@tonic-gate 	}
2903*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < pgraph.gcount; i++) {
2904*7c478bd9Sstevel@tonic-gate 		pv = pgraph.proc[i];
2905*7c478bd9Sstevel@tonic-gate 		if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2906*7c478bd9Sstevel@tonic-gate 			lock->pvertex = pv->index = i;
2907*7c478bd9Sstevel@tonic-gate 			return (pv);
2908*7c478bd9Sstevel@tonic-gate 		}
2909*7c478bd9Sstevel@tonic-gate 	}
2910*7c478bd9Sstevel@tonic-gate 	pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
2911*7c478bd9Sstevel@tonic-gate 	pv->pid = lock->l_flock.l_pid;
2912*7c478bd9Sstevel@tonic-gate 	pv->sysid = lock->l_flock.l_sysid;
2913*7c478bd9Sstevel@tonic-gate 	flk_proc_vertex_allocs++;
2914*7c478bd9Sstevel@tonic-gate 	if (pgraph.free != 0) {
2915*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < pgraph.gcount; i++) {
2916*7c478bd9Sstevel@tonic-gate 			if (pgraph.proc[i] == NULL) {
2917*7c478bd9Sstevel@tonic-gate 				pgraph.proc[i] = pv;
2918*7c478bd9Sstevel@tonic-gate 				lock->pvertex = pv->index = i;
2919*7c478bd9Sstevel@tonic-gate 				pgraph.free--;
2920*7c478bd9Sstevel@tonic-gate 				return (pv);
2921*7c478bd9Sstevel@tonic-gate 			}
2922*7c478bd9Sstevel@tonic-gate 		}
2923*7c478bd9Sstevel@tonic-gate 	}
2924*7c478bd9Sstevel@tonic-gate 	palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
2925*7c478bd9Sstevel@tonic-gate 				sizeof (proc_vertex_t *), KM_SLEEP);
2926*7c478bd9Sstevel@tonic-gate 
2927*7c478bd9Sstevel@tonic-gate 	if (pgraph.proc) {
2928*7c478bd9Sstevel@tonic-gate 		bcopy(pgraph.proc, palloc,
2929*7c478bd9Sstevel@tonic-gate 			pgraph.gcount * sizeof (proc_vertex_t *));
2930*7c478bd9Sstevel@tonic-gate 
2931*7c478bd9Sstevel@tonic-gate 		kmem_free(pgraph.proc,
2932*7c478bd9Sstevel@tonic-gate 			pgraph.gcount * sizeof (proc_vertex_t *));
2933*7c478bd9Sstevel@tonic-gate 	}
2934*7c478bd9Sstevel@tonic-gate 	pgraph.proc = palloc;
2935*7c478bd9Sstevel@tonic-gate 	pgraph.free += (PROC_CHUNK - 1);
2936*7c478bd9Sstevel@tonic-gate 	pv->index = lock->pvertex = pgraph.gcount;
2937*7c478bd9Sstevel@tonic-gate 	pgraph.gcount += PROC_CHUNK;
2938*7c478bd9Sstevel@tonic-gate 	pgraph.proc[pv->index] = pv;
2939*7c478bd9Sstevel@tonic-gate 	return (pv);
2940*7c478bd9Sstevel@tonic-gate }
2941*7c478bd9Sstevel@tonic-gate 
2942*7c478bd9Sstevel@tonic-gate /*
2943*7c478bd9Sstevel@tonic-gate  * Allocate a proc edge.
2944*7c478bd9Sstevel@tonic-gate  */
2945*7c478bd9Sstevel@tonic-gate 
2946*7c478bd9Sstevel@tonic-gate static proc_edge_t *
2947*7c478bd9Sstevel@tonic-gate flk_get_proc_edge()
2948*7c478bd9Sstevel@tonic-gate {
2949*7c478bd9Sstevel@tonic-gate 	proc_edge_t *pep;
2950*7c478bd9Sstevel@tonic-gate 
2951*7c478bd9Sstevel@tonic-gate 	pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
2952*7c478bd9Sstevel@tonic-gate 	flk_proc_edge_allocs++;
2953*7c478bd9Sstevel@tonic-gate 	return (pep);
2954*7c478bd9Sstevel@tonic-gate }
2955*7c478bd9Sstevel@tonic-gate 
2956*7c478bd9Sstevel@tonic-gate /*
2957*7c478bd9Sstevel@tonic-gate  * Free the proc edge. Called whenever its reference count goes to zero.
2958*7c478bd9Sstevel@tonic-gate  */
2959*7c478bd9Sstevel@tonic-gate 
2960*7c478bd9Sstevel@tonic-gate static void
2961*7c478bd9Sstevel@tonic-gate flk_free_proc_edge(proc_edge_t *pep)
2962*7c478bd9Sstevel@tonic-gate {
2963*7c478bd9Sstevel@tonic-gate 	ASSERT(pep->refcount == 0);
2964*7c478bd9Sstevel@tonic-gate 	kmem_free((void *)pep, sizeof (proc_edge_t));
2965*7c478bd9Sstevel@tonic-gate 	flk_proc_edge_frees++;
2966*7c478bd9Sstevel@tonic-gate }
2967*7c478bd9Sstevel@tonic-gate 
2968*7c478bd9Sstevel@tonic-gate /*
2969*7c478bd9Sstevel@tonic-gate  * Color the graph explicitly done only when the mark value hits max value.
2970*7c478bd9Sstevel@tonic-gate  */
2971*7c478bd9Sstevel@tonic-gate 
2972*7c478bd9Sstevel@tonic-gate static void
2973*7c478bd9Sstevel@tonic-gate flk_proc_graph_uncolor()
2974*7c478bd9Sstevel@tonic-gate {
2975*7c478bd9Sstevel@tonic-gate 	int i;
2976*7c478bd9Sstevel@tonic-gate 
2977*7c478bd9Sstevel@tonic-gate 	if (pgraph.mark == UINT_MAX) {
2978*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < pgraph.gcount; i++)
2979*7c478bd9Sstevel@tonic-gate 			if (pgraph.proc[i] != NULL) {
2980*7c478bd9Sstevel@tonic-gate 				pgraph.proc[i]->atime = 0;
2981*7c478bd9Sstevel@tonic-gate 				pgraph.proc[i]->dtime = 0;
2982*7c478bd9Sstevel@tonic-gate 			}
2983*7c478bd9Sstevel@tonic-gate 		pgraph.mark = 1;
2984*7c478bd9Sstevel@tonic-gate 	} else {
2985*7c478bd9Sstevel@tonic-gate 		pgraph.mark++;
2986*7c478bd9Sstevel@tonic-gate 	}
2987*7c478bd9Sstevel@tonic-gate }
2988*7c478bd9Sstevel@tonic-gate 
2989*7c478bd9Sstevel@tonic-gate /*
2990*7c478bd9Sstevel@tonic-gate  * Release the proc vertex iff both there are no in edges and out edges
2991*7c478bd9Sstevel@tonic-gate  */
2992*7c478bd9Sstevel@tonic-gate 
2993*7c478bd9Sstevel@tonic-gate static void
2994*7c478bd9Sstevel@tonic-gate flk_proc_release(proc_vertex_t *proc)
2995*7c478bd9Sstevel@tonic-gate {
2996*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
2997*7c478bd9Sstevel@tonic-gate 	if (proc->edge == NULL && proc->incount == 0) {
2998*7c478bd9Sstevel@tonic-gate 		pgraph.proc[proc->index] = NULL;
2999*7c478bd9Sstevel@tonic-gate 		pgraph.free++;
3000*7c478bd9Sstevel@tonic-gate 		kmem_free(proc, sizeof (proc_vertex_t));
3001*7c478bd9Sstevel@tonic-gate 		flk_proc_vertex_frees++;
3002*7c478bd9Sstevel@tonic-gate 	}
3003*7c478bd9Sstevel@tonic-gate }
3004*7c478bd9Sstevel@tonic-gate 
3005*7c478bd9Sstevel@tonic-gate /*
3006*7c478bd9Sstevel@tonic-gate  * Updates process graph to reflect change in a lock_graph.
3007*7c478bd9Sstevel@tonic-gate  * Note: We should call this function only after we have a correctly
3008*7c478bd9Sstevel@tonic-gate  * recomputed lock graph. Otherwise we might miss a deadlock detection.
3009*7c478bd9Sstevel@tonic-gate  * eg: in function flk_relation() we call this function after flk_recompute_
3010*7c478bd9Sstevel@tonic-gate  * dependencies() otherwise if a process tries to lock a vnode hashed
3011*7c478bd9Sstevel@tonic-gate  * into another graph it might sleep for ever.
3012*7c478bd9Sstevel@tonic-gate  */
3013*7c478bd9Sstevel@tonic-gate 
3014*7c478bd9Sstevel@tonic-gate static void
3015*7c478bd9Sstevel@tonic-gate flk_update_proc_graph(edge_t *ep, int delete)
3016*7c478bd9Sstevel@tonic-gate {
3017*7c478bd9Sstevel@tonic-gate 	proc_vertex_t *toproc, *fromproc;
3018*7c478bd9Sstevel@tonic-gate 	proc_edge_t *pep, *prevpep;
3019*7c478bd9Sstevel@tonic-gate 
3020*7c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
3021*7c478bd9Sstevel@tonic-gate 	toproc = flk_get_proc_vertex(ep->to_vertex);
3022*7c478bd9Sstevel@tonic-gate 	fromproc = flk_get_proc_vertex(ep->from_vertex);
3023*7c478bd9Sstevel@tonic-gate 
3024*7c478bd9Sstevel@tonic-gate 	if (!delete)
3025*7c478bd9Sstevel@tonic-gate 		goto add;
3026*7c478bd9Sstevel@tonic-gate 	pep = prevpep = fromproc->edge;
3027*7c478bd9Sstevel@tonic-gate 
3028*7c478bd9Sstevel@tonic-gate 	ASSERT(pep != NULL);
3029*7c478bd9Sstevel@tonic-gate 	while (pep != NULL) {
3030*7c478bd9Sstevel@tonic-gate 		if (pep->to_proc == toproc) {
3031*7c478bd9Sstevel@tonic-gate 			ASSERT(pep->refcount > 0);
3032*7c478bd9Sstevel@tonic-gate 			pep->refcount--;
3033*7c478bd9Sstevel@tonic-gate 			if (pep->refcount == 0) {
3034*7c478bd9Sstevel@tonic-gate 				if (pep == prevpep) {
3035*7c478bd9Sstevel@tonic-gate 					fromproc->edge = pep->next;
3036*7c478bd9Sstevel@tonic-gate 				} else {
3037*7c478bd9Sstevel@tonic-gate 					prevpep->next = pep->next;
3038*7c478bd9Sstevel@tonic-gate 				}
3039*7c478bd9Sstevel@tonic-gate 				toproc->incount--;
3040*7c478bd9Sstevel@tonic-gate 				flk_proc_release(toproc);
3041*7c478bd9Sstevel@tonic-gate 				flk_free_proc_edge(pep);
3042*7c478bd9Sstevel@tonic-gate 			}
3043*7c478bd9Sstevel@tonic-gate 			break;
3044*7c478bd9Sstevel@tonic-gate 		}
3045*7c478bd9Sstevel@tonic-gate 		prevpep = pep;
3046*7c478bd9Sstevel@tonic-gate 		pep = pep->next;
3047*7c478bd9Sstevel@tonic-gate 	}
3048*7c478bd9Sstevel@tonic-gate 	flk_proc_release(fromproc);
3049*7c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
3050*7c478bd9Sstevel@tonic-gate 	return;
3051*7c478bd9Sstevel@tonic-gate add:
3052*7c478bd9Sstevel@tonic-gate 
3053*7c478bd9Sstevel@tonic-gate 	pep = fromproc->edge;
3054*7c478bd9Sstevel@tonic-gate 
3055*7c478bd9Sstevel@tonic-gate 	while (pep != NULL) {
3056*7c478bd9Sstevel@tonic-gate 		if (pep->to_proc == toproc) {
3057*7c478bd9Sstevel@tonic-gate 			ASSERT(pep->refcount > 0);
3058*7c478bd9Sstevel@tonic-gate 			pep->refcount++;
3059*7c478bd9Sstevel@tonic-gate 			break;
3060*7c478bd9Sstevel@tonic-gate 		}
3061*7c478bd9Sstevel@tonic-gate 		pep = pep->next;
3062*7c478bd9Sstevel@tonic-gate 	}
3063*7c478bd9Sstevel@tonic-gate 	if (pep == NULL) {
3064*7c478bd9Sstevel@tonic-gate 		pep = flk_get_proc_edge();
3065*7c478bd9Sstevel@tonic-gate 		pep->to_proc = toproc;
3066*7c478bd9Sstevel@tonic-gate 		pep->refcount = 1;
3067*7c478bd9Sstevel@tonic-gate 		toproc->incount++;
3068*7c478bd9Sstevel@tonic-gate 		pep->next = fromproc->edge;
3069*7c478bd9Sstevel@tonic-gate 		fromproc->edge = pep;
3070*7c478bd9Sstevel@tonic-gate 	}
3071*7c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
3072*7c478bd9Sstevel@tonic-gate }
3073*7c478bd9Sstevel@tonic-gate 
3074*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
3075*7c478bd9Sstevel@tonic-gate /*
3076*7c478bd9Sstevel@tonic-gate  * Set the control status for lock manager requests.
3077*7c478bd9Sstevel@tonic-gate  *
3078*7c478bd9Sstevel@tonic-gate  */
3079*7c478bd9Sstevel@tonic-gate 
3080*7c478bd9Sstevel@tonic-gate /*
3081*7c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
3082*7c478bd9Sstevel@tonic-gate  *
3083*7c478bd9Sstevel@tonic-gate  * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3084*7c478bd9Sstevel@tonic-gate  * Effects: Set the state of the NLM server identified by "nlmid"
3085*7c478bd9Sstevel@tonic-gate  *   in the NLM registry to state "nlm_state."
3086*7c478bd9Sstevel@tonic-gate  *   Raises exception no_such_nlm if "nlmid" doesn't identify a known
3087*7c478bd9Sstevel@tonic-gate  *   NLM server to this LLM.
3088*7c478bd9Sstevel@tonic-gate  *   Note that when this routine is called with NLM_SHUTTING_DOWN there
3089*7c478bd9Sstevel@tonic-gate  *   may be locks requests that have gotten started but not finished.  In
3090*7c478bd9Sstevel@tonic-gate  *   particular, there may be blocking requests that are in the callback code
3091*7c478bd9Sstevel@tonic-gate  *   before sleeping (so they're not holding the lock for the graph).  If
3092*7c478bd9Sstevel@tonic-gate  *   such a thread reacquires the graph's lock (to go to sleep) after
3093*7c478bd9Sstevel@tonic-gate  *   NLM state in the NLM registry  is set to a non-up value,
3094*7c478bd9Sstevel@tonic-gate  *   it will notice the status and bail out.  If the request gets
3095*7c478bd9Sstevel@tonic-gate  *   granted before the thread can check the NLM registry, let it
3096*7c478bd9Sstevel@tonic-gate  *   continue normally.  It will get flushed when we are called with NLM_DOWN.
3097*7c478bd9Sstevel@tonic-gate  *
3098*7c478bd9Sstevel@tonic-gate  * Modifies: nlm_reg_obj (global)
3099*7c478bd9Sstevel@tonic-gate  * Arguments:
3100*7c478bd9Sstevel@tonic-gate  *    nlmid	(IN):    id uniquely identifying an NLM server
3101*7c478bd9Sstevel@tonic-gate  *    nlm_state (IN):    NLM server state to change "nlmid" to
3102*7c478bd9Sstevel@tonic-gate  */
3103*7c478bd9Sstevel@tonic-gate void
3104*7c478bd9Sstevel@tonic-gate cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3105*7c478bd9Sstevel@tonic-gate {
3106*7c478bd9Sstevel@tonic-gate 	/*
3107*7c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
3108*7c478bd9Sstevel@tonic-gate 	 */
3109*7c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3110*7c478bd9Sstevel@tonic-gate 		return;
3111*7c478bd9Sstevel@tonic-gate 	}
3112*7c478bd9Sstevel@tonic-gate 
3113*7c478bd9Sstevel@tonic-gate 	/*
3114*7c478bd9Sstevel@tonic-gate 	 * Check for development/debugging.  It is possible to boot a node
3115*7c478bd9Sstevel@tonic-gate 	 * in non-cluster mode, and then run a special script, currently
3116*7c478bd9Sstevel@tonic-gate 	 * available only to developers, to bring up the node as part of a
3117*7c478bd9Sstevel@tonic-gate 	 * cluster.  The problem is that running such a script does not
3118*7c478bd9Sstevel@tonic-gate 	 * result in the routine flk_init() being called and hence global array
3119*7c478bd9Sstevel@tonic-gate 	 * nlm_reg_status is NULL.  The NLM thinks it's in cluster mode,
3120*7c478bd9Sstevel@tonic-gate 	 * but the LLM needs to do an additional check to see if the global
3121*7c478bd9Sstevel@tonic-gate 	 * array has been created or not. If nlm_reg_status is NULL, then
3122*7c478bd9Sstevel@tonic-gate 	 * return, else continue.
3123*7c478bd9Sstevel@tonic-gate 	 */
3124*7c478bd9Sstevel@tonic-gate 	if (nlm_reg_status == NULL) {
3125*7c478bd9Sstevel@tonic-gate 		return;
3126*7c478bd9Sstevel@tonic-gate 	}
3127*7c478bd9Sstevel@tonic-gate 
3128*7c478bd9Sstevel@tonic-gate 	ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3129*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nlm_reg_lock);
3130*7c478bd9Sstevel@tonic-gate 
3131*7c478bd9Sstevel@tonic-gate 	if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3132*7c478bd9Sstevel@tonic-gate 		/*
3133*7c478bd9Sstevel@tonic-gate 		 * If the NLM server "nlmid" is unknown in the NLM registry,
3134*7c478bd9Sstevel@tonic-gate 		 * add it to the registry in the nlm shutting down state.
3135*7c478bd9Sstevel@tonic-gate 		 */
3136*7c478bd9Sstevel@tonic-gate 		FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3137*7c478bd9Sstevel@tonic-gate 			FLK_NLM_SHUTTING_DOWN);
3138*7c478bd9Sstevel@tonic-gate 	} else {
3139*7c478bd9Sstevel@tonic-gate 		/*
3140*7c478bd9Sstevel@tonic-gate 		 * Change the state of the NLM server identified by "nlmid"
3141*7c478bd9Sstevel@tonic-gate 		 * in the NLM registry to the argument "nlm_state."
3142*7c478bd9Sstevel@tonic-gate 		 */
3143*7c478bd9Sstevel@tonic-gate 		FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3144*7c478bd9Sstevel@tonic-gate 			nlm_state);
3145*7c478bd9Sstevel@tonic-gate 	}
3146*7c478bd9Sstevel@tonic-gate 
3147*7c478bd9Sstevel@tonic-gate 	/*
3148*7c478bd9Sstevel@tonic-gate 	 *  The reason we must register the NLM server that is shutting down
3149*7c478bd9Sstevel@tonic-gate 	 *  with an LLM that doesn't already know about it (never sent a lock
3150*7c478bd9Sstevel@tonic-gate 	 *  request) is to handle correctly a race between shutdown and a new
3151*7c478bd9Sstevel@tonic-gate 	 *  lock request.  Suppose that a shutdown request from the NLM server
3152*7c478bd9Sstevel@tonic-gate 	 *  invokes this routine at the LLM, and a thread is spawned to
3153*7c478bd9Sstevel@tonic-gate 	 *  service the request. Now suppose a new lock request is in
3154*7c478bd9Sstevel@tonic-gate 	 *  progress and has already passed the first line of defense in
3155*7c478bd9Sstevel@tonic-gate 	 *  reclock(), which denies new locks requests from NLM servers
3156*7c478bd9Sstevel@tonic-gate 	 *  that are not in the NLM_UP state.  After the current routine
3157*7c478bd9Sstevel@tonic-gate 	 *  is invoked for both phases of shutdown, the routine will return,
3158*7c478bd9Sstevel@tonic-gate 	 *  having done nothing, and the lock request will proceed and
3159*7c478bd9Sstevel@tonic-gate 	 *  probably be granted.  The problem is that the shutdown was ignored
3160*7c478bd9Sstevel@tonic-gate 	 *  by the lock request because there was no record of that NLM server
3161*7c478bd9Sstevel@tonic-gate 	 *  shutting down.   We will be in the peculiar position of thinking
3162*7c478bd9Sstevel@tonic-gate 	 *  that we've shutdown the NLM server and all locks at all LLMs have
3163*7c478bd9Sstevel@tonic-gate 	 *  been discarded, but in fact there's still one lock held.
3164*7c478bd9Sstevel@tonic-gate 	 *  The solution is to record the existence of NLM server and change
3165*7c478bd9Sstevel@tonic-gate 	 *  its state immediately to NLM_SHUTTING_DOWN.  The lock request in
3166*7c478bd9Sstevel@tonic-gate 	 *  progress may proceed because the next phase NLM_DOWN will catch
3167*7c478bd9Sstevel@tonic-gate 	 *  this lock and discard it.
3168*7c478bd9Sstevel@tonic-gate 	 */
3169*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nlm_reg_lock);
3170*7c478bd9Sstevel@tonic-gate 
3171*7c478bd9Sstevel@tonic-gate 	switch (nlm_state) {
3172*7c478bd9Sstevel@tonic-gate 	case FLK_NLM_UP:
3173*7c478bd9Sstevel@tonic-gate 		/*
3174*7c478bd9Sstevel@tonic-gate 		 * Change the NLM state of all locks still held on behalf of
3175*7c478bd9Sstevel@tonic-gate 		 * the NLM server identified by "nlmid" to NLM_UP.
3176*7c478bd9Sstevel@tonic-gate 		 */
3177*7c478bd9Sstevel@tonic-gate 		cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3178*7c478bd9Sstevel@tonic-gate 		break;
3179*7c478bd9Sstevel@tonic-gate 
3180*7c478bd9Sstevel@tonic-gate 	case FLK_NLM_SHUTTING_DOWN:
3181*7c478bd9Sstevel@tonic-gate 		/*
3182*7c478bd9Sstevel@tonic-gate 		 * Wake up all sleeping locks for the NLM server identified
3183*7c478bd9Sstevel@tonic-gate 		 * by "nlmid." Note that eventually all woken threads will
3184*7c478bd9Sstevel@tonic-gate 		 * have their lock requests cancelled and descriptors
3185*7c478bd9Sstevel@tonic-gate 		 * removed from the sleeping lock list.  Note that the NLM
3186*7c478bd9Sstevel@tonic-gate 		 * server state associated with each lock descriptor is
3187*7c478bd9Sstevel@tonic-gate 		 * changed to FLK_NLM_SHUTTING_DOWN.
3188*7c478bd9Sstevel@tonic-gate 		 */
3189*7c478bd9Sstevel@tonic-gate 		cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3190*7c478bd9Sstevel@tonic-gate 		break;
3191*7c478bd9Sstevel@tonic-gate 
3192*7c478bd9Sstevel@tonic-gate 	case FLK_NLM_DOWN:
3193*7c478bd9Sstevel@tonic-gate 		/*
3194*7c478bd9Sstevel@tonic-gate 		 * Discard all active, granted locks for this NLM server
3195*7c478bd9Sstevel@tonic-gate 		 * identified by "nlmid."
3196*7c478bd9Sstevel@tonic-gate 		 */
3197*7c478bd9Sstevel@tonic-gate 		cl_flk_unlock_nlm_granted(nlmid);
3198*7c478bd9Sstevel@tonic-gate 		break;
3199*7c478bd9Sstevel@tonic-gate 
3200*7c478bd9Sstevel@tonic-gate 	default:
3201*7c478bd9Sstevel@tonic-gate 		panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3202*7c478bd9Sstevel@tonic-gate 	}
3203*7c478bd9Sstevel@tonic-gate }
3204*7c478bd9Sstevel@tonic-gate 
3205*7c478bd9Sstevel@tonic-gate /*
3206*7c478bd9Sstevel@tonic-gate  * Set the control status for lock manager requests.
3207*7c478bd9Sstevel@tonic-gate  *
3208*7c478bd9Sstevel@tonic-gate  * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3209*7c478bd9Sstevel@tonic-gate  * may be locks requests that have gotten started but not finished.  In
3210*7c478bd9Sstevel@tonic-gate  * particular, there may be blocking requests that are in the callback code
3211*7c478bd9Sstevel@tonic-gate  * before sleeping (so they're not holding the lock for the graph).  If
3212*7c478bd9Sstevel@tonic-gate  * such a thread reacquires the graph's lock (to go to sleep) after
3213*7c478bd9Sstevel@tonic-gate  * flk_lockmgr_status is set to a non-up value, it will notice the status
3214*7c478bd9Sstevel@tonic-gate  * and bail out.  If the request gets granted before the thread can check
3215*7c478bd9Sstevel@tonic-gate  * flk_lockmgr_status, let it continue normally.  It will get flushed when
3216*7c478bd9Sstevel@tonic-gate  * we are called with FLK_LOCKMGR_DOWN.
3217*7c478bd9Sstevel@tonic-gate  */
3218*7c478bd9Sstevel@tonic-gate 
3219*7c478bd9Sstevel@tonic-gate void
3220*7c478bd9Sstevel@tonic-gate flk_set_lockmgr_status(flk_lockmgr_status_t status)
3221*7c478bd9Sstevel@tonic-gate {
3222*7c478bd9Sstevel@tonic-gate 	int i;
3223*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
3224*7c478bd9Sstevel@tonic-gate 	struct flock_globals *fg;
3225*7c478bd9Sstevel@tonic-gate 
3226*7c478bd9Sstevel@tonic-gate 	fg = flk_get_globals();
3227*7c478bd9Sstevel@tonic-gate 	ASSERT(fg != NULL);
3228*7c478bd9Sstevel@tonic-gate 
3229*7c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
3230*7c478bd9Sstevel@tonic-gate 	fg->flk_lockmgr_status = status;
3231*7c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
3232*7c478bd9Sstevel@tonic-gate 
3233*7c478bd9Sstevel@tonic-gate 	/*
3234*7c478bd9Sstevel@tonic-gate 	 * If the lock manager is coming back up, all that's needed is to
3235*7c478bd9Sstevel@tonic-gate 	 * propagate this information to the graphs.  If the lock manager
3236*7c478bd9Sstevel@tonic-gate 	 * is going down, additional action is required, and each graph's
3237*7c478bd9Sstevel@tonic-gate 	 * copy of the state is updated atomically with this other action.
3238*7c478bd9Sstevel@tonic-gate 	 */
3239*7c478bd9Sstevel@tonic-gate 	switch (status) {
3240*7c478bd9Sstevel@tonic-gate 	case FLK_LOCKMGR_UP:
3241*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < HASH_SIZE; i++) {
3242*7c478bd9Sstevel@tonic-gate 			mutex_enter(&flock_lock);
3243*7c478bd9Sstevel@tonic-gate 			gp = lock_graph[i];
3244*7c478bd9Sstevel@tonic-gate 			mutex_exit(&flock_lock);
3245*7c478bd9Sstevel@tonic-gate 			if (gp == NULL)
3246*7c478bd9Sstevel@tonic-gate 				continue;
3247*7c478bd9Sstevel@tonic-gate 			mutex_enter(&gp->gp_mutex);
3248*7c478bd9Sstevel@tonic-gate 			fg->lockmgr_status[i] = status;
3249*7c478bd9Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
3250*7c478bd9Sstevel@tonic-gate 		}
3251*7c478bd9Sstevel@tonic-gate 		break;
3252*7c478bd9Sstevel@tonic-gate 	case FLK_WAKEUP_SLEEPERS:
3253*7c478bd9Sstevel@tonic-gate 		wakeup_sleeping_lockmgr_locks(fg);
3254*7c478bd9Sstevel@tonic-gate 		break;
3255*7c478bd9Sstevel@tonic-gate 	case FLK_LOCKMGR_DOWN:
3256*7c478bd9Sstevel@tonic-gate 		unlock_lockmgr_granted(fg);
3257*7c478bd9Sstevel@tonic-gate 		break;
3258*7c478bd9Sstevel@tonic-gate 	default:
3259*7c478bd9Sstevel@tonic-gate 		panic("flk_set_lockmgr_status: bad status (%d)", status);
3260*7c478bd9Sstevel@tonic-gate 		break;
3261*7c478bd9Sstevel@tonic-gate 	}
3262*7c478bd9Sstevel@tonic-gate }
3263*7c478bd9Sstevel@tonic-gate 
3264*7c478bd9Sstevel@tonic-gate /*
3265*7c478bd9Sstevel@tonic-gate  * This routine returns all the locks that are active or sleeping and are
3266*7c478bd9Sstevel@tonic-gate  * associated with a particular set of identifiers.  If lock_state != 0, then
3267*7c478bd9Sstevel@tonic-gate  * only locks that match the lock_state are returned. If lock_state == 0, then
3268*7c478bd9Sstevel@tonic-gate  * all locks are returned. If pid == NOPID, the pid is ignored.  If
3269*7c478bd9Sstevel@tonic-gate  * use_sysid is FALSE, then the sysid is ignored.  If vp is NULL, then the
3270*7c478bd9Sstevel@tonic-gate  * vnode pointer is ignored.
3271*7c478bd9Sstevel@tonic-gate  *
3272*7c478bd9Sstevel@tonic-gate  * A list containing the vnode pointer and an flock structure
3273*7c478bd9Sstevel@tonic-gate  * describing the lock is returned.  Each element in the list is
3274*7c478bd9Sstevel@tonic-gate  * dynammically allocated and must be freed by the caller.  The
3275*7c478bd9Sstevel@tonic-gate  * last item in the list is denoted by a NULL value in the ll_next
3276*7c478bd9Sstevel@tonic-gate  * field.
3277*7c478bd9Sstevel@tonic-gate  *
3278*7c478bd9Sstevel@tonic-gate  * The vnode pointers returned are held.  The caller is responsible
3279*7c478bd9Sstevel@tonic-gate  * for releasing these.  Note that the returned list is only a snapshot of
3280*7c478bd9Sstevel@tonic-gate  * the current lock information, and that it is a snapshot of a moving
3281*7c478bd9Sstevel@tonic-gate  * target (only one graph is locked at a time).
3282*7c478bd9Sstevel@tonic-gate  */
3283*7c478bd9Sstevel@tonic-gate 
3284*7c478bd9Sstevel@tonic-gate locklist_t *
3285*7c478bd9Sstevel@tonic-gate get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3286*7c478bd9Sstevel@tonic-gate 		pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3287*7c478bd9Sstevel@tonic-gate {
3288*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
3289*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*graph_head;
3290*7c478bd9Sstevel@tonic-gate 	locklist_t		listhead;
3291*7c478bd9Sstevel@tonic-gate 	locklist_t		*llheadp;
3292*7c478bd9Sstevel@tonic-gate 	locklist_t		*llp;
3293*7c478bd9Sstevel@tonic-gate 	locklist_t		*lltp;
3294*7c478bd9Sstevel@tonic-gate 	graph_t			*gp;
3295*7c478bd9Sstevel@tonic-gate 	int			i;
3296*7c478bd9Sstevel@tonic-gate 	int			first_index; /* graph index */
3297*7c478bd9Sstevel@tonic-gate 	int			num_indexes; /* graph index */
3298*7c478bd9Sstevel@tonic-gate 
3299*7c478bd9Sstevel@tonic-gate 	ASSERT((list_type == FLK_ACTIVE_STATE) ||
3300*7c478bd9Sstevel@tonic-gate 	    (list_type == FLK_SLEEPING_STATE));
3301*7c478bd9Sstevel@tonic-gate 
3302*7c478bd9Sstevel@tonic-gate 	/*
3303*7c478bd9Sstevel@tonic-gate 	 * Get a pointer to something to use as a list head while building
3304*7c478bd9Sstevel@tonic-gate 	 * the rest of the list.
3305*7c478bd9Sstevel@tonic-gate 	 */
3306*7c478bd9Sstevel@tonic-gate 	llheadp = &listhead;
3307*7c478bd9Sstevel@tonic-gate 	lltp = llheadp;
3308*7c478bd9Sstevel@tonic-gate 	llheadp->ll_next = (locklist_t *)NULL;
3309*7c478bd9Sstevel@tonic-gate 
3310*7c478bd9Sstevel@tonic-gate 	/* Figure out which graphs we want to look at. */
3311*7c478bd9Sstevel@tonic-gate 	if (vp == NULL) {
3312*7c478bd9Sstevel@tonic-gate 		first_index = 0;
3313*7c478bd9Sstevel@tonic-gate 		num_indexes = HASH_SIZE;
3314*7c478bd9Sstevel@tonic-gate 	} else {
3315*7c478bd9Sstevel@tonic-gate 		first_index = HASH_INDEX(vp);
3316*7c478bd9Sstevel@tonic-gate 		num_indexes = 1;
3317*7c478bd9Sstevel@tonic-gate 	}
3318*7c478bd9Sstevel@tonic-gate 
3319*7c478bd9Sstevel@tonic-gate 	for (i = first_index; i < first_index + num_indexes; i++) {
3320*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
3321*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
3322*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
3323*7c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
3324*7c478bd9Sstevel@tonic-gate 			continue;
3325*7c478bd9Sstevel@tonic-gate 		}
3326*7c478bd9Sstevel@tonic-gate 
3327*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
3328*7c478bd9Sstevel@tonic-gate 		graph_head = (list_type == FLK_ACTIVE_STATE) ?
3329*7c478bd9Sstevel@tonic-gate 			ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3330*7c478bd9Sstevel@tonic-gate 		for (lock = graph_head->l_next;
3331*7c478bd9Sstevel@tonic-gate 		    lock != graph_head;
3332*7c478bd9Sstevel@tonic-gate 		    lock = lock->l_next) {
3333*7c478bd9Sstevel@tonic-gate 			if (use_sysid && lock->l_flock.l_sysid != sysid)
3334*7c478bd9Sstevel@tonic-gate 				continue;
3335*7c478bd9Sstevel@tonic-gate 			if (pid != NOPID && lock->l_flock.l_pid != pid)
3336*7c478bd9Sstevel@tonic-gate 				continue;
3337*7c478bd9Sstevel@tonic-gate 			if (vp != NULL && lock->l_vnode != vp)
3338*7c478bd9Sstevel@tonic-gate 				continue;
3339*7c478bd9Sstevel@tonic-gate 			if (lock_state && !(lock_state & lock->l_state))
3340*7c478bd9Sstevel@tonic-gate 				continue;
3341*7c478bd9Sstevel@tonic-gate 			if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3342*7c478bd9Sstevel@tonic-gate 				continue;
3343*7c478bd9Sstevel@tonic-gate 			/*
3344*7c478bd9Sstevel@tonic-gate 			 * A matching lock was found.  Allocate
3345*7c478bd9Sstevel@tonic-gate 			 * space for a new locklist entry and fill
3346*7c478bd9Sstevel@tonic-gate 			 * it in.
3347*7c478bd9Sstevel@tonic-gate 			 */
3348*7c478bd9Sstevel@tonic-gate 			llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3349*7c478bd9Sstevel@tonic-gate 			lltp->ll_next = llp;
3350*7c478bd9Sstevel@tonic-gate 			VN_HOLD(lock->l_vnode);
3351*7c478bd9Sstevel@tonic-gate 			llp->ll_vp = lock->l_vnode;
3352*7c478bd9Sstevel@tonic-gate 			create_flock(lock, &(llp->ll_flock));
3353*7c478bd9Sstevel@tonic-gate 			llp->ll_next = (locklist_t *)NULL;
3354*7c478bd9Sstevel@tonic-gate 			lltp = llp;
3355*7c478bd9Sstevel@tonic-gate 		}
3356*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
3357*7c478bd9Sstevel@tonic-gate 	}
3358*7c478bd9Sstevel@tonic-gate 
3359*7c478bd9Sstevel@tonic-gate 	llp = llheadp->ll_next;
3360*7c478bd9Sstevel@tonic-gate 	return (llp);
3361*7c478bd9Sstevel@tonic-gate }
3362*7c478bd9Sstevel@tonic-gate 
3363*7c478bd9Sstevel@tonic-gate /*
3364*7c478bd9Sstevel@tonic-gate  * These two functions are simply interfaces to get_lock_list.  They return
3365*7c478bd9Sstevel@tonic-gate  * a list of sleeping or active locks for the given sysid and pid.  See
3366*7c478bd9Sstevel@tonic-gate  * get_lock_list for details.
3367*7c478bd9Sstevel@tonic-gate  *
3368*7c478bd9Sstevel@tonic-gate  * In either case we don't particularly care to specify the zone of interest;
3369*7c478bd9Sstevel@tonic-gate  * the sysid-space is global across zones, so the sysid will map to exactly one
3370*7c478bd9Sstevel@tonic-gate  * zone, and we'll return information for that zone.
3371*7c478bd9Sstevel@tonic-gate  */
3372*7c478bd9Sstevel@tonic-gate 
3373*7c478bd9Sstevel@tonic-gate locklist_t *
3374*7c478bd9Sstevel@tonic-gate flk_get_sleeping_locks(int sysid, pid_t pid)
3375*7c478bd9Sstevel@tonic-gate {
3376*7c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3377*7c478bd9Sstevel@tonic-gate 		    ALL_ZONES));
3378*7c478bd9Sstevel@tonic-gate }
3379*7c478bd9Sstevel@tonic-gate 
3380*7c478bd9Sstevel@tonic-gate locklist_t *
3381*7c478bd9Sstevel@tonic-gate flk_get_active_locks(int sysid, pid_t pid)
3382*7c478bd9Sstevel@tonic-gate {
3383*7c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3384*7c478bd9Sstevel@tonic-gate 		    ALL_ZONES));
3385*7c478bd9Sstevel@tonic-gate }
3386*7c478bd9Sstevel@tonic-gate 
3387*7c478bd9Sstevel@tonic-gate /*
3388*7c478bd9Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
3389*7c478bd9Sstevel@tonic-gate  * locks for a given vnode.  Again, see get_lock_list for details.
3390*7c478bd9Sstevel@tonic-gate  *
3391*7c478bd9Sstevel@tonic-gate  * We don't need to specify which zone's locks we're interested in.  The matter
3392*7c478bd9Sstevel@tonic-gate  * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3393*7c478bd9Sstevel@tonic-gate  * be used by multiple zones, so the list of locks will all be from the right
3394*7c478bd9Sstevel@tonic-gate  * zone.
3395*7c478bd9Sstevel@tonic-gate  */
3396*7c478bd9Sstevel@tonic-gate 
3397*7c478bd9Sstevel@tonic-gate locklist_t *
3398*7c478bd9Sstevel@tonic-gate flk_active_locks_for_vp(const vnode_t *vp)
3399*7c478bd9Sstevel@tonic-gate {
3400*7c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3401*7c478bd9Sstevel@tonic-gate 		    ALL_ZONES));
3402*7c478bd9Sstevel@tonic-gate }
3403*7c478bd9Sstevel@tonic-gate 
3404*7c478bd9Sstevel@tonic-gate /*
3405*7c478bd9Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
3406*7c478bd9Sstevel@tonic-gate  * nbmand locks for a given vnode.  Again, see get_lock_list for details.
3407*7c478bd9Sstevel@tonic-gate  *
3408*7c478bd9Sstevel@tonic-gate  * See the comment for flk_active_locks_for_vp() for why we don't care to
3409*7c478bd9Sstevel@tonic-gate  * specify the particular zone of interest.
3410*7c478bd9Sstevel@tonic-gate  */
3411*7c478bd9Sstevel@tonic-gate locklist_t *
3412*7c478bd9Sstevel@tonic-gate flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3413*7c478bd9Sstevel@tonic-gate {
3414*7c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3415*7c478bd9Sstevel@tonic-gate 				NOPID, vp, ALL_ZONES));
3416*7c478bd9Sstevel@tonic-gate }
3417*7c478bd9Sstevel@tonic-gate 
3418*7c478bd9Sstevel@tonic-gate /*
3419*7c478bd9Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
3420*7c478bd9Sstevel@tonic-gate  * nbmand locks for a given pid.  Again, see get_lock_list for details.
3421*7c478bd9Sstevel@tonic-gate  *
3422*7c478bd9Sstevel@tonic-gate  * The zone doesn't need to be specified here; the locks held by a
3423*7c478bd9Sstevel@tonic-gate  * particular process will either be local (ie, non-NFS) or from the zone
3424*7c478bd9Sstevel@tonic-gate  * the process is executing in.  This is because other parts of the system
3425*7c478bd9Sstevel@tonic-gate  * ensure that an NFS vnode can't be used in a zone other than that in
3426*7c478bd9Sstevel@tonic-gate  * which it was opened.
3427*7c478bd9Sstevel@tonic-gate  */
3428*7c478bd9Sstevel@tonic-gate locklist_t *
3429*7c478bd9Sstevel@tonic-gate flk_active_nbmand_locks(pid_t pid)
3430*7c478bd9Sstevel@tonic-gate {
3431*7c478bd9Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3432*7c478bd9Sstevel@tonic-gate 				pid, NULL, ALL_ZONES));
3433*7c478bd9Sstevel@tonic-gate }
3434*7c478bd9Sstevel@tonic-gate 
3435*7c478bd9Sstevel@tonic-gate /*
3436*7c478bd9Sstevel@tonic-gate  * Free up all entries in the locklist.
3437*7c478bd9Sstevel@tonic-gate  */
3438*7c478bd9Sstevel@tonic-gate void
3439*7c478bd9Sstevel@tonic-gate flk_free_locklist(locklist_t *llp)
3440*7c478bd9Sstevel@tonic-gate {
3441*7c478bd9Sstevel@tonic-gate 	locklist_t *next_llp;
3442*7c478bd9Sstevel@tonic-gate 
3443*7c478bd9Sstevel@tonic-gate 	while (llp) {
3444*7c478bd9Sstevel@tonic-gate 		next_llp = llp->ll_next;
3445*7c478bd9Sstevel@tonic-gate 		VN_RELE(llp->ll_vp);
3446*7c478bd9Sstevel@tonic-gate 		kmem_free(llp, sizeof (*llp));
3447*7c478bd9Sstevel@tonic-gate 		llp = next_llp;
3448*7c478bd9Sstevel@tonic-gate 	}
3449*7c478bd9Sstevel@tonic-gate }
3450*7c478bd9Sstevel@tonic-gate 
3451*7c478bd9Sstevel@tonic-gate static void
3452*7c478bd9Sstevel@tonic-gate cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3453*7c478bd9Sstevel@tonic-gate {
3454*7c478bd9Sstevel@tonic-gate 	/*
3455*7c478bd9Sstevel@tonic-gate 	 * For each graph "lg" in the hash table lock_graph do
3456*7c478bd9Sstevel@tonic-gate 	 * a.  Get the list of sleeping locks
3457*7c478bd9Sstevel@tonic-gate 	 * b.  For each lock descriptor in the list do
3458*7c478bd9Sstevel@tonic-gate 	 *	i.   If the requested lock is an NLM server request AND
3459*7c478bd9Sstevel@tonic-gate 	 *		the nlmid is the same as the routine argument then
3460*7c478bd9Sstevel@tonic-gate 	 *		change the lock descriptor's state field to
3461*7c478bd9Sstevel@tonic-gate 	 *		"nlm_state."
3462*7c478bd9Sstevel@tonic-gate 	 * c.  Get the list of active locks
3463*7c478bd9Sstevel@tonic-gate 	 * d.  For each lock descriptor in the list do
3464*7c478bd9Sstevel@tonic-gate 	 *	i.   If the requested lock is an NLM server request AND
3465*7c478bd9Sstevel@tonic-gate 	 *		the nlmid is the same as the routine argument then
3466*7c478bd9Sstevel@tonic-gate 	 *		change the lock descriptor's state field to
3467*7c478bd9Sstevel@tonic-gate 	 *		"nlm_state."
3468*7c478bd9Sstevel@tonic-gate 	 */
3469*7c478bd9Sstevel@tonic-gate 
3470*7c478bd9Sstevel@tonic-gate 	int			i;
3471*7c478bd9Sstevel@tonic-gate 	graph_t			*gp;			/* lock graph */
3472*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;			/* lock */
3473*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*nlock = NULL;		/* next lock */
3474*7c478bd9Sstevel@tonic-gate 	int			lock_nlmid;
3475*7c478bd9Sstevel@tonic-gate 
3476*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
3477*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
3478*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
3479*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
3480*7c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
3481*7c478bd9Sstevel@tonic-gate 			continue;
3482*7c478bd9Sstevel@tonic-gate 		}
3483*7c478bd9Sstevel@tonic-gate 
3484*7c478bd9Sstevel@tonic-gate 		/* Get list of sleeping locks in current lock graph. */
3485*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
3486*7c478bd9Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
3487*7c478bd9Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
3488*7c478bd9Sstevel@tonic-gate 		    lock = nlock) {
3489*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
3490*7c478bd9Sstevel@tonic-gate 			/* get NLM id */
3491*7c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3492*7c478bd9Sstevel@tonic-gate 
3493*7c478bd9Sstevel@tonic-gate 			/*
3494*7c478bd9Sstevel@tonic-gate 			 * If NLM server request AND nlmid of lock matches
3495*7c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
3496*7c478bd9Sstevel@tonic-gate 			 * lock to "nlm_state."
3497*7c478bd9Sstevel@tonic-gate 			 */
3498*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3499*7c478bd9Sstevel@tonic-gate 				SET_NLM_STATE(lock, nlm_state);
3500*7c478bd9Sstevel@tonic-gate 			}
3501*7c478bd9Sstevel@tonic-gate 		}
3502*7c478bd9Sstevel@tonic-gate 
3503*7c478bd9Sstevel@tonic-gate 		/* Get list of active locks in current lock graph. */
3504*7c478bd9Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
3505*7c478bd9Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
3506*7c478bd9Sstevel@tonic-gate 		    lock = nlock) {
3507*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
3508*7c478bd9Sstevel@tonic-gate 			/* get NLM id */
3509*7c478bd9Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3510*7c478bd9Sstevel@tonic-gate 
3511*7c478bd9Sstevel@tonic-gate 			/*
3512*7c478bd9Sstevel@tonic-gate 			 * If NLM server request AND nlmid of lock matches
3513*7c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
3514*7c478bd9Sstevel@tonic-gate 			 * lock to "nlm_state."
3515*7c478bd9Sstevel@tonic-gate 			 */
3516*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3517*7c478bd9Sstevel@tonic-gate 				ASSERT(IS_ACTIVE(lock));
3518*7c478bd9Sstevel@tonic-gate 				SET_NLM_STATE(lock, nlm_state);
3519*7c478bd9Sstevel@tonic-gate 			}
3520*7c478bd9Sstevel@tonic-gate 		}
3521*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
3522*7c478bd9Sstevel@tonic-gate 	}
3523*7c478bd9Sstevel@tonic-gate }
3524*7c478bd9Sstevel@tonic-gate 
3525*7c478bd9Sstevel@tonic-gate /*
3526*7c478bd9Sstevel@tonic-gate  * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3527*7c478bd9Sstevel@tonic-gate  * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3528*7c478bd9Sstevel@tonic-gate  *   identified by "nlmid." Poke those lock requests.
3529*7c478bd9Sstevel@tonic-gate  */
3530*7c478bd9Sstevel@tonic-gate static void
3531*7c478bd9Sstevel@tonic-gate cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3532*7c478bd9Sstevel@tonic-gate {
3533*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
3534*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
3535*7c478bd9Sstevel@tonic-gate 	int i;
3536*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
3537*7c478bd9Sstevel@tonic-gate 	int	lock_nlmid;
3538*7c478bd9Sstevel@tonic-gate 
3539*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
3540*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
3541*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
3542*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
3543*7c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
3544*7c478bd9Sstevel@tonic-gate 			continue;
3545*7c478bd9Sstevel@tonic-gate 		}
3546*7c478bd9Sstevel@tonic-gate 
3547*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
3548*7c478bd9Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
3549*7c478bd9Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
3550*7c478bd9Sstevel@tonic-gate 		    lock = nlock) {
3551*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
3552*7c478bd9Sstevel@tonic-gate 			/*
3553*7c478bd9Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
3554*7c478bd9Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
3555*7c478bd9Sstevel@tonic-gate 			 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3556*7c478bd9Sstevel@tonic-gate 			 * request.
3557*7c478bd9Sstevel@tonic-gate 			 */
3558*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock)) {
3559*7c478bd9Sstevel@tonic-gate 				/* get NLM id */
3560*7c478bd9Sstevel@tonic-gate 				lock_nlmid =
3561*7c478bd9Sstevel@tonic-gate 					GETNLMID(lock->l_flock.l_sysid);
3562*7c478bd9Sstevel@tonic-gate 				if (nlmid == lock_nlmid) {
3563*7c478bd9Sstevel@tonic-gate 					SET_NLM_STATE(lock,
3564*7c478bd9Sstevel@tonic-gate 						FLK_NLM_SHUTTING_DOWN);
3565*7c478bd9Sstevel@tonic-gate 					INTERRUPT_WAKEUP(lock);
3566*7c478bd9Sstevel@tonic-gate 				}
3567*7c478bd9Sstevel@tonic-gate 			}
3568*7c478bd9Sstevel@tonic-gate 		}
3569*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
3570*7c478bd9Sstevel@tonic-gate 	}
3571*7c478bd9Sstevel@tonic-gate }
3572*7c478bd9Sstevel@tonic-gate 
3573*7c478bd9Sstevel@tonic-gate /*
3574*7c478bd9Sstevel@tonic-gate  * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3575*7c478bd9Sstevel@tonic-gate  * Effects:  Find all active (granted) lock manager locks _only_ for the
3576*7c478bd9Sstevel@tonic-gate  *   NLM server identified by "nlmid" and release them.
3577*7c478bd9Sstevel@tonic-gate  */
3578*7c478bd9Sstevel@tonic-gate static void
3579*7c478bd9Sstevel@tonic-gate cl_flk_unlock_nlm_granted(int nlmid)
3580*7c478bd9Sstevel@tonic-gate {
3581*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
3582*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
3583*7c478bd9Sstevel@tonic-gate 	int i;
3584*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
3585*7c478bd9Sstevel@tonic-gate 	int	lock_nlmid;
3586*7c478bd9Sstevel@tonic-gate 
3587*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
3588*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
3589*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
3590*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
3591*7c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
3592*7c478bd9Sstevel@tonic-gate 			continue;
3593*7c478bd9Sstevel@tonic-gate 		}
3594*7c478bd9Sstevel@tonic-gate 
3595*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
3596*7c478bd9Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
3597*7c478bd9Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
3598*7c478bd9Sstevel@tonic-gate 		    lock = nlock) {
3599*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
3600*7c478bd9Sstevel@tonic-gate 			ASSERT(IS_ACTIVE(lock));
3601*7c478bd9Sstevel@tonic-gate 
3602*7c478bd9Sstevel@tonic-gate 			/*
3603*7c478bd9Sstevel@tonic-gate 			 * If it's an  NLM server request _and_ nlmid of
3604*7c478bd9Sstevel@tonic-gate 			 * the lock matches nlmid of argument, then
3605*7c478bd9Sstevel@tonic-gate 			 * remove the active lock the list, wakup blocked
3606*7c478bd9Sstevel@tonic-gate 			 * threads, and free the storage for the lock.
3607*7c478bd9Sstevel@tonic-gate 			 * Note that there's no need to mark the NLM state
3608*7c478bd9Sstevel@tonic-gate 			 * of this lock to NLM_DOWN because the lock will
3609*7c478bd9Sstevel@tonic-gate 			 * be deleted anyway and its storage freed.
3610*7c478bd9Sstevel@tonic-gate 			 */
3611*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock)) {
3612*7c478bd9Sstevel@tonic-gate 				/* get NLM id */
3613*7c478bd9Sstevel@tonic-gate 				lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3614*7c478bd9Sstevel@tonic-gate 				if (nlmid == lock_nlmid) {
3615*7c478bd9Sstevel@tonic-gate 					flk_delete_active_lock(lock, 0);
3616*7c478bd9Sstevel@tonic-gate 					flk_wakeup(lock, 1);
3617*7c478bd9Sstevel@tonic-gate 					flk_free_lock(lock);
3618*7c478bd9Sstevel@tonic-gate 				}
3619*7c478bd9Sstevel@tonic-gate 			}
3620*7c478bd9Sstevel@tonic-gate 		}
3621*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
3622*7c478bd9Sstevel@tonic-gate 	}
3623*7c478bd9Sstevel@tonic-gate }
3624*7c478bd9Sstevel@tonic-gate 
3625*7c478bd9Sstevel@tonic-gate /*
3626*7c478bd9Sstevel@tonic-gate  * Find all sleeping lock manager requests and poke them.
3627*7c478bd9Sstevel@tonic-gate  */
3628*7c478bd9Sstevel@tonic-gate static void
3629*7c478bd9Sstevel@tonic-gate wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
3630*7c478bd9Sstevel@tonic-gate {
3631*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
3632*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
3633*7c478bd9Sstevel@tonic-gate 	int i;
3634*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
3635*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
3636*7c478bd9Sstevel@tonic-gate 
3637*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
3638*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
3639*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
3640*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
3641*7c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
3642*7c478bd9Sstevel@tonic-gate 			continue;
3643*7c478bd9Sstevel@tonic-gate 		}
3644*7c478bd9Sstevel@tonic-gate 
3645*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
3646*7c478bd9Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
3647*7c478bd9Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
3648*7c478bd9Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
3649*7c478bd9Sstevel@tonic-gate 		    lock = nlock) {
3650*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
3651*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3652*7c478bd9Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
3653*7c478bd9Sstevel@tonic-gate 			}
3654*7c478bd9Sstevel@tonic-gate 		}
3655*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
3656*7c478bd9Sstevel@tonic-gate 	}
3657*7c478bd9Sstevel@tonic-gate }
3658*7c478bd9Sstevel@tonic-gate 
3659*7c478bd9Sstevel@tonic-gate 
3660*7c478bd9Sstevel@tonic-gate /*
3661*7c478bd9Sstevel@tonic-gate  * Find all active (granted) lock manager locks and release them.
3662*7c478bd9Sstevel@tonic-gate  */
3663*7c478bd9Sstevel@tonic-gate static void
3664*7c478bd9Sstevel@tonic-gate unlock_lockmgr_granted(struct flock_globals *fg)
3665*7c478bd9Sstevel@tonic-gate {
3666*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
3667*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
3668*7c478bd9Sstevel@tonic-gate 	int i;
3669*7c478bd9Sstevel@tonic-gate 	graph_t *gp;
3670*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
3671*7c478bd9Sstevel@tonic-gate 
3672*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
3673*7c478bd9Sstevel@tonic-gate 		mutex_enter(&flock_lock);
3674*7c478bd9Sstevel@tonic-gate 		gp = lock_graph[i];
3675*7c478bd9Sstevel@tonic-gate 		mutex_exit(&flock_lock);
3676*7c478bd9Sstevel@tonic-gate 		if (gp == NULL) {
3677*7c478bd9Sstevel@tonic-gate 			continue;
3678*7c478bd9Sstevel@tonic-gate 		}
3679*7c478bd9Sstevel@tonic-gate 
3680*7c478bd9Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
3681*7c478bd9Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
3682*7c478bd9Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
3683*7c478bd9Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
3684*7c478bd9Sstevel@tonic-gate 		    lock = nlock) {
3685*7c478bd9Sstevel@tonic-gate 			nlock = lock->l_next;
3686*7c478bd9Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3687*7c478bd9Sstevel@tonic-gate 				ASSERT(IS_ACTIVE(lock));
3688*7c478bd9Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
3689*7c478bd9Sstevel@tonic-gate 				flk_wakeup(lock, 1);
3690*7c478bd9Sstevel@tonic-gate 				flk_free_lock(lock);
3691*7c478bd9Sstevel@tonic-gate 			}
3692*7c478bd9Sstevel@tonic-gate 		}
3693*7c478bd9Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
3694*7c478bd9Sstevel@tonic-gate 	}
3695*7c478bd9Sstevel@tonic-gate }
3696*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
3697*7c478bd9Sstevel@tonic-gate 
3698*7c478bd9Sstevel@tonic-gate 
3699*7c478bd9Sstevel@tonic-gate /*
3700*7c478bd9Sstevel@tonic-gate  * Wait until a lock is granted, cancelled, or interrupted.
3701*7c478bd9Sstevel@tonic-gate  */
3702*7c478bd9Sstevel@tonic-gate 
3703*7c478bd9Sstevel@tonic-gate static void
3704*7c478bd9Sstevel@tonic-gate wait_for_lock(lock_descriptor_t *request)
3705*7c478bd9Sstevel@tonic-gate {
3706*7c478bd9Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
3707*7c478bd9Sstevel@tonic-gate 
3708*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
3709*7c478bd9Sstevel@tonic-gate 
3710*7c478bd9Sstevel@tonic-gate 	while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
3711*7c478bd9Sstevel@tonic-gate 	    !(IS_INTERRUPTED(request))) {
3712*7c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
3713*7c478bd9Sstevel@tonic-gate 			flk_set_state(request, FLK_INTERRUPTED_STATE);
3714*7c478bd9Sstevel@tonic-gate 			request->l_state |= INTERRUPTED_LOCK;
3715*7c478bd9Sstevel@tonic-gate 		}
3716*7c478bd9Sstevel@tonic-gate 	}
3717*7c478bd9Sstevel@tonic-gate }
3718*7c478bd9Sstevel@tonic-gate 
3719*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
3720*7c478bd9Sstevel@tonic-gate /*
3721*7c478bd9Sstevel@tonic-gate  * Create an flock structure from the existing lock information
3722*7c478bd9Sstevel@tonic-gate  *
3723*7c478bd9Sstevel@tonic-gate  * This routine is used to create flock structures for the lock manager
3724*7c478bd9Sstevel@tonic-gate  * to use in a reclaim request.  Since the lock was orginated on this
3725*7c478bd9Sstevel@tonic-gate  * host, it must be conforming to UNIX semantics, so no checking is
3726*7c478bd9Sstevel@tonic-gate  * done to make sure it falls within the lower half of the 32-bit range.
3727*7c478bd9Sstevel@tonic-gate  */
3728*7c478bd9Sstevel@tonic-gate 
3729*7c478bd9Sstevel@tonic-gate static void
3730*7c478bd9Sstevel@tonic-gate create_flock(lock_descriptor_t *lp, flock64_t *flp)
3731*7c478bd9Sstevel@tonic-gate {
3732*7c478bd9Sstevel@tonic-gate 	ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
3733*7c478bd9Sstevel@tonic-gate 	ASSERT(lp->l_end >= lp->l_start);
3734*7c478bd9Sstevel@tonic-gate 
3735*7c478bd9Sstevel@tonic-gate 	flp->l_type = lp->l_type;
3736*7c478bd9Sstevel@tonic-gate 	flp->l_whence = 0;
3737*7c478bd9Sstevel@tonic-gate 	flp->l_start = lp->l_start;
3738*7c478bd9Sstevel@tonic-gate 	flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
3739*7c478bd9Sstevel@tonic-gate 		(lp->l_end - lp->l_start + 1);
3740*7c478bd9Sstevel@tonic-gate 	flp->l_sysid = lp->l_flock.l_sysid;
3741*7c478bd9Sstevel@tonic-gate 	flp->l_pid = lp->l_flock.l_pid;
3742*7c478bd9Sstevel@tonic-gate }
3743*7c478bd9Sstevel@tonic-gate 
3744*7c478bd9Sstevel@tonic-gate /*
3745*7c478bd9Sstevel@tonic-gate  * Convert flock_t data describing a lock range into unsigned long starting
3746*7c478bd9Sstevel@tonic-gate  * and ending points, which are put into lock_request.  Returns 0 or an
3747*7c478bd9Sstevel@tonic-gate  * errno value.
3748*7c478bd9Sstevel@tonic-gate  * Large Files: max is passed by the caller and we return EOVERFLOW
3749*7c478bd9Sstevel@tonic-gate  * as defined by LFS API.
3750*7c478bd9Sstevel@tonic-gate  */
3751*7c478bd9Sstevel@tonic-gate 
3752*7c478bd9Sstevel@tonic-gate int
3753*7c478bd9Sstevel@tonic-gate flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
3754*7c478bd9Sstevel@tonic-gate     u_offset_t *start, u_offset_t *end, offset_t offset)
3755*7c478bd9Sstevel@tonic-gate {
3756*7c478bd9Sstevel@tonic-gate 	struct vattr	vattr;
3757*7c478bd9Sstevel@tonic-gate 	int	error;
3758*7c478bd9Sstevel@tonic-gate 
3759*7c478bd9Sstevel@tonic-gate 	/*
3760*7c478bd9Sstevel@tonic-gate 	 * Determine the starting point of the request
3761*7c478bd9Sstevel@tonic-gate 	 */
3762*7c478bd9Sstevel@tonic-gate 	switch (flp->l_whence) {
3763*7c478bd9Sstevel@tonic-gate 	case 0:		/* SEEK_SET */
3764*7c478bd9Sstevel@tonic-gate 		*start = (u_offset_t)flp->l_start;
3765*7c478bd9Sstevel@tonic-gate 		break;
3766*7c478bd9Sstevel@tonic-gate 	case 1:		/* SEEK_CUR */
3767*7c478bd9Sstevel@tonic-gate 		*start = (u_offset_t)(flp->l_start + offset);
3768*7c478bd9Sstevel@tonic-gate 		break;
3769*7c478bd9Sstevel@tonic-gate 	case 2:		/* SEEK_END */
3770*7c478bd9Sstevel@tonic-gate 		vattr.va_mask = AT_SIZE;
3771*7c478bd9Sstevel@tonic-gate 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED()))
3772*7c478bd9Sstevel@tonic-gate 			return (error);
3773*7c478bd9Sstevel@tonic-gate 		*start = (u_offset_t)(flp->l_start + vattr.va_size);
3774*7c478bd9Sstevel@tonic-gate 		break;
3775*7c478bd9Sstevel@tonic-gate 	default:
3776*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3777*7c478bd9Sstevel@tonic-gate 	}
3778*7c478bd9Sstevel@tonic-gate 
3779*7c478bd9Sstevel@tonic-gate 	/*
3780*7c478bd9Sstevel@tonic-gate 	 * Determine the range covered by the request.
3781*7c478bd9Sstevel@tonic-gate 	 */
3782*7c478bd9Sstevel@tonic-gate 	if (flp->l_len == 0)
3783*7c478bd9Sstevel@tonic-gate 		*end = MAX_U_OFFSET_T;
3784*7c478bd9Sstevel@tonic-gate 	else if ((offset_t)flp->l_len > 0) {
3785*7c478bd9Sstevel@tonic-gate 		*end = (u_offset_t)(*start + (flp->l_len - 1));
3786*7c478bd9Sstevel@tonic-gate 	} else {
3787*7c478bd9Sstevel@tonic-gate 		/*
3788*7c478bd9Sstevel@tonic-gate 		 * Negative length; why do we even allow this ?
3789*7c478bd9Sstevel@tonic-gate 		 * Because this allows easy specification of
3790*7c478bd9Sstevel@tonic-gate 		 * the last n bytes of the file.
3791*7c478bd9Sstevel@tonic-gate 		 */
3792*7c478bd9Sstevel@tonic-gate 		*end = *start;
3793*7c478bd9Sstevel@tonic-gate 		*start += (u_offset_t)flp->l_len;
3794*7c478bd9Sstevel@tonic-gate 		(*start)++;
3795*7c478bd9Sstevel@tonic-gate 	}
3796*7c478bd9Sstevel@tonic-gate 	return (0);
3797*7c478bd9Sstevel@tonic-gate }
3798*7c478bd9Sstevel@tonic-gate 
3799*7c478bd9Sstevel@tonic-gate /*
3800*7c478bd9Sstevel@tonic-gate  * Check the validity of lock data.  This can used by the NFS
3801*7c478bd9Sstevel@tonic-gate  * frlock routines to check data before contacting the server.  The
3802*7c478bd9Sstevel@tonic-gate  * server must support semantics that aren't as restrictive as
3803*7c478bd9Sstevel@tonic-gate  * the UNIX API, so the NFS client is required to check.
3804*7c478bd9Sstevel@tonic-gate  * The maximum is now passed in by the caller.
3805*7c478bd9Sstevel@tonic-gate  */
3806*7c478bd9Sstevel@tonic-gate 
3807*7c478bd9Sstevel@tonic-gate int
3808*7c478bd9Sstevel@tonic-gate flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
3809*7c478bd9Sstevel@tonic-gate {
3810*7c478bd9Sstevel@tonic-gate 	/*
3811*7c478bd9Sstevel@tonic-gate 	 * The end (length) for local locking should never be greater
3812*7c478bd9Sstevel@tonic-gate 	 * than MAXEND. However, the representation for
3813*7c478bd9Sstevel@tonic-gate 	 * the entire file is MAX_U_OFFSET_T.
3814*7c478bd9Sstevel@tonic-gate 	 */
3815*7c478bd9Sstevel@tonic-gate 	if ((start > max) ||
3816*7c478bd9Sstevel@tonic-gate 	    ((end > max) && (end != MAX_U_OFFSET_T))) {
3817*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3818*7c478bd9Sstevel@tonic-gate 	}
3819*7c478bd9Sstevel@tonic-gate 	if (start > end) {
3820*7c478bd9Sstevel@tonic-gate 	    return (EINVAL);
3821*7c478bd9Sstevel@tonic-gate 	}
3822*7c478bd9Sstevel@tonic-gate 	return (0);
3823*7c478bd9Sstevel@tonic-gate }
3824*7c478bd9Sstevel@tonic-gate 
3825*7c478bd9Sstevel@tonic-gate /*
3826*7c478bd9Sstevel@tonic-gate  * Fill in request->l_flock with information about the lock blocking the
3827*7c478bd9Sstevel@tonic-gate  * request.  The complexity here is that lock manager requests are allowed
3828*7c478bd9Sstevel@tonic-gate  * to see into the upper part of the 32-bit address range, whereas local
3829*7c478bd9Sstevel@tonic-gate  * requests are only allowed to see signed values.
3830*7c478bd9Sstevel@tonic-gate  *
3831*7c478bd9Sstevel@tonic-gate  * What should be done when "blocker" is a lock manager lock that uses the
3832*7c478bd9Sstevel@tonic-gate  * upper portion of the 32-bit range, but "request" is local?  Since the
3833*7c478bd9Sstevel@tonic-gate  * request has already been determined to have been blocked by the blocker,
3834*7c478bd9Sstevel@tonic-gate  * at least some portion of "blocker" must be in the range of the request,
3835*7c478bd9Sstevel@tonic-gate  * or the request extends to the end of file.  For the first case, the
3836*7c478bd9Sstevel@tonic-gate  * portion in the lower range is returned with the indication that it goes
3837*7c478bd9Sstevel@tonic-gate  * "to EOF."  For the second case, the last byte of the lower range is
3838*7c478bd9Sstevel@tonic-gate  * returned with the indication that it goes "to EOF."
3839*7c478bd9Sstevel@tonic-gate  */
3840*7c478bd9Sstevel@tonic-gate 
3841*7c478bd9Sstevel@tonic-gate static void
3842*7c478bd9Sstevel@tonic-gate report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
3843*7c478bd9Sstevel@tonic-gate {
3844*7c478bd9Sstevel@tonic-gate 	flock64_t *flrp;			/* l_flock portion of request */
3845*7c478bd9Sstevel@tonic-gate 
3846*7c478bd9Sstevel@tonic-gate 	ASSERT(blocker != NULL);
3847*7c478bd9Sstevel@tonic-gate 
3848*7c478bd9Sstevel@tonic-gate 	flrp = &request->l_flock;
3849*7c478bd9Sstevel@tonic-gate 	flrp->l_whence = 0;
3850*7c478bd9Sstevel@tonic-gate 	flrp->l_type = blocker->l_type;
3851*7c478bd9Sstevel@tonic-gate 	flrp->l_pid = blocker->l_flock.l_pid;
3852*7c478bd9Sstevel@tonic-gate 	flrp->l_sysid = blocker->l_flock.l_sysid;
3853*7c478bd9Sstevel@tonic-gate 
3854*7c478bd9Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
3855*7c478bd9Sstevel@tonic-gate 		flrp->l_start = blocker->l_start;
3856*7c478bd9Sstevel@tonic-gate 		if (blocker->l_end == MAX_U_OFFSET_T)
3857*7c478bd9Sstevel@tonic-gate 			flrp->l_len = 0;
3858*7c478bd9Sstevel@tonic-gate 		else
3859*7c478bd9Sstevel@tonic-gate 			flrp->l_len = blocker->l_end - blocker->l_start + 1;
3860*7c478bd9Sstevel@tonic-gate 	} else {
3861*7c478bd9Sstevel@tonic-gate 		if (blocker->l_start > MAXEND) {
3862*7c478bd9Sstevel@tonic-gate 			flrp->l_start = MAXEND;
3863*7c478bd9Sstevel@tonic-gate 			flrp->l_len = 0;
3864*7c478bd9Sstevel@tonic-gate 		} else {
3865*7c478bd9Sstevel@tonic-gate 			flrp->l_start = blocker->l_start;
3866*7c478bd9Sstevel@tonic-gate 			if (blocker->l_end == MAX_U_OFFSET_T)
3867*7c478bd9Sstevel@tonic-gate 				flrp->l_len = 0;
3868*7c478bd9Sstevel@tonic-gate 			else
3869*7c478bd9Sstevel@tonic-gate 				flrp->l_len = blocker->l_end -
3870*7c478bd9Sstevel@tonic-gate 					blocker->l_start + 1;
3871*7c478bd9Sstevel@tonic-gate 		}
3872*7c478bd9Sstevel@tonic-gate 	}
3873*7c478bd9Sstevel@tonic-gate }
3874*7c478bd9Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
3875*7c478bd9Sstevel@tonic-gate 
3876*7c478bd9Sstevel@tonic-gate /*
3877*7c478bd9Sstevel@tonic-gate  * PSARC case 1997/292
3878*7c478bd9Sstevel@tonic-gate  */
3879*7c478bd9Sstevel@tonic-gate /*
3880*7c478bd9Sstevel@tonic-gate  * This is the public routine exported by flock.h.
3881*7c478bd9Sstevel@tonic-gate  */
3882*7c478bd9Sstevel@tonic-gate void
3883*7c478bd9Sstevel@tonic-gate cl_flk_change_nlm_state_to_unknown(int nlmid)
3884*7c478bd9Sstevel@tonic-gate {
3885*7c478bd9Sstevel@tonic-gate 	/*
3886*7c478bd9Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
3887*7c478bd9Sstevel@tonic-gate 	 */
3888*7c478bd9Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3889*7c478bd9Sstevel@tonic-gate 		return;
3890*7c478bd9Sstevel@tonic-gate 	}
3891*7c478bd9Sstevel@tonic-gate 
3892*7c478bd9Sstevel@tonic-gate 	/*
3893*7c478bd9Sstevel@tonic-gate 	 * See comment in cl_flk_set_nlm_status().
3894*7c478bd9Sstevel@tonic-gate 	 */
3895*7c478bd9Sstevel@tonic-gate 	if (nlm_reg_status == NULL) {
3896*7c478bd9Sstevel@tonic-gate 		return;
3897*7c478bd9Sstevel@tonic-gate 	}
3898*7c478bd9Sstevel@tonic-gate 
3899*7c478bd9Sstevel@tonic-gate 	/*
3900*7c478bd9Sstevel@tonic-gate 	 * protect NLM registry state with a mutex.
3901*7c478bd9Sstevel@tonic-gate 	 */
3902*7c478bd9Sstevel@tonic-gate 	ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3903*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nlm_reg_lock);
3904*7c478bd9Sstevel@tonic-gate 	FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
3905*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nlm_reg_lock);
3906*7c478bd9Sstevel@tonic-gate }
3907*7c478bd9Sstevel@tonic-gate 
3908*7c478bd9Sstevel@tonic-gate /*
3909*7c478bd9Sstevel@tonic-gate  * Return non-zero if the given I/O request conflicts with an active NBMAND
3910*7c478bd9Sstevel@tonic-gate  * lock.
3911*7c478bd9Sstevel@tonic-gate  * If svmand is non-zero, it means look at all active locks, not just NBMAND
3912*7c478bd9Sstevel@tonic-gate  * locks.
3913*7c478bd9Sstevel@tonic-gate  */
3914*7c478bd9Sstevel@tonic-gate 
3915*7c478bd9Sstevel@tonic-gate int
3916*7c478bd9Sstevel@tonic-gate nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3917*7c478bd9Sstevel@tonic-gate 		ssize_t length, int svmand)
3918*7c478bd9Sstevel@tonic-gate {
3919*7c478bd9Sstevel@tonic-gate 	int conflict = 0;
3920*7c478bd9Sstevel@tonic-gate 	graph_t			*gp;
3921*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*lock;
3922*7c478bd9Sstevel@tonic-gate 
3923*7c478bd9Sstevel@tonic-gate 	mutex_enter(&flock_lock);
3924*7c478bd9Sstevel@tonic-gate 	gp = lock_graph[HASH_INDEX(vp)];
3925*7c478bd9Sstevel@tonic-gate 	mutex_exit(&flock_lock);
3926*7c478bd9Sstevel@tonic-gate 	if (gp == NULL)
3927*7c478bd9Sstevel@tonic-gate 		return (0);
3928*7c478bd9Sstevel@tonic-gate 
3929*7c478bd9Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
3930*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
3931*7c478bd9Sstevel@tonic-gate 
3932*7c478bd9Sstevel@tonic-gate 	for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
3933*7c478bd9Sstevel@tonic-gate 		if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
3934*7c478bd9Sstevel@tonic-gate 		    lock->l_flock.l_sysid == 0 &&
3935*7c478bd9Sstevel@tonic-gate 		    lock->l_flock.l_pid != curproc->p_pid &&
3936*7c478bd9Sstevel@tonic-gate 		    lock_blocks_io(op, offset, length,
3937*7c478bd9Sstevel@tonic-gate 				lock->l_type, lock->l_start, lock->l_end)) {
3938*7c478bd9Sstevel@tonic-gate 			conflict = 1;
3939*7c478bd9Sstevel@tonic-gate 			break;
3940*7c478bd9Sstevel@tonic-gate 		}
3941*7c478bd9Sstevel@tonic-gate 	}
3942*7c478bd9Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
3943*7c478bd9Sstevel@tonic-gate 
3944*7c478bd9Sstevel@tonic-gate 	return (conflict);
3945*7c478bd9Sstevel@tonic-gate }
3946*7c478bd9Sstevel@tonic-gate 
3947*7c478bd9Sstevel@tonic-gate /*
3948*7c478bd9Sstevel@tonic-gate  * Return non-zero if the given I/O request conflicts with the given lock.
3949*7c478bd9Sstevel@tonic-gate  */
3950*7c478bd9Sstevel@tonic-gate 
3951*7c478bd9Sstevel@tonic-gate static int
3952*7c478bd9Sstevel@tonic-gate lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
3953*7c478bd9Sstevel@tonic-gate 	    int lock_type, u_offset_t lock_start, u_offset_t lock_end)
3954*7c478bd9Sstevel@tonic-gate {
3955*7c478bd9Sstevel@tonic-gate 	ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
3956*7c478bd9Sstevel@tonic-gate 	ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
3957*7c478bd9Sstevel@tonic-gate 
3958*7c478bd9Sstevel@tonic-gate 	if (op == NBL_READ && lock_type == F_RDLCK)
3959*7c478bd9Sstevel@tonic-gate 		return (0);
3960*7c478bd9Sstevel@tonic-gate 
3961*7c478bd9Sstevel@tonic-gate 	if (offset <= lock_start && lock_start < offset + length)
3962*7c478bd9Sstevel@tonic-gate 		return (1);
3963*7c478bd9Sstevel@tonic-gate 	if (lock_start <= offset && offset <= lock_end)
3964*7c478bd9Sstevel@tonic-gate 		return (1);
3965*7c478bd9Sstevel@tonic-gate 
3966*7c478bd9Sstevel@tonic-gate 	return (0);
3967*7c478bd9Sstevel@tonic-gate }
3968*7c478bd9Sstevel@tonic-gate 
3969*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3970*7c478bd9Sstevel@tonic-gate static void
3971*7c478bd9Sstevel@tonic-gate check_active_locks(graph_t *gp)
3972*7c478bd9Sstevel@tonic-gate {
3973*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock, *lock1;
3974*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
3975*7c478bd9Sstevel@tonic-gate 
3976*7c478bd9Sstevel@tonic-gate 	for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
3977*7c478bd9Sstevel@tonic-gate 						lock = lock->l_next) {
3978*7c478bd9Sstevel@tonic-gate 		ASSERT(IS_ACTIVE(lock));
3979*7c478bd9Sstevel@tonic-gate 		ASSERT(NOT_BLOCKED(lock));
3980*7c478bd9Sstevel@tonic-gate 		ASSERT(!IS_BARRIER(lock));
3981*7c478bd9Sstevel@tonic-gate 
3982*7c478bd9Sstevel@tonic-gate 		ep = FIRST_IN(lock);
3983*7c478bd9Sstevel@tonic-gate 
3984*7c478bd9Sstevel@tonic-gate 		while (ep != HEAD(lock)) {
3985*7c478bd9Sstevel@tonic-gate 			ASSERT(IS_SLEEPING(ep->from_vertex));
3986*7c478bd9Sstevel@tonic-gate 			ASSERT(!NOT_BLOCKED(ep->from_vertex));
3987*7c478bd9Sstevel@tonic-gate 			ep = NEXT_IN(ep);
3988*7c478bd9Sstevel@tonic-gate 		}
3989*7c478bd9Sstevel@tonic-gate 
3990*7c478bd9Sstevel@tonic-gate 		for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
3991*7c478bd9Sstevel@tonic-gate 					lock1 = lock1->l_next) {
3992*7c478bd9Sstevel@tonic-gate 			if (lock1->l_vnode == lock->l_vnode) {
3993*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock1, lock)) {
3994*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
3995*7c478bd9Sstevel@tonic-gate 				    "active lock %p blocks %p",
3996*7c478bd9Sstevel@tonic-gate 				    (void *)lock1, (void *)lock);
3997*7c478bd9Sstevel@tonic-gate 			} else if (BLOCKS(lock, lock1)) {
3998*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
3999*7c478bd9Sstevel@tonic-gate 				    "active lock %p blocks %p",
4000*7c478bd9Sstevel@tonic-gate 				    (void *)lock, (void *)lock1);
4001*7c478bd9Sstevel@tonic-gate 			}
4002*7c478bd9Sstevel@tonic-gate 			}
4003*7c478bd9Sstevel@tonic-gate 		}
4004*7c478bd9Sstevel@tonic-gate 	}
4005*7c478bd9Sstevel@tonic-gate }
4006*7c478bd9Sstevel@tonic-gate 
4007*7c478bd9Sstevel@tonic-gate /*
4008*7c478bd9Sstevel@tonic-gate  * Effect: This functions checks to see if the transition from 'old_state' to
4009*7c478bd9Sstevel@tonic-gate  *	'new_state' is a valid one.  It returns 0 if the transition is valid
4010*7c478bd9Sstevel@tonic-gate  *	and 1 if it is not.
4011*7c478bd9Sstevel@tonic-gate  *	For a map of valid transitions, see sys/flock_impl.h
4012*7c478bd9Sstevel@tonic-gate  */
4013*7c478bd9Sstevel@tonic-gate static int
4014*7c478bd9Sstevel@tonic-gate check_lock_transition(int old_state, int new_state)
4015*7c478bd9Sstevel@tonic-gate {
4016*7c478bd9Sstevel@tonic-gate 	switch (old_state) {
4017*7c478bd9Sstevel@tonic-gate 	case FLK_INITIAL_STATE:
4018*7c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_START_STATE) ||
4019*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_SLEEPING_STATE) ||
4020*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_ACTIVE_STATE) ||
4021*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
4022*7c478bd9Sstevel@tonic-gate 			return (0);
4023*7c478bd9Sstevel@tonic-gate 		} else {
4024*7c478bd9Sstevel@tonic-gate 			return (1);
4025*7c478bd9Sstevel@tonic-gate 		}
4026*7c478bd9Sstevel@tonic-gate 	case FLK_START_STATE:
4027*7c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_ACTIVE_STATE) ||
4028*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
4029*7c478bd9Sstevel@tonic-gate 			return (0);
4030*7c478bd9Sstevel@tonic-gate 		} else {
4031*7c478bd9Sstevel@tonic-gate 			return (1);
4032*7c478bd9Sstevel@tonic-gate 		}
4033*7c478bd9Sstevel@tonic-gate 	case FLK_ACTIVE_STATE:
4034*7c478bd9Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
4035*7c478bd9Sstevel@tonic-gate 			return (0);
4036*7c478bd9Sstevel@tonic-gate 		} else {
4037*7c478bd9Sstevel@tonic-gate 			return (1);
4038*7c478bd9Sstevel@tonic-gate 		}
4039*7c478bd9Sstevel@tonic-gate 	case FLK_SLEEPING_STATE:
4040*7c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_GRANTED_STATE) ||
4041*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE) ||
4042*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
4043*7c478bd9Sstevel@tonic-gate 			return (0);
4044*7c478bd9Sstevel@tonic-gate 		} else {
4045*7c478bd9Sstevel@tonic-gate 			return (1);
4046*7c478bd9Sstevel@tonic-gate 		}
4047*7c478bd9Sstevel@tonic-gate 	case FLK_GRANTED_STATE:
4048*7c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_START_STATE) ||
4049*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE) ||
4050*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
4051*7c478bd9Sstevel@tonic-gate 			return (0);
4052*7c478bd9Sstevel@tonic-gate 		} else {
4053*7c478bd9Sstevel@tonic-gate 			return (1);
4054*7c478bd9Sstevel@tonic-gate 		}
4055*7c478bd9Sstevel@tonic-gate 	case FLK_CANCELLED_STATE:
4056*7c478bd9Sstevel@tonic-gate 		if ((new_state == FLK_INTERRUPTED_STATE) ||
4057*7c478bd9Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
4058*7c478bd9Sstevel@tonic-gate 			return (0);
4059*7c478bd9Sstevel@tonic-gate 		} else {
4060*7c478bd9Sstevel@tonic-gate 			return (1);
4061*7c478bd9Sstevel@tonic-gate 		}
4062*7c478bd9Sstevel@tonic-gate 	case FLK_INTERRUPTED_STATE:
4063*7c478bd9Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
4064*7c478bd9Sstevel@tonic-gate 			return (0);
4065*7c478bd9Sstevel@tonic-gate 		} else {
4066*7c478bd9Sstevel@tonic-gate 			return (1);
4067*7c478bd9Sstevel@tonic-gate 		}
4068*7c478bd9Sstevel@tonic-gate 	case FLK_DEAD_STATE:
4069*7c478bd9Sstevel@tonic-gate 		/* May be set more than once */
4070*7c478bd9Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
4071*7c478bd9Sstevel@tonic-gate 			return (0);
4072*7c478bd9Sstevel@tonic-gate 		} else {
4073*7c478bd9Sstevel@tonic-gate 			return (1);
4074*7c478bd9Sstevel@tonic-gate 		}
4075*7c478bd9Sstevel@tonic-gate 	default:
4076*7c478bd9Sstevel@tonic-gate 		return (1);
4077*7c478bd9Sstevel@tonic-gate 	}
4078*7c478bd9Sstevel@tonic-gate }
4079*7c478bd9Sstevel@tonic-gate 
4080*7c478bd9Sstevel@tonic-gate static void
4081*7c478bd9Sstevel@tonic-gate check_sleeping_locks(graph_t *gp)
4082*7c478bd9Sstevel@tonic-gate {
4083*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock1, *lock2;
4084*7c478bd9Sstevel@tonic-gate 	edge_t *ep;
4085*7c478bd9Sstevel@tonic-gate 	for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4086*7c478bd9Sstevel@tonic-gate 				lock1 = lock1->l_next) {
4087*7c478bd9Sstevel@tonic-gate 				ASSERT(!IS_BARRIER(lock1));
4088*7c478bd9Sstevel@tonic-gate 	for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4089*7c478bd9Sstevel@tonic-gate 				lock2 = lock2->l_next) {
4090*7c478bd9Sstevel@tonic-gate 		if (lock1->l_vnode == lock2->l_vnode) {
4091*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock2, lock1)) {
4092*7c478bd9Sstevel@tonic-gate 				ASSERT(!IS_GRANTED(lock1));
4093*7c478bd9Sstevel@tonic-gate 				ASSERT(!NOT_BLOCKED(lock1));
4094*7c478bd9Sstevel@tonic-gate 				path(lock1, lock2);
4095*7c478bd9Sstevel@tonic-gate 			}
4096*7c478bd9Sstevel@tonic-gate 		}
4097*7c478bd9Sstevel@tonic-gate 	}
4098*7c478bd9Sstevel@tonic-gate 
4099*7c478bd9Sstevel@tonic-gate 	for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4100*7c478bd9Sstevel@tonic-gate 					lock2 = lock2->l_next) {
4101*7c478bd9Sstevel@tonic-gate 				ASSERT(!IS_BARRIER(lock1));
4102*7c478bd9Sstevel@tonic-gate 		if (lock1->l_vnode == lock2->l_vnode) {
4103*7c478bd9Sstevel@tonic-gate 			if (BLOCKS(lock2, lock1)) {
4104*7c478bd9Sstevel@tonic-gate 				ASSERT(!IS_GRANTED(lock1));
4105*7c478bd9Sstevel@tonic-gate 				ASSERT(!NOT_BLOCKED(lock1));
4106*7c478bd9Sstevel@tonic-gate 				path(lock1, lock2);
4107*7c478bd9Sstevel@tonic-gate 			}
4108*7c478bd9Sstevel@tonic-gate 		}
4109*7c478bd9Sstevel@tonic-gate 	}
4110*7c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock1);
4111*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
4112*7c478bd9Sstevel@tonic-gate 		ASSERT(BLOCKS(ep->to_vertex, lock1));
4113*7c478bd9Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
4114*7c478bd9Sstevel@tonic-gate 	}
4115*7c478bd9Sstevel@tonic-gate 	}
4116*7c478bd9Sstevel@tonic-gate }
4117*7c478bd9Sstevel@tonic-gate 
4118*7c478bd9Sstevel@tonic-gate static int
4119*7c478bd9Sstevel@tonic-gate level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4120*7c478bd9Sstevel@tonic-gate {
4121*7c478bd9Sstevel@tonic-gate 	edge_t	*ep;
4122*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t	*vertex;
4123*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
4124*7c478bd9Sstevel@tonic-gate 
4125*7c478bd9Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
4126*7c478bd9Sstevel@tonic-gate 
4127*7c478bd9Sstevel@tonic-gate 	flk_graph_uncolor(lock1->l_graph);
4128*7c478bd9Sstevel@tonic-gate 	ep = FIRST_ADJ(lock1);
4129*7c478bd9Sstevel@tonic-gate 	ASSERT(ep != HEAD(lock1));
4130*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
4131*7c478bd9Sstevel@tonic-gate 		if (no_path)
4132*7c478bd9Sstevel@tonic-gate 			ASSERT(ep->to_vertex != lock2);
4133*7c478bd9Sstevel@tonic-gate 		STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4134*7c478bd9Sstevel@tonic-gate 		COLOR(ep->to_vertex);
4135*7c478bd9Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
4136*7c478bd9Sstevel@tonic-gate 	}
4137*7c478bd9Sstevel@tonic-gate 
4138*7c478bd9Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4139*7c478bd9Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_dstack);
4140*7c478bd9Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4141*7c478bd9Sstevel@tonic-gate 						ep = NEXT_ADJ(ep)) {
4142*7c478bd9Sstevel@tonic-gate 			if (COLORED(ep->to_vertex))
4143*7c478bd9Sstevel@tonic-gate 				continue;
4144*7c478bd9Sstevel@tonic-gate 			COLOR(ep->to_vertex);
4145*7c478bd9Sstevel@tonic-gate 			if (ep->to_vertex == lock2)
4146*7c478bd9Sstevel@tonic-gate 				return (1);
4147*7c478bd9Sstevel@tonic-gate 
4148*7c478bd9Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4149*7c478bd9Sstevel@tonic-gate 		}
4150*7c478bd9Sstevel@tonic-gate 	}
4151*7c478bd9Sstevel@tonic-gate 	return (0);
4152*7c478bd9Sstevel@tonic-gate }
4153*7c478bd9Sstevel@tonic-gate 
4154*7c478bd9Sstevel@tonic-gate static void
4155*7c478bd9Sstevel@tonic-gate check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4156*7c478bd9Sstevel@tonic-gate {
4157*7c478bd9Sstevel@tonic-gate 	lock_descriptor_t *lock;
4158*7c478bd9Sstevel@tonic-gate 
4159*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4160*7c478bd9Sstevel@tonic-gate 
4161*7c478bd9Sstevel@tonic-gate 	if (lock) {
4162*7c478bd9Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4163*7c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_pid == pid &&
4164*7c478bd9Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid)
4165*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
4166*7c478bd9Sstevel@tonic-gate 				    "owner pid %d's lock %p in active queue",
4167*7c478bd9Sstevel@tonic-gate 				    pid, (void *)lock);
4168*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
4169*7c478bd9Sstevel@tonic-gate 		}
4170*7c478bd9Sstevel@tonic-gate 	}
4171*7c478bd9Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4172*7c478bd9Sstevel@tonic-gate 
4173*7c478bd9Sstevel@tonic-gate 	if (lock) {
4174*7c478bd9Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4175*7c478bd9Sstevel@tonic-gate 			if (lock->l_flock.l_pid == pid &&
4176*7c478bd9Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid)
4177*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
4178*7c478bd9Sstevel@tonic-gate 				    "owner pid %d's lock %p in sleep queue",
4179*7c478bd9Sstevel@tonic-gate 				    pid, (void *)lock);
4180*7c478bd9Sstevel@tonic-gate 			lock = lock->l_next;
4181*7c478bd9Sstevel@tonic-gate 		}
4182*7c478bd9Sstevel@tonic-gate 	}
4183*7c478bd9Sstevel@tonic-gate }
4184*7c478bd9Sstevel@tonic-gate 
4185*7c478bd9Sstevel@tonic-gate static int
4186*7c478bd9Sstevel@tonic-gate level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4187*7c478bd9Sstevel@tonic-gate {
4188*7c478bd9Sstevel@tonic-gate 	edge_t *ep = FIRST_ADJ(lock1);
4189*7c478bd9Sstevel@tonic-gate 
4190*7c478bd9Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
4191*7c478bd9Sstevel@tonic-gate 		if (ep->to_vertex == lock2)
4192*7c478bd9Sstevel@tonic-gate 			return (1);
4193*7c478bd9Sstevel@tonic-gate 		else
4194*7c478bd9Sstevel@tonic-gate 			ep = NEXT_ADJ(ep);
4195*7c478bd9Sstevel@tonic-gate 	}
4196*7c478bd9Sstevel@tonic-gate 	return (0);
4197*7c478bd9Sstevel@tonic-gate }
4198*7c478bd9Sstevel@tonic-gate 
4199*7c478bd9Sstevel@tonic-gate static int
4200*7c478bd9Sstevel@tonic-gate no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4201*7c478bd9Sstevel@tonic-gate {
4202*7c478bd9Sstevel@tonic-gate 	return (!level_two_path(lock1, lock2, 1));
4203*7c478bd9Sstevel@tonic-gate }
4204*7c478bd9Sstevel@tonic-gate 
4205*7c478bd9Sstevel@tonic-gate static void
4206*7c478bd9Sstevel@tonic-gate path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4207*7c478bd9Sstevel@tonic-gate {
4208*7c478bd9Sstevel@tonic-gate 	if (level_one_path(lock1, lock2)) {
4209*7c478bd9Sstevel@tonic-gate 		if (level_two_path(lock1, lock2, 0) != 0) {
4210*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
4211*7c478bd9Sstevel@tonic-gate 			    "one edge one path from lock1 %p lock2 %p",
4212*7c478bd9Sstevel@tonic-gate 			    (void *)lock1, (void *)lock2);
4213*7c478bd9Sstevel@tonic-gate 		}
4214*7c478bd9Sstevel@tonic-gate 	} else if (no_path(lock1, lock2)) {
4215*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC,
4216*7c478bd9Sstevel@tonic-gate 		    "No path from  lock1 %p to lock2 %p",
4217*7c478bd9Sstevel@tonic-gate 		    (void *)lock1, (void *)lock2);
4218*7c478bd9Sstevel@tonic-gate 	}
4219*7c478bd9Sstevel@tonic-gate }
4220*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
4221