xref: /titanic_51/usr/src/uts/common/klm/nlm_impl.c (revision 17ad7f9fd28ceea21aea94421cb8ada963285765)
1 /*
2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3  * Authors: Doug Rabson <dfr@rabson.org>
4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
30  * Copyright (c) 2012 by Delphix. All rights reserved.
31  */
32 
33 /*
34  * NFS LockManager, start/stop, support functions, etc.
35  * Most of the interesting code is here.
36  *
37  * Source code derived from FreeBSD nlm_prot_impl.c
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/thread.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/mount.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/share.h>
49 #include <sys/socket.h>
50 #include <sys/syscall.h>
51 #include <sys/syslog.h>
52 #include <sys/systm.h>
53 #include <sys/class.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 #include <sys/vfs.h>
57 #include <sys/queue.h>
58 #include <sys/bitmap.h>
59 #include <sys/sdt.h>
60 #include <netinet/in.h>
61 
62 #include <rpc/rpc.h>
63 #include <rpc/xdr.h>
64 #include <rpc/pmap_prot.h>
65 #include <rpc/pmap_clnt.h>
66 #include <rpc/rpcb_prot.h>
67 
68 #include <rpcsvc/nlm_prot.h>
69 #include <rpcsvc/sm_inter.h>
70 #include <rpcsvc/nsm_addr.h>
71 
72 #include <nfs/nfs.h>
73 #include <nfs/nfs_clnt.h>
74 #include <nfs/export.h>
75 #include <nfs/rnode.h>
76 #include <nfs/lm.h>
77 
78 #include "nlm_impl.h"
79 
80 struct nlm_knc {
81 	struct knetconfig	n_knc;
82 	const char		*n_netid;
83 };
84 
85 /*
86  * Number of attempts NLM tries to obtain RPC binding
87  * of local statd.
88  */
89 #define	NLM_NSM_RPCBIND_RETRIES 10
90 
91 /*
92  * Timeout (in seconds) NLM waits before making another
93  * attempt to obtain RPC binding of local statd.
94  */
95 #define	NLM_NSM_RPCBIND_TIMEOUT 5
96 
97 /*
98  * Total number of sysids in NLM sysid bitmap
99  */
100 #define	NLM_BMAP_NITEMS	(LM_SYSID_MAX + 1)
101 
102 /*
103  * Number of ulong_t words in bitmap that is used
104  * for allocation of sysid numbers.
105  */
106 #define	NLM_BMAP_WORDS  (NLM_BMAP_NITEMS / BT_NBIPUL)
107 
108 /*
109  * Given an integer x, the macro returns
110  * -1 if x is negative,
111  *  0 if x is zero
112  *  1 if x is positive
113  */
114 #define	SIGN(x) (((x) > 0) - ((x) < 0))
115 
116 #define	ARRSIZE(arr)	(sizeof (arr) / sizeof ((arr)[0]))
117 #define	NLM_KNCS	ARRSIZE(nlm_netconfigs)
118 
119 krwlock_t lm_lck;
120 
121 /*
122  * Zero timeout for asynchronous NLM RPC operations
123  */
124 static const struct timeval nlm_rpctv_zero = { 0,  0 };
125 
126 /*
127  * List of all Zone globals nlm_globals instences
128  * linked together.
129  */
130 static struct nlm_globals_list nlm_zones_list; /* (g) */
131 
132 /*
133  * NLM kmem caches
134  */
135 static struct kmem_cache *nlm_hosts_cache = NULL;
136 static struct kmem_cache *nlm_vhold_cache = NULL;
137 
138 /*
139  * A bitmap for allocation of new sysids.
140  * Sysid is a unique number between LM_SYSID
141  * and LM_SYSID_MAX. Sysid represents unique remote
142  * host that does file locks on the given host.
143  */
144 static ulong_t	nlm_sysid_bmap[NLM_BMAP_WORDS];	/* (g) */
145 static int	nlm_sysid_nidx;			/* (g) */
146 
147 /*
148  * RPC service registration for all transports
149  */
150 static SVC_CALLOUT nlm_svcs[] = {
151 	{ NLM_PROG, 4, 4, nlm_prog_4 },	/* NLM4_VERS */
152 	{ NLM_PROG, 1, 3, nlm_prog_3 }	/* NLM_VERS - NLM_VERSX */
153 };
154 
155 static SVC_CALLOUT_TABLE nlm_sct = {
156 	ARRSIZE(nlm_svcs),
157 	FALSE,
158 	nlm_svcs
159 };
160 
161 /*
162  * Static table of all netid/knetconfig network
163  * lock manager can work with. nlm_netconfigs table
164  * is used when we need to get valid knetconfig by
165  * netid and vice versa.
166  *
167  * Knetconfigs are activated either by the call from
168  * user-space lockd daemon (server side) or by taking
169  * knetconfig from NFS mountinfo (client side)
170  */
171 static struct nlm_knc nlm_netconfigs[] = { /* (g) */
172 	/* UDP */
173 	{
174 		{ NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
175 		"udp",
176 	},
177 	/* TCP */
178 	{
179 		{ NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
180 		"tcp",
181 	},
182 	/* UDP over IPv6 */
183 	{
184 		{ NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
185 		"udp6",
186 	},
187 	/* TCP over IPv6 */
188 	{
189 		{ NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
190 		"tcp6",
191 	},
192 	/* ticlts (loopback over UDP) */
193 	{
194 		{ NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
195 		"ticlts",
196 	},
197 	/* ticotsord (loopback over TCP) */
198 	{
199 		{ NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
200 		"ticotsord",
201 	},
202 };
203 
204 /*
205  * NLM misc. function
206  */
207 static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
208 static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
209 static void nlm_kmem_reclaim(void *);
210 static void nlm_pool_shutdown(void);
211 static void nlm_suspend_zone(struct nlm_globals *);
212 static void nlm_resume_zone(struct nlm_globals *);
213 static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
214 static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
215 
216 /*
217  * NLM thread functions
218  */
219 static void nlm_gc(struct nlm_globals *);
220 static void nlm_reclaimer(struct nlm_host *);
221 
222 /*
223  * NLM NSM functions
224  */
225 static int nlm_init_local_knc(struct knetconfig *);
226 static int nlm_nsm_init_local(struct nlm_nsm *);
227 static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
228 static void nlm_nsm_fini(struct nlm_nsm *);
229 static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
230 static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
231 static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
232 static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
233 
234 /*
235  * NLM host functions
236  */
237 static int nlm_host_ctor(void *, void *, int);
238 static void nlm_host_dtor(void *, void *);
239 static void nlm_host_destroy(struct nlm_host *);
240 static struct nlm_host *nlm_host_create(char *, const char *,
241     struct knetconfig *, struct netbuf *);
242 static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
243     const char *, struct netbuf *, avl_index_t *);
244 static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
245 static void nlm_host_gc_vholds(struct nlm_host *);
246 static bool_t nlm_host_has_srv_locks(struct nlm_host *);
247 static bool_t nlm_host_has_cli_locks(struct nlm_host *);
248 static bool_t nlm_host_has_locks(struct nlm_host *);
249 
250 /*
251  * NLM vhold functions
252  */
253 static int nlm_vhold_ctor(void *, void *, int);
254 static void nlm_vhold_dtor(void *, void *);
255 static void nlm_vhold_destroy(struct nlm_host *,
256     struct nlm_vhold *);
257 static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
258 static void nlm_vhold_clean(struct nlm_vhold *, int);
259 
260 /*
261  * NLM client/server sleeping locks/share reservation functions
262  */
263 struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
264     struct nlm_vhold *, struct flock64 *);
265 static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
266 static void nlm_shres_destroy_item(struct nlm_shres *);
267 static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
268 
269 /*
270  * NLM initialization functions.
271  */
272 void
273 nlm_init(void)
274 {
275 	nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
276 	    sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
277 	    nlm_kmem_reclaim, NULL, NULL, 0);
278 
279 	nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
280 	    sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
281 	    NULL, NULL, NULL, 0);
282 
283 	nlm_rpc_init();
284 	TAILQ_INIT(&nlm_zones_list);
285 
286 	/* initialize sysids bitmap */
287 	bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
288 	nlm_sysid_nidx = 1;
289 
290 	/*
291 	 * Reserv the sysid #0, because it's associated
292 	 * with local locks only. Don't let to allocate
293 	 * it for remote locks.
294 	 */
295 	BT_SET(nlm_sysid_bmap, 0);
296 }
297 
298 void
299 nlm_globals_register(struct nlm_globals *g)
300 {
301 	rw_enter(&lm_lck, RW_WRITER);
302 	TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
303 	rw_exit(&lm_lck);
304 }
305 
306 void
307 nlm_globals_unregister(struct nlm_globals *g)
308 {
309 	rw_enter(&lm_lck, RW_WRITER);
310 	TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
311 	rw_exit(&lm_lck);
312 }
313 
314 /* ARGSUSED */
315 static void
316 nlm_kmem_reclaim(void *cdrarg)
317 {
318 	struct nlm_globals *g;
319 
320 	rw_enter(&lm_lck, RW_READER);
321 	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
322 		cv_broadcast(&g->nlm_gc_sched_cv);
323 
324 	rw_exit(&lm_lck);
325 }
326 
327 /*
328  * NLM garbage collector thread (GC).
329  *
330  * NLM GC periodically checks whether there're any host objects
331  * that can be cleaned up. It also releases stale vnodes that
332  * live on the server side (under protection of vhold objects).
333  *
334  * NLM host objects are cleaned up from GC thread because
335  * operations helping us to determine whether given host has
336  * any locks can be quite expensive and it's not good to call
337  * them every time the very last reference to the host is dropped.
338  * Thus we use "lazy" approach for hosts cleanup.
339  *
340  * The work of GC is to release stale vnodes on the server side
341  * and destroy hosts that haven't any locks and any activity for
342  * some time (i.e. idle hosts).
343  */
344 static void
345 nlm_gc(struct nlm_globals *g)
346 {
347 	struct nlm_host *hostp;
348 	clock_t now, idle_period;
349 
350 	idle_period = SEC_TO_TICK(g->cn_idle_tmo);
351 	mutex_enter(&g->lock);
352 	for (;;) {
353 		/*
354 		 * GC thread can be explicitly scheduled from
355 		 * memory reclamation function.
356 		 */
357 		(void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
358 		    ddi_get_lbolt() + idle_period);
359 
360 		/*
361 		 * NLM is shutting down, time to die.
362 		 */
363 		if (g->run_status == NLM_ST_STOPPING)
364 			break;
365 
366 		now = ddi_get_lbolt();
367 		DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
368 		    clock_t, now);
369 
370 		/*
371 		 * Find all obviously unused vholds and destroy them.
372 		 */
373 		for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
374 		    hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
375 			struct nlm_vhold *nvp;
376 
377 			mutex_enter(&hostp->nh_lock);
378 
379 			nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
380 			while (nvp != NULL) {
381 				struct nlm_vhold *new_nvp;
382 
383 				new_nvp = TAILQ_NEXT(nvp, nv_link);
384 
385 				/*
386 				 * If these conditions are met, the vhold is
387 				 * obviously unused and we will destroy it.  In
388 				 * a case either v_filocks and/or v_shrlocks is
389 				 * non-NULL the vhold might still be unused by
390 				 * the host, but it is expensive to check that.
391 				 * We defer such check until the host is idle.
392 				 * The expensive check is done below without
393 				 * the global lock held.
394 				 */
395 				if (nvp->nv_refcnt == 0 &&
396 				    nvp->nv_vp->v_filocks == NULL &&
397 				    nvp->nv_vp->v_shrlocks == NULL) {
398 					nlm_vhold_destroy(hostp, nvp);
399 				}
400 
401 				nvp = new_nvp;
402 			}
403 
404 			mutex_exit(&hostp->nh_lock);
405 		}
406 
407 		/*
408 		 * Handle all hosts that are unused at the moment
409 		 * until we meet one with idle timeout in future.
410 		 */
411 		while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
412 			bool_t has_locks;
413 
414 			if (hostp->nh_idle_timeout > now)
415 				break;
416 
417 			/*
418 			 * Drop global lock while doing expensive work
419 			 * on this host. We'll re-check any conditions
420 			 * that might change after retaking the global
421 			 * lock.
422 			 */
423 			mutex_exit(&g->lock);
424 			mutex_enter(&hostp->nh_lock);
425 
426 			/*
427 			 * nlm_globals lock was dropped earlier because
428 			 * garbage collecting of vholds and checking whether
429 			 * host has any locks/shares are expensive operations.
430 			 */
431 			nlm_host_gc_vholds(hostp);
432 			has_locks = nlm_host_has_locks(hostp);
433 
434 			mutex_exit(&hostp->nh_lock);
435 			mutex_enter(&g->lock);
436 
437 			/*
438 			 * While we were doing expensive operations
439 			 * outside of nlm_globals critical section,
440 			 * somebody could take the host and remove it
441 			 * from the idle list.  Whether its been
442 			 * reinserted or not, our information about
443 			 * the host is outdated, and we should take no
444 			 * further action.
445 			 */
446 			if ((hostp->nh_flags & NLM_NH_INIDLE) == 0 ||
447 			    hostp->nh_idle_timeout > now)
448 				continue;
449 
450 			/*
451 			 * If the host has locks we have to renew the
452 			 * host's timeout and put it at the end of LRU
453 			 * list.
454 			 */
455 			if (has_locks) {
456 				TAILQ_REMOVE(&g->nlm_idle_hosts,
457 				    hostp, nh_link);
458 				hostp->nh_idle_timeout = now + idle_period;
459 				TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
460 				    hostp, nh_link);
461 				continue;
462 			}
463 
464 			/*
465 			 * We're here if all the following conditions hold:
466 			 * 1) Host hasn't any locks or share reservations
467 			 * 2) Host is unused
468 			 * 3) Host wasn't touched by anyone at least for
469 			 *    g->cn_idle_tmo seconds.
470 			 *
471 			 * So, now we can destroy it.
472 			 */
473 			nlm_host_unregister(g, hostp);
474 			mutex_exit(&g->lock);
475 
476 			nlm_host_unmonitor(g, hostp);
477 			nlm_host_destroy(hostp);
478 			mutex_enter(&g->lock);
479 			if (g->run_status == NLM_ST_STOPPING)
480 				break;
481 
482 		}
483 
484 		DTRACE_PROBE(gc__end);
485 	}
486 
487 	DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
488 
489 	/* Let others know that GC has died */
490 	g->nlm_gc_thread = NULL;
491 	mutex_exit(&g->lock);
492 
493 	cv_broadcast(&g->nlm_gc_finish_cv);
494 	zthread_exit();
495 }
496 
497 /*
498  * Thread reclaim locks/shares acquired by the client side
499  * on the given server represented by hostp.
500  */
501 static void
502 nlm_reclaimer(struct nlm_host *hostp)
503 {
504 	struct nlm_globals *g;
505 
506 	mutex_enter(&hostp->nh_lock);
507 	hostp->nh_reclaimer = curthread;
508 	mutex_exit(&hostp->nh_lock);
509 
510 	g = zone_getspecific(nlm_zone_key, curzone);
511 	nlm_reclaim_client(g, hostp);
512 
513 	mutex_enter(&hostp->nh_lock);
514 	hostp->nh_flags &= ~NLM_NH_RECLAIM;
515 	hostp->nh_reclaimer = NULL;
516 	cv_broadcast(&hostp->nh_recl_cv);
517 	mutex_exit(&hostp->nh_lock);
518 
519 	/*
520 	 * Host was explicitly referenced before
521 	 * nlm_reclaim() was called, release it
522 	 * here.
523 	 */
524 	nlm_host_release(g, hostp);
525 	zthread_exit();
526 }
527 
528 /*
529  * Copy a struct netobj.  (see xdr.h)
530  */
531 void
532 nlm_copy_netobj(struct netobj *dst, struct netobj *src)
533 {
534 	dst->n_len = src->n_len;
535 	dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
536 	bcopy(src->n_bytes, dst->n_bytes, src->n_len);
537 }
538 
539 /*
540  * An NLM specificw replacement for clnt_call().
541  * nlm_clnt_call() is used by all RPC functions generated
542  * from nlm_prot.x specification. The function is aware
543  * about some pitfalls of NLM RPC procedures and has a logic
544  * that handles them properly.
545  */
546 enum clnt_stat
547 nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
548     caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
549 {
550 	k_sigset_t oldmask;
551 	enum clnt_stat stat;
552 	bool_t sig_blocked = FALSE;
553 
554 	/*
555 	 * If NLM RPC procnum is one of the NLM _RES procedures
556 	 * that are used to reply to asynchronous NLM RPC
557 	 * (MSG calls), explicitly set RPC timeout to zero.
558 	 * Client doesn't send a reply to RES procedures, so
559 	 * we don't need to wait anything.
560 	 *
561 	 * NOTE: we ignore NLM4_*_RES procnums because they are
562 	 * equal to NLM_*_RES numbers.
563 	 */
564 	if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
565 		wait = nlm_rpctv_zero;
566 
567 	/*
568 	 * We need to block signals in case of NLM_CANCEL RPC
569 	 * in order to prevent interruption of network RPC
570 	 * calls.
571 	 */
572 	if (procnum == NLM_CANCEL) {
573 		k_sigset_t newmask;
574 
575 		sigfillset(&newmask);
576 		sigreplace(&newmask, &oldmask);
577 		sig_blocked = TRUE;
578 	}
579 
580 	stat = clnt_call(clnt, procnum, xdr_args,
581 	    argsp, xdr_result, resultp, wait);
582 
583 	/*
584 	 * Restore signal mask back if signals were blocked
585 	 */
586 	if (sig_blocked)
587 		sigreplace(&oldmask, (k_sigset_t *)NULL);
588 
589 	return (stat);
590 }
591 
592 /*
593  * Suspend NLM client/server in the given zone.
594  *
595  * During suspend operation we mark those hosts
596  * that have any locks with NLM_NH_SUSPEND flags,
597  * so that they can be checked later, when resume
598  * operation occurs.
599  */
600 static void
601 nlm_suspend_zone(struct nlm_globals *g)
602 {
603 	struct nlm_host *hostp;
604 	struct nlm_host_list all_hosts;
605 
606 	/*
607 	 * Note that while we're doing suspend, GC thread is active
608 	 * and it can destroy some hosts while we're walking through
609 	 * the hosts tree. To prevent that and make suspend logic
610 	 * a bit more simple we put all hosts to local "all_hosts"
611 	 * list and increment reference counter of each host.
612 	 * This guaranties that no hosts will be released while
613 	 * we're doing suspend.
614 	 * NOTE: reference of each host must be dropped during
615 	 * resume operation.
616 	 */
617 	TAILQ_INIT(&all_hosts);
618 	mutex_enter(&g->lock);
619 	for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
620 	    hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
621 		/*
622 		 * If host is idle, remove it from idle list and
623 		 * clear idle flag. That is done to prevent GC
624 		 * from touching this host.
625 		 */
626 		if (hostp->nh_flags & NLM_NH_INIDLE) {
627 			TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
628 			hostp->nh_flags &= ~NLM_NH_INIDLE;
629 		}
630 
631 		hostp->nh_refs++;
632 		TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
633 	}
634 
635 	/*
636 	 * Now we can walk through all hosts on the system
637 	 * with zone globals lock released. The fact the
638 	 * we have taken a reference to each host guaranties
639 	 * that no hosts can be destroyed during that process.
640 	 */
641 	mutex_exit(&g->lock);
642 	while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
643 		mutex_enter(&hostp->nh_lock);
644 		if (nlm_host_has_locks(hostp))
645 			hostp->nh_flags |= NLM_NH_SUSPEND;
646 
647 		mutex_exit(&hostp->nh_lock);
648 		TAILQ_REMOVE(&all_hosts, hostp, nh_link);
649 	}
650 }
651 
652 /*
653  * Resume NLM hosts for the given zone.
654  *
655  * nlm_resume_zone() is called after hosts were suspended
656  * (see nlm_suspend_zone) and its main purpose to check
657  * whether remote locks owned by hosts are still in consistent
658  * state. If they aren't, resume function tries to reclaim
659  * locks (for client side hosts) and clean locks (for
660  * server side hosts).
661  */
662 static void
663 nlm_resume_zone(struct nlm_globals *g)
664 {
665 	struct nlm_host *hostp, *h_next;
666 
667 	mutex_enter(&g->lock);
668 	hostp = avl_first(&g->nlm_hosts_tree);
669 
670 	/*
671 	 * In nlm_suspend_zone() the reference counter of each
672 	 * host was incremented, so we can safely iterate through
673 	 * all hosts without worrying that any host we touch will
674 	 * be removed at the moment.
675 	 */
676 	while (hostp != NULL) {
677 		struct nlm_nsm nsm;
678 		enum clnt_stat stat;
679 		int32_t sm_state;
680 		int error;
681 		bool_t resume_failed = FALSE;
682 
683 		h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
684 		mutex_exit(&g->lock);
685 
686 		DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
687 
688 		/*
689 		 * Suspend operation marked that the host doesn't
690 		 * have any locks. Skip it.
691 		 */
692 		if (!(hostp->nh_flags & NLM_NH_SUSPEND))
693 			goto cycle_end;
694 
695 		error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
696 		if (error != 0) {
697 			NLM_ERR("Resume: Failed to contact to NSM of host %s "
698 			    "[error=%d]\n", hostp->nh_name, error);
699 			resume_failed = TRUE;
700 			goto cycle_end;
701 		}
702 
703 		stat = nlm_nsm_stat(&nsm, &sm_state);
704 		if (stat != RPC_SUCCESS) {
705 			NLM_ERR("Resume: Failed to call SM_STAT operation for "
706 			    "host %s [stat=%d]\n", hostp->nh_name, stat);
707 			resume_failed = TRUE;
708 			nlm_nsm_fini(&nsm);
709 			goto cycle_end;
710 		}
711 
712 		if (sm_state != hostp->nh_state) {
713 			/*
714 			 * Current SM state of the host isn't equal
715 			 * to the one host had when it was suspended.
716 			 * Probably it was rebooted. Try to reclaim
717 			 * locks if the host has any on its client side.
718 			 * Also try to clean up its server side locks
719 			 * (if the host has any).
720 			 */
721 			nlm_host_notify_client(hostp, sm_state);
722 			nlm_host_notify_server(hostp, sm_state);
723 		}
724 
725 		nlm_nsm_fini(&nsm);
726 
727 cycle_end:
728 		if (resume_failed) {
729 			/*
730 			 * Resume failed for the given host.
731 			 * Just clean up all resources it owns.
732 			 */
733 			nlm_host_notify_server(hostp, 0);
734 			nlm_client_cancel_all(g, hostp);
735 		}
736 
737 		hostp->nh_flags &= ~NLM_NH_SUSPEND;
738 		nlm_host_release(g, hostp);
739 		hostp = h_next;
740 		mutex_enter(&g->lock);
741 	}
742 
743 	mutex_exit(&g->lock);
744 }
745 
746 /*
747  * NLM functions responsible for operations on NSM handle.
748  */
749 
750 /*
751  * Initialize knetconfig that is used for communication
752  * with local statd via loopback interface.
753  */
754 static int
755 nlm_init_local_knc(struct knetconfig *knc)
756 {
757 	int error;
758 	vnode_t *vp;
759 
760 	bzero(knc, sizeof (*knc));
761 	error = lookupname("/dev/tcp", UIO_SYSSPACE,
762 	    FOLLOW, NULLVPP, &vp);
763 	if (error != 0)
764 		return (error);
765 
766 	knc->knc_semantics = NC_TPI_COTS;
767 	knc->knc_protofmly = NC_INET;
768 	knc->knc_proto = NC_TCP;
769 	knc->knc_rdev = vp->v_rdev;
770 	VN_RELE(vp);
771 
772 
773 	return (0);
774 }
775 
776 /*
777  * Initialize NSM handle that will be used to talk
778  * to local statd via loopback interface.
779  */
780 static int
781 nlm_nsm_init_local(struct nlm_nsm *nsm)
782 {
783 	int error;
784 	struct knetconfig knc;
785 	struct sockaddr_in sin;
786 	struct netbuf nb;
787 
788 	error = nlm_init_local_knc(&knc);
789 	if (error != 0)
790 		return (error);
791 
792 	bzero(&sin, sizeof (sin));
793 	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
794 	sin.sin_family = AF_INET;
795 
796 	nb.buf = (char *)&sin;
797 	nb.len = nb.maxlen = sizeof (sin);
798 
799 	return (nlm_nsm_init(nsm, &knc, &nb));
800 }
801 
802 /*
803  * Initialize NSM handle used for talking to statd
804  */
805 static int
806 nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
807 {
808 	enum clnt_stat stat;
809 	int error, retries;
810 
811 	bzero(nsm, sizeof (*nsm));
812 	nsm->ns_knc = *knc;
813 	nlm_copy_netbuf(&nsm->ns_addr, nb);
814 
815 	/*
816 	 * Try several times to get the port of statd service,
817 	 * If rpcbind_getaddr returns  RPC_PROGNOTREGISTERED,
818 	 * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
819 	 * seconds berofore.
820 	 */
821 	for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
822 		stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
823 		    SM_VERS, &nsm->ns_addr);
824 		if (stat != RPC_SUCCESS) {
825 			if (stat == RPC_PROGNOTREGISTERED) {
826 				delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
827 				continue;
828 			}
829 		}
830 
831 		break;
832 	}
833 
834 	if (stat != RPC_SUCCESS) {
835 		DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
836 		    int, retries);
837 		error = ENOENT;
838 		goto error;
839 	}
840 
841 	/*
842 	 * Create an RPC handle that'll be used for communication with local
843 	 * statd using the status monitor protocol.
844 	 */
845 	error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
846 	    0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle);
847 	if (error != 0)
848 		goto error;
849 
850 	/*
851 	 * Create an RPC handle that'll be used for communication with the
852 	 * local statd using the address registration protocol.
853 	 */
854 	error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
855 	    NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle);
856 	if (error != 0)
857 		goto error;
858 
859 	sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL);
860 	return (0);
861 
862 error:
863 	kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
864 	if (nsm->ns_handle)
865 		CLNT_DESTROY(nsm->ns_handle);
866 
867 	return (error);
868 }
869 
870 static void
871 nlm_nsm_fini(struct nlm_nsm *nsm)
872 {
873 	kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
874 	CLNT_DESTROY(nsm->ns_addr_handle);
875 	nsm->ns_addr_handle = NULL;
876 	CLNT_DESTROY(nsm->ns_handle);
877 	nsm->ns_handle = NULL;
878 	sema_destroy(&nsm->ns_sem);
879 }
880 
881 static enum clnt_stat
882 nlm_nsm_simu_crash(struct nlm_nsm *nsm)
883 {
884 	enum clnt_stat stat;
885 
886 	sema_p(&nsm->ns_sem);
887 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
888 	stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
889 	sema_v(&nsm->ns_sem);
890 
891 	return (stat);
892 }
893 
894 static enum clnt_stat
895 nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
896 {
897 	struct sm_name args;
898 	struct sm_stat_res res;
899 	enum clnt_stat stat;
900 
901 	args.mon_name = uts_nodename();
902 	bzero(&res, sizeof (res));
903 
904 	sema_p(&nsm->ns_sem);
905 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
906 	stat = sm_stat_1(&args, &res, nsm->ns_handle);
907 	sema_v(&nsm->ns_sem);
908 
909 	if (stat == RPC_SUCCESS)
910 		*out_stat = res.state;
911 
912 	return (stat);
913 }
914 
915 static enum clnt_stat
916 nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
917 {
918 	struct mon args;
919 	struct sm_stat_res res;
920 	enum clnt_stat stat;
921 
922 	bzero(&args, sizeof (args));
923 	bzero(&res, sizeof (res));
924 
925 	args.mon_id.mon_name = hostname;
926 	args.mon_id.my_id.my_name = uts_nodename();
927 	args.mon_id.my_id.my_prog = NLM_PROG;
928 	args.mon_id.my_id.my_vers = NLM_SM;
929 	args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
930 	bcopy(&priv, args.priv, sizeof (priv));
931 
932 	sema_p(&nsm->ns_sem);
933 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
934 	stat = sm_mon_1(&args, &res, nsm->ns_handle);
935 	sema_v(&nsm->ns_sem);
936 
937 	return (stat);
938 }
939 
940 static enum clnt_stat
941 nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
942 {
943 	struct mon_id args;
944 	struct sm_stat res;
945 	enum clnt_stat stat;
946 
947 	bzero(&args, sizeof (args));
948 	bzero(&res, sizeof (res));
949 
950 	args.mon_name = hostname;
951 	args.my_id.my_name = uts_nodename();
952 	args.my_id.my_prog = NLM_PROG;
953 	args.my_id.my_vers = NLM_SM;
954 	args.my_id.my_proc = NLM_SM_NOTIFY1;
955 
956 	sema_p(&nsm->ns_sem);
957 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
958 	stat = sm_unmon_1(&args, &res, nsm->ns_handle);
959 	sema_v(&nsm->ns_sem);
960 
961 	return (stat);
962 }
963 
964 static enum clnt_stat
965 nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
966 {
967 	struct reg1args args = { 0 };
968 	struct reg1res res = { 0 };
969 	enum clnt_stat stat;
970 
971 	args.family = family;
972 	args.name = name;
973 	args.address = *address;
974 
975 	sema_p(&nsm->ns_sem);
976 	nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
977 	stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
978 	sema_v(&nsm->ns_sem);
979 
980 	return (stat);
981 }
982 
983 /*
984  * Get NLM vhold object corresponding to vnode "vp".
985  * If no such object was found, create a new one.
986  *
987  * The purpose of this function is to associate vhold
988  * object with given vnode, so that:
989  * 1) vnode is hold (VN_HOLD) while vhold object is alive.
990  * 2) host has a track of all vnodes it touched by lock
991  *    or share operations. These vnodes are accessible
992  *    via collection of vhold objects.
993  */
994 struct nlm_vhold *
995 nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
996 {
997 	struct nlm_vhold *nvp, *new_nvp = NULL;
998 
999 	mutex_enter(&hostp->nh_lock);
1000 	nvp = nlm_vhold_find_locked(hostp, vp);
1001 	if (nvp != NULL)
1002 		goto out;
1003 
1004 	/* nlm_vhold wasn't found, then create a new one */
1005 	mutex_exit(&hostp->nh_lock);
1006 	new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
1007 
1008 	/*
1009 	 * Check if another thread has already
1010 	 * created the same nlm_vhold.
1011 	 */
1012 	mutex_enter(&hostp->nh_lock);
1013 	nvp = nlm_vhold_find_locked(hostp, vp);
1014 	if (nvp == NULL) {
1015 		nvp = new_nvp;
1016 		new_nvp = NULL;
1017 
1018 		TAILQ_INIT(&nvp->nv_slreqs);
1019 		nvp->nv_vp = vp;
1020 		nvp->nv_refcnt = 1;
1021 		VN_HOLD(nvp->nv_vp);
1022 
1023 		VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
1024 		    (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
1025 		TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
1026 	}
1027 
1028 out:
1029 	mutex_exit(&hostp->nh_lock);
1030 	if (new_nvp != NULL)
1031 		kmem_cache_free(nlm_vhold_cache, new_nvp);
1032 
1033 	return (nvp);
1034 }
1035 
1036 /*
1037  * Drop a reference to vhold object nvp.
1038  */
1039 void
1040 nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1041 {
1042 	if (nvp == NULL)
1043 		return;
1044 
1045 	mutex_enter(&hostp->nh_lock);
1046 	ASSERT(nvp->nv_refcnt > 0);
1047 	nvp->nv_refcnt--;
1048 
1049 	/*
1050 	 * If these conditions are met, the vhold is obviously unused and we
1051 	 * will destroy it.  In a case either v_filocks and/or v_shrlocks is
1052 	 * non-NULL the vhold might still be unused by the host, but it is
1053 	 * expensive to check that.  We defer such check until the host is
1054 	 * idle.  The expensive check is done in the NLM garbage collector.
1055 	 */
1056 	if (nvp->nv_refcnt == 0 &&
1057 	    nvp->nv_vp->v_filocks == NULL &&
1058 	    nvp->nv_vp->v_shrlocks == NULL) {
1059 		nlm_vhold_destroy(hostp, nvp);
1060 	}
1061 
1062 	mutex_exit(&hostp->nh_lock);
1063 }
1064 
1065 /*
1066  * Clean all locks and share reservations on the
1067  * given vhold object that were acquired by the
1068  * given sysid
1069  */
1070 static void
1071 nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1072 {
1073 	cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1074 	cleanshares_by_sysid(nvp->nv_vp, sysid);
1075 }
1076 
1077 static void
1078 nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1079 {
1080 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1081 
1082 	ASSERT(nvp->nv_refcnt == 0);
1083 	ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1084 
1085 	VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1086 	    (mod_hash_key_t)nvp->nv_vp,
1087 	    (mod_hash_val_t)&nvp) == 0);
1088 
1089 	TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1090 	VN_RELE(nvp->nv_vp);
1091 	nvp->nv_vp = NULL;
1092 
1093 	kmem_cache_free(nlm_vhold_cache, nvp);
1094 }
1095 
1096 /*
1097  * Return TRUE if the given vhold is busy.
1098  * Vhold object is considered to be "busy" when
1099  * all the following conditions hold:
1100  * 1) No one uses it at the moment;
1101  * 2) It hasn't any locks;
1102  * 3) It hasn't any share reservations;
1103  */
1104 static bool_t
1105 nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1106 {
1107 	vnode_t *vp;
1108 	int sysid;
1109 
1110 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1111 
1112 	if (nvp->nv_refcnt > 0)
1113 		return (TRUE);
1114 
1115 	vp = nvp->nv_vp;
1116 	sysid = hostp->nh_sysid;
1117 	if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1118 	    shr_has_remote_shares(vp, sysid))
1119 		return (TRUE);
1120 
1121 	return (FALSE);
1122 }
1123 
1124 /* ARGSUSED */
1125 static int
1126 nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1127 {
1128 	struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1129 
1130 	bzero(nvp, sizeof (*nvp));
1131 	return (0);
1132 }
1133 
1134 /* ARGSUSED */
1135 static void
1136 nlm_vhold_dtor(void *datap, void *cdrarg)
1137 {
1138 	struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1139 
1140 	ASSERT(nvp->nv_refcnt == 0);
1141 	ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1142 	ASSERT(nvp->nv_vp == NULL);
1143 }
1144 
1145 struct nlm_vhold *
1146 nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1147 {
1148 	struct nlm_vhold *nvp = NULL;
1149 
1150 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1151 	(void) mod_hash_find(hostp->nh_vholds_by_vp,
1152 	    (mod_hash_key_t)vp,
1153 	    (mod_hash_val_t)&nvp);
1154 
1155 	if (nvp != NULL)
1156 		nvp->nv_refcnt++;
1157 
1158 	return (nvp);
1159 }
1160 
1161 /*
1162  * NLM host functions
1163  */
1164 static void
1165 nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1166 {
1167 	ASSERT(src->len <= src->maxlen);
1168 
1169 	dst->maxlen = src->maxlen;
1170 	dst->len = src->len;
1171 	dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1172 	bcopy(src->buf, dst->buf, src->len);
1173 }
1174 
1175 /* ARGSUSED */
1176 static int
1177 nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1178 {
1179 	struct nlm_host *hostp = (struct nlm_host *)datap;
1180 
1181 	bzero(hostp, sizeof (*hostp));
1182 	return (0);
1183 }
1184 
1185 /* ARGSUSED */
1186 static void
1187 nlm_host_dtor(void *datap, void *cdrarg)
1188 {
1189 	struct nlm_host *hostp = (struct nlm_host *)datap;
1190 	ASSERT(hostp->nh_refs == 0);
1191 }
1192 
1193 static void
1194 nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1195 {
1196 	ASSERT(hostp->nh_refs == 0);
1197 	ASSERT(hostp->nh_flags & NLM_NH_INIDLE);
1198 
1199 	avl_remove(&g->nlm_hosts_tree, hostp);
1200 	VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1201 	    (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1202 	    (mod_hash_val_t)&hostp) == 0);
1203 	TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1204 	hostp->nh_flags &= ~NLM_NH_INIDLE;
1205 }
1206 
1207 /*
1208  * Free resources used by a host. This is called after the reference
1209  * count has reached zero so it doesn't need to worry about locks.
1210  */
1211 static void
1212 nlm_host_destroy(struct nlm_host *hostp)
1213 {
1214 	ASSERT(hostp->nh_name != NULL);
1215 	ASSERT(hostp->nh_netid != NULL);
1216 	ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1217 
1218 	strfree(hostp->nh_name);
1219 	strfree(hostp->nh_netid);
1220 	kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1221 
1222 	if (hostp->nh_sysid != LM_NOSYSID)
1223 		nlm_sysid_free(hostp->nh_sysid);
1224 
1225 	nlm_rpc_cache_destroy(hostp);
1226 
1227 	ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1228 	mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1229 
1230 	mutex_destroy(&hostp->nh_lock);
1231 	cv_destroy(&hostp->nh_rpcb_cv);
1232 	cv_destroy(&hostp->nh_recl_cv);
1233 
1234 	kmem_cache_free(nlm_hosts_cache, hostp);
1235 }
1236 
1237 /*
1238  * Cleanup SERVER-side state after a client restarts,
1239  * or becomes unresponsive, or whatever.
1240  *
1241  * We unlock any active locks owned by the host.
1242  * When rpc.lockd is shutting down,
1243  * this function is called with newstate set to zero
1244  * which allows us to cancel any pending async locks
1245  * and clear the locking state.
1246  *
1247  * When "state" is 0, we don't update host's state,
1248  * but cleanup all remote locks on the host.
1249  * It's useful to call this function for resources
1250  * cleanup.
1251  */
1252 void
1253 nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1254 {
1255 	struct nlm_vhold *nvp;
1256 	struct nlm_slreq *slr;
1257 	struct nlm_slreq_list slreqs2free;
1258 
1259 	TAILQ_INIT(&slreqs2free);
1260 	mutex_enter(&hostp->nh_lock);
1261 	if (state != 0)
1262 		hostp->nh_state = state;
1263 
1264 	TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1265 
1266 		/* cleanup sleeping requests at first */
1267 		while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1268 			TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1269 
1270 			/*
1271 			 * Instead of freeing cancelled sleeping request
1272 			 * here, we add it to the linked list created
1273 			 * on the stack in order to do all frees outside
1274 			 * the critical section.
1275 			 */
1276 			TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1277 		}
1278 
1279 		nvp->nv_refcnt++;
1280 		mutex_exit(&hostp->nh_lock);
1281 
1282 		nlm_vhold_clean(nvp, hostp->nh_sysid);
1283 
1284 		mutex_enter(&hostp->nh_lock);
1285 		nvp->nv_refcnt--;
1286 	}
1287 
1288 	mutex_exit(&hostp->nh_lock);
1289 	while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1290 		TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1291 		kmem_free(slr, sizeof (*slr));
1292 	}
1293 }
1294 
1295 /*
1296  * Cleanup CLIENT-side state after a server restarts,
1297  * or becomes unresponsive, or whatever.
1298  *
1299  * This is called by the local NFS statd when we receive a
1300  * host state change notification.  (also nlm_svc_stopping)
1301  *
1302  * Deal with a server restart.  If we are stopping the
1303  * NLM service, we'll have newstate == 0, and will just
1304  * cancel all our client-side lock requests.  Otherwise,
1305  * start the "recovery" process to reclaim any locks
1306  * we hold on this server.
1307  */
1308 void
1309 nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1310 {
1311 	mutex_enter(&hostp->nh_lock);
1312 	hostp->nh_state = state;
1313 	if (hostp->nh_flags & NLM_NH_RECLAIM) {
1314 		/*
1315 		 * Either host's state is up to date or
1316 		 * host is already in recovery.
1317 		 */
1318 		mutex_exit(&hostp->nh_lock);
1319 		return;
1320 	}
1321 
1322 	hostp->nh_flags |= NLM_NH_RECLAIM;
1323 
1324 	/*
1325 	 * Host will be released by the recovery thread,
1326 	 * thus we need to increment refcount.
1327 	 */
1328 	hostp->nh_refs++;
1329 	mutex_exit(&hostp->nh_lock);
1330 
1331 	(void) zthread_create(NULL, 0, nlm_reclaimer,
1332 	    hostp, 0, minclsyspri);
1333 }
1334 
1335 /*
1336  * The function is called when NLM client detects that
1337  * server has entered in grace period and client needs
1338  * to wait until reclamation process (if any) does
1339  * its job.
1340  */
1341 int
1342 nlm_host_wait_grace(struct nlm_host *hostp)
1343 {
1344 	struct nlm_globals *g;
1345 	int error = 0;
1346 
1347 	g = zone_getspecific(nlm_zone_key, curzone);
1348 	mutex_enter(&hostp->nh_lock);
1349 
1350 	do {
1351 		int rc;
1352 
1353 		rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1354 		    &hostp->nh_lock, ddi_get_lbolt() +
1355 		    SEC_TO_TICK(g->retrans_tmo));
1356 
1357 		if (rc == 0) {
1358 			error = EINTR;
1359 			break;
1360 		}
1361 	} while (hostp->nh_flags & NLM_NH_RECLAIM);
1362 
1363 	mutex_exit(&hostp->nh_lock);
1364 	return (error);
1365 }
1366 
1367 /*
1368  * Create a new NLM host.
1369  *
1370  * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1371  * which needs both a knetconfig and an address when creating
1372  * endpoints. Thus host object stores both knetconfig and
1373  * netid.
1374  */
1375 static struct nlm_host *
1376 nlm_host_create(char *name, const char *netid,
1377     struct knetconfig *knc, struct netbuf *naddr)
1378 {
1379 	struct nlm_host *host;
1380 
1381 	host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1382 
1383 	mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1384 	cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1385 	cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1386 
1387 	host->nh_sysid = LM_NOSYSID;
1388 	host->nh_refs = 1;
1389 	host->nh_name = strdup(name);
1390 	host->nh_netid = strdup(netid);
1391 	host->nh_knc = *knc;
1392 	nlm_copy_netbuf(&host->nh_addr, naddr);
1393 
1394 	host->nh_state = 0;
1395 	host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1396 	host->nh_flags = 0;
1397 
1398 	host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1399 	    32, mod_hash_null_valdtor, sizeof (vnode_t));
1400 
1401 	TAILQ_INIT(&host->nh_vholds_list);
1402 	TAILQ_INIT(&host->nh_rpchc);
1403 
1404 	return (host);
1405 }
1406 
1407 /*
1408  * Cancel all client side sleeping locks owned by given host.
1409  */
1410 void
1411 nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1412 {
1413 	struct nlm_slock *nslp;
1414 
1415 	mutex_enter(&g->lock);
1416 	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1417 		if (nslp->nsl_host == hostp) {
1418 			nslp->nsl_state = NLM_SL_CANCELLED;
1419 			cv_broadcast(&nslp->nsl_cond);
1420 		}
1421 	}
1422 
1423 	mutex_exit(&g->lock);
1424 }
1425 
1426 /*
1427  * Garbage collect stale vhold objects.
1428  *
1429  * In other words check whether vnodes that are
1430  * held by vhold objects still have any locks
1431  * or shares or still in use. If they aren't,
1432  * just destroy them.
1433  */
1434 static void
1435 nlm_host_gc_vholds(struct nlm_host *hostp)
1436 {
1437 	struct nlm_vhold *nvp;
1438 
1439 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1440 
1441 	nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1442 	while (nvp != NULL) {
1443 		struct nlm_vhold *nvp_tmp;
1444 
1445 		if (nlm_vhold_busy(hostp, nvp)) {
1446 			nvp = TAILQ_NEXT(nvp, nv_link);
1447 			continue;
1448 		}
1449 
1450 		nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1451 		nlm_vhold_destroy(hostp, nvp);
1452 		nvp = nvp_tmp;
1453 	}
1454 }
1455 
1456 /*
1457  * Check whether the given host has any
1458  * server side locks or share reservations.
1459  */
1460 static bool_t
1461 nlm_host_has_srv_locks(struct nlm_host *hostp)
1462 {
1463 	/*
1464 	 * It's cheap and simple: if server has
1465 	 * any locks/shares there must be vhold
1466 	 * object storing the affected vnode.
1467 	 *
1468 	 * NOTE: We don't need to check sleeping
1469 	 * locks on the server side, because if
1470 	 * server side sleeping lock is alive,
1471 	 * there must be a vhold object corresponding
1472 	 * to target vnode.
1473 	 */
1474 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1475 	if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1476 		return (TRUE);
1477 
1478 	return (FALSE);
1479 }
1480 
1481 /*
1482  * Check whether the given host has any client side
1483  * locks or share reservations.
1484  */
1485 static bool_t
1486 nlm_host_has_cli_locks(struct nlm_host *hostp)
1487 {
1488 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1489 
1490 	/*
1491 	 * XXX: It's not the way I'd like to do the check,
1492 	 * because flk_sysid_has_locks() can be very
1493 	 * expensive by design. Unfortunatelly it iterates
1494 	 * through all locks on the system, doesn't matter
1495 	 * were they made on remote system via NLM or
1496 	 * on local system via reclock. To understand the
1497 	 * problem, consider that there're dozens of thousands
1498 	 * of locks that are made on some ZFS dataset. And there's
1499 	 * another dataset shared by NFS where NLM client had locks
1500 	 * some time ago, but doesn't have them now.
1501 	 * In this case flk_sysid_has_locks() will iterate
1502 	 * thrught dozens of thousands locks until it returns us
1503 	 * FALSE.
1504 	 * Oh, I hope that in shiny future somebody will make
1505 	 * local lock manager (os/flock.c) better, so that
1506 	 * it'd be more friedly to remote locks and
1507 	 * flk_sysid_has_locks() wouldn't be so expensive.
1508 	 */
1509 	if (flk_sysid_has_locks(hostp->nh_sysid |
1510 	    LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1511 		return (TRUE);
1512 
1513 	/*
1514 	 * Check whether host has any share reservations
1515 	 * registered on the client side.
1516 	 */
1517 	if (hostp->nh_shrlist != NULL)
1518 		return (TRUE);
1519 
1520 	return (FALSE);
1521 }
1522 
1523 /*
1524  * Determine whether the given host owns any
1525  * locks or share reservations.
1526  */
1527 static bool_t
1528 nlm_host_has_locks(struct nlm_host *hostp)
1529 {
1530 	if (nlm_host_has_srv_locks(hostp))
1531 		return (TRUE);
1532 
1533 	return (nlm_host_has_cli_locks(hostp));
1534 }
1535 
1536 /*
1537  * This function compares only addresses of two netbufs
1538  * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1539  * Port part of netbuf is ignored.
1540  *
1541  * Return values:
1542  *  -1: nb1's address is "smaller" than nb2's
1543  *   0: addresses are equal
1544  *   1: nb1's address is "greater" than nb2's
1545  */
1546 static int
1547 nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1548 {
1549 	union nlm_addr {
1550 		struct sockaddr sa;
1551 		struct sockaddr_in sin;
1552 		struct sockaddr_in6 sin6;
1553 	} *na1, *na2;
1554 	int res;
1555 
1556 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1557 	na1 = (union nlm_addr *)nb1->buf;
1558 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1559 	na2 = (union nlm_addr *)nb2->buf;
1560 
1561 	if (na1->sa.sa_family < na2->sa.sa_family)
1562 		return (-1);
1563 	if (na1->sa.sa_family > na2->sa.sa_family)
1564 		return (1);
1565 
1566 	switch (na1->sa.sa_family) {
1567 	case AF_INET:
1568 		res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1569 		    sizeof (na1->sin.sin_addr));
1570 		break;
1571 	case AF_INET6:
1572 		res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1573 		    sizeof (na1->sin6.sin6_addr));
1574 		break;
1575 	default:
1576 		VERIFY(0);
1577 		return (0);
1578 	}
1579 
1580 	return (SIGN(res));
1581 }
1582 
1583 /*
1584  * Compare two nlm hosts.
1585  * Return values:
1586  * -1: host1 is "smaller" than host2
1587  *  0: host1 is equal to host2
1588  *  1: host1 is "greater" than host2
1589  */
1590 int
1591 nlm_host_cmp(const void *p1, const void *p2)
1592 {
1593 	struct nlm_host *h1 = (struct nlm_host *)p1;
1594 	struct nlm_host *h2 = (struct nlm_host *)p2;
1595 	int res;
1596 
1597 	res = strcmp(h1->nh_netid, h2->nh_netid);
1598 	if (res != 0)
1599 		return (SIGN(res));
1600 
1601 	res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1602 	return (res);
1603 }
1604 
1605 /*
1606  * Find the host specified by...  (see below)
1607  * If found, increment the ref count.
1608  */
1609 static struct nlm_host *
1610 nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1611     struct netbuf *naddr, avl_index_t *wherep)
1612 {
1613 	struct nlm_host *hostp, key;
1614 	avl_index_t pos;
1615 
1616 	ASSERT(MUTEX_HELD(&g->lock));
1617 
1618 	key.nh_netid = (char *)netid;
1619 	key.nh_addr.buf = naddr->buf;
1620 	key.nh_addr.len = naddr->len;
1621 	key.nh_addr.maxlen = naddr->maxlen;
1622 
1623 	hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1624 
1625 	if (hostp != NULL) {
1626 		/*
1627 		 * Host is inuse now. Remove it from idle
1628 		 * hosts list if needed.
1629 		 */
1630 		if (hostp->nh_flags & NLM_NH_INIDLE) {
1631 			TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1632 			hostp->nh_flags &= ~NLM_NH_INIDLE;
1633 		}
1634 
1635 		hostp->nh_refs++;
1636 	}
1637 	if (wherep != NULL)
1638 		*wherep = pos;
1639 
1640 	return (hostp);
1641 }
1642 
1643 /*
1644  * Find NLM host for the given name and address.
1645  */
1646 struct nlm_host *
1647 nlm_host_find(struct nlm_globals *g, const char *netid,
1648     struct netbuf *addr)
1649 {
1650 	struct nlm_host *hostp = NULL;
1651 
1652 	mutex_enter(&g->lock);
1653 	if (g->run_status != NLM_ST_UP)
1654 		goto out;
1655 
1656 	hostp = nlm_host_find_locked(g, netid, addr, NULL);
1657 
1658 out:
1659 	mutex_exit(&g->lock);
1660 	return (hostp);
1661 }
1662 
1663 
1664 /*
1665  * Find or create an NLM host for the given name and address.
1666  *
1667  * The remote host is determined by all of: name, netid, address.
1668  * Note that the netid is whatever nlm_svc_add_ep() gave to
1669  * svc_tli_kcreate() for the service binding.  If any of these
1670  * are different, allocate a new host (new sysid).
1671  */
1672 struct nlm_host *
1673 nlm_host_findcreate(struct nlm_globals *g, char *name,
1674     const char *netid, struct netbuf *addr)
1675 {
1676 	int err;
1677 	struct nlm_host *host, *newhost = NULL;
1678 	struct knetconfig knc;
1679 	avl_index_t where;
1680 
1681 	mutex_enter(&g->lock);
1682 	if (g->run_status != NLM_ST_UP) {
1683 		mutex_exit(&g->lock);
1684 		return (NULL);
1685 	}
1686 
1687 	host = nlm_host_find_locked(g, netid, addr, NULL);
1688 	mutex_exit(&g->lock);
1689 	if (host != NULL)
1690 		return (host);
1691 
1692 	err = nlm_knc_from_netid(netid, &knc);
1693 	if (err != 0)
1694 		return (NULL);
1695 	/*
1696 	 * Do allocations (etc.) outside of mutex,
1697 	 * and then check again before inserting.
1698 	 */
1699 	newhost = nlm_host_create(name, netid, &knc, addr);
1700 	newhost->nh_sysid = nlm_sysid_alloc();
1701 	if (newhost->nh_sysid == LM_NOSYSID)
1702 		goto out;
1703 
1704 	mutex_enter(&g->lock);
1705 	host = nlm_host_find_locked(g, netid, addr, &where);
1706 	if (host == NULL) {
1707 		host = newhost;
1708 		newhost = NULL;
1709 
1710 		/*
1711 		 * Insert host to the hosts AVL tree that is
1712 		 * used to lookup by <netid, address> pair.
1713 		 */
1714 		avl_insert(&g->nlm_hosts_tree, host, where);
1715 
1716 		/*
1717 		 * Insert host to the hosts hash table that is
1718 		 * used to lookup host by sysid.
1719 		 */
1720 		VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1721 		    (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1722 		    (mod_hash_val_t)host) == 0);
1723 	}
1724 
1725 	mutex_exit(&g->lock);
1726 
1727 out:
1728 	if (newhost != NULL) {
1729 		/*
1730 		 * We do not need the preallocated nlm_host
1731 		 * so decrement the reference counter
1732 		 * and destroy it.
1733 		 */
1734 		newhost->nh_refs--;
1735 		nlm_host_destroy(newhost);
1736 	}
1737 
1738 	return (host);
1739 }
1740 
1741 /*
1742  * Find the NLM host that matches the value of 'sysid'.
1743  * If found, return it with a new ref,
1744  * else return NULL.
1745  */
1746 struct nlm_host *
1747 nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1748 {
1749 	struct nlm_host *hostp = NULL;
1750 
1751 	mutex_enter(&g->lock);
1752 	if (g->run_status != NLM_ST_UP)
1753 		goto out;
1754 
1755 	(void) mod_hash_find(g->nlm_hosts_hash,
1756 	    (mod_hash_key_t)(uintptr_t)sysid,
1757 	    (mod_hash_val_t)&hostp);
1758 
1759 	if (hostp == NULL)
1760 		goto out;
1761 
1762 	/*
1763 	 * Host is inuse now. Remove it
1764 	 * from idle hosts list if needed.
1765 	 */
1766 	if (hostp->nh_flags & NLM_NH_INIDLE) {
1767 		TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1768 		hostp->nh_flags &= ~NLM_NH_INIDLE;
1769 	}
1770 
1771 	hostp->nh_refs++;
1772 
1773 out:
1774 	mutex_exit(&g->lock);
1775 	return (hostp);
1776 }
1777 
1778 /*
1779  * Release the given host.
1780  * I.e. drop a reference that was taken earlier by one of
1781  * the following functions: nlm_host_findcreate(), nlm_host_find(),
1782  * nlm_host_find_by_sysid().
1783  *
1784  * When the very last reference is dropped, host is moved to
1785  * so-called "idle state". All hosts that are in idle state
1786  * have an idle timeout. If timeout is expired, GC thread
1787  * checks whether hosts have any locks and if they heven't
1788  * any, it removes them.
1789  * NOTE: only unused hosts can be in idle state.
1790  */
1791 void
1792 nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1793 {
1794 	if (hostp == NULL)
1795 		return;
1796 
1797 	mutex_enter(&g->lock);
1798 	ASSERT(hostp->nh_refs > 0);
1799 
1800 	hostp->nh_refs--;
1801 	if (hostp->nh_refs != 0) {
1802 		mutex_exit(&g->lock);
1803 		return;
1804 	}
1805 
1806 	/*
1807 	 * The very last reference to the host was dropped,
1808 	 * thus host is unused now. Set its idle timeout
1809 	 * and move it to the idle hosts LRU list.
1810 	 */
1811 	hostp->nh_idle_timeout = ddi_get_lbolt() +
1812 	    SEC_TO_TICK(g->cn_idle_tmo);
1813 
1814 	ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1815 	TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1816 	hostp->nh_flags |= NLM_NH_INIDLE;
1817 	mutex_exit(&g->lock);
1818 }
1819 
1820 /*
1821  * Unregister this NLM host (NFS client) with the local statd
1822  * due to idleness (no locks held for a while).
1823  */
1824 void
1825 nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1826 {
1827 	enum clnt_stat stat;
1828 
1829 	VERIFY(host->nh_refs == 0);
1830 	if (!(host->nh_flags & NLM_NH_MONITORED))
1831 		return;
1832 
1833 	host->nh_flags &= ~NLM_NH_MONITORED;
1834 	stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1835 	if (stat != RPC_SUCCESS) {
1836 		NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1837 		return;
1838 	}
1839 }
1840 
1841 /*
1842  * Ask the local NFS statd to begin monitoring this host.
1843  * It will call us back when that host restarts, using the
1844  * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1845  * which is handled in nlm_do_notify1().
1846  */
1847 void
1848 nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1849 {
1850 	int family;
1851 	netobj obj;
1852 	enum clnt_stat stat;
1853 
1854 	if (state != 0 && host->nh_state == 0) {
1855 		/*
1856 		 * This is the first time we have seen an NSM state
1857 		 * Value for this host. We record it here to help
1858 		 * detect host reboots.
1859 		 */
1860 		host->nh_state = state;
1861 	}
1862 
1863 	mutex_enter(&host->nh_lock);
1864 	if (host->nh_flags & NLM_NH_MONITORED) {
1865 		mutex_exit(&host->nh_lock);
1866 		return;
1867 	}
1868 
1869 	host->nh_flags |= NLM_NH_MONITORED;
1870 	mutex_exit(&host->nh_lock);
1871 
1872 	/*
1873 	 * Before we begin monitoring the host register the network address
1874 	 * associated with this hostname.
1875 	 */
1876 	nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1877 	stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1878 	if (stat != RPC_SUCCESS) {
1879 		NLM_WARN("Failed to register address, stat=%d\n", stat);
1880 		mutex_enter(&g->lock);
1881 		host->nh_flags &= ~NLM_NH_MONITORED;
1882 		mutex_exit(&g->lock);
1883 
1884 		return;
1885 	}
1886 
1887 	/*
1888 	 * Tell statd how to call us with status updates for
1889 	 * this host. Updates arrive via nlm_do_notify1().
1890 	 *
1891 	 * We put our assigned system ID value in the priv field to
1892 	 * make it simpler to find the host if we are notified of a
1893 	 * host restart.
1894 	 */
1895 	stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1896 	if (stat != RPC_SUCCESS) {
1897 		NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1898 		mutex_enter(&g->lock);
1899 		host->nh_flags &= ~NLM_NH_MONITORED;
1900 		mutex_exit(&g->lock);
1901 
1902 		return;
1903 	}
1904 }
1905 
1906 int
1907 nlm_host_get_state(struct nlm_host *hostp)
1908 {
1909 
1910 	return (hostp->nh_state);
1911 }
1912 
1913 /*
1914  * NLM client/server sleeping locks
1915  */
1916 
1917 /*
1918  * Register client side sleeping lock.
1919  *
1920  * Our client code calls this to keep information
1921  * about sleeping lock somewhere. When it receives
1922  * grant callback from server or when it just
1923  * needs to remove all sleeping locks from vnode,
1924  * it uses this information for remove/apply lock
1925  * properly.
1926  */
1927 struct nlm_slock *
1928 nlm_slock_register(
1929 	struct nlm_globals *g,
1930 	struct nlm_host *host,
1931 	struct nlm4_lock *lock,
1932 	struct vnode *vp)
1933 {
1934 	struct nlm_slock *nslp;
1935 
1936 	nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1937 	cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1938 	nslp->nsl_lock = *lock;
1939 	nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1940 	nslp->nsl_state = NLM_SL_BLOCKED;
1941 	nslp->nsl_host = host;
1942 	nslp->nsl_vp = vp;
1943 
1944 	mutex_enter(&g->lock);
1945 	TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1946 	mutex_exit(&g->lock);
1947 
1948 	return (nslp);
1949 }
1950 
1951 /*
1952  * Remove this lock from the wait list and destroy it.
1953  */
1954 void
1955 nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1956 {
1957 	mutex_enter(&g->lock);
1958 	TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1959 	mutex_exit(&g->lock);
1960 
1961 	kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1962 	cv_destroy(&nslp->nsl_cond);
1963 	kmem_free(nslp, sizeof (*nslp));
1964 }
1965 
1966 /*
1967  * Wait for a granted callback or cancellation event
1968  * for a sleeping lock.
1969  *
1970  * If a signal interrupted the wait or if the lock
1971  * was cancelled, return EINTR - the caller must arrange to send
1972  * a cancellation to the server.
1973  *
1974  * If timeout occurred, return ETIMEDOUT - the caller must
1975  * resend the lock request to the server.
1976  *
1977  * On success return 0.
1978  */
1979 int
1980 nlm_slock_wait(struct nlm_globals *g,
1981     struct nlm_slock *nslp, uint_t timeo_secs)
1982 {
1983 	clock_t timeo_ticks;
1984 	int cv_res, error;
1985 
1986 	/*
1987 	 * If the granted message arrived before we got here,
1988 	 * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
1989 	 */
1990 	cv_res = 1;
1991 	timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
1992 
1993 	mutex_enter(&g->lock);
1994 	while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
1995 		cv_res = cv_timedwait_sig(&nslp->nsl_cond,
1996 		    &g->lock, timeo_ticks);
1997 	}
1998 
1999 	/*
2000 	 * No matter why we wake up, if the lock was
2001 	 * cancelled, let the function caller to know
2002 	 * about it by returning EINTR.
2003 	 */
2004 	if (nslp->nsl_state == NLM_SL_CANCELLED) {
2005 		error = EINTR;
2006 		goto out;
2007 	}
2008 
2009 	if (cv_res <= 0) {
2010 		/* We were woken up either by timeout or by interrupt */
2011 		error = (cv_res < 0) ? ETIMEDOUT : EINTR;
2012 
2013 		/*
2014 		 * The granted message may arrive after the
2015 		 * interrupt/timeout but before we manage to lock the
2016 		 * mutex. Detect this by examining nslp.
2017 		 */
2018 		if (nslp->nsl_state == NLM_SL_GRANTED)
2019 			error = 0;
2020 	} else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
2021 		error = 0;
2022 		VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
2023 	}
2024 
2025 out:
2026 	mutex_exit(&g->lock);
2027 	return (error);
2028 }
2029 
2030 /*
2031  * Mark client side sleeping lock as granted
2032  * and wake up a process blocked on the lock.
2033  * Called from server side NLM_GRANT handler.
2034  *
2035  * If sleeping lock is found return 0, otherwise
2036  * return ENOENT.
2037  */
2038 int
2039 nlm_slock_grant(struct nlm_globals *g,
2040     struct nlm_host *hostp, struct nlm4_lock *alock)
2041 {
2042 	struct nlm_slock *nslp;
2043 	int error = ENOENT;
2044 
2045 	mutex_enter(&g->lock);
2046 	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
2047 		if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
2048 		    (nslp->nsl_host != hostp))
2049 			continue;
2050 
2051 		if (alock->svid		== nslp->nsl_lock.svid &&
2052 		    alock->l_offset	== nslp->nsl_lock.l_offset &&
2053 		    alock->l_len	== nslp->nsl_lock.l_len &&
2054 		    alock->fh.n_len	== nslp->nsl_lock.fh.n_len &&
2055 		    bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2056 		    nslp->nsl_lock.fh.n_len) == 0) {
2057 			nslp->nsl_state = NLM_SL_GRANTED;
2058 			cv_broadcast(&nslp->nsl_cond);
2059 			error = 0;
2060 			break;
2061 		}
2062 	}
2063 
2064 	mutex_exit(&g->lock);
2065 	return (error);
2066 }
2067 
2068 /*
2069  * Register sleeping lock request corresponding to
2070  * flp on the given vhold object.
2071  * On success function returns 0, otherwise (if
2072  * lock request with the same flp is already
2073  * registered) function returns EEXIST.
2074  */
2075 int
2076 nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2077 	struct flock64 *flp)
2078 {
2079 	struct nlm_slreq *slr, *new_slr = NULL;
2080 	int ret = EEXIST;
2081 
2082 	mutex_enter(&hostp->nh_lock);
2083 	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2084 	if (slr != NULL)
2085 		goto out;
2086 
2087 	mutex_exit(&hostp->nh_lock);
2088 	new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2089 	bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2090 
2091 	mutex_enter(&hostp->nh_lock);
2092 	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2093 	if (slr == NULL) {
2094 		slr = new_slr;
2095 		new_slr = NULL;
2096 		ret = 0;
2097 
2098 		TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2099 	}
2100 
2101 out:
2102 	mutex_exit(&hostp->nh_lock);
2103 	if (new_slr != NULL)
2104 		kmem_free(new_slr, sizeof (*new_slr));
2105 
2106 	return (ret);
2107 }
2108 
2109 /*
2110  * Unregister sleeping lock request corresponding
2111  * to flp from the given vhold object.
2112  * On success function returns 0, otherwise (if
2113  * lock request corresponding to flp isn't found
2114  * on the given vhold) function returns ENOENT.
2115  */
2116 int
2117 nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2118 	struct flock64 *flp)
2119 {
2120 	struct nlm_slreq *slr;
2121 
2122 	mutex_enter(&hostp->nh_lock);
2123 	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2124 	if (slr == NULL) {
2125 		mutex_exit(&hostp->nh_lock);
2126 		return (ENOENT);
2127 	}
2128 
2129 	TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2130 	mutex_exit(&hostp->nh_lock);
2131 
2132 	kmem_free(slr, sizeof (*slr));
2133 	return (0);
2134 }
2135 
2136 /*
2137  * Find sleeping lock request on the given vhold object by flp.
2138  */
2139 struct nlm_slreq *
2140 nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2141     struct flock64 *flp)
2142 {
2143 	struct nlm_slreq *slr = NULL;
2144 
2145 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
2146 	TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2147 		if (slr->nsr_fl.l_start		== flp->l_start	&&
2148 		    slr->nsr_fl.l_len		== flp->l_len	&&
2149 		    slr->nsr_fl.l_pid		== flp->l_pid	&&
2150 		    slr->nsr_fl.l_type		== flp->l_type)
2151 			break;
2152 	}
2153 
2154 	return (slr);
2155 }
2156 
2157 /*
2158  * NLM tracks active share reservations made on the client side.
2159  * It needs to have a track of share reservations for two purposes
2160  * 1) to determine if nlm_host is busy (if it has active locks and/or
2161  *    share reservations, it is)
2162  * 2) to recover active share reservations when NLM server reports
2163  *    that it has rebooted.
2164  *
2165  * Unfortunately Illumos local share reservations manager (see os/share.c)
2166  * doesn't have an ability to lookup all reservations on the system
2167  * by sysid (like local lock manager) or get all reservations by sysid.
2168  * It tracks reservations per vnode and is able to get/looup them
2169  * on particular vnode. It's not what NLM needs. Thus it has that ugly
2170  * share reservations tracking scheme.
2171  */
2172 
2173 void
2174 nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2175 {
2176 	struct nlm_shres *nsp, *nsp_new;
2177 
2178 	/*
2179 	 * NFS code must fill the s_owner, so that
2180 	 * s_own_len is never 0.
2181 	 */
2182 	ASSERT(shrp->s_own_len > 0);
2183 	nsp_new = nlm_shres_create_item(shrp, vp);
2184 
2185 	mutex_enter(&hostp->nh_lock);
2186 	for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2187 		if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2188 			break;
2189 
2190 	if (nsp != NULL) {
2191 		/*
2192 		 * Found a duplicate. Do nothing.
2193 		 */
2194 
2195 		goto out;
2196 	}
2197 
2198 	nsp = nsp_new;
2199 	nsp_new = NULL;
2200 	nsp->ns_next = hostp->nh_shrlist;
2201 	hostp->nh_shrlist = nsp;
2202 
2203 out:
2204 	mutex_exit(&hostp->nh_lock);
2205 	if (nsp_new != NULL)
2206 		nlm_shres_destroy_item(nsp_new);
2207 }
2208 
2209 void
2210 nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2211 {
2212 	struct nlm_shres *nsp, *nsp_prev = NULL;
2213 
2214 	mutex_enter(&hostp->nh_lock);
2215 	nsp = hostp->nh_shrlist;
2216 	while (nsp != NULL) {
2217 		if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2218 			struct nlm_shres *nsp_del;
2219 
2220 			nsp_del = nsp;
2221 			nsp = nsp->ns_next;
2222 			if (nsp_prev != NULL)
2223 				nsp_prev->ns_next = nsp;
2224 			else
2225 				hostp->nh_shrlist = nsp;
2226 
2227 			nlm_shres_destroy_item(nsp_del);
2228 			continue;
2229 		}
2230 
2231 		nsp_prev = nsp;
2232 		nsp = nsp->ns_next;
2233 	}
2234 
2235 	mutex_exit(&hostp->nh_lock);
2236 }
2237 
2238 /*
2239  * Get a _copy_ of the list of all active share reservations
2240  * made by the given host.
2241  * NOTE: the list function returns _must_ be released using
2242  *       nlm_free_shrlist().
2243  */
2244 struct nlm_shres *
2245 nlm_get_active_shres(struct nlm_host *hostp)
2246 {
2247 	struct nlm_shres *nsp, *nslist = NULL;
2248 
2249 	mutex_enter(&hostp->nh_lock);
2250 	for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2251 		struct nlm_shres *nsp_new;
2252 
2253 		nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2254 		nsp_new->ns_next = nslist;
2255 		nslist = nsp_new;
2256 	}
2257 
2258 	mutex_exit(&hostp->nh_lock);
2259 	return (nslist);
2260 }
2261 
2262 /*
2263  * Free memory allocated for the active share reservations
2264  * list created by nlm_get_active_shres() function.
2265  */
2266 void
2267 nlm_free_shrlist(struct nlm_shres *nslist)
2268 {
2269 	struct nlm_shres *nsp;
2270 
2271 	while (nslist != NULL) {
2272 		nsp =  nslist;
2273 		nslist = nslist->ns_next;
2274 
2275 		nlm_shres_destroy_item(nsp);
2276 	}
2277 }
2278 
2279 static bool_t
2280 nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2281 {
2282 	if (shrp1->s_sysid	== shrp2->s_sysid	&&
2283 	    shrp1->s_pid	== shrp2->s_pid		&&
2284 	    shrp1->s_own_len	== shrp2->s_own_len	&&
2285 	    bcmp(shrp1->s_owner, shrp2->s_owner,
2286 	    shrp1->s_own_len) == 0)
2287 		return (TRUE);
2288 
2289 	return (FALSE);
2290 }
2291 
2292 static struct nlm_shres *
2293 nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2294 {
2295 	struct nlm_shres *nsp;
2296 
2297 	nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2298 	nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2299 	bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2300 	nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2301 	bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2302 	nsp->ns_vp = vp;
2303 
2304 	return (nsp);
2305 }
2306 
2307 static void
2308 nlm_shres_destroy_item(struct nlm_shres *nsp)
2309 {
2310 	kmem_free(nsp->ns_shr->s_owner,
2311 	    nsp->ns_shr->s_own_len);
2312 	kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2313 	kmem_free(nsp, sizeof (*nsp));
2314 }
2315 
2316 /*
2317  * Called by klmmod.c when lockd adds a network endpoint
2318  * on which we should begin RPC services.
2319  */
2320 int
2321 nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2322 {
2323 	SVCMASTERXPRT *xprt = NULL;
2324 	int error;
2325 
2326 	error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2327 	    &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2328 	if (error != 0)
2329 		return (error);
2330 
2331 	(void) nlm_knc_to_netid(knc);
2332 	return (0);
2333 }
2334 
2335 /*
2336  * Start NLM service.
2337  */
2338 int
2339 nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2340     const char *netid, struct knetconfig *knc)
2341 {
2342 	int error;
2343 	enum clnt_stat stat;
2344 
2345 	VERIFY(g->run_status == NLM_ST_STARTING);
2346 	VERIFY(g->nlm_gc_thread == NULL);
2347 
2348 	error = nlm_nsm_init_local(&g->nlm_nsm);
2349 	if (error != 0) {
2350 		NLM_ERR("Failed to initialize NSM handler "
2351 		    "(error=%d)\n", error);
2352 		g->run_status = NLM_ST_DOWN;
2353 		return (error);
2354 	}
2355 
2356 	error = EIO;
2357 
2358 	/*
2359 	 * Create an NLM garbage collector thread that will
2360 	 * clean up stale vholds and hosts objects.
2361 	 */
2362 	g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2363 	    g, 0, minclsyspri);
2364 
2365 	/*
2366 	 * Send SIMU_CRASH to local statd to report that
2367 	 * NLM started, so that statd can report other hosts
2368 	 * about NLM state change.
2369 	 */
2370 
2371 	stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2372 	if (stat != RPC_SUCCESS) {
2373 		NLM_ERR("Failed to connect to local statd "
2374 		    "(rpcerr=%d)\n", stat);
2375 		goto shutdown_lm;
2376 	}
2377 
2378 	stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2379 	if (stat != RPC_SUCCESS) {
2380 		NLM_ERR("Failed to get the status of local statd "
2381 		    "(rpcerr=%d)\n", stat);
2382 		goto shutdown_lm;
2383 	}
2384 
2385 	g->grace_threshold = ddi_get_lbolt() +
2386 	    SEC_TO_TICK(g->grace_period);
2387 
2388 	/* Register endpoint used for communications with local NLM */
2389 	error = nlm_svc_add_ep(fp, netid, knc);
2390 	if (error != 0)
2391 		goto shutdown_lm;
2392 
2393 	(void) svc_pool_control(NLM_SVCPOOL_ID,
2394 	    SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2395 	g->run_status = NLM_ST_UP;
2396 	return (0);
2397 
2398 shutdown_lm:
2399 	mutex_enter(&g->lock);
2400 	g->run_status = NLM_ST_STOPPING;
2401 	mutex_exit(&g->lock);
2402 
2403 	nlm_svc_stopping(g);
2404 	return (error);
2405 }
2406 
2407 /*
2408  * Called when the server pool is destroyed, so that
2409  * all transports are closed and no any server threads
2410  * exist.
2411  *
2412  * Just call lm_shutdown() to shut NLM down properly.
2413  */
2414 static void
2415 nlm_pool_shutdown(void)
2416 {
2417 	(void) lm_shutdown();
2418 }
2419 
2420 /*
2421  * Stop NLM service, cleanup all resources
2422  * NLM owns at the moment.
2423  *
2424  * NOTE: NFS code can call NLM while it's
2425  * stopping or even if it's shut down. Any attempt
2426  * to lock file either on client or on the server
2427  * will fail if NLM isn't in NLM_ST_UP state.
2428  */
2429 void
2430 nlm_svc_stopping(struct nlm_globals *g)
2431 {
2432 	mutex_enter(&g->lock);
2433 	ASSERT(g->run_status == NLM_ST_STOPPING);
2434 
2435 	/*
2436 	 * Ask NLM GC thread to exit and wait until it dies.
2437 	 */
2438 	cv_signal(&g->nlm_gc_sched_cv);
2439 	while (g->nlm_gc_thread != NULL)
2440 		cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2441 
2442 	mutex_exit(&g->lock);
2443 
2444 	/*
2445 	 * Cleanup locks owned by NLM hosts.
2446 	 * NOTE: New hosts won't be created while
2447 	 * NLM is stopping.
2448 	 */
2449 	while (!avl_is_empty(&g->nlm_hosts_tree)) {
2450 		struct nlm_host *hostp;
2451 		int busy_hosts = 0;
2452 
2453 		/*
2454 		 * Iterate through all NLM hosts in the system
2455 		 * and drop the locks they own by force.
2456 		 */
2457 		hostp = avl_first(&g->nlm_hosts_tree);
2458 		while (hostp != NULL) {
2459 			/* Cleanup all client and server side locks */
2460 			nlm_client_cancel_all(g, hostp);
2461 			nlm_host_notify_server(hostp, 0);
2462 
2463 			mutex_enter(&hostp->nh_lock);
2464 			nlm_host_gc_vholds(hostp);
2465 			if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2466 				/*
2467 				 * Oh, it seems the host is still busy, let
2468 				 * it some time to release and go to the
2469 				 * next one.
2470 				 */
2471 
2472 				mutex_exit(&hostp->nh_lock);
2473 				hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2474 				busy_hosts++;
2475 				continue;
2476 			}
2477 
2478 			mutex_exit(&hostp->nh_lock);
2479 			hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2480 		}
2481 
2482 		/*
2483 		 * All hosts go to nlm_idle_hosts list after
2484 		 * all locks they own are cleaned up and last refereces
2485 		 * were dropped. Just destroy all hosts in nlm_idle_hosts
2486 		 * list, they can not be removed from there while we're
2487 		 * in stopping state.
2488 		 */
2489 		while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2490 			nlm_host_unregister(g, hostp);
2491 			nlm_host_destroy(hostp);
2492 		}
2493 
2494 		if (busy_hosts > 0) {
2495 			/*
2496 			 * There're some hosts that weren't cleaned
2497 			 * up. Probably they're in resource cleanup
2498 			 * process. Give them some time to do drop
2499 			 * references.
2500 			 */
2501 			delay(MSEC_TO_TICK(500));
2502 		}
2503 	}
2504 
2505 	ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2506 
2507 	nlm_nsm_fini(&g->nlm_nsm);
2508 	g->lockd_pid = 0;
2509 	g->run_status = NLM_ST_DOWN;
2510 }
2511 
2512 /*
2513  * Returns TRUE if the given vnode has
2514  * any active or sleeping locks.
2515  */
2516 int
2517 nlm_vp_active(const vnode_t *vp)
2518 {
2519 	struct nlm_globals *g;
2520 	struct nlm_host *hostp;
2521 	struct nlm_vhold *nvp;
2522 	int active = 0;
2523 
2524 	g = zone_getspecific(nlm_zone_key, curzone);
2525 
2526 	/*
2527 	 * Server side NLM has locks on the given vnode
2528 	 * if there exist a vhold object that holds
2529 	 * the given vnode "vp" in one of NLM hosts.
2530 	 */
2531 	mutex_enter(&g->lock);
2532 	hostp = avl_first(&g->nlm_hosts_tree);
2533 	while (hostp != NULL) {
2534 		mutex_enter(&hostp->nh_lock);
2535 		nvp = nlm_vhold_find_locked(hostp, vp);
2536 		mutex_exit(&hostp->nh_lock);
2537 		if (nvp != NULL) {
2538 			active = 1;
2539 			break;
2540 		}
2541 
2542 		hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2543 	}
2544 
2545 	mutex_exit(&g->lock);
2546 	return (active);
2547 }
2548 
2549 /*
2550  * Called right before NFS export is going to
2551  * dissapear. The function finds all vnodes
2552  * belonging to the given export and cleans
2553  * all remote locks and share reservations
2554  * on them.
2555  */
2556 void
2557 nlm_unexport(struct exportinfo *exi)
2558 {
2559 	struct nlm_globals *g;
2560 	struct nlm_host *hostp;
2561 
2562 	g = zone_getspecific(nlm_zone_key, curzone);
2563 
2564 	mutex_enter(&g->lock);
2565 	hostp = avl_first(&g->nlm_hosts_tree);
2566 	while (hostp != NULL) {
2567 		struct nlm_vhold *nvp;
2568 
2569 		mutex_enter(&hostp->nh_lock);
2570 		TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2571 			vnode_t *vp;
2572 
2573 			nvp->nv_refcnt++;
2574 			mutex_exit(&hostp->nh_lock);
2575 
2576 			vp = nvp->nv_vp;
2577 
2578 			if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2579 				goto next_iter;
2580 
2581 			/*
2582 			 * Ok, it we found out that vnode vp is under
2583 			 * control by the exportinfo exi, now we need
2584 			 * to drop all locks from this vnode, let's
2585 			 * do it.
2586 			 */
2587 			nlm_vhold_clean(nvp, hostp->nh_sysid);
2588 
2589 		next_iter:
2590 			mutex_enter(&hostp->nh_lock);
2591 			nvp->nv_refcnt--;
2592 		}
2593 
2594 		mutex_exit(&hostp->nh_lock);
2595 		hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2596 	}
2597 
2598 	mutex_exit(&g->lock);
2599 }
2600 
2601 /*
2602  * Allocate new unique sysid.
2603  * In case of failure (no available sysids)
2604  * return LM_NOSYSID.
2605  */
2606 sysid_t
2607 nlm_sysid_alloc(void)
2608 {
2609 	sysid_t ret_sysid = LM_NOSYSID;
2610 
2611 	rw_enter(&lm_lck, RW_WRITER);
2612 	if (nlm_sysid_nidx > LM_SYSID_MAX)
2613 		nlm_sysid_nidx = LM_SYSID;
2614 
2615 	if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2616 		BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2617 		ret_sysid = nlm_sysid_nidx++;
2618 	} else {
2619 		index_t id;
2620 
2621 		id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2622 		if (id > 0) {
2623 			nlm_sysid_nidx = id + 1;
2624 			ret_sysid = id;
2625 			BT_SET(nlm_sysid_bmap, id);
2626 		}
2627 	}
2628 
2629 	rw_exit(&lm_lck);
2630 	return (ret_sysid);
2631 }
2632 
2633 void
2634 nlm_sysid_free(sysid_t sysid)
2635 {
2636 	ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2637 
2638 	rw_enter(&lm_lck, RW_WRITER);
2639 	ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2640 	BT_CLEAR(nlm_sysid_bmap, sysid);
2641 	rw_exit(&lm_lck);
2642 }
2643 
2644 /*
2645  * Return true if the request came from a local caller.
2646  * By necessity, this "knows" the netid names invented
2647  * in lm_svc() and nlm_netid_from_knetconfig().
2648  */
2649 bool_t
2650 nlm_caller_is_local(SVCXPRT *transp)
2651 {
2652 	char *netid;
2653 	struct netbuf *rtaddr;
2654 
2655 	netid = svc_getnetid(transp);
2656 	rtaddr = svc_getrpccaller(transp);
2657 
2658 	if (netid == NULL)
2659 		return (FALSE);
2660 
2661 	if (strcmp(netid, "ticlts") == 0 ||
2662 	    strcmp(netid, "ticotsord") == 0)
2663 		return (TRUE);
2664 
2665 	if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2666 		struct sockaddr_in *sin = (void *)rtaddr->buf;
2667 		if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2668 			return (TRUE);
2669 	}
2670 	if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2671 		struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2672 		if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2673 			return (TRUE);
2674 	}
2675 
2676 	return (FALSE); /* unknown transport */
2677 }
2678 
2679 /*
2680  * Get netid string correspondig to the given knetconfig.
2681  * If not done already, save knc->knc_rdev in our table.
2682  */
2683 const char *
2684 nlm_knc_to_netid(struct knetconfig *knc)
2685 {
2686 	int i;
2687 	dev_t rdev;
2688 	struct nlm_knc *nc;
2689 	const char *netid = NULL;
2690 
2691 	rw_enter(&lm_lck, RW_READER);
2692 	for (i = 0; i < NLM_KNCS; i++) {
2693 		nc = &nlm_netconfigs[i];
2694 
2695 		if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2696 		    strcmp(nc->n_knc.knc_protofmly,
2697 		    knc->knc_protofmly) == 0) {
2698 			netid = nc->n_netid;
2699 			rdev = nc->n_knc.knc_rdev;
2700 			break;
2701 		}
2702 	}
2703 	rw_exit(&lm_lck);
2704 
2705 	if (netid != NULL && rdev == NODEV) {
2706 		rw_enter(&lm_lck, RW_WRITER);
2707 		if (nc->n_knc.knc_rdev == NODEV)
2708 			nc->n_knc.knc_rdev = knc->knc_rdev;
2709 		rw_exit(&lm_lck);
2710 	}
2711 
2712 	return (netid);
2713 }
2714 
2715 /*
2716  * Get a knetconfig corresponding to the given netid.
2717  * If there's no knetconfig for this netid, ENOENT
2718  * is returned.
2719  */
2720 int
2721 nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2722 {
2723 	int i, ret;
2724 
2725 	ret = ENOENT;
2726 	for (i = 0; i < NLM_KNCS; i++) {
2727 		struct nlm_knc *nknc;
2728 
2729 		nknc = &nlm_netconfigs[i];
2730 		if (strcmp(netid, nknc->n_netid) == 0 &&
2731 		    nknc->n_knc.knc_rdev != NODEV) {
2732 			*knc = nknc->n_knc;
2733 			ret = 0;
2734 			break;
2735 		}
2736 	}
2737 
2738 	return (ret);
2739 }
2740 
2741 void
2742 nlm_cprsuspend(void)
2743 {
2744 	struct nlm_globals *g;
2745 
2746 	rw_enter(&lm_lck, RW_READER);
2747 	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2748 		nlm_suspend_zone(g);
2749 
2750 	rw_exit(&lm_lck);
2751 }
2752 
2753 void
2754 nlm_cprresume(void)
2755 {
2756 	struct nlm_globals *g;
2757 
2758 	rw_enter(&lm_lck, RW_READER);
2759 	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2760 		nlm_resume_zone(g);
2761 
2762 	rw_exit(&lm_lck);
2763 }
2764 
2765 static void
2766 nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2767 {
2768 	(void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2769 	    NLM_RPC_RETRIES, kcred);
2770 }
2771 
2772 static void
2773 nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2774 {
2775 	/* LINTED pointer alignment */
2776 	struct sockaddr *sa = (struct sockaddr *)addr->buf;
2777 
2778 	*family = sa->sa_family;
2779 
2780 	switch (sa->sa_family) {
2781 	case AF_INET: {
2782 		/* LINTED pointer alignment */
2783 		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2784 
2785 		obj->n_len = sizeof (sin->sin_addr);
2786 		obj->n_bytes = (char *)&sin->sin_addr;
2787 		break;
2788 	}
2789 
2790 	case AF_INET6: {
2791 		/* LINTED pointer alignment */
2792 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2793 
2794 		obj->n_len = sizeof (sin6->sin6_addr);
2795 		obj->n_bytes = (char *)&sin6->sin6_addr;
2796 		break;
2797 	}
2798 
2799 	default:
2800 		VERIFY(0);
2801 		break;
2802 	}
2803 }
2804