xref: /titanic_44/usr/src/uts/common/klm/nlm_impl.c (revision 096e63b2c66f47e2a2d213edc199cdb082d8b2d6)
1 /*
2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3  * Authors: Doug Rabson <dfr@rabson.org>
4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Copyright (c) 2012 by Delphix. All rights reserved.
30  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
31  */
32 
33 /*
34  * NFS LockManager, start/stop, support functions, etc.
35  * Most of the interesting code is here.
36  *
37  * Source code derived from FreeBSD nlm_prot_impl.c
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/thread.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/mount.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/share.h>
49 #include <sys/socket.h>
50 #include <sys/syscall.h>
51 #include <sys/syslog.h>
52 #include <sys/systm.h>
53 #include <sys/class.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 #include <sys/vfs.h>
57 #include <sys/queue.h>
58 #include <sys/bitmap.h>
59 #include <sys/sdt.h>
60 #include <netinet/in.h>
61 
62 #include <rpc/rpc.h>
63 #include <rpc/xdr.h>
64 #include <rpc/pmap_prot.h>
65 #include <rpc/pmap_clnt.h>
66 #include <rpc/rpcb_prot.h>
67 
68 #include <rpcsvc/nlm_prot.h>
69 #include <rpcsvc/sm_inter.h>
70 #include <rpcsvc/nsm_addr.h>
71 
72 #include <nfs/nfs.h>
73 #include <nfs/nfs_clnt.h>
74 #include <nfs/export.h>
75 #include <nfs/rnode.h>
76 #include <nfs/lm.h>
77 
78 #include "nlm_impl.h"
79 
80 struct nlm_knc {
81 	struct knetconfig	n_knc;
82 	const char		*n_netid;
83 };
84 
85 /*
86  * Number of attempts NLM tries to obtain RPC binding
87  * of local statd.
88  */
89 #define	NLM_NSM_RPCBIND_RETRIES 10
90 
91 /*
92  * Timeout (in seconds) NLM waits before making another
93  * attempt to obtain RPC binding of local statd.
94  */
95 #define	NLM_NSM_RPCBIND_TIMEOUT 5
96 
97 /*
98  * Total number of sysids in NLM sysid bitmap
99  */
100 #define	NLM_BMAP_NITEMS	(LM_SYSID_MAX + 1)
101 
102 /*
103  * Number of ulong_t words in bitmap that is used
104  * for allocation of sysid numbers.
105  */
106 #define	NLM_BMAP_WORDS  (NLM_BMAP_NITEMS / BT_NBIPUL)
107 
108 /*
109  * Given an integer x, the macro returns
110  * -1 if x is negative,
111  *  0 if x is zero
112  *  1 if x is positive
113  */
114 #define	SIGN(x) (((x) > 0) - ((x) < 0))
115 
116 #define	ARRSIZE(arr)	(sizeof (arr) / sizeof ((arr)[0]))
117 #define	NLM_KNCS	ARRSIZE(nlm_netconfigs)
118 
119 krwlock_t lm_lck;
120 
121 /*
122  * Zero timeout for asynchronous NLM RPC operations
123  */
124 static const struct timeval nlm_rpctv_zero = { 0,  0 };
125 
126 /*
127  * List of all Zone globals nlm_globals instences
128  * linked together.
129  */
130 static struct nlm_globals_list nlm_zones_list; /* (g) */
131 
132 /*
133  * NLM kmem caches
134  */
135 static struct kmem_cache *nlm_hosts_cache = NULL;
136 static struct kmem_cache *nlm_vhold_cache = NULL;
137 
138 /*
139  * A bitmap for allocation of new sysids.
140  * Sysid is a unique number between LM_SYSID
141  * and LM_SYSID_MAX. Sysid represents unique remote
142  * host that does file locks on the given host.
143  */
144 static ulong_t	nlm_sysid_bmap[NLM_BMAP_WORDS];	/* (g) */
145 static int	nlm_sysid_nidx;			/* (g) */
146 
147 /*
148  * RPC service registration for all transports
149  */
150 static SVC_CALLOUT nlm_svcs[] = {
151 	{ NLM_PROG, 4, 4, nlm_prog_4 },	/* NLM4_VERS */
152 	{ NLM_PROG, 1, 3, nlm_prog_3 }	/* NLM_VERS - NLM_VERSX */
153 };
154 
155 static SVC_CALLOUT_TABLE nlm_sct = {
156 	ARRSIZE(nlm_svcs),
157 	FALSE,
158 	nlm_svcs
159 };
160 
161 /*
162  * Static table of all netid/knetconfig network
163  * lock manager can work with. nlm_netconfigs table
164  * is used when we need to get valid knetconfig by
165  * netid and vice versa.
166  *
167  * Knetconfigs are activated either by the call from
168  * user-space lockd daemon (server side) or by taking
169  * knetconfig from NFS mountinfo (client side)
170  */
171 static struct nlm_knc nlm_netconfigs[] = { /* (g) */
172 	/* UDP */
173 	{
174 		{ NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
175 		"udp",
176 	},
177 	/* TCP */
178 	{
179 		{ NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
180 		"tcp",
181 	},
182 	/* UDP over IPv6 */
183 	{
184 		{ NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
185 		"udp6",
186 	},
187 	/* TCP over IPv6 */
188 	{
189 		{ NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
190 		"tcp6",
191 	},
192 	/* ticlts (loopback over UDP) */
193 	{
194 		{ NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
195 		"ticlts",
196 	},
197 	/* ticotsord (loopback over TCP) */
198 	{
199 		{ NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
200 		"ticotsord",
201 	},
202 };
203 
204 /*
205  * NLM misc. function
206  */
207 static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
208 static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
209 static void nlm_kmem_reclaim(void *);
210 static void nlm_pool_shutdown(void);
211 static void nlm_suspend_zone(struct nlm_globals *);
212 static void nlm_resume_zone(struct nlm_globals *);
213 static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
214 static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
215 
216 /*
217  * NLM thread functions
218  */
219 static void nlm_gc(struct nlm_globals *);
220 static void nlm_reclaimer(struct nlm_host *);
221 
222 /*
223  * NLM NSM functions
224  */
225 static int nlm_init_local_knc(struct knetconfig *);
226 static int nlm_nsm_init_local(struct nlm_nsm *);
227 static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
228 static void nlm_nsm_fini(struct nlm_nsm *);
229 static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
230 static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
231 static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
232 static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
233 
234 /*
235  * NLM host functions
236  */
237 static int nlm_host_ctor(void *, void *, int);
238 static void nlm_host_dtor(void *, void *);
239 static void nlm_host_destroy(struct nlm_host *);
240 static struct nlm_host *nlm_host_create(char *, const char *,
241     struct knetconfig *, struct netbuf *);
242 static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
243     const char *, struct netbuf *, avl_index_t *);
244 static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
245 static void nlm_host_gc_vholds(struct nlm_host *);
246 static bool_t nlm_host_has_srv_locks(struct nlm_host *);
247 static bool_t nlm_host_has_cli_locks(struct nlm_host *);
248 static bool_t nlm_host_has_locks(struct nlm_host *);
249 
250 /*
251  * NLM vhold functions
252  */
253 static int nlm_vhold_ctor(void *, void *, int);
254 static void nlm_vhold_dtor(void *, void *);
255 static void nlm_vhold_destroy(struct nlm_host *,
256     struct nlm_vhold *);
257 static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
258 static void nlm_vhold_clean(struct nlm_vhold *, int);
259 
260 /*
261  * NLM client/server sleeping locks/share reservation functions
262  */
263 struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
264     struct nlm_vhold *, struct flock64 *);
265 static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
266 static void nlm_shres_destroy_item(struct nlm_shres *);
267 static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
268 
269 /*
270  * NLM initialization functions.
271  */
272 void
nlm_init(void)273 nlm_init(void)
274 {
275 	nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
276 	    sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
277 	    nlm_kmem_reclaim, NULL, NULL, 0);
278 
279 	nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
280 	    sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
281 	    NULL, NULL, NULL, 0);
282 
283 	nlm_rpc_init();
284 	TAILQ_INIT(&nlm_zones_list);
285 
286 	/* initialize sysids bitmap */
287 	bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
288 	nlm_sysid_nidx = 1;
289 
290 	/*
291 	 * Reserv the sysid #0, because it's associated
292 	 * with local locks only. Don't let to allocate
293 	 * it for remote locks.
294 	 */
295 	BT_SET(nlm_sysid_bmap, 0);
296 }
297 
298 void
nlm_globals_register(struct nlm_globals * g)299 nlm_globals_register(struct nlm_globals *g)
300 {
301 	rw_enter(&lm_lck, RW_WRITER);
302 	TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
303 	rw_exit(&lm_lck);
304 }
305 
306 void
nlm_globals_unregister(struct nlm_globals * g)307 nlm_globals_unregister(struct nlm_globals *g)
308 {
309 	rw_enter(&lm_lck, RW_WRITER);
310 	TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
311 	rw_exit(&lm_lck);
312 }
313 
314 /* ARGSUSED */
315 static void
nlm_kmem_reclaim(void * cdrarg)316 nlm_kmem_reclaim(void *cdrarg)
317 {
318 	struct nlm_globals *g;
319 
320 	rw_enter(&lm_lck, RW_READER);
321 	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
322 		cv_broadcast(&g->nlm_gc_sched_cv);
323 
324 	rw_exit(&lm_lck);
325 }
326 
327 /*
328  * NLM garbage collector thread (GC).
329  *
330  * NLM GC periodically checks whether there're any host objects
331  * that can be cleaned up. It also releases stale vnodes that
332  * live on the server side (under protection of vhold objects).
333  *
334  * NLM host objects are cleaned up from GC thread because
335  * operations helping us to determine whether given host has
336  * any locks can be quite expensive and it's not good to call
337  * them every time the very last reference to the host is dropped.
338  * Thus we use "lazy" approach for hosts cleanup.
339  *
340  * The work of GC is to release stale vnodes on the server side
341  * and destroy hosts that haven't any locks and any activity for
342  * some time (i.e. idle hosts).
343  */
344 static void
nlm_gc(struct nlm_globals * g)345 nlm_gc(struct nlm_globals *g)
346 {
347 	struct nlm_host *hostp;
348 	clock_t now, idle_period;
349 
350 	idle_period = SEC_TO_TICK(g->cn_idle_tmo);
351 	mutex_enter(&g->lock);
352 	for (;;) {
353 		/*
354 		 * GC thread can be explicitly scheduled from
355 		 * memory reclamation function.
356 		 */
357 		(void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
358 		    ddi_get_lbolt() + idle_period);
359 
360 		/*
361 		 * NLM is shutting down, time to die.
362 		 */
363 		if (g->run_status == NLM_ST_STOPPING)
364 			break;
365 
366 		now = ddi_get_lbolt();
367 		DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
368 		    clock_t, now);
369 
370 		/*
371 		 * Handle all hosts that are unused at the moment
372 		 * until we meet one with idle timeout in future.
373 		 */
374 		while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
375 			bool_t has_locks = FALSE;
376 
377 			if (hostp->nh_idle_timeout > now)
378 				break;
379 
380 			/*
381 			 * Drop global lock while doing expensive work
382 			 * on this host. We'll re-check any conditions
383 			 * that might change after retaking the global
384 			 * lock.
385 			 */
386 			mutex_exit(&g->lock);
387 			mutex_enter(&hostp->nh_lock);
388 
389 			/*
390 			 * nlm_globals lock was dropped earlier because
391 			 * garbage collecting of vholds and checking whether
392 			 * host has any locks/shares are expensive operations.
393 			 */
394 			nlm_host_gc_vholds(hostp);
395 			has_locks = nlm_host_has_locks(hostp);
396 
397 			mutex_exit(&hostp->nh_lock);
398 			mutex_enter(&g->lock);
399 
400 			/*
401 			 * While we were doing expensive operations
402 			 * outside of nlm_globals critical section,
403 			 * somebody could take the host and remove it
404 			 * from the idle list.  Whether its been
405 			 * reinserted or not, our information about
406 			 * the host is outdated, and we should take no
407 			 * further action.
408 			 */
409 			if (hostp->nh_idle_timeout > now || hostp->nh_refs > 0)
410 				continue;
411 
412 			/*
413 			 * If the host has locks we have to renew the
414 			 * host's timeout and put it at the end of LRU
415 			 * list.
416 			 */
417 			if (has_locks) {
418 				TAILQ_REMOVE(&g->nlm_idle_hosts,
419 				    hostp, nh_link);
420 				hostp->nh_idle_timeout = now + idle_period;
421 				TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
422 				    hostp, nh_link);
423 				continue;
424 			}
425 
426 			/*
427 			 * We're here if all the following conditions hold:
428 			 * 1) Host hasn't any locks or share reservations
429 			 * 2) Host is unused
430 			 * 3) Host wasn't touched by anyone at least for
431 			 *    g->cn_idle_tmo seconds.
432 			 *
433 			 * So, now we can destroy it.
434 			 */
435 			nlm_host_unregister(g, hostp);
436 			mutex_exit(&g->lock);
437 
438 			nlm_host_unmonitor(g, hostp);
439 			nlm_host_destroy(hostp);
440 			mutex_enter(&g->lock);
441 			if (g->run_status == NLM_ST_STOPPING)
442 				break;
443 
444 		}
445 
446 		DTRACE_PROBE(gc__end);
447 	}
448 
449 	DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
450 
451 	/* Let others know that GC has died */
452 	g->nlm_gc_thread = NULL;
453 	mutex_exit(&g->lock);
454 
455 	cv_broadcast(&g->nlm_gc_finish_cv);
456 	zthread_exit();
457 }
458 
459 /*
460  * Thread reclaim locks/shares acquired by the client side
461  * on the given server represented by hostp.
462  */
463 static void
nlm_reclaimer(struct nlm_host * hostp)464 nlm_reclaimer(struct nlm_host *hostp)
465 {
466 	struct nlm_globals *g;
467 
468 	mutex_enter(&hostp->nh_lock);
469 	hostp->nh_reclaimer = curthread;
470 	mutex_exit(&hostp->nh_lock);
471 
472 	g = zone_getspecific(nlm_zone_key, curzone);
473 	nlm_reclaim_client(g, hostp);
474 
475 	mutex_enter(&hostp->nh_lock);
476 	hostp->nh_flags &= ~NLM_NH_RECLAIM;
477 	hostp->nh_reclaimer = NULL;
478 	cv_broadcast(&hostp->nh_recl_cv);
479 	mutex_exit(&hostp->nh_lock);
480 
481 	/*
482 	 * Host was explicitly referenced before
483 	 * nlm_reclaim() was called, release it
484 	 * here.
485 	 */
486 	nlm_host_release(g, hostp);
487 	zthread_exit();
488 }
489 
490 /*
491  * Copy a struct netobj.  (see xdr.h)
492  */
493 void
nlm_copy_netobj(struct netobj * dst,struct netobj * src)494 nlm_copy_netobj(struct netobj *dst, struct netobj *src)
495 {
496 	dst->n_len = src->n_len;
497 	dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
498 	bcopy(src->n_bytes, dst->n_bytes, src->n_len);
499 }
500 
501 /*
502  * An NLM specificw replacement for clnt_call().
503  * nlm_clnt_call() is used by all RPC functions generated
504  * from nlm_prot.x specification. The function is aware
505  * about some pitfalls of NLM RPC procedures and has a logic
506  * that handles them properly.
507  */
508 enum clnt_stat
nlm_clnt_call(CLIENT * clnt,rpcproc_t procnum,xdrproc_t xdr_args,caddr_t argsp,xdrproc_t xdr_result,caddr_t resultp,struct timeval wait)509 nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
510     caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
511 {
512 	k_sigset_t oldmask;
513 	enum clnt_stat stat;
514 	bool_t sig_blocked = FALSE;
515 
516 	/*
517 	 * If NLM RPC procnum is one of the NLM _RES procedures
518 	 * that are used to reply to asynchronous NLM RPC
519 	 * (MSG calls), explicitly set RPC timeout to zero.
520 	 * Client doesn't send a reply to RES procedures, so
521 	 * we don't need to wait anything.
522 	 *
523 	 * NOTE: we ignore NLM4_*_RES procnums because they are
524 	 * equal to NLM_*_RES numbers.
525 	 */
526 	if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
527 		wait = nlm_rpctv_zero;
528 
529 	/*
530 	 * We need to block signals in case of NLM_CANCEL RPC
531 	 * in order to prevent interruption of network RPC
532 	 * calls.
533 	 */
534 	if (procnum == NLM_CANCEL) {
535 		k_sigset_t newmask;
536 
537 		sigfillset(&newmask);
538 		sigreplace(&newmask, &oldmask);
539 		sig_blocked = TRUE;
540 	}
541 
542 	stat = clnt_call(clnt, procnum, xdr_args,
543 	    argsp, xdr_result, resultp, wait);
544 
545 	/*
546 	 * Restore signal mask back if signals were blocked
547 	 */
548 	if (sig_blocked)
549 		sigreplace(&oldmask, (k_sigset_t *)NULL);
550 
551 	return (stat);
552 }
553 
554 /*
555  * Suspend NLM client/server in the given zone.
556  *
557  * During suspend operation we mark those hosts
558  * that have any locks with NLM_NH_SUSPEND flags,
559  * so that they can be checked later, when resume
560  * operation occurs.
561  */
562 static void
nlm_suspend_zone(struct nlm_globals * g)563 nlm_suspend_zone(struct nlm_globals *g)
564 {
565 	struct nlm_host *hostp;
566 	struct nlm_host_list all_hosts;
567 
568 	/*
569 	 * Note that while we're doing suspend, GC thread is active
570 	 * and it can destroy some hosts while we're walking through
571 	 * the hosts tree. To prevent that and make suspend logic
572 	 * a bit more simple we put all hosts to local "all_hosts"
573 	 * list and increment reference counter of each host.
574 	 * This guaranties that no hosts will be released while
575 	 * we're doing suspend.
576 	 * NOTE: reference of each host must be dropped during
577 	 * resume operation.
578 	 */
579 	TAILQ_INIT(&all_hosts);
580 	mutex_enter(&g->lock);
581 	for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
582 	    hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
583 		/*
584 		 * If host is idle, remove it from idle list and
585 		 * clear idle flag. That is done to prevent GC
586 		 * from touching this host.
587 		 */
588 		if (hostp->nh_flags & NLM_NH_INIDLE) {
589 			TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
590 			hostp->nh_flags &= ~NLM_NH_INIDLE;
591 		}
592 
593 		hostp->nh_refs++;
594 		TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
595 	}
596 
597 	/*
598 	 * Now we can walk through all hosts on the system
599 	 * with zone globals lock released. The fact the
600 	 * we have taken a reference to each host guaranties
601 	 * that no hosts can be destroyed during that process.
602 	 */
603 	mutex_exit(&g->lock);
604 	while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
605 		mutex_enter(&hostp->nh_lock);
606 		if (nlm_host_has_locks(hostp))
607 			hostp->nh_flags |= NLM_NH_SUSPEND;
608 
609 		mutex_exit(&hostp->nh_lock);
610 		TAILQ_REMOVE(&all_hosts, hostp, nh_link);
611 	}
612 }
613 
614 /*
615  * Resume NLM hosts for the given zone.
616  *
617  * nlm_resume_zone() is called after hosts were suspended
618  * (see nlm_suspend_zone) and its main purpose to check
619  * whether remote locks owned by hosts are still in consistent
620  * state. If they aren't, resume function tries to reclaim
621  * reclaim locks (for client side hosts) and clean locks (for
622  * server side hosts).
623  */
624 static void
nlm_resume_zone(struct nlm_globals * g)625 nlm_resume_zone(struct nlm_globals *g)
626 {
627 	struct nlm_host *hostp, *h_next;
628 
629 	mutex_enter(&g->lock);
630 	hostp = avl_first(&g->nlm_hosts_tree);
631 
632 	/*
633 	 * In nlm_suspend_zone() the reference counter of each
634 	 * host was incremented, so we can safely iterate through
635 	 * all hosts without worrying that any host we touch will
636 	 * be removed at the moment.
637 	 */
638 	while (hostp != NULL) {
639 		struct nlm_nsm nsm;
640 		enum clnt_stat stat;
641 		int32_t sm_state;
642 		int error;
643 		bool_t resume_failed = FALSE;
644 
645 		h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
646 		mutex_exit(&g->lock);
647 
648 		DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
649 
650 		/*
651 		 * Suspend operation marked that the host doesn't
652 		 * have any locks. Skip it.
653 		 */
654 		if (!(hostp->nh_flags & NLM_NH_SUSPEND))
655 			goto cycle_end;
656 
657 		error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
658 		if (error != 0) {
659 			NLM_ERR("Resume: Failed to contact to NSM of host %s "
660 			    "[error=%d]\n", hostp->nh_name, error);
661 			resume_failed = TRUE;
662 			goto cycle_end;
663 		}
664 
665 		stat = nlm_nsm_stat(&nsm, &sm_state);
666 		if (stat != RPC_SUCCESS) {
667 			NLM_ERR("Resume: Failed to call SM_STAT operation for "
668 			    "host %s [stat=%d]\n", hostp->nh_name, stat);
669 			resume_failed = TRUE;
670 			nlm_nsm_fini(&nsm);
671 			goto cycle_end;
672 		}
673 
674 		if (sm_state != hostp->nh_state) {
675 			/*
676 			 * Current SM state of the host isn't equal
677 			 * to the one host had when it was suspended.
678 			 * Probably it was rebooted. Try to reclaim
679 			 * locks if the host has any on its client side.
680 			 * Also try to clean up its server side locks
681 			 * (if the host has any).
682 			 */
683 			nlm_host_notify_client(hostp, sm_state);
684 			nlm_host_notify_server(hostp, sm_state);
685 		}
686 
687 		nlm_nsm_fini(&nsm);
688 
689 cycle_end:
690 		if (resume_failed) {
691 			/*
692 			 * Resume failed for the given host.
693 			 * Just clean up all resources it owns.
694 			 */
695 			nlm_host_notify_server(hostp, 0);
696 			nlm_client_cancel_all(g, hostp);
697 		}
698 
699 		hostp->nh_flags &= ~NLM_NH_SUSPEND;
700 		nlm_host_release(g, hostp);
701 		hostp = h_next;
702 		mutex_enter(&g->lock);
703 	}
704 
705 	mutex_exit(&g->lock);
706 }
707 
708 /*
709  * NLM functions responsible for operations on NSM handle.
710  */
711 
712 /*
713  * Initialize knetconfig that is used for communication
714  * with local statd via loopback interface.
715  */
716 static int
nlm_init_local_knc(struct knetconfig * knc)717 nlm_init_local_knc(struct knetconfig *knc)
718 {
719 	int error;
720 	vnode_t *vp;
721 
722 	bzero(knc, sizeof (*knc));
723 	error = lookupname("/dev/tcp", UIO_SYSSPACE,
724 	    FOLLOW, NULLVPP, &vp);
725 	if (error != 0)
726 		return (error);
727 
728 	knc->knc_semantics = NC_TPI_COTS;
729 	knc->knc_protofmly = NC_INET;
730 	knc->knc_proto = NC_TCP;
731 	knc->knc_rdev = vp->v_rdev;
732 	VN_RELE(vp);
733 
734 
735 	return (0);
736 }
737 
738 /*
739  * Initialize NSM handle that will be used to talk
740  * to local statd via loopback interface.
741  */
742 static int
nlm_nsm_init_local(struct nlm_nsm * nsm)743 nlm_nsm_init_local(struct nlm_nsm *nsm)
744 {
745 	int error;
746 	struct knetconfig knc;
747 	struct sockaddr_in sin;
748 	struct netbuf nb;
749 
750 	error = nlm_init_local_knc(&knc);
751 	if (error != 0)
752 		return (error);
753 
754 	bzero(&sin, sizeof (sin));
755 	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
756 	sin.sin_family = AF_INET;
757 
758 	nb.buf = (char *)&sin;
759 	nb.len = nb.maxlen = sizeof (sin);
760 
761 	return (nlm_nsm_init(nsm, &knc, &nb));
762 }
763 
764 /*
765  * Initialize NSM handle used for talking to statd
766  */
767 static int
nlm_nsm_init(struct nlm_nsm * nsm,struct knetconfig * knc,struct netbuf * nb)768 nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
769 {
770 	enum clnt_stat stat;
771 	int error, retries;
772 
773 	bzero(nsm, sizeof (*nsm));
774 	nsm->ns_knc = *knc;
775 	nlm_copy_netbuf(&nsm->ns_addr, nb);
776 
777 	/*
778 	 * Try several times to get the port of statd service,
779 	 * If rpcbind_getaddr returns  RPC_PROGNOTREGISTERED,
780 	 * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
781 	 * seconds berofore.
782 	 */
783 	for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
784 		stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
785 		    SM_VERS, &nsm->ns_addr);
786 		if (stat != RPC_SUCCESS) {
787 			if (stat == RPC_PROGNOTREGISTERED) {
788 				delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
789 				continue;
790 			}
791 		}
792 
793 		break;
794 	}
795 
796 	if (stat != RPC_SUCCESS) {
797 		DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
798 		    int, retries);
799 		error = ENOENT;
800 		goto error;
801 	}
802 
803 	/*
804 	 * Create an RPC handle that'll be used for communication with local
805 	 * statd using the status monitor protocol.
806 	 */
807 	error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
808 	    0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle);
809 	if (error != 0)
810 		goto error;
811 
812 	/*
813 	 * Create an RPC handle that'll be used for communication with the
814 	 * local statd using the address registration protocol.
815 	 */
816 	error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
817 	    NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle);
818 	if (error != 0)
819 		goto error;
820 
821 	sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL);
822 	return (0);
823 
824 error:
825 	kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
826 	if (nsm->ns_handle)
827 		CLNT_DESTROY(nsm->ns_handle);
828 
829 	return (error);
830 }
831 
832 static void
nlm_nsm_fini(struct nlm_nsm * nsm)833 nlm_nsm_fini(struct nlm_nsm *nsm)
834 {
835 	kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
836 	CLNT_DESTROY(nsm->ns_addr_handle);
837 	nsm->ns_addr_handle = NULL;
838 	CLNT_DESTROY(nsm->ns_handle);
839 	nsm->ns_handle = NULL;
840 	sema_destroy(&nsm->ns_sem);
841 }
842 
843 static enum clnt_stat
nlm_nsm_simu_crash(struct nlm_nsm * nsm)844 nlm_nsm_simu_crash(struct nlm_nsm *nsm)
845 {
846 	enum clnt_stat stat;
847 
848 	sema_p(&nsm->ns_sem);
849 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
850 	stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
851 	sema_v(&nsm->ns_sem);
852 
853 	return (stat);
854 }
855 
856 static enum clnt_stat
nlm_nsm_stat(struct nlm_nsm * nsm,int32_t * out_stat)857 nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
858 {
859 	struct sm_name args;
860 	struct sm_stat_res res;
861 	enum clnt_stat stat;
862 
863 	args.mon_name = uts_nodename();
864 	bzero(&res, sizeof (res));
865 
866 	sema_p(&nsm->ns_sem);
867 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
868 	stat = sm_stat_1(&args, &res, nsm->ns_handle);
869 	sema_v(&nsm->ns_sem);
870 
871 	if (stat == RPC_SUCCESS)
872 		*out_stat = res.state;
873 
874 	return (stat);
875 }
876 
877 static enum clnt_stat
nlm_nsm_mon(struct nlm_nsm * nsm,char * hostname,uint16_t priv)878 nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
879 {
880 	struct mon args;
881 	struct sm_stat_res res;
882 	enum clnt_stat stat;
883 
884 	bzero(&args, sizeof (args));
885 	bzero(&res, sizeof (res));
886 
887 	args.mon_id.mon_name = hostname;
888 	args.mon_id.my_id.my_name = uts_nodename();
889 	args.mon_id.my_id.my_prog = NLM_PROG;
890 	args.mon_id.my_id.my_vers = NLM_SM;
891 	args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
892 	bcopy(&priv, args.priv, sizeof (priv));
893 
894 	sema_p(&nsm->ns_sem);
895 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
896 	stat = sm_mon_1(&args, &res, nsm->ns_handle);
897 	sema_v(&nsm->ns_sem);
898 
899 	return (stat);
900 }
901 
902 static enum clnt_stat
nlm_nsm_unmon(struct nlm_nsm * nsm,char * hostname)903 nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
904 {
905 	struct mon_id args;
906 	struct sm_stat res;
907 	enum clnt_stat stat;
908 
909 	bzero(&args, sizeof (args));
910 	bzero(&res, sizeof (res));
911 
912 	args.mon_name = hostname;
913 	args.my_id.my_name = uts_nodename();
914 	args.my_id.my_prog = NLM_PROG;
915 	args.my_id.my_vers = NLM_SM;
916 	args.my_id.my_proc = NLM_SM_NOTIFY1;
917 
918 	sema_p(&nsm->ns_sem);
919 	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
920 	stat = sm_unmon_1(&args, &res, nsm->ns_handle);
921 	sema_v(&nsm->ns_sem);
922 
923 	return (stat);
924 }
925 
926 static enum clnt_stat
nlm_nsmaddr_reg(struct nlm_nsm * nsm,char * name,int family,netobj * address)927 nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
928 {
929 	struct reg1args args = { 0 };
930 	struct reg1res res = { 0 };
931 	enum clnt_stat stat;
932 
933 	args.family = family;
934 	args.name = name;
935 	args.address = *address;
936 
937 	sema_p(&nsm->ns_sem);
938 	nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
939 	stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
940 	sema_v(&nsm->ns_sem);
941 
942 	return (stat);
943 }
944 
945 /*
946  * Get NLM vhold object corresponding to vnode "vp".
947  * If no such object was found, create a new one.
948  *
949  * The purpose of this function is to associate vhold
950  * object with given vnode, so that:
951  * 1) vnode is hold (VN_HOLD) while vhold object is alive.
952  * 2) host has a track of all vnodes it touched by lock
953  *    or share operations. These vnodes are accessible
954  *    via collection of vhold objects.
955  */
956 struct nlm_vhold *
nlm_vhold_get(struct nlm_host * hostp,vnode_t * vp)957 nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
958 {
959 	struct nlm_vhold *nvp, *new_nvp = NULL;
960 
961 	mutex_enter(&hostp->nh_lock);
962 	nvp = nlm_vhold_find_locked(hostp, vp);
963 	if (nvp != NULL)
964 		goto out;
965 
966 	/* nlm_vhold wasn't found, then create a new one */
967 	mutex_exit(&hostp->nh_lock);
968 	new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
969 
970 	/*
971 	 * Check if another thread has already
972 	 * created the same nlm_vhold.
973 	 */
974 	mutex_enter(&hostp->nh_lock);
975 	nvp = nlm_vhold_find_locked(hostp, vp);
976 	if (nvp == NULL) {
977 		nvp = new_nvp;
978 		new_nvp = NULL;
979 
980 		TAILQ_INIT(&nvp->nv_slreqs);
981 		nvp->nv_vp = vp;
982 		nvp->nv_refcnt = 1;
983 		VN_HOLD(nvp->nv_vp);
984 
985 		VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
986 		    (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
987 		TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
988 	}
989 
990 out:
991 	mutex_exit(&hostp->nh_lock);
992 	if (new_nvp != NULL)
993 		kmem_cache_free(nlm_vhold_cache, new_nvp);
994 
995 	return (nvp);
996 }
997 
998 /*
999  * Drop a reference to vhold object nvp.
1000  */
1001 void
nlm_vhold_release(struct nlm_host * hostp,struct nlm_vhold * nvp)1002 nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1003 {
1004 	if (nvp == NULL)
1005 		return;
1006 
1007 	mutex_enter(&hostp->nh_lock);
1008 	ASSERT(nvp->nv_refcnt > 0);
1009 	nvp->nv_refcnt--;
1010 	mutex_exit(&hostp->nh_lock);
1011 }
1012 
1013 /*
1014  * Clean all locks and share reservations on the
1015  * given vhold object that were acquired by the
1016  * given sysid
1017  */
1018 static void
nlm_vhold_clean(struct nlm_vhold * nvp,int sysid)1019 nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1020 {
1021 	cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1022 	cleanshares_by_sysid(nvp->nv_vp, sysid);
1023 }
1024 
1025 static void
nlm_vhold_destroy(struct nlm_host * hostp,struct nlm_vhold * nvp)1026 nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1027 {
1028 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1029 
1030 	VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1031 	    (mod_hash_key_t)nvp->nv_vp,
1032 	    (mod_hash_val_t)&nvp) == 0);
1033 
1034 	TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1035 	VN_RELE(nvp->nv_vp);
1036 	nvp->nv_vp = NULL;
1037 
1038 	kmem_cache_free(nlm_vhold_cache, nvp);
1039 }
1040 
1041 /*
1042  * Return TRUE if the given vhold is busy.
1043  * Vhold object is considered to be "busy" when
1044  * all the following conditions hold:
1045  * 1) No one uses it at the moment;
1046  * 2) It hasn't any locks;
1047  * 3) It hasn't any share reservations;
1048  */
1049 static bool_t
nlm_vhold_busy(struct nlm_host * hostp,struct nlm_vhold * nvp)1050 nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1051 {
1052 	vnode_t *vp;
1053 	int sysid;
1054 
1055 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1056 
1057 	if (nvp->nv_refcnt > 0)
1058 		return (TRUE);
1059 
1060 	vp = nvp->nv_vp;
1061 	sysid = hostp->nh_sysid;
1062 	if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1063 	    shr_has_remote_shares(vp, sysid))
1064 		return (TRUE);
1065 
1066 	return (FALSE);
1067 }
1068 
1069 /* ARGSUSED */
1070 static int
nlm_vhold_ctor(void * datap,void * cdrarg,int kmflags)1071 nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1072 {
1073 	struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1074 
1075 	bzero(nvp, sizeof (*nvp));
1076 	return (0);
1077 }
1078 
1079 /* ARGSUSED */
1080 static void
nlm_vhold_dtor(void * datap,void * cdrarg)1081 nlm_vhold_dtor(void *datap, void *cdrarg)
1082 {
1083 	struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1084 
1085 	ASSERT(nvp->nv_refcnt == 0);
1086 	ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1087 	ASSERT(nvp->nv_vp == NULL);
1088 }
1089 
1090 struct nlm_vhold *
nlm_vhold_find_locked(struct nlm_host * hostp,const vnode_t * vp)1091 nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1092 {
1093 	struct nlm_vhold *nvp = NULL;
1094 
1095 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1096 	(void) mod_hash_find(hostp->nh_vholds_by_vp,
1097 	    (mod_hash_key_t)vp,
1098 	    (mod_hash_val_t)&nvp);
1099 
1100 	if (nvp != NULL)
1101 		nvp->nv_refcnt++;
1102 
1103 	return (nvp);
1104 }
1105 
1106 /*
1107  * NLM host functions
1108  */
1109 static void
nlm_copy_netbuf(struct netbuf * dst,struct netbuf * src)1110 nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1111 {
1112 	ASSERT(src->len <= src->maxlen);
1113 
1114 	dst->maxlen = src->maxlen;
1115 	dst->len = src->len;
1116 	dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1117 	bcopy(src->buf, dst->buf, src->len);
1118 }
1119 
1120 /* ARGSUSED */
1121 static int
nlm_host_ctor(void * datap,void * cdrarg,int kmflags)1122 nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1123 {
1124 	struct nlm_host *hostp = (struct nlm_host *)datap;
1125 
1126 	bzero(hostp, sizeof (*hostp));
1127 	return (0);
1128 }
1129 
1130 /* ARGSUSED */
1131 static void
nlm_host_dtor(void * datap,void * cdrarg)1132 nlm_host_dtor(void *datap, void *cdrarg)
1133 {
1134 	struct nlm_host *hostp = (struct nlm_host *)datap;
1135 	ASSERT(hostp->nh_refs == 0);
1136 }
1137 
1138 static void
nlm_host_unregister(struct nlm_globals * g,struct nlm_host * hostp)1139 nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1140 {
1141 	ASSERT(hostp->nh_refs == 0);
1142 	ASSERT(hostp->nh_flags & NLM_NH_INIDLE);
1143 
1144 	avl_remove(&g->nlm_hosts_tree, hostp);
1145 	VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1146 	    (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1147 	    (mod_hash_val_t)&hostp) == 0);
1148 	TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1149 	hostp->nh_flags &= ~NLM_NH_INIDLE;
1150 }
1151 
1152 /*
1153  * Free resources used by a host. This is called after the reference
1154  * count has reached zero so it doesn't need to worry about locks.
1155  */
1156 static void
nlm_host_destroy(struct nlm_host * hostp)1157 nlm_host_destroy(struct nlm_host *hostp)
1158 {
1159 	ASSERT(hostp->nh_name != NULL);
1160 	ASSERT(hostp->nh_netid != NULL);
1161 	ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1162 
1163 	strfree(hostp->nh_name);
1164 	strfree(hostp->nh_netid);
1165 	kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1166 
1167 	if (hostp->nh_sysid != LM_NOSYSID)
1168 		nlm_sysid_free(hostp->nh_sysid);
1169 
1170 	nlm_rpc_cache_destroy(hostp);
1171 
1172 	ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1173 	mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1174 
1175 	mutex_destroy(&hostp->nh_lock);
1176 	cv_destroy(&hostp->nh_rpcb_cv);
1177 	cv_destroy(&hostp->nh_recl_cv);
1178 
1179 	kmem_cache_free(nlm_hosts_cache, hostp);
1180 }
1181 
1182 /*
1183  * Cleanup SERVER-side state after a client restarts,
1184  * or becomes unresponsive, or whatever.
1185  *
1186  * We unlock any active locks owned by the host.
1187  * When rpc.lockd is shutting down,
1188  * this function is called with newstate set to zero
1189  * which allows us to cancel any pending async locks
1190  * and clear the locking state.
1191  *
1192  * When "state" is 0, we don't update host's state,
1193  * but cleanup all remote locks on the host.
1194  * It's useful to call this function for resources
1195  * cleanup.
1196  */
1197 void
nlm_host_notify_server(struct nlm_host * hostp,int32_t state)1198 nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1199 {
1200 	struct nlm_vhold *nvp;
1201 	struct nlm_slreq *slr;
1202 	struct nlm_slreq_list slreqs2free;
1203 
1204 	TAILQ_INIT(&slreqs2free);
1205 	mutex_enter(&hostp->nh_lock);
1206 	if (state != 0)
1207 		hostp->nh_state = state;
1208 
1209 	TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1210 
1211 		/* cleanup sleeping requests at first */
1212 		while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1213 			TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1214 
1215 			/*
1216 			 * Instead of freeing cancelled sleeping request
1217 			 * here, we add it to the linked list created
1218 			 * on the stack in order to do all frees outside
1219 			 * the critical section.
1220 			 */
1221 			TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1222 		}
1223 
1224 		nvp->nv_refcnt++;
1225 		mutex_exit(&hostp->nh_lock);
1226 
1227 		nlm_vhold_clean(nvp, hostp->nh_sysid);
1228 
1229 		mutex_enter(&hostp->nh_lock);
1230 		nvp->nv_refcnt--;
1231 	}
1232 
1233 	mutex_exit(&hostp->nh_lock);
1234 	while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1235 		TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1236 		kmem_free(slr, sizeof (*slr));
1237 	}
1238 }
1239 
1240 /*
1241  * Cleanup CLIENT-side state after a server restarts,
1242  * or becomes unresponsive, or whatever.
1243  *
1244  * This is called by the local NFS statd when we receive a
1245  * host state change notification.  (also nlm_svc_stopping)
1246  *
1247  * Deal with a server restart.  If we are stopping the
1248  * NLM service, we'll have newstate == 0, and will just
1249  * cancel all our client-side lock requests.  Otherwise,
1250  * start the "recovery" process to reclaim any locks
1251  * we hold on this server.
1252  */
1253 void
nlm_host_notify_client(struct nlm_host * hostp,int32_t state)1254 nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1255 {
1256 	mutex_enter(&hostp->nh_lock);
1257 	hostp->nh_state = state;
1258 	if (hostp->nh_flags & NLM_NH_RECLAIM) {
1259 		/*
1260 		 * Either host's state is up to date or
1261 		 * host is already in recovery.
1262 		 */
1263 		mutex_exit(&hostp->nh_lock);
1264 		return;
1265 	}
1266 
1267 	hostp->nh_flags |= NLM_NH_RECLAIM;
1268 
1269 	/*
1270 	 * Host will be released by the recovery thread,
1271 	 * thus we need to increment refcount.
1272 	 */
1273 	hostp->nh_refs++;
1274 	mutex_exit(&hostp->nh_lock);
1275 
1276 	(void) zthread_create(NULL, 0, nlm_reclaimer,
1277 	    hostp, 0, minclsyspri);
1278 }
1279 
1280 /*
1281  * The function is called when NLM client detects that
1282  * server has entered in grace period and client needs
1283  * to wait until reclamation process (if any) does
1284  * its job.
1285  */
1286 int
nlm_host_wait_grace(struct nlm_host * hostp)1287 nlm_host_wait_grace(struct nlm_host *hostp)
1288 {
1289 	struct nlm_globals *g;
1290 	int error = 0;
1291 
1292 	g = zone_getspecific(nlm_zone_key, curzone);
1293 	mutex_enter(&hostp->nh_lock);
1294 
1295 	do {
1296 		int rc;
1297 
1298 		rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1299 		    &hostp->nh_lock, ddi_get_lbolt() +
1300 		    SEC_TO_TICK(g->retrans_tmo));
1301 
1302 		if (rc == 0) {
1303 			error = EINTR;
1304 			break;
1305 		}
1306 	} while (hostp->nh_flags & NLM_NH_RECLAIM);
1307 
1308 	mutex_exit(&hostp->nh_lock);
1309 	return (error);
1310 }
1311 
1312 /*
1313  * Create a new NLM host.
1314  *
1315  * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1316  * which needs both a knetconfig and an address when creating
1317  * endpoints. Thus host object stores both knetconfig and
1318  * netid.
1319  */
1320 static struct nlm_host *
nlm_host_create(char * name,const char * netid,struct knetconfig * knc,struct netbuf * naddr)1321 nlm_host_create(char *name, const char *netid,
1322     struct knetconfig *knc, struct netbuf *naddr)
1323 {
1324 	struct nlm_host *host;
1325 
1326 	host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1327 
1328 	mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1329 	cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1330 	cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1331 
1332 	host->nh_sysid = LM_NOSYSID;
1333 	host->nh_refs = 1;
1334 	host->nh_name = strdup(name);
1335 	host->nh_netid = strdup(netid);
1336 	host->nh_knc = *knc;
1337 	nlm_copy_netbuf(&host->nh_addr, naddr);
1338 
1339 	host->nh_state = 0;
1340 	host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1341 	host->nh_flags = 0;
1342 
1343 	host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1344 	    32, mod_hash_null_valdtor, sizeof (vnode_t));
1345 
1346 	TAILQ_INIT(&host->nh_vholds_list);
1347 	TAILQ_INIT(&host->nh_rpchc);
1348 
1349 	return (host);
1350 }
1351 
1352 /*
1353  * Cancel all client side sleeping locks owned by given host.
1354  */
1355 void
nlm_host_cancel_slocks(struct nlm_globals * g,struct nlm_host * hostp)1356 nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1357 {
1358 	struct nlm_slock *nslp;
1359 
1360 	mutex_enter(&g->lock);
1361 	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1362 		if (nslp->nsl_host == hostp) {
1363 			nslp->nsl_state = NLM_SL_CANCELLED;
1364 			cv_broadcast(&nslp->nsl_cond);
1365 		}
1366 	}
1367 
1368 	mutex_exit(&g->lock);
1369 }
1370 
1371 /*
1372  * Garbage collect stale vhold objects.
1373  *
1374  * In other words check whether vnodes that are
1375  * held by vhold objects still have any locks
1376  * or shares or still in use. If they aren't,
1377  * just destroy them.
1378  */
1379 static void
nlm_host_gc_vholds(struct nlm_host * hostp)1380 nlm_host_gc_vholds(struct nlm_host *hostp)
1381 {
1382 	struct nlm_vhold *nvp;
1383 
1384 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1385 
1386 	nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1387 	while (nvp != NULL) {
1388 		struct nlm_vhold *nvp_tmp;
1389 
1390 		if (nlm_vhold_busy(hostp, nvp)) {
1391 			nvp = TAILQ_NEXT(nvp, nv_link);
1392 			continue;
1393 		}
1394 
1395 		nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1396 		nlm_vhold_destroy(hostp, nvp);
1397 		nvp = nvp_tmp;
1398 	}
1399 }
1400 
1401 /*
1402  * Check whether the given host has any
1403  * server side locks or share reservations.
1404  */
1405 static bool_t
nlm_host_has_srv_locks(struct nlm_host * hostp)1406 nlm_host_has_srv_locks(struct nlm_host *hostp)
1407 {
1408 	/*
1409 	 * It's cheap and simple: if server has
1410 	 * any locks/shares there must be vhold
1411 	 * object storing the affected vnode.
1412 	 *
1413 	 * NOTE: We don't need to check sleeping
1414 	 * locks on the server side, because if
1415 	 * server side sleeping lock is alive,
1416 	 * there must be a vhold object corresponding
1417 	 * to target vnode.
1418 	 */
1419 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1420 	if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1421 		return (TRUE);
1422 
1423 	return (FALSE);
1424 }
1425 
1426 /*
1427  * Check whether the given host has any client side
1428  * locks or share reservations.
1429  */
1430 static bool_t
nlm_host_has_cli_locks(struct nlm_host * hostp)1431 nlm_host_has_cli_locks(struct nlm_host *hostp)
1432 {
1433 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1434 
1435 	/*
1436 	 * XXX: It's not the way I'd like to do the check,
1437 	 * because flk_sysid_has_locks() can be very
1438 	 * expensive by design. Unfortunatelly it iterates
1439 	 * through all locks on the system, doesn't matter
1440 	 * were they made on remote system via NLM or
1441 	 * on local system via reclock. To understand the
1442 	 * problem, consider that there're dozens of thousands
1443 	 * of locks that are made on some ZFS dataset. And there's
1444 	 * another dataset shared by NFS where NLM client had locks
1445 	 * some time ago, but doesn't have them now.
1446 	 * In this case flk_sysid_has_locks() will iterate
1447 	 * thrught dozens of thousands locks until it returns us
1448 	 * FALSE.
1449 	 * Oh, I hope that in shiny future somebody will make
1450 	 * local lock manager (os/flock.c) better, so that
1451 	 * it'd be more friedly to remote locks and
1452 	 * flk_sysid_has_locks() wouldn't be so expensive.
1453 	 */
1454 	if (flk_sysid_has_locks(hostp->nh_sysid |
1455 	    LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1456 		return (TRUE);
1457 
1458 	/*
1459 	 * Check whether host has any share reservations
1460 	 * registered on the client side.
1461 	 */
1462 	if (hostp->nh_shrlist != NULL)
1463 		return (TRUE);
1464 
1465 	return (FALSE);
1466 }
1467 
1468 /*
1469  * Determine whether the given host owns any
1470  * locks or share reservations.
1471  */
1472 static bool_t
nlm_host_has_locks(struct nlm_host * hostp)1473 nlm_host_has_locks(struct nlm_host *hostp)
1474 {
1475 	if (nlm_host_has_srv_locks(hostp))
1476 		return (TRUE);
1477 
1478 	return (nlm_host_has_cli_locks(hostp));
1479 }
1480 
1481 /*
1482  * This function compares only addresses of two netbufs
1483  * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1484  * Port part of netbuf is ignored.
1485  *
1486  * Return values:
1487  *  -1: nb1's address is "smaller" than nb2's
1488  *   0: addresses are equal
1489  *   1: nb1's address is "greater" than nb2's
1490  */
1491 static int
nlm_netbuf_addrs_cmp(struct netbuf * nb1,struct netbuf * nb2)1492 nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1493 {
1494 	union nlm_addr {
1495 		struct sockaddr sa;
1496 		struct sockaddr_in sin;
1497 		struct sockaddr_in6 sin6;
1498 	} *na1, *na2;
1499 	int res;
1500 
1501 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1502 	na1 = (union nlm_addr *)nb1->buf;
1503 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1504 	na2 = (union nlm_addr *)nb2->buf;
1505 
1506 	if (na1->sa.sa_family < na2->sa.sa_family)
1507 		return (-1);
1508 	if (na1->sa.sa_family > na2->sa.sa_family)
1509 		return (1);
1510 
1511 	switch (na1->sa.sa_family) {
1512 	case AF_INET:
1513 		res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1514 		    sizeof (na1->sin.sin_addr));
1515 		break;
1516 	case AF_INET6:
1517 		res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1518 		    sizeof (na1->sin6.sin6_addr));
1519 		break;
1520 	default:
1521 		VERIFY(0);
1522 		return (0);
1523 	}
1524 
1525 	return (SIGN(res));
1526 }
1527 
1528 /*
1529  * Compare two nlm hosts.
1530  * Return values:
1531  * -1: host1 is "smaller" than host2
1532  *  0: host1 is equal to host2
1533  *  1: host1 is "greater" than host2
1534  */
1535 int
nlm_host_cmp(const void * p1,const void * p2)1536 nlm_host_cmp(const void *p1, const void *p2)
1537 {
1538 	struct nlm_host *h1 = (struct nlm_host *)p1;
1539 	struct nlm_host *h2 = (struct nlm_host *)p2;
1540 	int res;
1541 
1542 	res = strcmp(h1->nh_netid, h2->nh_netid);
1543 	if (res != 0)
1544 		return (SIGN(res));
1545 
1546 	res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1547 	return (res);
1548 }
1549 
1550 /*
1551  * Find the host specified by...  (see below)
1552  * If found, increment the ref count.
1553  */
1554 static struct nlm_host *
nlm_host_find_locked(struct nlm_globals * g,const char * netid,struct netbuf * naddr,avl_index_t * wherep)1555 nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1556     struct netbuf *naddr, avl_index_t *wherep)
1557 {
1558 	struct nlm_host *hostp, key;
1559 	avl_index_t pos;
1560 
1561 	ASSERT(MUTEX_HELD(&g->lock));
1562 
1563 	key.nh_netid = (char *)netid;
1564 	key.nh_addr.buf = naddr->buf;
1565 	key.nh_addr.len = naddr->len;
1566 	key.nh_addr.maxlen = naddr->maxlen;
1567 
1568 	hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1569 
1570 	if (hostp != NULL) {
1571 		/*
1572 		 * Host is inuse now. Remove it from idle
1573 		 * hosts list if needed.
1574 		 */
1575 		if (hostp->nh_flags & NLM_NH_INIDLE) {
1576 			TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1577 			hostp->nh_flags &= ~NLM_NH_INIDLE;
1578 		}
1579 
1580 		hostp->nh_refs++;
1581 	}
1582 	if (wherep != NULL)
1583 		*wherep = pos;
1584 
1585 	return (hostp);
1586 }
1587 
1588 /*
1589  * Find NLM host for the given name and address.
1590  */
1591 struct nlm_host *
nlm_host_find(struct nlm_globals * g,const char * netid,struct netbuf * addr)1592 nlm_host_find(struct nlm_globals *g, const char *netid,
1593     struct netbuf *addr)
1594 {
1595 	struct nlm_host *hostp = NULL;
1596 
1597 	mutex_enter(&g->lock);
1598 	if (g->run_status != NLM_ST_UP)
1599 		goto out;
1600 
1601 	hostp = nlm_host_find_locked(g, netid, addr, NULL);
1602 
1603 out:
1604 	mutex_exit(&g->lock);
1605 	return (hostp);
1606 }
1607 
1608 
1609 /*
1610  * Find or create an NLM host for the given name and address.
1611  *
1612  * The remote host is determined by all of: name, netid, address.
1613  * Note that the netid is whatever nlm_svc_add_ep() gave to
1614  * svc_tli_kcreate() for the service binding.  If any of these
1615  * are different, allocate a new host (new sysid).
1616  */
1617 struct nlm_host *
nlm_host_findcreate(struct nlm_globals * g,char * name,const char * netid,struct netbuf * addr)1618 nlm_host_findcreate(struct nlm_globals *g, char *name,
1619     const char *netid, struct netbuf *addr)
1620 {
1621 	int err;
1622 	struct nlm_host *host, *newhost = NULL;
1623 	struct knetconfig knc;
1624 	avl_index_t where;
1625 
1626 	mutex_enter(&g->lock);
1627 	if (g->run_status != NLM_ST_UP) {
1628 		mutex_exit(&g->lock);
1629 		return (NULL);
1630 	}
1631 
1632 	host = nlm_host_find_locked(g, netid, addr, NULL);
1633 	mutex_exit(&g->lock);
1634 	if (host != NULL)
1635 		return (host);
1636 
1637 	err = nlm_knc_from_netid(netid, &knc);
1638 	if (err != 0)
1639 		return (NULL);
1640 	/*
1641 	 * Do allocations (etc.) outside of mutex,
1642 	 * and then check again before inserting.
1643 	 */
1644 	newhost = nlm_host_create(name, netid, &knc, addr);
1645 	newhost->nh_sysid = nlm_sysid_alloc();
1646 	if (newhost->nh_sysid == LM_NOSYSID)
1647 		goto out;
1648 
1649 	mutex_enter(&g->lock);
1650 	host = nlm_host_find_locked(g, netid, addr, &where);
1651 	if (host == NULL) {
1652 		host = newhost;
1653 		newhost = NULL;
1654 
1655 		/*
1656 		 * Insert host to the hosts AVL tree that is
1657 		 * used to lookup by <netid, address> pair.
1658 		 */
1659 		avl_insert(&g->nlm_hosts_tree, host, where);
1660 
1661 		/*
1662 		 * Insert host to the hosts hash table that is
1663 		 * used to lookup host by sysid.
1664 		 */
1665 		VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1666 		    (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1667 		    (mod_hash_val_t)host) == 0);
1668 	}
1669 
1670 	mutex_exit(&g->lock);
1671 
1672 out:
1673 	if (newhost != NULL) {
1674 		/*
1675 		 * We do not need the preallocated nlm_host
1676 		 * so decrement the reference counter
1677 		 * and destroy it.
1678 		 */
1679 		newhost->nh_refs--;
1680 		nlm_host_destroy(newhost);
1681 	}
1682 
1683 	return (host);
1684 }
1685 
1686 /*
1687  * Find the NLM host that matches the value of 'sysid'.
1688  * If found, return it with a new ref,
1689  * else return NULL.
1690  */
1691 struct nlm_host *
nlm_host_find_by_sysid(struct nlm_globals * g,sysid_t sysid)1692 nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1693 {
1694 	struct nlm_host *hostp = NULL;
1695 
1696 	mutex_enter(&g->lock);
1697 	if (g->run_status != NLM_ST_UP)
1698 		goto out;
1699 
1700 	(void) mod_hash_find(g->nlm_hosts_hash,
1701 	    (mod_hash_key_t)(uintptr_t)sysid,
1702 	    (mod_hash_val_t)&hostp);
1703 
1704 	if (hostp == NULL)
1705 		goto out;
1706 
1707 	/*
1708 	 * Host is inuse now. Remove it
1709 	 * from idle hosts list if needed.
1710 	 */
1711 	if (hostp->nh_flags & NLM_NH_INIDLE) {
1712 		TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1713 		hostp->nh_flags &= ~NLM_NH_INIDLE;
1714 	}
1715 
1716 	hostp->nh_refs++;
1717 
1718 out:
1719 	mutex_exit(&g->lock);
1720 	return (hostp);
1721 }
1722 
1723 /*
1724  * Release the given host.
1725  * I.e. drop a reference that was taken earlier by one of
1726  * the following functions: nlm_host_findcreate(), nlm_host_find(),
1727  * nlm_host_find_by_sysid().
1728  *
1729  * When the very last reference is dropped, host is moved to
1730  * so-called "idle state". All hosts that are in idle state
1731  * have an idle timeout. If timeout is expired, GC thread
1732  * checks whether hosts have any locks and if they heven't
1733  * any, it removes them.
1734  * NOTE: only unused hosts can be in idle state.
1735  */
1736 void
nlm_host_release(struct nlm_globals * g,struct nlm_host * hostp)1737 nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1738 {
1739 	if (hostp == NULL)
1740 		return;
1741 
1742 	mutex_enter(&g->lock);
1743 	ASSERT(hostp->nh_refs > 0);
1744 
1745 	hostp->nh_refs--;
1746 	if (hostp->nh_refs != 0) {
1747 		mutex_exit(&g->lock);
1748 		return;
1749 	}
1750 
1751 	/*
1752 	 * The very last reference to the host was dropped,
1753 	 * thus host is unused now. Set its idle timeout
1754 	 * and move it to the idle hosts LRU list.
1755 	 */
1756 	hostp->nh_idle_timeout = ddi_get_lbolt() +
1757 	    SEC_TO_TICK(g->cn_idle_tmo);
1758 
1759 	ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1760 	TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1761 	hostp->nh_flags |= NLM_NH_INIDLE;
1762 	mutex_exit(&g->lock);
1763 }
1764 
1765 /*
1766  * Unregister this NLM host (NFS client) with the local statd
1767  * due to idleness (no locks held for a while).
1768  */
1769 void
nlm_host_unmonitor(struct nlm_globals * g,struct nlm_host * host)1770 nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1771 {
1772 	enum clnt_stat stat;
1773 
1774 	VERIFY(host->nh_refs == 0);
1775 	if (!(host->nh_flags & NLM_NH_MONITORED))
1776 		return;
1777 
1778 	host->nh_flags &= ~NLM_NH_MONITORED;
1779 	stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1780 	if (stat != RPC_SUCCESS) {
1781 		NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1782 		return;
1783 	}
1784 }
1785 
1786 /*
1787  * Ask the local NFS statd to begin monitoring this host.
1788  * It will call us back when that host restarts, using the
1789  * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1790  * which is handled in nlm_do_notify1().
1791  */
1792 void
nlm_host_monitor(struct nlm_globals * g,struct nlm_host * host,int state)1793 nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1794 {
1795 	int family;
1796 	netobj obj;
1797 	enum clnt_stat stat;
1798 
1799 	if (state != 0 && host->nh_state == 0) {
1800 		/*
1801 		 * This is the first time we have seen an NSM state
1802 		 * Value for this host. We record it here to help
1803 		 * detect host reboots.
1804 		 */
1805 		host->nh_state = state;
1806 	}
1807 
1808 	mutex_enter(&host->nh_lock);
1809 	if (host->nh_flags & NLM_NH_MONITORED) {
1810 		mutex_exit(&host->nh_lock);
1811 		return;
1812 	}
1813 
1814 	host->nh_flags |= NLM_NH_MONITORED;
1815 	mutex_exit(&host->nh_lock);
1816 
1817 	/*
1818 	 * Before we begin monitoring the host register the network address
1819 	 * associated with this hostname.
1820 	 */
1821 	nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1822 	stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1823 	if (stat != RPC_SUCCESS) {
1824 		NLM_WARN("Failed to register address, stat=%d\n", stat);
1825 		mutex_enter(&g->lock);
1826 		host->nh_flags &= ~NLM_NH_MONITORED;
1827 		mutex_exit(&g->lock);
1828 
1829 		return;
1830 	}
1831 
1832 	/*
1833 	 * Tell statd how to call us with status updates for
1834 	 * this host. Updates arrive via nlm_do_notify1().
1835 	 *
1836 	 * We put our assigned system ID value in the priv field to
1837 	 * make it simpler to find the host if we are notified of a
1838 	 * host restart.
1839 	 */
1840 	stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1841 	if (stat != RPC_SUCCESS) {
1842 		NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1843 		mutex_enter(&g->lock);
1844 		host->nh_flags &= ~NLM_NH_MONITORED;
1845 		mutex_exit(&g->lock);
1846 
1847 		return;
1848 	}
1849 }
1850 
1851 int
nlm_host_get_state(struct nlm_host * hostp)1852 nlm_host_get_state(struct nlm_host *hostp)
1853 {
1854 
1855 	return (hostp->nh_state);
1856 }
1857 
1858 /*
1859  * NLM client/server sleeping locks
1860  */
1861 
1862 /*
1863  * Register client side sleeping lock.
1864  *
1865  * Our client code calls this to keep information
1866  * about sleeping lock somewhere. When it receives
1867  * grant callback from server or when it just
1868  * needs to remove all sleeping locks from vnode,
1869  * it uses this information for remove/apply lock
1870  * properly.
1871  */
1872 struct nlm_slock *
nlm_slock_register(struct nlm_globals * g,struct nlm_host * host,struct nlm4_lock * lock,struct vnode * vp)1873 nlm_slock_register(
1874 	struct nlm_globals *g,
1875 	struct nlm_host *host,
1876 	struct nlm4_lock *lock,
1877 	struct vnode *vp)
1878 {
1879 	struct nlm_slock *nslp;
1880 
1881 	nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1882 	cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1883 	nslp->nsl_lock = *lock;
1884 	nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1885 	nslp->nsl_state = NLM_SL_BLOCKED;
1886 	nslp->nsl_host = host;
1887 	nslp->nsl_vp = vp;
1888 
1889 	mutex_enter(&g->lock);
1890 	TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1891 	mutex_exit(&g->lock);
1892 
1893 	return (nslp);
1894 }
1895 
1896 /*
1897  * Remove this lock from the wait list and destroy it.
1898  */
1899 void
nlm_slock_unregister(struct nlm_globals * g,struct nlm_slock * nslp)1900 nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1901 {
1902 	mutex_enter(&g->lock);
1903 	TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1904 	mutex_exit(&g->lock);
1905 
1906 	kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1907 	cv_destroy(&nslp->nsl_cond);
1908 	kmem_free(nslp, sizeof (*nslp));
1909 }
1910 
1911 /*
1912  * Wait for a granted callback or cancellation event
1913  * for a sleeping lock.
1914  *
1915  * If a signal interrupted the wait or if the lock
1916  * was cancelled, return EINTR - the caller must arrange to send
1917  * a cancellation to the server.
1918  *
1919  * If timeout occurred, return ETIMEDOUT - the caller must
1920  * resend the lock request to the server.
1921  *
1922  * On success return 0.
1923  */
1924 int
nlm_slock_wait(struct nlm_globals * g,struct nlm_slock * nslp,uint_t timeo_secs)1925 nlm_slock_wait(struct nlm_globals *g,
1926     struct nlm_slock *nslp, uint_t timeo_secs)
1927 {
1928 	clock_t timeo_ticks;
1929 	int cv_res, error;
1930 
1931 	/*
1932 	 * If the granted message arrived before we got here,
1933 	 * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
1934 	 */
1935 	cv_res = 1;
1936 	timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
1937 
1938 	mutex_enter(&g->lock);
1939 	while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
1940 		cv_res = cv_timedwait_sig(&nslp->nsl_cond,
1941 		    &g->lock, timeo_ticks);
1942 	}
1943 
1944 	/*
1945 	 * No matter why we wake up, if the lock was
1946 	 * cancelled, let the function caller to know
1947 	 * about it by returning EINTR.
1948 	 */
1949 	if (nslp->nsl_state == NLM_SL_CANCELLED) {
1950 		error = EINTR;
1951 		goto out;
1952 	}
1953 
1954 	if (cv_res <= 0) {
1955 		/* We were woken up either by timeout or by interrupt */
1956 		error = (cv_res < 0) ? ETIMEDOUT : EINTR;
1957 
1958 		/*
1959 		 * The granted message may arrive after the
1960 		 * interrupt/timeout but before we manage to lock the
1961 		 * mutex. Detect this by examining nslp.
1962 		 */
1963 		if (nslp->nsl_state == NLM_SL_GRANTED)
1964 			error = 0;
1965 	} else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
1966 		error = 0;
1967 		VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
1968 	}
1969 
1970 out:
1971 	mutex_exit(&g->lock);
1972 	return (error);
1973 }
1974 
1975 /*
1976  * Mark client side sleeping lock as granted
1977  * and wake up a process blocked on the lock.
1978  * Called from server side NLM_GRANT handler.
1979  *
1980  * If sleeping lock is found return 0, otherwise
1981  * return ENOENT.
1982  */
1983 int
nlm_slock_grant(struct nlm_globals * g,struct nlm_host * hostp,struct nlm4_lock * alock)1984 nlm_slock_grant(struct nlm_globals *g,
1985     struct nlm_host *hostp, struct nlm4_lock *alock)
1986 {
1987 	struct nlm_slock *nslp;
1988 	int error = ENOENT;
1989 
1990 	mutex_enter(&g->lock);
1991 	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1992 		if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
1993 		    (nslp->nsl_host != hostp))
1994 			continue;
1995 
1996 		if (alock->svid		== nslp->nsl_lock.svid &&
1997 		    alock->l_offset	== nslp->nsl_lock.l_offset &&
1998 		    alock->l_len	== nslp->nsl_lock.l_len &&
1999 		    alock->fh.n_len	== nslp->nsl_lock.fh.n_len &&
2000 		    bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2001 		    nslp->nsl_lock.fh.n_len) == 0) {
2002 			nslp->nsl_state = NLM_SL_GRANTED;
2003 			cv_broadcast(&nslp->nsl_cond);
2004 			error = 0;
2005 			break;
2006 		}
2007 	}
2008 
2009 	mutex_exit(&g->lock);
2010 	return (error);
2011 }
2012 
2013 /*
2014  * Register sleeping lock request corresponding to
2015  * flp on the given vhold object.
2016  * On success function returns 0, otherwise (if
2017  * lock request with the same flp is already
2018  * registered) function returns EEXIST.
2019  */
2020 int
nlm_slreq_register(struct nlm_host * hostp,struct nlm_vhold * nvp,struct flock64 * flp)2021 nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2022 	struct flock64 *flp)
2023 {
2024 	struct nlm_slreq *slr, *new_slr = NULL;
2025 	int ret = EEXIST;
2026 
2027 	mutex_enter(&hostp->nh_lock);
2028 	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2029 	if (slr != NULL)
2030 		goto out;
2031 
2032 	mutex_exit(&hostp->nh_lock);
2033 	new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2034 	bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2035 
2036 	mutex_enter(&hostp->nh_lock);
2037 	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2038 	if (slr == NULL) {
2039 		slr = new_slr;
2040 		new_slr = NULL;
2041 		ret = 0;
2042 
2043 		TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2044 	}
2045 
2046 out:
2047 	mutex_exit(&hostp->nh_lock);
2048 	if (new_slr != NULL)
2049 		kmem_free(new_slr, sizeof (*new_slr));
2050 
2051 	return (ret);
2052 }
2053 
2054 /*
2055  * Unregister sleeping lock request corresponding
2056  * to flp from the given vhold object.
2057  * On success function returns 0, otherwise (if
2058  * lock request corresponding to flp isn't found
2059  * on the given vhold) function returns ENOENT.
2060  */
2061 int
nlm_slreq_unregister(struct nlm_host * hostp,struct nlm_vhold * nvp,struct flock64 * flp)2062 nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2063 	struct flock64 *flp)
2064 {
2065 	struct nlm_slreq *slr;
2066 
2067 	mutex_enter(&hostp->nh_lock);
2068 	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2069 	if (slr == NULL) {
2070 		mutex_exit(&hostp->nh_lock);
2071 		return (ENOENT);
2072 	}
2073 
2074 	TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2075 	mutex_exit(&hostp->nh_lock);
2076 
2077 	kmem_free(slr, sizeof (*slr));
2078 	return (0);
2079 }
2080 
2081 /*
2082  * Find sleeping lock request on the given vhold object by flp.
2083  */
2084 struct nlm_slreq *
nlm_slreq_find_locked(struct nlm_host * hostp,struct nlm_vhold * nvp,struct flock64 * flp)2085 nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2086     struct flock64 *flp)
2087 {
2088 	struct nlm_slreq *slr = NULL;
2089 
2090 	ASSERT(MUTEX_HELD(&hostp->nh_lock));
2091 	TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2092 		if (slr->nsr_fl.l_start		== flp->l_start	&&
2093 		    slr->nsr_fl.l_len		== flp->l_len	&&
2094 		    slr->nsr_fl.l_pid		== flp->l_pid	&&
2095 		    slr->nsr_fl.l_type		== flp->l_type)
2096 			break;
2097 	}
2098 
2099 	return (slr);
2100 }
2101 
2102 /*
2103  * NLM tracks active share reservations made on the client side.
2104  * It needs to have a track of share reservations for two purposes
2105  * 1) to determine if nlm_host is busy (if it has active locks and/or
2106  *    share reservations, it is)
2107  * 2) to recover active share reservations when NLM server reports
2108  *    that it has rebooted.
2109  *
2110  * Unfortunately Illumos local share reservations manager (see os/share.c)
2111  * doesn't have an ability to lookup all reservations on the system
2112  * by sysid (like local lock manager) or get all reservations by sysid.
2113  * It tracks reservations per vnode and is able to get/looup them
2114  * on particular vnode. It's not what NLM needs. Thus it has that ugly
2115  * share reservations tracking scheme.
2116  */
2117 
2118 void
nlm_shres_track(struct nlm_host * hostp,vnode_t * vp,struct shrlock * shrp)2119 nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2120 {
2121 	struct nlm_shres *nsp, *nsp_new;
2122 
2123 	/*
2124 	 * NFS code must fill the s_owner, so that
2125 	 * s_own_len is never 0.
2126 	 */
2127 	ASSERT(shrp->s_own_len > 0);
2128 	nsp_new = nlm_shres_create_item(shrp, vp);
2129 
2130 	mutex_enter(&hostp->nh_lock);
2131 	for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2132 		if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2133 			break;
2134 
2135 	if (nsp != NULL) {
2136 		/*
2137 		 * Found a duplicate. Do nothing.
2138 		 */
2139 
2140 		goto out;
2141 	}
2142 
2143 	nsp = nsp_new;
2144 	nsp_new = NULL;
2145 	nsp->ns_next = hostp->nh_shrlist;
2146 	hostp->nh_shrlist = nsp;
2147 
2148 out:
2149 	mutex_exit(&hostp->nh_lock);
2150 	if (nsp_new != NULL)
2151 		nlm_shres_destroy_item(nsp_new);
2152 }
2153 
2154 void
nlm_shres_untrack(struct nlm_host * hostp,vnode_t * vp,struct shrlock * shrp)2155 nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2156 {
2157 	struct nlm_shres *nsp, *nsp_prev = NULL;
2158 
2159 	mutex_enter(&hostp->nh_lock);
2160 	nsp = hostp->nh_shrlist;
2161 	while (nsp != NULL) {
2162 		if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2163 			struct nlm_shres *nsp_del;
2164 
2165 			nsp_del = nsp;
2166 			nsp = nsp->ns_next;
2167 			if (nsp_prev != NULL)
2168 				nsp_prev->ns_next = nsp;
2169 			else
2170 				hostp->nh_shrlist = nsp;
2171 
2172 			nlm_shres_destroy_item(nsp_del);
2173 			continue;
2174 		}
2175 
2176 		nsp_prev = nsp;
2177 		nsp = nsp->ns_next;
2178 	}
2179 
2180 	mutex_exit(&hostp->nh_lock);
2181 }
2182 
2183 /*
2184  * Get a _copy_ of the list of all active share reservations
2185  * made by the given host.
2186  * NOTE: the list function returns _must_ be released using
2187  *       nlm_free_shrlist().
2188  */
2189 struct nlm_shres *
nlm_get_active_shres(struct nlm_host * hostp)2190 nlm_get_active_shres(struct nlm_host *hostp)
2191 {
2192 	struct nlm_shres *nsp, *nslist = NULL;
2193 
2194 	mutex_enter(&hostp->nh_lock);
2195 	for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2196 		struct nlm_shres *nsp_new;
2197 
2198 		nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2199 		nsp_new->ns_next = nslist;
2200 		nslist = nsp_new;
2201 	}
2202 
2203 	mutex_exit(&hostp->nh_lock);
2204 	return (nslist);
2205 }
2206 
2207 /*
2208  * Free memory allocated for the active share reservations
2209  * list created by nlm_get_active_shres() function.
2210  */
2211 void
nlm_free_shrlist(struct nlm_shres * nslist)2212 nlm_free_shrlist(struct nlm_shres *nslist)
2213 {
2214 	struct nlm_shres *nsp;
2215 
2216 	while (nslist != NULL) {
2217 		nsp =  nslist;
2218 		nslist = nslist->ns_next;
2219 
2220 		nlm_shres_destroy_item(nsp);
2221 	}
2222 }
2223 
2224 static bool_t
nlm_shres_equal(struct shrlock * shrp1,struct shrlock * shrp2)2225 nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2226 {
2227 	if (shrp1->s_sysid	== shrp2->s_sysid	&&
2228 	    shrp1->s_pid	== shrp2->s_pid		&&
2229 	    shrp1->s_own_len	== shrp2->s_own_len	&&
2230 	    bcmp(shrp1->s_owner, shrp2->s_owner,
2231 	    shrp1->s_own_len) == 0)
2232 		return (TRUE);
2233 
2234 	return (FALSE);
2235 }
2236 
2237 static struct nlm_shres *
nlm_shres_create_item(struct shrlock * shrp,vnode_t * vp)2238 nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2239 {
2240 	struct nlm_shres *nsp;
2241 
2242 	nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2243 	nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2244 	bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2245 	nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2246 	bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2247 	nsp->ns_vp = vp;
2248 
2249 	return (nsp);
2250 }
2251 
2252 static void
nlm_shres_destroy_item(struct nlm_shres * nsp)2253 nlm_shres_destroy_item(struct nlm_shres *nsp)
2254 {
2255 	kmem_free(nsp->ns_shr->s_owner,
2256 	    nsp->ns_shr->s_own_len);
2257 	kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2258 	kmem_free(nsp, sizeof (*nsp));
2259 }
2260 
2261 /*
2262  * Called by klmmod.c when lockd adds a network endpoint
2263  * on which we should begin RPC services.
2264  */
2265 int
nlm_svc_add_ep(struct file * fp,const char * netid,struct knetconfig * knc)2266 nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2267 {
2268 	SVCMASTERXPRT *xprt = NULL;
2269 	int error;
2270 
2271 	error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2272 	    &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2273 	if (error != 0)
2274 		return (error);
2275 
2276 	(void) nlm_knc_to_netid(knc);
2277 	return (0);
2278 }
2279 
2280 /*
2281  * Start NLM service.
2282  */
2283 int
nlm_svc_starting(struct nlm_globals * g,struct file * fp,const char * netid,struct knetconfig * knc)2284 nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2285     const char *netid, struct knetconfig *knc)
2286 {
2287 	int error;
2288 	enum clnt_stat stat;
2289 
2290 	VERIFY(g->run_status == NLM_ST_STARTING);
2291 	VERIFY(g->nlm_gc_thread == NULL);
2292 
2293 	error = nlm_nsm_init_local(&g->nlm_nsm);
2294 	if (error != 0) {
2295 		NLM_ERR("Failed to initialize NSM handler "
2296 		    "(error=%d)\n", error);
2297 		g->run_status = NLM_ST_DOWN;
2298 		return (error);
2299 	}
2300 
2301 	error = EIO;
2302 
2303 	/*
2304 	 * Create an NLM garbage collector thread that will
2305 	 * clean up stale vholds and hosts objects.
2306 	 */
2307 	g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2308 	    g, 0, minclsyspri);
2309 
2310 	/*
2311 	 * Send SIMU_CRASH to local statd to report that
2312 	 * NLM started, so that statd can report other hosts
2313 	 * about NLM state change.
2314 	 */
2315 
2316 	stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2317 	if (stat != RPC_SUCCESS) {
2318 		NLM_ERR("Failed to connect to local statd "
2319 		    "(rpcerr=%d)\n", stat);
2320 		goto shutdown_lm;
2321 	}
2322 
2323 	stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2324 	if (stat != RPC_SUCCESS) {
2325 		NLM_ERR("Failed to get the status of local statd "
2326 		    "(rpcerr=%d)\n", stat);
2327 		goto shutdown_lm;
2328 	}
2329 
2330 	g->grace_threshold = ddi_get_lbolt() +
2331 	    SEC_TO_TICK(g->grace_period);
2332 
2333 	/* Register endpoint used for communications with local NLM */
2334 	error = nlm_svc_add_ep(fp, netid, knc);
2335 	if (error != 0)
2336 		goto shutdown_lm;
2337 
2338 	(void) svc_pool_control(NLM_SVCPOOL_ID,
2339 	    SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2340 	g->run_status = NLM_ST_UP;
2341 	return (0);
2342 
2343 shutdown_lm:
2344 	mutex_enter(&g->lock);
2345 	g->run_status = NLM_ST_STOPPING;
2346 	mutex_exit(&g->lock);
2347 
2348 	nlm_svc_stopping(g);
2349 	return (error);
2350 }
2351 
2352 /*
2353  * Called when the server pool is destroyed, so that
2354  * all transports are closed and no any server threads
2355  * exist.
2356  *
2357  * Just call lm_shutdown() to shut NLM down properly.
2358  */
2359 static void
nlm_pool_shutdown(void)2360 nlm_pool_shutdown(void)
2361 {
2362 	(void) lm_shutdown();
2363 }
2364 
2365 /*
2366  * Stop NLM service, cleanup all resources
2367  * NLM owns at the moment.
2368  *
2369  * NOTE: NFS code can call NLM while it's
2370  * stopping or even if it's shut down. Any attempt
2371  * to lock file either on client or on the server
2372  * will fail if NLM isn't in NLM_ST_UP state.
2373  */
2374 void
nlm_svc_stopping(struct nlm_globals * g)2375 nlm_svc_stopping(struct nlm_globals *g)
2376 {
2377 	mutex_enter(&g->lock);
2378 	ASSERT(g->run_status == NLM_ST_STOPPING);
2379 
2380 	/*
2381 	 * Ask NLM GC thread to exit and wait until it dies.
2382 	 */
2383 	cv_signal(&g->nlm_gc_sched_cv);
2384 	while (g->nlm_gc_thread != NULL)
2385 		cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2386 
2387 	mutex_exit(&g->lock);
2388 
2389 	/*
2390 	 * Cleanup locks owned by NLM hosts.
2391 	 * NOTE: New hosts won't be created while
2392 	 * NLM is stopping.
2393 	 */
2394 	while (!avl_is_empty(&g->nlm_hosts_tree)) {
2395 		struct nlm_host *hostp;
2396 		int busy_hosts = 0;
2397 
2398 		/*
2399 		 * Iterate through all NLM hosts in the system
2400 		 * and drop the locks they own by force.
2401 		 */
2402 		hostp = avl_first(&g->nlm_hosts_tree);
2403 		while (hostp != NULL) {
2404 			/* Cleanup all client and server side locks */
2405 			nlm_client_cancel_all(g, hostp);
2406 			nlm_host_notify_server(hostp, 0);
2407 
2408 			mutex_enter(&hostp->nh_lock);
2409 			nlm_host_gc_vholds(hostp);
2410 			if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2411 				/*
2412 				 * Oh, it seems the host is still busy, let
2413 				 * it some time to release and go to the
2414 				 * next one.
2415 				 */
2416 
2417 				mutex_exit(&hostp->nh_lock);
2418 				hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2419 				busy_hosts++;
2420 				continue;
2421 			}
2422 
2423 			mutex_exit(&hostp->nh_lock);
2424 			hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2425 		}
2426 
2427 		/*
2428 		 * All hosts go to nlm_idle_hosts list after
2429 		 * all locks they own are cleaned up and last refereces
2430 		 * were dropped. Just destroy all hosts in nlm_idle_hosts
2431 		 * list, they can not be removed from there while we're
2432 		 * in stopping state.
2433 		 */
2434 		while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2435 			nlm_host_unregister(g, hostp);
2436 			nlm_host_destroy(hostp);
2437 		}
2438 
2439 		if (busy_hosts > 0) {
2440 			/*
2441 			 * There're some hosts that weren't cleaned
2442 			 * up. Probably they're in resource cleanup
2443 			 * process. Give them some time to do drop
2444 			 * references.
2445 			 */
2446 			delay(MSEC_TO_TICK(500));
2447 		}
2448 	}
2449 
2450 	ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2451 
2452 	nlm_nsm_fini(&g->nlm_nsm);
2453 	g->lockd_pid = 0;
2454 	g->run_status = NLM_ST_DOWN;
2455 }
2456 
2457 /*
2458  * Returns TRUE if the given vnode has
2459  * any active or sleeping locks.
2460  */
2461 int
nlm_vp_active(const vnode_t * vp)2462 nlm_vp_active(const vnode_t *vp)
2463 {
2464 	struct nlm_globals *g;
2465 	struct nlm_host *hostp;
2466 	struct nlm_vhold *nvp;
2467 	int active = 0;
2468 
2469 	g = zone_getspecific(nlm_zone_key, curzone);
2470 
2471 	/*
2472 	 * Server side NLM has locks on the given vnode
2473 	 * if there exist a vhold object that holds
2474 	 * the given vnode "vp" in one of NLM hosts.
2475 	 */
2476 	mutex_enter(&g->lock);
2477 	hostp = avl_first(&g->nlm_hosts_tree);
2478 	while (hostp != NULL) {
2479 		mutex_enter(&hostp->nh_lock);
2480 		nvp = nlm_vhold_find_locked(hostp, vp);
2481 		mutex_exit(&hostp->nh_lock);
2482 		if (nvp != NULL) {
2483 			active = 1;
2484 			break;
2485 		}
2486 
2487 		hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2488 	}
2489 
2490 	mutex_exit(&g->lock);
2491 	return (active);
2492 }
2493 
2494 /*
2495  * Called right before NFS export is going to
2496  * dissapear. The function finds all vnodes
2497  * belonging to the given export and cleans
2498  * all remote locks and share reservations
2499  * on them.
2500  */
2501 void
nlm_unexport(struct exportinfo * exi)2502 nlm_unexport(struct exportinfo *exi)
2503 {
2504 	struct nlm_globals *g;
2505 	struct nlm_host *hostp;
2506 
2507 	g = zone_getspecific(nlm_zone_key, curzone);
2508 
2509 	mutex_enter(&g->lock);
2510 	hostp = avl_first(&g->nlm_hosts_tree);
2511 	while (hostp != NULL) {
2512 		struct nlm_vhold *nvp;
2513 
2514 		mutex_enter(&hostp->nh_lock);
2515 		TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2516 			vnode_t *vp;
2517 
2518 			nvp->nv_refcnt++;
2519 			mutex_exit(&hostp->nh_lock);
2520 
2521 			vp = nvp->nv_vp;
2522 
2523 			if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2524 				goto next_iter;
2525 
2526 			/*
2527 			 * Ok, it we found out that vnode vp is under
2528 			 * control by the exportinfo exi, now we need
2529 			 * to drop all locks from this vnode, let's
2530 			 * do it.
2531 			 */
2532 			nlm_vhold_clean(nvp, hostp->nh_sysid);
2533 
2534 		next_iter:
2535 			mutex_enter(&hostp->nh_lock);
2536 			nvp->nv_refcnt--;
2537 		}
2538 
2539 		mutex_exit(&hostp->nh_lock);
2540 		hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2541 	}
2542 
2543 	mutex_exit(&g->lock);
2544 }
2545 
2546 /*
2547  * Allocate new unique sysid.
2548  * In case of failure (no available sysids)
2549  * return LM_NOSYSID.
2550  */
2551 sysid_t
nlm_sysid_alloc(void)2552 nlm_sysid_alloc(void)
2553 {
2554 	sysid_t ret_sysid = LM_NOSYSID;
2555 
2556 	rw_enter(&lm_lck, RW_WRITER);
2557 	if (nlm_sysid_nidx > LM_SYSID_MAX)
2558 		nlm_sysid_nidx = LM_SYSID;
2559 
2560 	if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2561 		BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2562 		ret_sysid = nlm_sysid_nidx++;
2563 	} else {
2564 		index_t id;
2565 
2566 		id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2567 		if (id > 0) {
2568 			nlm_sysid_nidx = id + 1;
2569 			ret_sysid = id;
2570 			BT_SET(nlm_sysid_bmap, id);
2571 		}
2572 	}
2573 
2574 	rw_exit(&lm_lck);
2575 	return (ret_sysid);
2576 }
2577 
2578 void
nlm_sysid_free(sysid_t sysid)2579 nlm_sysid_free(sysid_t sysid)
2580 {
2581 	ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2582 
2583 	rw_enter(&lm_lck, RW_WRITER);
2584 	ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2585 	BT_CLEAR(nlm_sysid_bmap, sysid);
2586 	rw_exit(&lm_lck);
2587 }
2588 
2589 /*
2590  * Return true if the request came from a local caller.
2591  * By necessity, this "knows" the netid names invented
2592  * in lm_svc() and nlm_netid_from_knetconfig().
2593  */
2594 bool_t
nlm_caller_is_local(SVCXPRT * transp)2595 nlm_caller_is_local(SVCXPRT *transp)
2596 {
2597 	char *netid;
2598 	struct netbuf *rtaddr;
2599 
2600 	netid = svc_getnetid(transp);
2601 	rtaddr = svc_getrpccaller(transp);
2602 
2603 	if (netid == NULL)
2604 		return (FALSE);
2605 
2606 	if (strcmp(netid, "ticlts") == 0 ||
2607 	    strcmp(netid, "ticotsord") == 0)
2608 		return (TRUE);
2609 
2610 	if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2611 		struct sockaddr_in *sin = (void *)rtaddr->buf;
2612 		if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2613 			return (TRUE);
2614 	}
2615 	if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2616 		struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2617 		if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2618 			return (TRUE);
2619 	}
2620 
2621 	return (FALSE); /* unknown transport */
2622 }
2623 
2624 /*
2625  * Get netid string correspondig to the given knetconfig.
2626  * If not done already, save knc->knc_rdev in our table.
2627  */
2628 const char *
nlm_knc_to_netid(struct knetconfig * knc)2629 nlm_knc_to_netid(struct knetconfig *knc)
2630 {
2631 	int i;
2632 	dev_t rdev;
2633 	struct nlm_knc *nc;
2634 	const char *netid = NULL;
2635 
2636 	rw_enter(&lm_lck, RW_READER);
2637 	for (i = 0; i < NLM_KNCS; i++) {
2638 		nc = &nlm_netconfigs[i];
2639 
2640 		if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2641 		    strcmp(nc->n_knc.knc_protofmly,
2642 		    knc->knc_protofmly) == 0) {
2643 			netid = nc->n_netid;
2644 			rdev = nc->n_knc.knc_rdev;
2645 			break;
2646 		}
2647 	}
2648 	rw_exit(&lm_lck);
2649 
2650 	if (netid != NULL && rdev == NODEV) {
2651 		rw_enter(&lm_lck, RW_WRITER);
2652 		if (nc->n_knc.knc_rdev == NODEV)
2653 			nc->n_knc.knc_rdev = knc->knc_rdev;
2654 		rw_exit(&lm_lck);
2655 	}
2656 
2657 	return (netid);
2658 }
2659 
2660 /*
2661  * Get a knetconfig corresponding to the given netid.
2662  * If there's no knetconfig for this netid, ENOENT
2663  * is returned.
2664  */
2665 int
nlm_knc_from_netid(const char * netid,struct knetconfig * knc)2666 nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2667 {
2668 	int i, ret;
2669 
2670 	ret = ENOENT;
2671 	for (i = 0; i < NLM_KNCS; i++) {
2672 		struct nlm_knc *nknc;
2673 
2674 		nknc = &nlm_netconfigs[i];
2675 		if (strcmp(netid, nknc->n_netid) == 0 &&
2676 		    nknc->n_knc.knc_rdev != NODEV) {
2677 			*knc = nknc->n_knc;
2678 			ret = 0;
2679 			break;
2680 		}
2681 	}
2682 
2683 	return (ret);
2684 }
2685 
2686 void
nlm_cprsuspend(void)2687 nlm_cprsuspend(void)
2688 {
2689 	struct nlm_globals *g;
2690 
2691 	rw_enter(&lm_lck, RW_READER);
2692 	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2693 		nlm_suspend_zone(g);
2694 
2695 	rw_exit(&lm_lck);
2696 }
2697 
2698 void
nlm_cprresume(void)2699 nlm_cprresume(void)
2700 {
2701 	struct nlm_globals *g;
2702 
2703 	rw_enter(&lm_lck, RW_READER);
2704 	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2705 		nlm_resume_zone(g);
2706 
2707 	rw_exit(&lm_lck);
2708 }
2709 
2710 static void
nlm_nsm_clnt_init(CLIENT * clnt,struct nlm_nsm * nsm)2711 nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2712 {
2713 	(void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2714 	    NLM_RPC_RETRIES, kcred);
2715 }
2716 
2717 static void
nlm_netbuf_to_netobj(struct netbuf * addr,int * family,netobj * obj)2718 nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2719 {
2720 	/* LINTED pointer alignment */
2721 	struct sockaddr *sa = (struct sockaddr *)addr->buf;
2722 
2723 	*family = sa->sa_family;
2724 
2725 	switch (sa->sa_family) {
2726 	case AF_INET: {
2727 		/* LINTED pointer alignment */
2728 		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2729 
2730 		obj->n_len = sizeof (sin->sin_addr);
2731 		obj->n_bytes = (char *)&sin->sin_addr;
2732 		break;
2733 	}
2734 
2735 	case AF_INET6: {
2736 		/* LINTED pointer alignment */
2737 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2738 
2739 		obj->n_len = sizeof (sin6->sin6_addr);
2740 		obj->n_bytes = (char *)&sin6->sin6_addr;
2741 		break;
2742 	}
2743 
2744 	default:
2745 		VERIFY(0);
2746 		break;
2747 	}
2748 }
2749