xref: /illumos-gate/usr/src/uts/common/klm/nlm_client.c (revision 13b136d3061155363c62c9f6568d25b8b27da8f6)
1 /*
2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3  * Authors: Doug Rabson <dfr@rabson.org>
4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
30  * Copyright (c) 2012 by Delphix. All rights reserved.
31  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
32  */
33 
34 /*
35  * Client-side support for (NFS) VOP_FRLOCK, VOP_SHRLOCK.
36  * (called via klmops.c: lm_frlock, lm4_frlock)
37  *
38  * Source code derived from FreeBSD nlm_advlock.c
39  */
40 
41 #include <sys/param.h>
42 #include <sys/fcntl.h>
43 #include <sys/lock.h>
44 #include <sys/flock.h>
45 #include <sys/mount.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/share.h>
49 #include <sys/syslog.h>
50 #include <sys/systm.h>
51 #include <sys/unistd.h>
52 #include <sys/vnode.h>
53 #include <sys/queue.h>
54 #include <sys/sdt.h>
55 #include <netinet/in.h>
56 
57 #include <fs/fs_subr.h>
58 #include <rpcsvc/nlm_prot.h>
59 
60 #include <nfs/nfs.h>
61 #include <nfs/nfs_clnt.h>
62 #include <nfs/export.h>
63 #include <nfs/rnode.h>
64 #include <nfs/lm.h>
65 
66 #include "nlm_impl.h"
67 
68 /* Extra flags for nlm_call_lock() - xflags */
69 #define	NLM_X_RECLAIM	1
70 #define	NLM_X_BLOCKING	2
71 
72 /*
73  * Max. number of retries nlm_call_cancel() does
74  * when NLM server is in grace period or doesn't
75  * respond correctly.
76  */
77 #define	NLM_CANCEL_NRETRS 5
78 
79 /*
80  * Determines wether given lock "flp" is safe.
81  * The lock is considered to be safe when it
82  * acquires the whole file (i.e. its start
83  * and len are zeroes).
84  */
85 #define	NLM_FLOCK_IS_SAFE(flp) \
86 	((flp)->l_start == 0 && (flp)->l_len == 0)
87 
88 static volatile uint32_t nlm_xid = 1;
89 
90 static int nlm_init_fh_by_vp(vnode_t *, struct netobj *, rpcvers_t *);
91 static int nlm_map_status(nlm4_stats);
92 static int nlm_map_clnt_stat(enum clnt_stat);
93 static void nlm_send_siglost(pid_t);
94 
95 static int nlm_frlock_getlk(struct nlm_host *, vnode_t *,
96     struct flock64 *, int, u_offset_t, struct netobj *, int);
97 
98 static int nlm_frlock_setlk(struct nlm_host *, vnode_t *,
99     struct flock64 *, int, u_offset_t, struct netobj *,
100     struct flk_callback *, int, bool_t);
101 
102 static int nlm_reclaim_lock(struct nlm_host *, vnode_t *,
103     struct flock64 *, int32_t);
104 
105 static void nlm_init_lock(struct nlm4_lock *,
106     const struct flock64 *, struct netobj *,
107     struct nlm_owner_handle *);
108 
109 static int nlm_call_lock(vnode_t *, struct flock64 *,
110     struct nlm_host *, struct netobj *,
111     struct flk_callback *, int, int);
112 static int nlm_call_unlock(struct flock64 *, struct nlm_host *,
113     struct netobj *, int);
114 static int nlm_call_test(struct flock64 *, struct nlm_host *,
115     struct netobj *, int);
116 static int nlm_call_cancel(struct nlm4_lockargs *,
117     struct nlm_host *, int);
118 
119 static int nlm_local_getlk(vnode_t *, struct flock64 *, int);
120 static int nlm_local_setlk(vnode_t *, struct flock64 *, int);
121 static void nlm_local_cancelk(vnode_t *, struct flock64 *);
122 
123 static void nlm_init_share(struct nlm4_share *,
124     const struct shrlock *, struct netobj *);
125 
126 static int nlm_call_share(struct shrlock *, struct nlm_host *,
127     struct netobj *, int, int);
128 static int nlm_call_unshare(struct shrlock *, struct nlm_host *,
129     struct netobj *, int);
130 static int nlm_reclaim_share(struct nlm_host *, vnode_t *,
131     struct shrlock *, uint32_t);
132 static int nlm_local_shrlock(vnode_t *, struct shrlock *, int, int);
133 static void nlm_local_shrcancel(vnode_t *, struct shrlock *);
134 
135 /*
136  * Reclaim locks/shares acquired by the client side
137  * on the given server represented by hostp.
138  * The function is called from a dedicated thread
139  * when server reports us that it's entered grace
140  * period.
141  */
142 void
143 nlm_reclaim_client(struct nlm_globals *g, struct nlm_host *hostp)
144 {
145 	int32_t state;
146 	int error, sysid;
147 	struct locklist *llp_head, *llp;
148 	struct nlm_shres *nsp_head, *nsp;
149 	bool_t restart;
150 
151 	sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
152 	do {
153 		error = 0;
154 		restart = FALSE;
155 		state = nlm_host_get_state(hostp);
156 
157 		DTRACE_PROBE3(reclaim__iter, struct nlm_globals *, g,
158 		    struct nlm_host *, hostp, int, state);
159 
160 		/*
161 		 * We cancel all sleeping locks that were
162 		 * done by the host, because we don't allow
163 		 * reclamation of sleeping locks. The reason
164 		 * we do this is that allowing of sleeping locks
165 		 * reclamation can potentially break locks recovery
166 		 * order.
167 		 *
168 		 * Imagine that we have two client machines A and B
169 		 * and an NLM server machine. A adds a non sleeping
170 		 * lock to the file F and aquires this file. Machine
171 		 * B in its turn adds sleeping lock to the file
172 		 * F and blocks because F is already aquired by
173 		 * the machine A. Then server crashes and after the
174 		 * reboot it notifies its clients about the crash.
175 		 * If we would allow sleeping locks reclamation,
176 		 * there would be possible that machine B recovers
177 		 * its lock faster than machine A (by some reason).
178 		 * So that B aquires the file F after server crash and
179 		 * machine A (that by some reason recovers slower) fails
180 		 * to recover its non sleeping lock. Thus the original
181 		 * locks order becames broken.
182 		 */
183 		nlm_host_cancel_slocks(g, hostp);
184 
185 		/*
186 		 * Try to reclaim all active locks we have
187 		 */
188 		llp_head = llp = flk_get_active_locks(sysid, NOPID);
189 		while (llp != NULL) {
190 			error = nlm_reclaim_lock(hostp, llp->ll_vp,
191 			    &llp->ll_flock, state);
192 
193 			if (error == 0) {
194 				llp = llp->ll_next;
195 				continue;
196 			} else if (error == ERESTART) {
197 				restart = TRUE;
198 				break;
199 			} else {
200 				/*
201 				 * Critical error occurred, the lock
202 				 * can not be recovered, just take it away.
203 				 */
204 				nlm_local_cancelk(llp->ll_vp, &llp->ll_flock);
205 			}
206 
207 			llp = llp->ll_next;
208 		}
209 
210 		flk_free_locklist(llp_head);
211 		if (restart) {
212 			/*
213 			 * Lock reclamation fucntion reported us that
214 			 * the server state was changed (again), so
215 			 * try to repeat the whole reclamation process.
216 			 */
217 			continue;
218 		}
219 
220 		nsp_head = nsp = nlm_get_active_shres(hostp);
221 		while (nsp != NULL) {
222 			error = nlm_reclaim_share(hostp, nsp->ns_vp,
223 			    nsp->ns_shr, state);
224 
225 			if (error == 0) {
226 				nsp = nsp->ns_next;
227 				continue;
228 			} else if (error == ERESTART) {
229 				break;
230 			} else {
231 				/* Failed to reclaim share */
232 				nlm_shres_untrack(hostp, nsp->ns_vp,
233 				    nsp->ns_shr);
234 				nlm_local_shrcancel(nsp->ns_vp,
235 				    nsp->ns_shr);
236 			}
237 
238 			nsp = nsp->ns_next;
239 		}
240 
241 		nlm_free_shrlist(nsp_head);
242 	} while (state != nlm_host_get_state(hostp));
243 }
244 
245 /*
246  * nlm_frlock --
247  *      NFS advisory byte-range locks.
248  *	Called in klmops.c
249  *
250  * Note that the local locking code (os/flock.c) is used to
251  * keep track of remote locks granted by some server, so we
252  * can reclaim those locks after a server restarts.  We can
253  * also sometimes use this as a cache of lock information.
254  *
255  * Was: nlm_advlock()
256  */
257 /* ARGSUSED */
258 int
259 nlm_frlock(struct vnode *vp, int cmd, struct flock64 *flkp,
260     int flags, u_offset_t offset, struct cred *crp,
261     struct netobj *fhp, struct flk_callback *flcb, int vers)
262 {
263 	mntinfo_t *mi;
264 	servinfo_t *sv;
265 	const char *netid;
266 	struct nlm_host *hostp;
267 	int error;
268 	struct nlm_globals *g;
269 
270 	mi = VTOMI(vp);
271 	sv = mi->mi_curr_serv;
272 
273 	netid = nlm_knc_to_netid(sv->sv_knconf);
274 	if (netid == NULL) {
275 		NLM_ERR("nlm_frlock: unknown NFS netid");
276 		return (ENOSYS);
277 	}
278 
279 	g = zone_getspecific(nlm_zone_key, curzone);
280 	hostp = nlm_host_findcreate(g, sv->sv_hostname, netid, &sv->sv_addr);
281 	if (hostp == NULL)
282 		return (ENOSYS);
283 
284 	/*
285 	 * Purge cached attributes in order to make sure that
286 	 * future calls of convoff()/VOP_GETATTR() will get the
287 	 * latest data.
288 	 */
289 	if (flkp->l_whence == SEEK_END)
290 		PURGE_ATTRCACHE(vp);
291 
292 	/* Now flk0 is the zero-based lock request. */
293 	switch (cmd) {
294 	case F_GETLK:
295 		error = nlm_frlock_getlk(hostp, vp, flkp, flags,
296 		    offset, fhp, vers);
297 		break;
298 
299 	case F_SETLK:
300 	case F_SETLKW:
301 		error = nlm_frlock_setlk(hostp, vp, flkp, flags,
302 		    offset, fhp, flcb, vers, (cmd == F_SETLKW));
303 		if (error == 0)
304 			nlm_host_monitor(g, hostp, 0);
305 		break;
306 
307 	default:
308 		error = EINVAL;
309 		break;
310 	}
311 
312 	nlm_host_release(g, hostp);
313 	return (error);
314 }
315 
316 static int
317 nlm_frlock_getlk(struct nlm_host *hostp, vnode_t *vp,
318     struct flock64 *flkp, int flags, u_offset_t offset,
319     struct netobj *fhp, int vers)
320 {
321 	struct flock64 flk0;
322 	int error;
323 
324 	/*
325 	 * Check local (cached) locks first.
326 	 * If we find one, no need for RPC.
327 	 */
328 	flk0 = *flkp;
329 	flk0.l_pid = curproc->p_pid;
330 	error = nlm_local_getlk(vp, &flk0, flags);
331 	if (error != 0)
332 		return (error);
333 	if (flk0.l_type != F_UNLCK) {
334 		*flkp = flk0;
335 		return (0);
336 	}
337 
338 	/* Not found locally.  Try remote. */
339 	flk0 = *flkp;
340 	flk0.l_pid = curproc->p_pid;
341 	error = convoff(vp, &flk0, 0, (offset_t)offset);
342 	if (error != 0)
343 		return (error);
344 
345 	error = nlm_call_test(&flk0, hostp, fhp, vers);
346 	if (error != 0)
347 		return (error);
348 
349 	if (flk0.l_type == F_UNLCK) {
350 		/*
351 		 * Update the caller's *flkp with information
352 		 * on the conflicting lock (or lack thereof).
353 		 */
354 		flkp->l_type = F_UNLCK;
355 	} else {
356 		/*
357 		 * Found a conflicting lock.  Set the
358 		 * caller's *flkp with the info, first
359 		 * converting to the caller's whence.
360 		 */
361 		(void) convoff(vp, &flk0, flkp->l_whence, (offset_t)offset);
362 		*flkp = flk0;
363 	}
364 
365 	return (0);
366 }
367 
368 static int
369 nlm_frlock_setlk(struct nlm_host *hostp, vnode_t *vp,
370     struct flock64 *flkp, int flags, u_offset_t offset,
371     struct netobj *fhp, struct flk_callback *flcb,
372     int vers, bool_t do_block)
373 {
374 	int error, xflags;
375 
376 	error = convoff(vp, flkp, 0, (offset_t)offset);
377 	if (error != 0)
378 		return (error);
379 
380 	/*
381 	 * NFS v2 clients should not request locks where any part
382 	 * of the lock range is beyond 0xffffffff.  The NFS code
383 	 * checks that (see nfs_frlock, flk_check_lock_data), but
384 	 * as that's outside this module, let's check here too.
385 	 * This check ensures that we will be able to convert this
386 	 * lock request into 32-bit form without change, and that
387 	 * (more importantly) when the granted call back arrives,
388 	 * it's unchanged when converted back into 64-bit form.
389 	 * If this lock range were to change in any way during
390 	 * either of those conversions, the "granted" call back
391 	 * from the NLM server would not find our sleeping lock.
392 	 */
393 	if (vers < NLM4_VERS) {
394 		if (flkp->l_start > MAX_UOFF32 ||
395 		    flkp->l_start + flkp->l_len > MAX_UOFF32 + 1)
396 			return (EINVAL);
397 	}
398 
399 	/*
400 	 * Fill in l_sysid for the local locking calls.
401 	 * Also, let's not trust the caller's l_pid.
402 	 */
403 	flkp->l_sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
404 	flkp->l_pid = curproc->p_pid;
405 
406 	if (flkp->l_type == F_UNLCK) {
407 		/*
408 		 * Purge local (cached) lock information first,
409 		 * then clear the remote lock.
410 		 */
411 		(void) nlm_local_setlk(vp, flkp, flags);
412 		error = nlm_call_unlock(flkp, hostp, fhp, vers);
413 
414 		return (error);
415 	}
416 
417 	if (!do_block) {
418 		/*
419 		 * This is a non-blocking "set" request,
420 		 * so we can check locally first, and
421 		 * sometimes avoid an RPC call.
422 		 */
423 		struct flock64 flk0;
424 
425 		flk0 = *flkp;
426 		error = nlm_local_getlk(vp, &flk0, flags);
427 		if (error != 0 && flk0.l_type != F_UNLCK) {
428 			/* Found a conflicting lock. */
429 			return (EAGAIN);
430 		}
431 
432 		xflags = 0;
433 	} else {
434 		xflags = NLM_X_BLOCKING;
435 	}
436 
437 	nfs_add_locking_id(vp, curproc->p_pid, RLMPL_PID,
438 	    (char *)&curproc->p_pid, sizeof (pid_t));
439 
440 	error = nlm_call_lock(vp, flkp, hostp, fhp, flcb, vers, xflags);
441 	if (error != 0)
442 		return (error);
443 
444 	/*
445 	 * Save the lock locally.  This should not fail,
446 	 * because the server is authoritative about locks
447 	 * and it just told us we have the lock!
448 	 */
449 	error = nlm_local_setlk(vp, flkp, flags);
450 	if (error != 0) {
451 		/*
452 		 * That's unexpected situation. Just ignore the error.
453 		 */
454 		NLM_WARN("nlm_frlock_setlk: Failed to set local lock. "
455 		    "[err=%d]\n", error);
456 		error = 0;
457 	}
458 
459 	return (error);
460 }
461 
462 /*
463  * Cancel all client side remote locks/shares on the
464  * given host. Report to the processes that own
465  * cancelled locks that they are removed by force
466  * by sending SIGLOST.
467  */
468 void
469 nlm_client_cancel_all(struct nlm_globals *g, struct nlm_host *hostp)
470 {
471 	struct locklist *llp_head, *llp;
472 	struct nlm_shres *nsp_head, *nsp;
473 	struct netobj lm_fh;
474 	rpcvers_t vers;
475 	int error, sysid;
476 
477 	sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
478 	nlm_host_cancel_slocks(g, hostp);
479 
480 	/*
481 	 * Destroy all active locks
482 	 */
483 	llp_head = llp = flk_get_active_locks(sysid, NOPID);
484 	while (llp != NULL) {
485 		llp->ll_flock.l_type = F_UNLCK;
486 
487 		error = nlm_init_fh_by_vp(llp->ll_vp, &lm_fh, &vers);
488 		if (error == 0)
489 			(void) nlm_call_unlock(&llp->ll_flock, hostp,
490 			    &lm_fh, vers);
491 
492 		nlm_local_cancelk(llp->ll_vp, &llp->ll_flock);
493 		llp = llp->ll_next;
494 	}
495 
496 	flk_free_locklist(llp_head);
497 
498 	/*
499 	 * Destroy all active share reservations
500 	 */
501 	nsp_head = nsp = nlm_get_active_shres(hostp);
502 	while (nsp != NULL) {
503 		error = nlm_init_fh_by_vp(nsp->ns_vp, &lm_fh, &vers);
504 		if (error == 0)
505 			(void) nlm_call_unshare(nsp->ns_shr, hostp,
506 			    &lm_fh, vers);
507 
508 		nlm_local_shrcancel(nsp->ns_vp, nsp->ns_shr);
509 		nlm_shres_untrack(hostp, nsp->ns_vp, nsp->ns_shr);
510 		nsp = nsp->ns_next;
511 	}
512 
513 	nlm_free_shrlist(nsp_head);
514 }
515 
516 /*
517  * The function determines whether the lock "fl" can
518  * be safely applied to the file vnode "vp" corresponds to.
519  * The lock can be "safely" applied if all the conditions
520  * above are held:
521  *  - It's not a mandatory lock
522  *  - The vnode wasn't mapped by anyone
523  *  - The vnode was mapped, but it hasn't any locks on it.
524  *  - The vnode was mapped and all locks it has occupies
525  *    the whole file.
526  */
527 int
528 nlm_safelock(vnode_t *vp, const struct flock64 *fl, cred_t *cr)
529 {
530 	rnode_t *rp = VTOR(vp);
531 	struct vattr va;
532 	int err;
533 
534 	if ((rp->r_mapcnt > 0) && (fl->l_start != 0 || fl->l_len != 0))
535 		return (0);
536 
537 	va.va_mask = AT_MODE;
538 	err = VOP_GETATTR(vp, &va, 0, cr, NULL);
539 	if (err != 0)
540 		return (0);
541 
542 	/* NLM4 doesn't allow mandatory file locking */
543 	if (MANDLOCK(vp, va.va_mode))
544 		return (0);
545 
546 	return (1);
547 }
548 
549 /*
550  * The function determines whether it's safe to map
551  * a file correspoding to vnode vp.
552  * The mapping is considered to be "safe" if file
553  * either has no any locks on it or all locks it
554  * has occupy the whole file.
555  */
556 int
557 nlm_safemap(const vnode_t *vp)
558 {
559 	struct locklist *llp, *llp_next;
560 	struct nlm_slock *nslp;
561 	struct nlm_globals *g;
562 	int safe = 1;
563 
564 	/* Check active locks at first */
565 	llp = flk_active_locks_for_vp(vp);
566 	while (llp != NULL) {
567 		if ((llp->ll_vp == vp) &&
568 		    !NLM_FLOCK_IS_SAFE(&llp->ll_flock))
569 			safe = 0;
570 
571 		llp_next = llp->ll_next;
572 		VN_RELE(llp->ll_vp);
573 		kmem_free(llp, sizeof (*llp));
574 		llp = llp_next;
575 	}
576 	if (!safe)
577 		return (safe);
578 
579 	/* Then check sleeping locks if any */
580 	g = zone_getspecific(nlm_zone_key, curzone);
581 	mutex_enter(&g->lock);
582 	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
583 		if (nslp->nsl_state == NLM_SL_BLOCKED &&
584 		    nslp->nsl_vp == vp &&
585 		    (nslp->nsl_lock.l_offset != 0 ||
586 		    nslp->nsl_lock.l_len != 0)) {
587 			safe = 0;
588 			break;
589 		}
590 	}
591 
592 	mutex_exit(&g->lock);
593 	return (safe);
594 }
595 
596 int
597 nlm_has_sleep(const vnode_t *vp)
598 {
599 	struct nlm_globals *g;
600 	struct nlm_slock *nslp;
601 	int has_slocks = FALSE;
602 
603 	g = zone_getspecific(nlm_zone_key, curzone);
604 	mutex_enter(&g->lock);
605 	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
606 		if (nslp->nsl_state == NLM_SL_BLOCKED &&
607 		    nslp->nsl_vp == vp) {
608 			has_slocks = TRUE;
609 			break;
610 		}
611 	}
612 
613 	mutex_exit(&g->lock);
614 	return (has_slocks);
615 }
616 
617 void
618 nlm_register_lock_locally(struct vnode *vp, struct nlm_host *hostp,
619     struct flock64 *flk, int flags, u_offset_t offset)
620 {
621 	struct nlm_globals *g = NULL;
622 	int sysid = 0;
623 
624 	if (hostp == NULL) {
625 		mntinfo_t *mi;
626 		servinfo_t *sv;
627 		const char *netid;
628 
629 		mi = VTOMI(vp);
630 		sv = mi->mi_curr_serv;
631 		netid = nlm_knc_to_netid(sv->sv_knconf);
632 
633 		if (netid != NULL) {
634 			g = zone_getspecific(nlm_zone_key, curzone);
635 			hostp = nlm_host_findcreate(g, sv->sv_hostname,
636 			    netid, &sv->sv_addr);
637 		}
638 	}
639 
640 	if (hostp != NULL) {
641 		sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
642 
643 		if (g != NULL)
644 			nlm_host_release(g, hostp);
645 	}
646 
647 	flk->l_sysid = sysid;
648 	(void) convoff(vp, flk, 0, (offset_t)offset);
649 	(void) nlm_local_setlk(vp, flk, flags);
650 }
651 
652 
653 /*
654  * The BSD code had functions here to "reclaim" (destroy)
655  * remote locks when a vnode is being forcibly destroyed.
656  * We just keep vnodes around until statd tells us the
657  * client has gone away.
658  */
659 
660 static int
661 nlm_reclaim_lock(struct nlm_host *hostp, vnode_t *vp,
662     struct flock64 *flp, int32_t orig_state)
663 {
664 	struct netobj lm_fh;
665 	int error, state;
666 	rpcvers_t vers;
667 
668 	/*
669 	 * If the remote NSM state changes during recovery, the host
670 	 * must have rebooted a second time. In that case, we must
671 	 * restart the recovery.
672 	 */
673 	state = nlm_host_get_state(hostp);
674 	if (state != orig_state)
675 		return (ERESTART);
676 
677 	error = nlm_init_fh_by_vp(vp, &lm_fh, &vers);
678 	if (error != 0)
679 		return (error);
680 
681 	return (nlm_call_lock(vp, flp, hostp, &lm_fh,
682 	    NULL, vers, NLM_X_RECLAIM));
683 }
684 
685 /*
686  * Get local lock information for some NFS server.
687  *
688  * This gets (checks for) a local conflicting lock.
689  * Note: Modifies passed flock, if a conflict is found,
690  * but the caller expects that.
691  */
692 static int
693 nlm_local_getlk(vnode_t *vp, struct flock64 *fl, int flags)
694 {
695 	VERIFY(fl->l_whence == SEEK_SET);
696 	return (reclock(vp, fl, 0, flags, 0, NULL));
697 }
698 
699 /*
700  * Set local lock information for some NFS server.
701  *
702  * Called after a lock request (set or clear) succeeded. We record the
703  * details in the local lock manager. Note that since the remote
704  * server has granted the lock, we can be sure that it doesn't
705  * conflict with any other locks we have in the local lock manager.
706  *
707  * Since it is possible that host may also make NLM client requests to
708  * our NLM server, we use a different sysid value to record our own
709  * client locks.
710  *
711  * Note that since it is possible for us to receive replies from the
712  * server in a different order than the locks were granted (e.g. if
713  * many local threads are contending for the same lock), we must use a
714  * blocking operation when registering with the local lock manager.
715  * We expect that any actual wait will be rare and short hence we
716  * ignore signals for this.
717  */
718 static int
719 nlm_local_setlk(vnode_t *vp, struct flock64 *fl, int flags)
720 {
721 	VERIFY(fl->l_whence == SEEK_SET);
722 	return (reclock(vp, fl, SETFLCK, flags, 0, NULL));
723 }
724 
725 /*
726  * Cancel local lock and send send SIGLOST signal
727  * to the lock owner.
728  *
729  * NOTE: modifies flp
730  */
731 static void
732 nlm_local_cancelk(vnode_t *vp, struct flock64 *flp)
733 {
734 	flp->l_type = F_UNLCK;
735 	(void) nlm_local_setlk(vp, flp, FREAD | FWRITE);
736 	nlm_send_siglost(flp->l_pid);
737 }
738 
739 /*
740  * Do NLM_LOCK call.
741  * Was: nlm_setlock()
742  *
743  * NOTE: nlm_call_lock() function should care about locking/unlocking
744  * of rnode->r_lkserlock which should be released before nlm_call_lock()
745  * sleeps on waiting lock and acquired when it wakes up.
746  */
747 static int
748 nlm_call_lock(vnode_t *vp, struct flock64 *flp,
749     struct nlm_host *hostp, struct netobj *fhp,
750     struct flk_callback *flcb, int vers, int xflags)
751 {
752 	struct nlm4_lockargs args;
753 	struct nlm_owner_handle oh;
754 	struct nlm_globals *g;
755 	rnode_t *rnp = VTOR(vp);
756 	struct nlm_slock *nslp = NULL;
757 	uint32_t xid;
758 	int error = 0;
759 
760 	bzero(&args, sizeof (args));
761 	g = zone_getspecific(nlm_zone_key, curzone);
762 	nlm_init_lock(&args.alock, flp, fhp, &oh);
763 
764 	args.exclusive = (flp->l_type == F_WRLCK);
765 	args.reclaim = xflags & NLM_X_RECLAIM;
766 	args.state = g->nsm_state;
767 	args.cookie.n_len = sizeof (xid);
768 	args.cookie.n_bytes = (char *)&xid;
769 
770 	oh.oh_sysid = hostp->nh_sysid;
771 	xid = atomic_inc_32_nv(&nlm_xid);
772 
773 	if (xflags & NLM_X_BLOCKING) {
774 		args.block = TRUE;
775 		nslp = nlm_slock_register(g, hostp, &args.alock, vp);
776 	}
777 
778 	for (;;) {
779 		nlm_rpc_t *rpcp;
780 		enum clnt_stat stat;
781 		struct nlm4_res res;
782 		enum nlm4_stats nlm_err;
783 
784 		error = nlm_host_get_rpc(hostp, vers, &rpcp);
785 		if (error != 0) {
786 			error = ENOLCK;
787 			goto out;
788 		}
789 
790 		bzero(&res, sizeof (res));
791 		stat = nlm_lock_rpc(&args, &res, rpcp->nr_handle, vers);
792 		nlm_host_rele_rpc(hostp, rpcp);
793 
794 		error = nlm_map_clnt_stat(stat);
795 		if (error != 0) {
796 			if (error == EAGAIN)
797 				continue;
798 
799 			goto out;
800 		}
801 
802 		DTRACE_PROBE1(lock__res, enum nlm4_stats, res.stat.stat);
803 		nlm_err = res.stat.stat;
804 		xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
805 		if (nlm_err == nlm4_denied_grace_period) {
806 			if (args.reclaim) {
807 				error = ENOLCK;
808 				goto out;
809 			}
810 
811 			error = nlm_host_wait_grace(hostp);
812 			if (error != 0)
813 				goto out;
814 
815 			continue;
816 		}
817 
818 		switch (nlm_err) {
819 		case nlm4_granted:
820 		case nlm4_blocked:
821 			error = 0;
822 			break;
823 
824 		case nlm4_denied:
825 			if (nslp != NULL) {
826 				NLM_WARN("nlm_call_lock: got nlm4_denied for "
827 				    "blocking lock\n");
828 			}
829 
830 			error = EAGAIN;
831 			break;
832 
833 		default:
834 			error = nlm_map_status(nlm_err);
835 		}
836 
837 		/*
838 		 * If we deal with either non-blocking lock or
839 		 * with a blocking locks that wasn't blocked on
840 		 * the server side (by some reason), our work
841 		 * is finished.
842 		 */
843 		if (nslp == NULL			||
844 		    nlm_err != nlm4_blocked		||
845 		    error != 0)
846 			goto out;
847 
848 		/*
849 		 * Before releasing the r_lkserlock of rnode, we should
850 		 * check whether the new lock is "safe". If it's not
851 		 * safe, disable caching for the given vnode. That is done
852 		 * for sleeping locks only that are waiting for a GRANT reply
853 		 * from the NLM server.
854 		 *
855 		 * NOTE: the vnode cache can be enabled back later if an
856 		 * unsafe lock will be merged with existent locks so that
857 		 * it will become safe. This condition is checked in the
858 		 * NFSv3 code (see nfs_lockcompletion).
859 		 */
860 		if (!NLM_FLOCK_IS_SAFE(flp)) {
861 			mutex_enter(&vp->v_lock);
862 			vp->v_flag &= ~VNOCACHE;
863 			mutex_exit(&vp->v_lock);
864 		}
865 
866 		/*
867 		 * The server should call us back with a
868 		 * granted message when the lock succeeds.
869 		 * In order to deal with broken servers,
870 		 * lost granted messages, or server reboots,
871 		 * we will also re-try every few seconds.
872 		 *
873 		 * Note: We're supposed to call these
874 		 * flk_invoke_callbacks when blocking.
875 		 * Take care on rnode->r_lkserlock, we should
876 		 * release it before going to sleep.
877 		 */
878 		(void) flk_invoke_callbacks(flcb, FLK_BEFORE_SLEEP);
879 		nfs_rw_exit(&rnp->r_lkserlock);
880 
881 		error = nlm_slock_wait(g, nslp, g->retrans_tmo);
882 
883 		/*
884 		 * NFS expects that we return with rnode->r_lkserlock
885 		 * locked on write, lock it back.
886 		 *
887 		 * NOTE: nfs_rw_enter_sig() can be either interruptible
888 		 * or not. It depends on options of NFS mount. Here
889 		 * we're _always_ uninterruptible (independently of mount
890 		 * options), because nfs_frlock/nfs3_frlock expects that
891 		 * we return with rnode->r_lkserlock acquired. So we don't
892 		 * want our lock attempt to be interrupted by a signal.
893 		 */
894 		(void) nfs_rw_enter_sig(&rnp->r_lkserlock, RW_WRITER, 0);
895 		(void) flk_invoke_callbacks(flcb, FLK_AFTER_SLEEP);
896 
897 		if (error == 0) {
898 			break;
899 		} else if (error == EINTR) {
900 			/*
901 			 * We need to call the server to cancel our
902 			 * lock request.
903 			 */
904 			DTRACE_PROBE1(cancel__lock, int, error);
905 			(void) nlm_call_cancel(&args, hostp, vers);
906 			break;
907 		} else {
908 			/*
909 			 * Timeout happened, resend the lock request to
910 			 * the server. Well, we're a bit paranoid here,
911 			 * but keep in mind previous request could lost
912 			 * (especially with conectionless transport).
913 			 */
914 
915 			ASSERT(error == ETIMEDOUT);
916 			continue;
917 		}
918 	}
919 
920 	/*
921 	 * We could disable the vnode cache for the given _sleeping_
922 	 * (codition: nslp != NULL) lock if it was unsafe. Normally,
923 	 * nfs_lockcompletion() function can enable the vnode cache
924 	 * back if the lock becomes safe after activativation. But it
925 	 * will not happen if any error occurs on the locking path.
926 	 *
927 	 * Here we enable the vnode cache back if the error occurred
928 	 * and if there aren't any unsafe locks on the given vnode.
929 	 * Note that if error happened, sleeping lock was derigistered.
930 	 */
931 	if (error != 0 && nslp != NULL && nlm_safemap(vp)) {
932 		mutex_enter(&vp->v_lock);
933 		vp->v_flag |= VNOCACHE;
934 		mutex_exit(&vp->v_lock);
935 	}
936 
937 out:
938 	if (nslp != NULL)
939 		nlm_slock_unregister(g, nslp);
940 
941 	return (error);
942 }
943 
944 /*
945  * Do NLM_CANCEL call.
946  * Helper for nlm_call_lock() error recovery.
947  */
948 static int
949 nlm_call_cancel(struct nlm4_lockargs *largs,
950     struct nlm_host *hostp, int vers)
951 {
952 	nlm4_cancargs cargs;
953 	uint32_t xid;
954 	int error, retries;
955 
956 	bzero(&cargs, sizeof (cargs));
957 
958 	xid = atomic_inc_32_nv(&nlm_xid);
959 	cargs.cookie.n_len = sizeof (xid);
960 	cargs.cookie.n_bytes = (char *)&xid;
961 	cargs.block	= largs->block;
962 	cargs.exclusive	= largs->exclusive;
963 	cargs.alock	= largs->alock;
964 
965 	/*
966 	 * Unlike all other nlm_call_* functions, nlm_call_cancel
967 	 * doesn't spin forever until it gets reasonable response
968 	 * from NLM server. It makes limited number of retries and
969 	 * if server doesn't send a reasonable reply, it returns an
970 	 * error. It behaves like that because it's called from nlm_call_lock
971 	 * with blocked signals and thus it can not be interrupted from
972 	 * user space.
973 	 */
974 	for (retries = 0; retries < NLM_CANCEL_NRETRS; retries++) {
975 		nlm_rpc_t *rpcp;
976 		enum clnt_stat stat;
977 		struct nlm4_res res;
978 
979 		error = nlm_host_get_rpc(hostp, vers, &rpcp);
980 		if (error != 0)
981 			return (ENOLCK);
982 
983 		bzero(&res, sizeof (res));
984 		stat = nlm_cancel_rpc(&cargs, &res, rpcp->nr_handle, vers);
985 		nlm_host_rele_rpc(hostp, rpcp);
986 
987 		DTRACE_PROBE1(cancel__rloop_end, enum clnt_stat, stat);
988 		error = nlm_map_clnt_stat(stat);
989 		if (error != 0) {
990 			if (error == EAGAIN)
991 				continue;
992 
993 			return (error);
994 		}
995 
996 		DTRACE_PROBE1(cancel__res, enum nlm4_stats, res.stat.stat);
997 		switch (res.stat.stat) {
998 			/*
999 			 * There was nothing to cancel. We are going to go ahead
1000 			 * and assume we got the lock.
1001 			 */
1002 		case nlm_denied:
1003 			/*
1004 			 * The server has recently rebooted.  Treat this as a
1005 			 * successful cancellation.
1006 			 */
1007 		case nlm4_denied_grace_period:
1008 			/*
1009 			 * We managed to cancel.
1010 			 */
1011 		case nlm4_granted:
1012 			error = 0;
1013 			break;
1014 
1015 		default:
1016 			/*
1017 			 * Broken server implementation.  Can't really do
1018 			 * anything here.
1019 			 */
1020 			error = EIO;
1021 			break;
1022 		}
1023 
1024 		xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
1025 		break;
1026 	}
1027 
1028 	return (error);
1029 }
1030 
1031 /*
1032  * Do NLM_UNLOCK call.
1033  * Was: nlm_clearlock
1034  */
1035 static int
1036 nlm_call_unlock(struct flock64 *flp, struct nlm_host *hostp,
1037     struct netobj *fhp, int vers)
1038 {
1039 	struct nlm4_unlockargs args;
1040 	struct nlm_owner_handle oh;
1041 	enum nlm4_stats nlm_err;
1042 	uint32_t xid;
1043 	int error;
1044 
1045 	bzero(&args, sizeof (args));
1046 	nlm_init_lock(&args.alock, flp, fhp, &oh);
1047 
1048 	oh.oh_sysid = hostp->nh_sysid;
1049 	xid = atomic_inc_32_nv(&nlm_xid);
1050 	args.cookie.n_len = sizeof (xid);
1051 	args.cookie.n_bytes = (char *)&xid;
1052 
1053 	for (;;) {
1054 		nlm_rpc_t *rpcp;
1055 		struct nlm4_res res;
1056 		enum clnt_stat stat;
1057 
1058 		error = nlm_host_get_rpc(hostp, vers, &rpcp);
1059 		if (error != 0)
1060 			return (ENOLCK);
1061 
1062 		bzero(&res, sizeof (res));
1063 		stat = nlm_unlock_rpc(&args, &res, rpcp->nr_handle, vers);
1064 		nlm_host_rele_rpc(hostp, rpcp);
1065 
1066 		error = nlm_map_clnt_stat(stat);
1067 		if (error != 0) {
1068 			if (error == EAGAIN)
1069 				continue;
1070 
1071 			return (error);
1072 		}
1073 
1074 		DTRACE_PROBE1(unlock__res, enum nlm4_stats, res.stat.stat);
1075 		nlm_err = res.stat.stat;
1076 		xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
1077 		if (nlm_err == nlm4_denied_grace_period) {
1078 			error = nlm_host_wait_grace(hostp);
1079 			if (error != 0)
1080 				return (error);
1081 
1082 			continue;
1083 		}
1084 
1085 		break;
1086 	}
1087 
1088 	/* special cases */
1089 	switch (nlm_err) {
1090 	case nlm4_denied:
1091 		error = EINVAL;
1092 		break;
1093 	default:
1094 		error = nlm_map_status(nlm_err);
1095 		break;
1096 	}
1097 
1098 	return (error);
1099 }
1100 
1101 /*
1102  * Do NLM_TEST call.
1103  * Was: nlm_getlock()
1104  */
1105 static int
1106 nlm_call_test(struct flock64 *flp, struct nlm_host *hostp,
1107     struct netobj *fhp, int vers)
1108 {
1109 	struct nlm4_testargs args;
1110 	struct nlm4_holder h;
1111 	struct nlm_owner_handle oh;
1112 	enum nlm4_stats nlm_err;
1113 	uint32_t xid;
1114 	int error;
1115 
1116 	bzero(&args, sizeof (args));
1117 	nlm_init_lock(&args.alock, flp, fhp, &oh);
1118 
1119 	args.exclusive = (flp->l_type == F_WRLCK);
1120 	oh.oh_sysid = hostp->nh_sysid;
1121 	xid = atomic_inc_32_nv(&nlm_xid);
1122 	args.cookie.n_len = sizeof (xid);
1123 	args.cookie.n_bytes = (char *)&xid;
1124 
1125 	for (;;) {
1126 		nlm_rpc_t *rpcp;
1127 		struct nlm4_testres res;
1128 		enum clnt_stat stat;
1129 
1130 		error = nlm_host_get_rpc(hostp, vers, &rpcp);
1131 		if (error != 0)
1132 			return (ENOLCK);
1133 
1134 		bzero(&res, sizeof (res));
1135 		stat = nlm_test_rpc(&args, &res, rpcp->nr_handle, vers);
1136 		nlm_host_rele_rpc(hostp, rpcp);
1137 
1138 		error = nlm_map_clnt_stat(stat);
1139 		if (error != 0) {
1140 			if (error == EAGAIN)
1141 				continue;
1142 
1143 			return (error);
1144 		}
1145 
1146 		DTRACE_PROBE1(test__res, enum nlm4_stats, res.stat.stat);
1147 		nlm_err = res.stat.stat;
1148 		bcopy(&res.stat.nlm4_testrply_u.holder, &h, sizeof (h));
1149 		xdr_free((xdrproc_t)xdr_nlm4_testres, (void *)&res);
1150 		if (nlm_err == nlm4_denied_grace_period) {
1151 			error = nlm_host_wait_grace(hostp);
1152 			if (error != 0)
1153 				return (error);
1154 
1155 			continue;
1156 		}
1157 
1158 		break;
1159 	}
1160 
1161 	switch (nlm_err) {
1162 	case nlm4_granted:
1163 		flp->l_type = F_UNLCK;
1164 		error = 0;
1165 		break;
1166 
1167 	case nlm4_denied:
1168 		flp->l_start = h.l_offset;
1169 		flp->l_len = h.l_len;
1170 		flp->l_pid = h.svid;
1171 		flp->l_type = (h.exclusive) ? F_WRLCK : F_RDLCK;
1172 		flp->l_whence = SEEK_SET;
1173 		flp->l_sysid = 0;
1174 		error = 0;
1175 		break;
1176 
1177 	default:
1178 		error = nlm_map_status(nlm_err);
1179 		break;
1180 	}
1181 
1182 	return (error);
1183 }
1184 
1185 
1186 static void
1187 nlm_init_lock(struct nlm4_lock *lock,
1188     const struct flock64 *fl, struct netobj *fh,
1189     struct nlm_owner_handle *oh)
1190 {
1191 
1192 	/* Caller converts to zero-base. */
1193 	VERIFY(fl->l_whence == SEEK_SET);
1194 	bzero(lock, sizeof (*lock));
1195 	bzero(oh, sizeof (*oh));
1196 
1197 	lock->caller_name = uts_nodename();
1198 	lock->fh.n_len = fh->n_len;
1199 	lock->fh.n_bytes = fh->n_bytes;
1200 	lock->oh.n_len = sizeof (*oh);
1201 	lock->oh.n_bytes = (void *)oh;
1202 	lock->svid = fl->l_pid;
1203 	lock->l_offset = fl->l_start;
1204 	lock->l_len = fl->l_len;
1205 }
1206 
1207 /* ************************************************************** */
1208 
1209 int
1210 nlm_shrlock(struct vnode *vp, int cmd, struct shrlock *shr,
1211     int flags, struct netobj *fh, int vers)
1212 {
1213 	struct shrlock shlk;
1214 	mntinfo_t *mi;
1215 	servinfo_t *sv;
1216 	const char *netid;
1217 	struct nlm_host *host = NULL;
1218 	int error;
1219 	struct nlm_globals *g;
1220 
1221 	mi = VTOMI(vp);
1222 	sv = mi->mi_curr_serv;
1223 
1224 	netid = nlm_knc_to_netid(sv->sv_knconf);
1225 	if (netid == NULL) {
1226 		NLM_ERR("nlm_shrlock: unknown NFS netid\n");
1227 		return (ENOSYS);
1228 	}
1229 
1230 	g = zone_getspecific(nlm_zone_key, curzone);
1231 	host = nlm_host_findcreate(g, sv->sv_hostname, netid, &sv->sv_addr);
1232 	if (host == NULL)
1233 		return (ENOSYS);
1234 
1235 	/*
1236 	 * Fill in s_sysid for the local locking calls.
1237 	 * Also, let's not trust the caller's l_pid.
1238 	 */
1239 	shlk = *shr;
1240 	shlk.s_sysid = host->nh_sysid | LM_SYSID_CLIENT;
1241 	shlk.s_pid = curproc->p_pid;
1242 
1243 	if (cmd == F_UNSHARE) {
1244 		/*
1245 		 * Purge local (cached) share information first,
1246 		 * then clear the remote share.
1247 		 */
1248 		(void) nlm_local_shrlock(vp, &shlk, cmd, flags);
1249 		nlm_shres_untrack(host, vp, &shlk);
1250 		error = nlm_call_unshare(&shlk, host, fh, vers);
1251 		goto out;
1252 	}
1253 
1254 	nfs_add_locking_id(vp, curproc->p_pid, RLMPL_OWNER,
1255 	    shr->s_owner, shr->s_own_len);
1256 
1257 	error = nlm_call_share(&shlk, host, fh, vers, FALSE);
1258 	if (error != 0)
1259 		goto out;
1260 
1261 	/*
1262 	 * Save the share locally.  This should not fail,
1263 	 * because the server is authoritative about shares
1264 	 * and it just told us we have the share reservation!
1265 	 */
1266 	error = nlm_local_shrlock(vp, shr, cmd, flags);
1267 	if (error != 0) {
1268 		/*
1269 		 * Oh oh, we really don't expect an error here.
1270 		 */
1271 		NLM_WARN("nlm_shrlock: set locally, err %d\n", error);
1272 		error = 0;
1273 	}
1274 
1275 	nlm_shres_track(host, vp, &shlk);
1276 	nlm_host_monitor(g, host, 0);
1277 
1278 out:
1279 	nlm_host_release(g, host);
1280 
1281 	return (error);
1282 }
1283 
1284 static int
1285 nlm_reclaim_share(struct nlm_host *hostp, vnode_t *vp,
1286     struct shrlock *shr, uint32_t orig_state)
1287 {
1288 	struct netobj lm_fh;
1289 	int error, state;
1290 	rpcvers_t vers;
1291 
1292 	state = nlm_host_get_state(hostp);
1293 	if (state != orig_state) {
1294 		/*
1295 		 * It seems that NLM server rebooted while
1296 		 * we were busy with recovery.
1297 		 */
1298 		return (ERESTART);
1299 	}
1300 
1301 	error = nlm_init_fh_by_vp(vp, &lm_fh, &vers);
1302 	if (error != 0)
1303 		return (error);
1304 
1305 	return (nlm_call_share(shr, hostp, &lm_fh, vers, 1));
1306 }
1307 
1308 /*
1309  * Set local share information for some NFS server.
1310  *
1311  * Called after a share request (set or clear) succeeded. We record
1312  * the details in the local lock manager. Note that since the remote
1313  * server has granted the share, we can be sure that it doesn't
1314  * conflict with any other shares we have in the local lock manager.
1315  *
1316  * Since it is possible that host may also make NLM client requests to
1317  * our NLM server, we use a different sysid value to record our own
1318  * client shares.
1319  */
1320 int
1321 nlm_local_shrlock(vnode_t *vp, struct shrlock *shr, int cmd, int flags)
1322 {
1323 	return (fs_shrlock(vp, cmd, shr, flags, CRED(), NULL));
1324 }
1325 
1326 static void
1327 nlm_local_shrcancel(vnode_t *vp, struct shrlock *shr)
1328 {
1329 	(void) nlm_local_shrlock(vp, shr, F_UNSHARE, FREAD | FWRITE);
1330 	nlm_send_siglost(shr->s_pid);
1331 }
1332 
1333 /*
1334  * Do NLM_SHARE call.
1335  * Was: nlm_setshare()
1336  */
1337 static int
1338 nlm_call_share(struct shrlock *shr, struct nlm_host *host,
1339     struct netobj *fh, int vers, int reclaim)
1340 {
1341 	struct nlm4_shareargs args;
1342 	enum nlm4_stats nlm_err;
1343 	uint32_t xid;
1344 	int error;
1345 
1346 	bzero(&args, sizeof (args));
1347 	nlm_init_share(&args.share, shr, fh);
1348 
1349 	args.reclaim = reclaim;
1350 	xid = atomic_inc_32_nv(&nlm_xid);
1351 	args.cookie.n_len = sizeof (xid);
1352 	args.cookie.n_bytes = (char *)&xid;
1353 
1354 
1355 	for (;;) {
1356 		nlm_rpc_t *rpcp;
1357 		struct nlm4_shareres res;
1358 		enum clnt_stat stat;
1359 
1360 		error = nlm_host_get_rpc(host, vers, &rpcp);
1361 		if (error != 0)
1362 			return (ENOLCK);
1363 
1364 		bzero(&res, sizeof (res));
1365 		stat = nlm_share_rpc(&args, &res, rpcp->nr_handle, vers);
1366 		nlm_host_rele_rpc(host, rpcp);
1367 
1368 		error = nlm_map_clnt_stat(stat);
1369 		if (error != 0) {
1370 			if (error == EAGAIN)
1371 				continue;
1372 
1373 			return (error);
1374 		}
1375 
1376 		DTRACE_PROBE1(share__res, enum nlm4_stats, res.stat);
1377 		nlm_err = res.stat;
1378 		xdr_free((xdrproc_t)xdr_nlm4_shareres, (void *)&res);
1379 		if (nlm_err == nlm4_denied_grace_period) {
1380 			if (args.reclaim)
1381 				return (ENOLCK);
1382 
1383 			error = nlm_host_wait_grace(host);
1384 			if (error != 0)
1385 				return (error);
1386 
1387 			continue;
1388 		}
1389 
1390 		break;
1391 	}
1392 
1393 	switch (nlm_err) {
1394 	case nlm4_granted:
1395 		error = 0;
1396 		break;
1397 	case nlm4_blocked:
1398 	case nlm4_denied:
1399 		error = EAGAIN;
1400 		break;
1401 	case nlm4_denied_nolocks:
1402 	case nlm4_deadlck:
1403 		error = ENOLCK;
1404 		break;
1405 	default:
1406 		error = EINVAL;
1407 		break;
1408 	}
1409 
1410 	return (error);
1411 }
1412 
1413 /*
1414  * Do NLM_UNSHARE call.
1415  */
1416 static int
1417 nlm_call_unshare(struct shrlock *shr, struct nlm_host *host,
1418     struct netobj *fh, int vers)
1419 {
1420 	struct nlm4_shareargs args;
1421 	enum nlm4_stats nlm_err;
1422 	uint32_t xid;
1423 	int error;
1424 
1425 	bzero(&args, sizeof (args));
1426 	nlm_init_share(&args.share, shr, fh);
1427 
1428 	xid = atomic_inc_32_nv(&nlm_xid);
1429 	args.cookie.n_len = sizeof (xid);
1430 	args.cookie.n_bytes = (char *)&xid;
1431 
1432 	for (;;) {
1433 		nlm_rpc_t *rpcp;
1434 		struct nlm4_shareres res;
1435 		enum clnt_stat stat;
1436 
1437 		error = nlm_host_get_rpc(host, vers, &rpcp);
1438 		if (error != 0)
1439 			return (ENOLCK);
1440 
1441 		bzero(&res, sizeof (res));
1442 		stat = nlm_unshare_rpc(&args, &res, rpcp->nr_handle, vers);
1443 		nlm_host_rele_rpc(host, rpcp);
1444 
1445 		error = nlm_map_clnt_stat(stat);
1446 		if (error != 0) {
1447 			if (error == EAGAIN)
1448 				continue;
1449 
1450 			return (error);
1451 		}
1452 
1453 		DTRACE_PROBE1(unshare__res, enum nlm4_stats, res.stat);
1454 		nlm_err = res.stat;
1455 		xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
1456 		if (nlm_err == nlm4_denied_grace_period) {
1457 			error = nlm_host_wait_grace(host);
1458 			if (error != 0)
1459 				return (error);
1460 
1461 			continue;
1462 		}
1463 
1464 		break;
1465 	}
1466 
1467 	switch (nlm_err) {
1468 	case nlm4_granted:
1469 		error = 0;
1470 		break;
1471 	case nlm4_denied:
1472 		error = EAGAIN;
1473 		break;
1474 	case nlm4_denied_nolocks:
1475 		error = ENOLCK;
1476 		break;
1477 	default:
1478 		error = EINVAL;
1479 		break;
1480 	}
1481 
1482 	return (error);
1483 }
1484 
1485 static void
1486 nlm_init_share(struct nlm4_share *args,
1487     const struct shrlock *shr, struct netobj *fh)
1488 {
1489 
1490 	bzero(args, sizeof (*args));
1491 
1492 	args->caller_name = uts_nodename();
1493 	args->fh.n_len = fh->n_len;
1494 	args->fh.n_bytes = fh->n_bytes;
1495 	args->oh.n_len = shr->s_own_len;
1496 	args->oh.n_bytes = (void *)shr->s_owner;
1497 
1498 	switch (shr->s_deny) {
1499 	default:
1500 	case F_NODNY:
1501 		args->mode = fsm_DN;
1502 		break;
1503 	case F_RDDNY:
1504 		args->mode = fsm_DR;
1505 		break;
1506 	case F_WRDNY:
1507 		args->mode = fsm_DW;
1508 		break;
1509 	case F_RWDNY:
1510 		args->mode = fsm_DRW;
1511 		break;
1512 	}
1513 
1514 	switch (shr->s_access) {
1515 	default:
1516 	case 0:	/* seen with F_UNSHARE */
1517 		args->access = fsa_NONE;
1518 		break;
1519 	case F_RDACC:
1520 		args->access = fsa_R;
1521 		break;
1522 	case F_WRACC:
1523 		args->access = fsa_W;
1524 		break;
1525 	case F_RWACC:
1526 		args->access = fsa_RW;
1527 		break;
1528 	}
1529 }
1530 
1531 /*
1532  * Initialize filehandle according to the version
1533  * of NFS vnode was created on. The version of
1534  * NLM that can be used with given NFS version
1535  * is saved to lm_vers.
1536  */
1537 static int
1538 nlm_init_fh_by_vp(vnode_t *vp, struct netobj *fh, rpcvers_t *lm_vers)
1539 {
1540 	mntinfo_t *mi = VTOMI(vp);
1541 
1542 	/*
1543 	 * Too bad the NFS code doesn't just carry the FH
1544 	 * in a netobj or a netbuf.
1545 	 */
1546 	switch (mi->mi_vers) {
1547 	case NFS_V3:
1548 		/* See nfs3_frlock() */
1549 		*lm_vers = NLM4_VERS;
1550 		fh->n_len = VTOFH3(vp)->fh3_length;
1551 		fh->n_bytes = (char *)&(VTOFH3(vp)->fh3_u.data);
1552 		break;
1553 
1554 	case NFS_VERSION:
1555 		/* See nfs_frlock() */
1556 		*lm_vers = NLM_VERS;
1557 		fh->n_len = sizeof (fhandle_t);
1558 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1559 		fh->n_bytes = (char *)VTOFH(vp);
1560 		break;
1561 	default:
1562 		return (ENOSYS);
1563 	}
1564 
1565 	return (0);
1566 }
1567 
1568 /*
1569  * Send SIGLOST to the process identified by pid.
1570  * NOTE: called when NLM decides to remove lock
1571  * or share reservation ownder by the process
1572  * by force.
1573  */
1574 static void
1575 nlm_send_siglost(pid_t pid)
1576 {
1577 	proc_t *p;
1578 
1579 	mutex_enter(&pidlock);
1580 	p = prfind(pid);
1581 	if (p != NULL)
1582 		psignal(p, SIGLOST);
1583 
1584 	mutex_exit(&pidlock);
1585 }
1586 
1587 static int
1588 nlm_map_clnt_stat(enum clnt_stat stat)
1589 {
1590 	switch (stat) {
1591 	case RPC_SUCCESS:
1592 		return (0);
1593 
1594 	case RPC_TIMEDOUT:
1595 	case RPC_PROGUNAVAIL:
1596 		return (EAGAIN);
1597 
1598 	case RPC_INTR:
1599 		return (EINTR);
1600 
1601 	default:
1602 		return (EINVAL);
1603 	}
1604 }
1605 
1606 static int
1607 nlm_map_status(enum nlm4_stats stat)
1608 {
1609 	switch (stat) {
1610 	case nlm4_granted:
1611 		return (0);
1612 
1613 	case nlm4_denied:
1614 		return (EAGAIN);
1615 
1616 	case nlm4_denied_nolocks:
1617 		return (ENOLCK);
1618 
1619 	case nlm4_blocked:
1620 		return (EAGAIN);
1621 
1622 	case nlm4_denied_grace_period:
1623 		return (EAGAIN);
1624 
1625 	case nlm4_deadlck:
1626 		return (EDEADLK);
1627 
1628 	case nlm4_rofs:
1629 		return (EROFS);
1630 
1631 	case nlm4_stale_fh:
1632 		return (ESTALE);
1633 
1634 	case nlm4_fbig:
1635 		return (EFBIG);
1636 
1637 	case nlm4_failed:
1638 		return (EACCES);
1639 
1640 	default:
1641 		return (EINVAL);
1642 	}
1643 }
1644