xref: /titanic_51/usr/src/uts/common/fs/nfs/nfs4_subr.c (revision 7660e73f5b1e781050d87237f1123324e01f467b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  *  	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All Rights Reserved
29  */
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/vtrace.h>
36 #include <sys/session.h>
37 #include <sys/thread.h>
38 #include <sys/dnlc.h>
39 #include <sys/cred.h>
40 #include <sys/priv.h>
41 #include <sys/list.h>
42 #include <sys/sdt.h>
43 #include <sys/policy.h>
44 
45 #include <rpc/types.h>
46 #include <rpc/xdr.h>
47 
48 #include <nfs/nfs.h>
49 
50 #include <nfs/nfs_clnt.h>
51 
52 #include <nfs/nfs4.h>
53 #include <nfs/rnode4.h>
54 #include <nfs/nfs4_clnt.h>
55 
56 /*
57  * client side statistics
58  */
59 static const struct clstat4 clstat4_tmpl = {
60 	{ "calls",	KSTAT_DATA_UINT64 },
61 	{ "badcalls",	KSTAT_DATA_UINT64 },
62 	{ "referrals",	KSTAT_DATA_UINT64 },
63 	{ "referlinks",	KSTAT_DATA_UINT64 },
64 	{ "clgets",	KSTAT_DATA_UINT64 },
65 	{ "cltoomany",	KSTAT_DATA_UINT64 },
66 #ifdef DEBUG
67 	{ "clalloc",	KSTAT_DATA_UINT64 },
68 	{ "noresponse",	KSTAT_DATA_UINT64 },
69 	{ "failover",	KSTAT_DATA_UINT64 },
70 	{ "remap",	KSTAT_DATA_UINT64 },
71 #endif
72 };
73 
74 #ifdef DEBUG
75 struct clstat4_debug clstat4_debug = {
76 	{ "nrnode",	KSTAT_DATA_UINT64 },
77 	{ "access",	KSTAT_DATA_UINT64 },
78 	{ "dirent",	KSTAT_DATA_UINT64 },
79 	{ "dirents",	KSTAT_DATA_UINT64 },
80 	{ "reclaim",	KSTAT_DATA_UINT64 },
81 	{ "clreclaim",	KSTAT_DATA_UINT64 },
82 	{ "f_reclaim",	KSTAT_DATA_UINT64 },
83 	{ "a_reclaim",	KSTAT_DATA_UINT64 },
84 	{ "r_reclaim",	KSTAT_DATA_UINT64 },
85 	{ "r_path",	KSTAT_DATA_UINT64 },
86 };
87 #endif
88 
89 /*
90  * We keep a global list of per-zone client data, so we can clean up all zones
91  * if we get low on memory.
92  */
93 static list_t nfs4_clnt_list;
94 static kmutex_t nfs4_clnt_list_lock;
95 zone_key_t nfs4clnt_zone_key;
96 
97 static struct kmem_cache *chtab4_cache;
98 
99 #ifdef DEBUG
100 static int nfs4_rfscall_debug;
101 static int nfs4_try_failover_any;
102 int nfs4_utf8_debug = 0;
103 #endif
104 
105 /*
106  * NFSv4 readdir cache implementation
107  */
108 typedef struct rddir4_cache_impl {
109 	rddir4_cache	rc;		/* readdir cache element */
110 	kmutex_t	lock;		/* lock protects count */
111 	uint_t		count;		/* reference count */
112 	avl_node_t	tree;		/* AVL tree link */
113 } rddir4_cache_impl;
114 
115 static int rddir4_cache_compar(const void *, const void *);
116 static void rddir4_cache_free(rddir4_cache_impl *);
117 static rddir4_cache *rddir4_cache_alloc(int);
118 static void rddir4_cache_hold(rddir4_cache *);
119 static int try_failover(enum clnt_stat);
120 
121 static int nfs4_readdir_cache_hits = 0;
122 static int nfs4_readdir_cache_waits = 0;
123 static int nfs4_readdir_cache_misses = 0;
124 
125 /*
126  * Shared nfs4 functions
127  */
128 
129 /*
130  * Copy an nfs_fh4.  The destination storage (to->nfs_fh4_val) must already
131  * be allocated.
132  */
133 
134 void
135 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to)
136 {
137 	to->nfs_fh4_len = from->nfs_fh4_len;
138 	bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len);
139 }
140 
141 /*
142  * nfs4cmpfh - compare 2 filehandles.
143  * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is
144  * "less" than the second, +1 if the first is "greater" than the second.
145  */
146 
147 int
148 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2)
149 {
150 	const char *c1, *c2;
151 
152 	if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len)
153 		return (-1);
154 	if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len)
155 		return (1);
156 	for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val;
157 	    c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len;
158 	    c1++, c2++) {
159 		if (*c1 < *c2)
160 			return (-1);
161 		if (*c1 > *c2)
162 			return (1);
163 	}
164 
165 	return (0);
166 }
167 
168 /*
169  * Compare two v4 filehandles.  Return zero if they're the same, non-zero
170  * if they're not.  Like nfs4cmpfh(), but different filehandle
171  * representation, and doesn't provide information about greater than or
172  * less than.
173  */
174 
175 int
176 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2)
177 {
178 	if (fh1->fh_len == fh2->fh_len)
179 		return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len));
180 
181 	return (1);
182 }
183 
184 int
185 stateid4_cmp(stateid4 *s1, stateid4 *s2)
186 {
187 	if (bcmp(s1, s2, sizeof (stateid4)) == 0)
188 		return (1);
189 	else
190 		return (0);
191 }
192 
193 nfsstat4
194 puterrno4(int error)
195 {
196 	switch (error) {
197 	case 0:
198 		return (NFS4_OK);
199 	case EPERM:
200 		return (NFS4ERR_PERM);
201 	case ENOENT:
202 		return (NFS4ERR_NOENT);
203 	case EINTR:
204 		return (NFS4ERR_IO);
205 	case EIO:
206 		return (NFS4ERR_IO);
207 	case ENXIO:
208 		return (NFS4ERR_NXIO);
209 	case ENOMEM:
210 		return (NFS4ERR_RESOURCE);
211 	case EACCES:
212 		return (NFS4ERR_ACCESS);
213 	case EBUSY:
214 		return (NFS4ERR_IO);
215 	case EEXIST:
216 		return (NFS4ERR_EXIST);
217 	case EXDEV:
218 		return (NFS4ERR_XDEV);
219 	case ENODEV:
220 		return (NFS4ERR_IO);
221 	case ENOTDIR:
222 		return (NFS4ERR_NOTDIR);
223 	case EISDIR:
224 		return (NFS4ERR_ISDIR);
225 	case EINVAL:
226 		return (NFS4ERR_INVAL);
227 	case EMFILE:
228 		return (NFS4ERR_RESOURCE);
229 	case EFBIG:
230 		return (NFS4ERR_FBIG);
231 	case ENOSPC:
232 		return (NFS4ERR_NOSPC);
233 	case EROFS:
234 		return (NFS4ERR_ROFS);
235 	case EMLINK:
236 		return (NFS4ERR_MLINK);
237 	case EDEADLK:
238 		return (NFS4ERR_DEADLOCK);
239 	case ENOLCK:
240 		return (NFS4ERR_DENIED);
241 	case EREMOTE:
242 		return (NFS4ERR_SERVERFAULT);
243 	case ENOTSUP:
244 		return (NFS4ERR_NOTSUPP);
245 	case EDQUOT:
246 		return (NFS4ERR_DQUOT);
247 	case ENAMETOOLONG:
248 		return (NFS4ERR_NAMETOOLONG);
249 	case EOVERFLOW:
250 		return (NFS4ERR_INVAL);
251 	case ENOSYS:
252 		return (NFS4ERR_NOTSUPP);
253 	case ENOTEMPTY:
254 		return (NFS4ERR_NOTEMPTY);
255 	case EOPNOTSUPP:
256 		return (NFS4ERR_NOTSUPP);
257 	case ESTALE:
258 		return (NFS4ERR_STALE);
259 	case EAGAIN:
260 		if (curthread->t_flag & T_WOULDBLOCK) {
261 			curthread->t_flag &= ~T_WOULDBLOCK;
262 			return (NFS4ERR_DELAY);
263 		}
264 		return (NFS4ERR_LOCKED);
265 	default:
266 		return ((enum nfsstat4)error);
267 	}
268 }
269 
270 int
271 geterrno4(enum nfsstat4 status)
272 {
273 	switch (status) {
274 	case NFS4_OK:
275 		return (0);
276 	case NFS4ERR_PERM:
277 		return (EPERM);
278 	case NFS4ERR_NOENT:
279 		return (ENOENT);
280 	case NFS4ERR_IO:
281 		return (EIO);
282 	case NFS4ERR_NXIO:
283 		return (ENXIO);
284 	case NFS4ERR_ACCESS:
285 		return (EACCES);
286 	case NFS4ERR_EXIST:
287 		return (EEXIST);
288 	case NFS4ERR_XDEV:
289 		return (EXDEV);
290 	case NFS4ERR_NOTDIR:
291 		return (ENOTDIR);
292 	case NFS4ERR_ISDIR:
293 		return (EISDIR);
294 	case NFS4ERR_INVAL:
295 		return (EINVAL);
296 	case NFS4ERR_FBIG:
297 		return (EFBIG);
298 	case NFS4ERR_NOSPC:
299 		return (ENOSPC);
300 	case NFS4ERR_ROFS:
301 		return (EROFS);
302 	case NFS4ERR_MLINK:
303 		return (EMLINK);
304 	case NFS4ERR_NAMETOOLONG:
305 		return (ENAMETOOLONG);
306 	case NFS4ERR_NOTEMPTY:
307 		return (ENOTEMPTY);
308 	case NFS4ERR_DQUOT:
309 		return (EDQUOT);
310 	case NFS4ERR_STALE:
311 		return (ESTALE);
312 	case NFS4ERR_BADHANDLE:
313 		return (ESTALE);
314 	case NFS4ERR_BAD_COOKIE:
315 		return (EINVAL);
316 	case NFS4ERR_NOTSUPP:
317 		return (EOPNOTSUPP);
318 	case NFS4ERR_TOOSMALL:
319 		return (EINVAL);
320 	case NFS4ERR_SERVERFAULT:
321 		return (EIO);
322 	case NFS4ERR_BADTYPE:
323 		return (EINVAL);
324 	case NFS4ERR_DELAY:
325 		return (ENXIO);
326 	case NFS4ERR_SAME:
327 		return (EPROTO);
328 	case NFS4ERR_DENIED:
329 		return (ENOLCK);
330 	case NFS4ERR_EXPIRED:
331 		return (EPROTO);
332 	case NFS4ERR_LOCKED:
333 		return (EACCES);
334 	case NFS4ERR_GRACE:
335 		return (EAGAIN);
336 	case NFS4ERR_FHEXPIRED:	/* if got here, failed to get a new fh */
337 		return (ESTALE);
338 	case NFS4ERR_SHARE_DENIED:
339 		return (EACCES);
340 	case NFS4ERR_WRONGSEC:
341 		return (EPERM);
342 	case NFS4ERR_CLID_INUSE:
343 		return (EAGAIN);
344 	case NFS4ERR_RESOURCE:
345 		return (EAGAIN);
346 	case NFS4ERR_MOVED:
347 		return (EPROTO);
348 	case NFS4ERR_NOFILEHANDLE:
349 		return (EIO);
350 	case NFS4ERR_MINOR_VERS_MISMATCH:
351 		return (ENOTSUP);
352 	case NFS4ERR_STALE_CLIENTID:
353 		return (EIO);
354 	case NFS4ERR_STALE_STATEID:
355 		return (EIO);
356 	case NFS4ERR_OLD_STATEID:
357 		return (EIO);
358 	case NFS4ERR_BAD_STATEID:
359 		return (EIO);
360 	case NFS4ERR_BAD_SEQID:
361 		return (EIO);
362 	case NFS4ERR_NOT_SAME:
363 		return (EPROTO);
364 	case NFS4ERR_LOCK_RANGE:
365 		return (EPROTO);
366 	case NFS4ERR_SYMLINK:
367 		return (EPROTO);
368 	case NFS4ERR_RESTOREFH:
369 		return (EPROTO);
370 	case NFS4ERR_LEASE_MOVED:
371 		return (EPROTO);
372 	case NFS4ERR_ATTRNOTSUPP:
373 		return (ENOTSUP);
374 	case NFS4ERR_NO_GRACE:
375 		return (EPROTO);
376 	case NFS4ERR_RECLAIM_BAD:
377 		return (EPROTO);
378 	case NFS4ERR_RECLAIM_CONFLICT:
379 		return (EPROTO);
380 	case NFS4ERR_BADXDR:
381 		return (EINVAL);
382 	case NFS4ERR_LOCKS_HELD:
383 		return (EIO);
384 	case NFS4ERR_OPENMODE:
385 		return (EACCES);
386 	case NFS4ERR_BADOWNER:
387 		/*
388 		 * Client and server are in different DNS domains
389 		 * and the NFSMAPID_DOMAIN in /etc/default/nfs
390 		 * doesn't match.  No good answer here.  Return
391 		 * EACCESS, which translates to "permission denied".
392 		 */
393 		return (EACCES);
394 	case NFS4ERR_BADCHAR:
395 		return (EINVAL);
396 	case NFS4ERR_BADNAME:
397 		return (EINVAL);
398 	case NFS4ERR_BAD_RANGE:
399 		return (EIO);
400 	case NFS4ERR_LOCK_NOTSUPP:
401 		return (ENOTSUP);
402 	case NFS4ERR_OP_ILLEGAL:
403 		return (EINVAL);
404 	case NFS4ERR_DEADLOCK:
405 		return (EDEADLK);
406 	case NFS4ERR_FILE_OPEN:
407 		return (EACCES);
408 	case NFS4ERR_ADMIN_REVOKED:
409 		return (EPROTO);
410 	case NFS4ERR_CB_PATH_DOWN:
411 		return (EPROTO);
412 	default:
413 #ifdef DEBUG
414 		zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d",
415 		    status);
416 #endif
417 		return ((int)status);
418 	}
419 }
420 
421 void
422 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op)
423 {
424 	nfs4_server_t *server;
425 
426 	/*
427 	 * Return if already printed/queued a msg
428 	 * for this mount point.
429 	 */
430 	if (mi->mi_flags & MI4_BADOWNER_DEBUG)
431 		return;
432 	/*
433 	 * Happens once per client <-> server pair.
434 	 */
435 	if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
436 	    mi->mi_flags & MI4_INT))
437 		return;
438 
439 	server = find_nfs4_server(mi);
440 	if (server == NULL) {
441 		nfs_rw_exit(&mi->mi_recovlock);
442 		return;
443 	}
444 
445 	if (!(server->s_flags & N4S_BADOWNER_DEBUG)) {
446 		zcmn_err(mi->mi_zone->zone_id, CE_WARN,
447 		    "!NFSMAPID_DOMAIN does not match"
448 		    " the server: %s domain.\n"
449 		    "Please check configuration",
450 		    mi->mi_curr_serv->sv_hostname);
451 		server->s_flags |= N4S_BADOWNER_DEBUG;
452 	}
453 	mutex_exit(&server->s_lock);
454 	nfs4_server_rele(server);
455 	nfs_rw_exit(&mi->mi_recovlock);
456 
457 	/*
458 	 * Happens once per mntinfo4_t.
459 	 * This error is deemed as one of the recovery facts "RF_BADOWNER",
460 	 * queue this in the mesg queue for this mount_info. This message
461 	 * is not printed, meaning its absent from id_to_dump_solo_fact()
462 	 * but its there for inspection if the queue is ever dumped/inspected.
463 	 */
464 	mutex_enter(&mi->mi_lock);
465 	if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) {
466 		nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op,
467 		    FALSE, NULL, 0, NULL);
468 		mi->mi_flags |= MI4_BADOWNER_DEBUG;
469 	}
470 	mutex_exit(&mi->mi_lock);
471 }
472 
473 int
474 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime)
475 {
476 	int64_t sec;
477 	int32_t nsec;
478 
479 	/*
480 	 * Here check that the nfsv4 time is valid for the system.
481 	 * nfsv4 time value is a signed 64-bit, and the system time
482 	 * may be either int64_t or int32_t (depends on the kernel),
483 	 * so if the kernel is 32-bit, the nfsv4 time value may not fit.
484 	 */
485 #ifndef _LP64
486 	if (! NFS4_TIME_OK(ntime->seconds)) {
487 		return (EOVERFLOW);
488 	}
489 #endif
490 
491 	/* Invalid to specify 1 billion (or more) nsecs */
492 	if (ntime->nseconds >= 1000000000)
493 		return (EINVAL);
494 
495 	if (ntime->seconds < 0) {
496 		sec = ntime->seconds + 1;
497 		nsec = -1000000000 + ntime->nseconds;
498 	} else {
499 		sec = ntime->seconds;
500 		nsec = ntime->nseconds;
501 	}
502 
503 	vatime->tv_sec = sec;
504 	vatime->tv_nsec = nsec;
505 
506 	return (0);
507 }
508 
509 int
510 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime)
511 {
512 	int64_t sec;
513 	uint32_t nsec;
514 
515 	/*
516 	 * nfsv4 time value is a signed 64-bit, and the system time
517 	 * may be either int64_t or int32_t (depends on the kernel),
518 	 * so all system time values will fit.
519 	 */
520 	if (vatime->tv_nsec >= 0) {
521 		sec = vatime->tv_sec;
522 		nsec = vatime->tv_nsec;
523 	} else {
524 		sec = vatime->tv_sec - 1;
525 		nsec = 1000000000 + vatime->tv_nsec;
526 	}
527 	ntime->seconds = sec;
528 	ntime->nseconds = nsec;
529 
530 	return (0);
531 }
532 
533 /*
534  * Converts a utf8 string to a valid null terminated filename string.
535  *
536  * XXX - Not actually translating the UTF-8 string as per RFC 2279.
537  *	 For now, just validate that the UTF-8 string off the wire
538  *	 does not have characters that will freak out UFS, and leave
539  *	 it at that.
540  */
541 char *
542 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s)
543 {
544 	ASSERT(lenp != NULL);
545 
546 	if (u8s == NULL || u8s->utf8string_len <= 0 ||
547 	    u8s->utf8string_val == NULL)
548 		return (NULL);
549 
550 	/*
551 	 * Check for obvious illegal filename chars
552 	 */
553 	if (utf8_strchr(u8s, '/') != NULL) {
554 #ifdef DEBUG
555 		if (nfs4_utf8_debug) {
556 			char *path;
557 			int len = u8s->utf8string_len;
558 
559 			path = kmem_alloc(len + 1, KM_SLEEP);
560 			bcopy(u8s->utf8string_val, path, len);
561 			path[len] = '\0';
562 
563 			zcmn_err(getzoneid(), CE_WARN,
564 			    "Invalid UTF-8 filename: %s", path);
565 
566 			kmem_free(path, len + 1);
567 		}
568 #endif
569 		return (NULL);
570 	}
571 
572 	return (utf8_to_str(u8s, lenp, s));
573 }
574 
575 /*
576  * Converts a utf8 string to a C string.
577  * kmem_allocs a new string if not supplied
578  */
579 char *
580 utf8_to_str(utf8string *str, uint_t *lenp, char *s)
581 {
582 	char	*sp;
583 	char	*u8p;
584 	int	len;
585 	int	 i;
586 
587 	ASSERT(lenp != NULL);
588 
589 	if (str == NULL)
590 		return (NULL);
591 
592 	u8p = str->utf8string_val;
593 	len = str->utf8string_len;
594 	if (len <= 0 || u8p == NULL) {
595 		if (s)
596 			*s = '\0';
597 		return (NULL);
598 	}
599 
600 	sp = s;
601 	if (sp == NULL)
602 		sp = kmem_alloc(len + 1, KM_SLEEP);
603 
604 	/*
605 	 * At least check for embedded nulls
606 	 */
607 	for (i = 0; i < len; i++) {
608 		sp[i] = u8p[i];
609 		if (u8p[i] == '\0') {
610 #ifdef	DEBUG
611 			zcmn_err(getzoneid(), CE_WARN,
612 			    "Embedded NULL in UTF-8 string");
613 #endif
614 			if (s == NULL)
615 				kmem_free(sp, len + 1);
616 			return (NULL);
617 		}
618 	}
619 	sp[len] = '\0';
620 	*lenp = len + 1;
621 
622 	return (sp);
623 }
624 
625 /*
626  * str_to_utf8 - converts a null-terminated C string to a utf8 string
627  */
628 utf8string *
629 str_to_utf8(char *nm, utf8string *str)
630 {
631 	int len;
632 
633 	if (str == NULL)
634 		return (NULL);
635 
636 	if (nm == NULL || *nm == '\0') {
637 		str->utf8string_len = 0;
638 		str->utf8string_val = NULL;
639 	}
640 
641 	len = strlen(nm);
642 
643 	str->utf8string_val = kmem_alloc(len, KM_SLEEP);
644 	str->utf8string_len = len;
645 	bcopy(nm, str->utf8string_val, len);
646 
647 	return (str);
648 }
649 
650 utf8string *
651 utf8_copy(utf8string *src, utf8string *dest)
652 {
653 	if (src == NULL)
654 		return (NULL);
655 	if (dest == NULL)
656 		return (NULL);
657 
658 	if (src->utf8string_len > 0) {
659 		dest->utf8string_val = kmem_alloc(src->utf8string_len,
660 		    KM_SLEEP);
661 		bcopy(src->utf8string_val, dest->utf8string_val,
662 		    src->utf8string_len);
663 		dest->utf8string_len = src->utf8string_len;
664 	} else {
665 		dest->utf8string_val = NULL;
666 		dest->utf8string_len = 0;
667 	}
668 
669 	return (dest);
670 }
671 
672 int
673 utf8_compare(const utf8string *a, const utf8string *b)
674 {
675 	int mlen, cmp;
676 	int alen, blen;
677 	char *aval, *bval;
678 
679 	if ((a == NULL) && (b == NULL))
680 		return (0);
681 	else if (a == NULL)
682 		return (-1);
683 	else if (b == NULL)
684 		return (1);
685 
686 	alen = a->utf8string_len;
687 	blen = b->utf8string_len;
688 	aval = a->utf8string_val;
689 	bval = b->utf8string_val;
690 
691 	if (((alen == 0) || (aval == NULL)) &&
692 	    ((blen == 0) || (bval == NULL)))
693 		return (0);
694 	else if ((alen == 0) || (aval == NULL))
695 		return (-1);
696 	else if ((blen == 0) || (bval == NULL))
697 		return (1);
698 
699 	mlen = MIN(alen, blen);
700 	cmp = strncmp(aval, bval, mlen);
701 
702 	if ((cmp == 0) && (alen == blen))
703 		return (0);
704 	else if ((cmp == 0) && (alen < blen))
705 		return (-1);
706 	else if (cmp == 0)
707 		return (1);
708 	else if (cmp < 0)
709 		return (-1);
710 	return (1);
711 }
712 
713 /*
714  * utf8_dir_verify - checks that the utf8 string is valid
715  */
716 int
717 utf8_dir_verify(utf8string *str)
718 {
719 	char *nm;
720 	int len;
721 
722 	if (str == NULL)
723 		return (0);
724 
725 	nm = str->utf8string_val;
726 	len = str->utf8string_len;
727 	if (nm == NULL || len == 0) {
728 		return (0);
729 	}
730 
731 	if (len == 1 && nm[0] == '.')
732 		return (0);
733 	if (len == 2 && nm[0] == '.' && nm[1] == '.')
734 		return (0);
735 
736 	if (utf8_strchr(str, '/') != NULL)
737 		return (0);
738 
739 	if (utf8_strchr(str, '\0') != NULL)
740 		return (0);
741 
742 	return (1);
743 }
744 
745 /*
746  * from rpcsec module (common/rpcsec)
747  */
748 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
749 extern void sec_clnt_freeh(AUTH *);
750 extern void sec_clnt_freeinfo(struct sec_data *);
751 
752 /*
753  * authget() gets an auth handle based on the security
754  * information from the servinfo in mountinfo.
755  * The auth handle is stored in ch_client->cl_auth.
756  *
757  * First security flavor of choice is to use sv_secdata
758  * which is initiated by the client. If that fails, get
759  * secinfo from the server and then select one from the
760  * server secinfo list .
761  *
762  * For RPCSEC_GSS flavor, upon success, a secure context is
763  * established between client and server.
764  */
765 int
766 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr)
767 {
768 	int error, i;
769 
770 	/*
771 	 * SV4_TRYSECINFO indicates to try the secinfo list from
772 	 * sv_secinfo until a successful one is reached. Point
773 	 * sv_currsec to the selected security mechanism for
774 	 * later sessions.
775 	 */
776 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
777 	if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) {
778 		for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count;
779 		    i++) {
780 			if (!(error = sec_clnt_geth(ch_client,
781 			    &svp->sv_secinfo->sdata[i],
782 			    cr, &ch_client->cl_auth))) {
783 
784 				svp->sv_currsec = &svp->sv_secinfo->sdata[i];
785 				svp->sv_secinfo->index = i;
786 				/* done */
787 				svp->sv_flags &= ~SV4_TRYSECINFO;
788 				break;
789 			}
790 
791 			/*
792 			 * Allow the caller retry with the security flavor
793 			 * pointed by svp->sv_secinfo->index when
794 			 * ETIMEDOUT/ECONNRESET occurs.
795 			 */
796 			if (error == ETIMEDOUT || error == ECONNRESET) {
797 				svp->sv_secinfo->index = i;
798 				break;
799 			}
800 		}
801 	} else {
802 		/* sv_currsec points to one of the entries in sv_secinfo */
803 		if (svp->sv_currsec) {
804 			error = sec_clnt_geth(ch_client, svp->sv_currsec, cr,
805 			    &ch_client->cl_auth);
806 		} else {
807 			/* If it's null, use sv_secdata. */
808 			error = sec_clnt_geth(ch_client, svp->sv_secdata, cr,
809 			    &ch_client->cl_auth);
810 		}
811 	}
812 	nfs_rw_exit(&svp->sv_lock);
813 
814 	return (error);
815 }
816 
817 /*
818  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
819  */
820 int
821 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
822     struct chtab **chp, struct nfs4_clnt *nfscl)
823 {
824 	struct chhead *ch, *newch;
825 	struct chhead **plistp;
826 	struct chtab *cp;
827 	int error;
828 	k_sigset_t smask;
829 
830 	if (newcl == NULL || chp == NULL || ci == NULL)
831 		return (EINVAL);
832 
833 	*newcl = NULL;
834 	*chp = NULL;
835 
836 	/*
837 	 * Find an unused handle or create one
838 	 */
839 	newch = NULL;
840 	nfscl->nfscl_stat.clgets.value.ui64++;
841 top:
842 	/*
843 	 * Find the correct entry in the cache to check for free
844 	 * client handles.  The search is based on the RPC program
845 	 * number, program version number, dev_t for the transport
846 	 * device, and the protocol family.
847 	 */
848 	mutex_enter(&nfscl->nfscl_chtable4_lock);
849 	plistp = &nfscl->nfscl_chtable4;
850 	for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
851 		if (ch->ch_prog == ci->cl_prog &&
852 		    ch->ch_vers == ci->cl_vers &&
853 		    ch->ch_dev == svp->sv_knconf->knc_rdev &&
854 		    (strcmp(ch->ch_protofmly,
855 		    svp->sv_knconf->knc_protofmly) == 0))
856 			break;
857 		plistp = &ch->ch_next;
858 	}
859 
860 	/*
861 	 * If we didn't find a cache entry for this quadruple, then
862 	 * create one.  If we don't have one already preallocated,
863 	 * then drop the cache lock, create one, and then start over.
864 	 * If we did have a preallocated entry, then just add it to
865 	 * the front of the list.
866 	 */
867 	if (ch == NULL) {
868 		if (newch == NULL) {
869 			mutex_exit(&nfscl->nfscl_chtable4_lock);
870 			newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
871 			newch->ch_timesused = 0;
872 			newch->ch_prog = ci->cl_prog;
873 			newch->ch_vers = ci->cl_vers;
874 			newch->ch_dev = svp->sv_knconf->knc_rdev;
875 			newch->ch_protofmly = kmem_alloc(
876 			    strlen(svp->sv_knconf->knc_protofmly) + 1,
877 			    KM_SLEEP);
878 			(void) strcpy(newch->ch_protofmly,
879 			    svp->sv_knconf->knc_protofmly);
880 			newch->ch_list = NULL;
881 			goto top;
882 		}
883 		ch = newch;
884 		newch = NULL;
885 		ch->ch_next = nfscl->nfscl_chtable4;
886 		nfscl->nfscl_chtable4 = ch;
887 	/*
888 	 * We found a cache entry, but if it isn't on the front of the
889 	 * list, then move it to the front of the list to try to take
890 	 * advantage of locality of operations.
891 	 */
892 	} else if (ch != nfscl->nfscl_chtable4) {
893 		*plistp = ch->ch_next;
894 		ch->ch_next = nfscl->nfscl_chtable4;
895 		nfscl->nfscl_chtable4 = ch;
896 	}
897 
898 	/*
899 	 * If there was a free client handle cached, then remove it
900 	 * from the list, init it, and use it.
901 	 */
902 	if (ch->ch_list != NULL) {
903 		cp = ch->ch_list;
904 		ch->ch_list = cp->ch_list;
905 		mutex_exit(&nfscl->nfscl_chtable4_lock);
906 		if (newch != NULL) {
907 			kmem_free(newch->ch_protofmly,
908 			    strlen(newch->ch_protofmly) + 1);
909 			kmem_free(newch, sizeof (*newch));
910 		}
911 		(void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
912 		    &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
913 
914 		/*
915 		 * Get an auth handle.
916 		 */
917 		error = authget(svp, cp->ch_client, cr);
918 		if (error || cp->ch_client->cl_auth == NULL) {
919 			CLNT_DESTROY(cp->ch_client);
920 			kmem_cache_free(chtab4_cache, cp);
921 			return ((error != 0) ? error : EINTR);
922 		}
923 		ch->ch_timesused++;
924 		*newcl = cp->ch_client;
925 		*chp = cp;
926 		return (0);
927 	}
928 
929 	/*
930 	 * There weren't any free client handles which fit, so allocate
931 	 * a new one and use that.
932 	 */
933 #ifdef DEBUG
934 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1);
935 #endif
936 	mutex_exit(&nfscl->nfscl_chtable4_lock);
937 
938 	nfscl->nfscl_stat.cltoomany.value.ui64++;
939 	if (newch != NULL) {
940 		kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
941 		kmem_free(newch, sizeof (*newch));
942 	}
943 
944 	cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP);
945 	cp->ch_head = ch;
946 
947 	sigintr(&smask, (int)ci->cl_flags & MI4_INT);
948 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
949 	    ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
950 	sigunintr(&smask);
951 
952 	if (error != 0) {
953 		kmem_cache_free(chtab4_cache, cp);
954 #ifdef DEBUG
955 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
956 #endif
957 		/*
958 		 * Warning is unnecessary if error is EINTR.
959 		 */
960 		if (error != EINTR) {
961 			nfs_cmn_err(error, CE_WARN,
962 			    "clget: couldn't create handle: %m\n");
963 		}
964 		return (error);
965 	}
966 	(void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
967 	auth_destroy(cp->ch_client->cl_auth);
968 
969 	/*
970 	 * Get an auth handle.
971 	 */
972 	error = authget(svp, cp->ch_client, cr);
973 	if (error || cp->ch_client->cl_auth == NULL) {
974 		CLNT_DESTROY(cp->ch_client);
975 		kmem_cache_free(chtab4_cache, cp);
976 #ifdef DEBUG
977 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
978 #endif
979 		return ((error != 0) ? error : EINTR);
980 	}
981 	ch->ch_timesused++;
982 	*newcl = cp->ch_client;
983 	ASSERT(cp->ch_client->cl_nosignal == FALSE);
984 	*chp = cp;
985 	return (0);
986 }
987 
988 static int
989 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
990     struct chtab **chp, struct nfs4_clnt *nfscl)
991 {
992 	clinfo_t ci;
993 	bool_t is_recov;
994 	int firstcall, error = 0;
995 
996 	/*
997 	 * Set read buffer size to rsize
998 	 * and add room for RPC headers.
999 	 */
1000 	ci.cl_readsize = mi->mi_tsize;
1001 	if (ci.cl_readsize != 0)
1002 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
1003 
1004 	/*
1005 	 * If soft mount and server is down just try once.
1006 	 * meaning: do not retransmit.
1007 	 */
1008 	if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN))
1009 		ci.cl_retrans = 0;
1010 	else
1011 		ci.cl_retrans = mi->mi_retrans;
1012 
1013 	ci.cl_prog = mi->mi_prog;
1014 	ci.cl_vers = mi->mi_vers;
1015 	ci.cl_flags = mi->mi_flags;
1016 
1017 	/*
1018 	 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS
1019 	 * security flavor, the client tries to establish a security context
1020 	 * by contacting the server. If the connection is timed out or reset,
1021 	 * e.g. server reboot, we will try again.
1022 	 */
1023 	is_recov = (curthread == mi->mi_recovthread);
1024 	firstcall = 1;
1025 
1026 	do {
1027 		error = clget4(&ci, svp, cr, newcl, chp, nfscl);
1028 
1029 		if (error == 0)
1030 			break;
1031 
1032 		/*
1033 		 * For forced unmount and zone shutdown, bail out but
1034 		 * let the recovery thread do one more transmission.
1035 		 */
1036 		if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) &&
1037 		    (!is_recov || !firstcall)) {
1038 			error = EIO;
1039 			break;
1040 		}
1041 
1042 		/* do not retry for soft mount */
1043 		if (!(mi->mi_flags & MI4_HARD))
1044 			break;
1045 
1046 		/* let the caller deal with the failover case */
1047 		if (FAILOVER_MOUNT4(mi))
1048 			break;
1049 
1050 		firstcall = 0;
1051 
1052 	} while (error == ETIMEDOUT || error == ECONNRESET);
1053 
1054 	return (error);
1055 }
1056 
1057 void
1058 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl)
1059 {
1060 	if (cl->cl_auth != NULL) {
1061 		sec_clnt_freeh(cl->cl_auth);
1062 		cl->cl_auth = NULL;
1063 	}
1064 
1065 	/*
1066 	 * Timestamp this cache entry so that we know when it was last
1067 	 * used.
1068 	 */
1069 	cp->ch_freed = gethrestime_sec();
1070 
1071 	/*
1072 	 * Add the free client handle to the front of the list.
1073 	 * This way, the list will be sorted in youngest to oldest
1074 	 * order.
1075 	 */
1076 	mutex_enter(&nfscl->nfscl_chtable4_lock);
1077 	cp->ch_list = cp->ch_head->ch_list;
1078 	cp->ch_head->ch_list = cp;
1079 	mutex_exit(&nfscl->nfscl_chtable4_lock);
1080 }
1081 
1082 #define	CL_HOLDTIME	60	/* time to hold client handles */
1083 
1084 static void
1085 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime)
1086 {
1087 	struct chhead *ch;
1088 	struct chtab *cp;	/* list of objects that can be reclaimed */
1089 	struct chtab *cpe;
1090 	struct chtab *cpl;
1091 	struct chtab **cpp;
1092 #ifdef DEBUG
1093 	int n = 0;
1094 	clstat4_debug.clreclaim.value.ui64++;
1095 #endif
1096 
1097 	/*
1098 	 * Need to reclaim some memory, so step through the cache
1099 	 * looking through the lists for entries which can be freed.
1100 	 */
1101 	cp = NULL;
1102 
1103 	mutex_enter(&nfscl->nfscl_chtable4_lock);
1104 
1105 	/*
1106 	 * Here we step through each non-NULL quadruple and start to
1107 	 * construct the reclaim list pointed to by cp.  Note that
1108 	 * cp will contain all eligible chtab entries.  When this traversal
1109 	 * completes, chtab entries from the last quadruple will be at the
1110 	 * front of cp and entries from previously inspected quadruples have
1111 	 * been appended to the rear of cp.
1112 	 */
1113 	for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
1114 		if (ch->ch_list == NULL)
1115 			continue;
1116 		/*
1117 		 * Search each list for entries older then
1118 		 * cl_holdtime seconds.  The lists are maintained
1119 		 * in youngest to oldest order so that when the
1120 		 * first entry is found which is old enough, then
1121 		 * all of the rest of the entries on the list will
1122 		 * be old enough as well.
1123 		 */
1124 		cpl = ch->ch_list;
1125 		cpp = &ch->ch_list;
1126 		while (cpl != NULL &&
1127 		    cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
1128 			cpp = &cpl->ch_list;
1129 			cpl = cpl->ch_list;
1130 		}
1131 		if (cpl != NULL) {
1132 			*cpp = NULL;
1133 			if (cp != NULL) {
1134 				cpe = cpl;
1135 				while (cpe->ch_list != NULL)
1136 					cpe = cpe->ch_list;
1137 				cpe->ch_list = cp;
1138 			}
1139 			cp = cpl;
1140 		}
1141 	}
1142 
1143 	mutex_exit(&nfscl->nfscl_chtable4_lock);
1144 
1145 	/*
1146 	 * If cp is empty, then there is nothing to reclaim here.
1147 	 */
1148 	if (cp == NULL)
1149 		return;
1150 
1151 	/*
1152 	 * Step through the list of entries to free, destroying each client
1153 	 * handle and kmem_free'ing the memory for each entry.
1154 	 */
1155 	while (cp != NULL) {
1156 #ifdef DEBUG
1157 		n++;
1158 #endif
1159 		CLNT_DESTROY(cp->ch_client);
1160 		cpl = cp->ch_list;
1161 		kmem_cache_free(chtab4_cache, cp);
1162 		cp = cpl;
1163 	}
1164 
1165 #ifdef DEBUG
1166 	/*
1167 	 * Update clalloc so that nfsstat shows the current number
1168 	 * of allocated client handles.
1169 	 */
1170 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
1171 #endif
1172 }
1173 
1174 /* ARGSUSED */
1175 static void
1176 clreclaim4(void *all)
1177 {
1178 	struct nfs4_clnt *nfscl;
1179 
1180 	/*
1181 	 * The system is low on memory; go through and try to reclaim some from
1182 	 * every zone on the system.
1183 	 */
1184 	mutex_enter(&nfs4_clnt_list_lock);
1185 	nfscl = list_head(&nfs4_clnt_list);
1186 	for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl))
1187 		clreclaim4_zone(nfscl, CL_HOLDTIME);
1188 	mutex_exit(&nfs4_clnt_list_lock);
1189 }
1190 
1191 /*
1192  * Minimum time-out values indexed by call type
1193  * These units are in "eights" of a second to avoid multiplies
1194  */
1195 static unsigned int minimum_timeo[] = {
1196 	6, 7, 10
1197 };
1198 
1199 #define	SHORTWAIT	(NFS_COTS_TIMEO / 10)
1200 
1201 /*
1202  * Back off for retransmission timeout, MAXTIMO is in hz of a sec
1203  */
1204 #define	MAXTIMO	(20*hz)
1205 #define	backoff(tim)	(((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
1206 #define	dobackoff(tim)	((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
1207 
1208 static int
1209 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1210     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue,
1211     enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl)
1212 {
1213 	CLIENT *client;
1214 	struct chtab *ch;
1215 	cred_t *cr = icr;
1216 	struct rpc_err rpcerr, rpcerr_tmp;
1217 	enum clnt_stat status;
1218 	int error;
1219 	struct timeval wait;
1220 	int timeo;		/* in units of hz */
1221 	bool_t tryagain, is_recov;
1222 	bool_t cred_cloned = FALSE;
1223 	k_sigset_t smask;
1224 	servinfo4_t *svp;
1225 #ifdef DEBUG
1226 	char *bufp;
1227 #endif
1228 	int firstcall;
1229 
1230 	rpcerr.re_status = RPC_SUCCESS;
1231 
1232 	/*
1233 	 * If we know that we are rebooting then let's
1234 	 * not bother with doing any over the wireness.
1235 	 */
1236 	mutex_enter(&mi->mi_lock);
1237 	if (mi->mi_flags & MI4_SHUTDOWN) {
1238 		mutex_exit(&mi->mi_lock);
1239 		return (EIO);
1240 	}
1241 	mutex_exit(&mi->mi_lock);
1242 
1243 	/* For TSOL, use a new cred which has net_mac_aware flag */
1244 	if (!cred_cloned && is_system_labeled()) {
1245 		cred_cloned = TRUE;
1246 		cr = crdup(icr);
1247 		(void) setpflags(NET_MAC_AWARE, 1, cr);
1248 	}
1249 
1250 	/*
1251 	 * clget() calls clnt_tli_kinit() which clears the xid, so we
1252 	 * are guaranteed to reprocess the retry as a new request.
1253 	 */
1254 	svp = mi->mi_curr_serv;
1255 	rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl);
1256 	if (rpcerr.re_errno != 0)
1257 		return (rpcerr.re_errno);
1258 
1259 	timeo = (mi->mi_timeo * hz) / 10;
1260 
1261 	/*
1262 	 * If hard mounted fs, retry call forever unless hard error
1263 	 * occurs.
1264 	 *
1265 	 * For forced unmount, let the recovery thread through but return
1266 	 * an error for all others.  This is so that user processes can
1267 	 * exit quickly.  The recovery thread bails out after one
1268 	 * transmission so that it can tell if it needs to continue.
1269 	 *
1270 	 * For zone shutdown, behave as above to encourage quick
1271 	 * process exit, but also fail quickly when servers have
1272 	 * timed out before and reduce the timeouts.
1273 	 */
1274 	is_recov = (curthread == mi->mi_recovthread);
1275 	firstcall = 1;
1276 	do {
1277 		tryagain = FALSE;
1278 
1279 		NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE,
1280 		    "nfs4_rfscall: vfs_flag=0x%x, %s",
1281 		    mi->mi_vfsp->vfs_flag,
1282 		    is_recov ? "recov thread" : "not recov thread"));
1283 
1284 		/*
1285 		 * It's possible while we're retrying the admin
1286 		 * decided to reboot.
1287 		 */
1288 		mutex_enter(&mi->mi_lock);
1289 		if (mi->mi_flags & MI4_SHUTDOWN) {
1290 			mutex_exit(&mi->mi_lock);
1291 			clfree4(client, ch, nfscl);
1292 			if (cred_cloned)
1293 				crfree(cr);
1294 			return (EIO);
1295 		}
1296 		mutex_exit(&mi->mi_lock);
1297 
1298 		if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) &&
1299 		    (!is_recov || !firstcall)) {
1300 			clfree4(client, ch, nfscl);
1301 			if (cred_cloned)
1302 				crfree(cr);
1303 			return (EIO);
1304 		}
1305 
1306 		if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) {
1307 			mutex_enter(&mi->mi_lock);
1308 			if ((mi->mi_flags & MI4_TIMEDOUT) ||
1309 			    !is_recov || !firstcall) {
1310 				mutex_exit(&mi->mi_lock);
1311 				clfree4(client, ch, nfscl);
1312 				if (cred_cloned)
1313 					crfree(cr);
1314 				return (EIO);
1315 			}
1316 			mutex_exit(&mi->mi_lock);
1317 			timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10;
1318 		}
1319 
1320 		firstcall = 0;
1321 		TICK_TO_TIMEVAL(timeo, &wait);
1322 
1323 		/*
1324 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
1325 		 * and SIGTERM. (Preserving the existing masks).
1326 		 * Mask out SIGINT if mount option nointr is specified.
1327 		 */
1328 		sigintr(&smask, (int)mi->mi_flags & MI4_INT);
1329 		if (!(mi->mi_flags & MI4_INT))
1330 			client->cl_nosignal = TRUE;
1331 
1332 		/*
1333 		 * If there is a current signal, then don't bother
1334 		 * even trying to send out the request because we
1335 		 * won't be able to block waiting for the response.
1336 		 * Simply assume RPC_INTR and get on with it.
1337 		 */
1338 		if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
1339 			status = RPC_INTR;
1340 		else {
1341 			status = CLNT_CALL(client, which, xdrargs, argsp,
1342 			    xdrres, resp, wait);
1343 		}
1344 
1345 		if (!(mi->mi_flags & MI4_INT))
1346 			client->cl_nosignal = FALSE;
1347 		/*
1348 		 * restore original signal mask
1349 		 */
1350 		sigunintr(&smask);
1351 
1352 		switch (status) {
1353 		case RPC_SUCCESS:
1354 			break;
1355 
1356 		case RPC_INTR:
1357 			/*
1358 			 * There is no way to recover from this error,
1359 			 * even if mount option nointr is specified.
1360 			 * SIGKILL, for example, cannot be blocked.
1361 			 */
1362 			rpcerr.re_status = RPC_INTR;
1363 			rpcerr.re_errno = EINTR;
1364 			break;
1365 
1366 		case RPC_UDERROR:
1367 			/*
1368 			 * If the NFS server is local (vold) and
1369 			 * it goes away then we get RPC_UDERROR.
1370 			 * This is a retryable error, so we would
1371 			 * loop, so check to see if the specific
1372 			 * error was ECONNRESET, indicating that
1373 			 * target did not exist at all.  If so,
1374 			 * return with RPC_PROGUNAVAIL and
1375 			 * ECONNRESET to indicate why.
1376 			 */
1377 			CLNT_GETERR(client, &rpcerr);
1378 			if (rpcerr.re_errno == ECONNRESET) {
1379 				rpcerr.re_status = RPC_PROGUNAVAIL;
1380 				rpcerr.re_errno = ECONNRESET;
1381 				break;
1382 			}
1383 			/*FALLTHROUGH*/
1384 
1385 		default:		/* probably RPC_TIMEDOUT */
1386 
1387 			if (IS_UNRECOVERABLE_RPC(status))
1388 				break;
1389 
1390 			/*
1391 			 * increment server not responding count
1392 			 */
1393 			mutex_enter(&mi->mi_lock);
1394 			mi->mi_noresponse++;
1395 			mutex_exit(&mi->mi_lock);
1396 #ifdef DEBUG
1397 			nfscl->nfscl_stat.noresponse.value.ui64++;
1398 #endif
1399 			/*
1400 			 * On zone shutdown, mark server dead and move on.
1401 			 */
1402 			if (zone_status_get(curproc->p_zone) >=
1403 			    ZONE_IS_SHUTTING_DOWN) {
1404 				mutex_enter(&mi->mi_lock);
1405 				mi->mi_flags |= MI4_TIMEDOUT;
1406 				mutex_exit(&mi->mi_lock);
1407 				clfree4(client, ch, nfscl);
1408 				if (cred_cloned)
1409 					crfree(cr);
1410 				return (EIO);
1411 			}
1412 
1413 			/*
1414 			 * NFS client failover support:
1415 			 * return and let the caller take care of
1416 			 * failover.  We only return for failover mounts
1417 			 * because otherwise we want the "not responding"
1418 			 * message, the timer updates, etc.
1419 			 */
1420 			if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) &&
1421 			    (error = try_failover(status)) != 0) {
1422 				clfree4(client, ch, nfscl);
1423 				if (cred_cloned)
1424 					crfree(cr);
1425 				*rpc_statusp = status;
1426 				return (error);
1427 			}
1428 
1429 			if (flags & RFSCALL_SOFT)
1430 				break;
1431 
1432 			tryagain = TRUE;
1433 
1434 			/*
1435 			 * The call is in progress (over COTS).
1436 			 * Try the CLNT_CALL again, but don't
1437 			 * print a noisy error message.
1438 			 */
1439 			if (status == RPC_INPROGRESS)
1440 				break;
1441 
1442 			timeo = backoff(timeo);
1443 			CLNT_GETERR(client, &rpcerr_tmp);
1444 
1445 			mutex_enter(&mi->mi_lock);
1446 			if (!(mi->mi_flags & MI4_PRINTED)) {
1447 				mi->mi_flags |= MI4_PRINTED;
1448 				mutex_exit(&mi->mi_lock);
1449 				if ((status == RPC_CANTSEND) &&
1450 				    (rpcerr_tmp.re_errno == ENOBUFS))
1451 					nfs4_queue_fact(RF_SENDQ_FULL, mi, 0,
1452 					    0, 0, FALSE, NULL, 0, NULL);
1453 				else
1454 					nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
1455 					    0, 0, 0, FALSE, NULL, 0, NULL);
1456 			} else
1457 				mutex_exit(&mi->mi_lock);
1458 
1459 			if (*doqueue && nfs_has_ctty()) {
1460 				*doqueue = 0;
1461 				if (!(mi->mi_flags & MI4_NOPRINT)) {
1462 					if ((status == RPC_CANTSEND) &&
1463 					    (rpcerr_tmp.re_errno == ENOBUFS))
1464 						nfs4_queue_fact(RF_SENDQ_FULL,
1465 						    mi, 0, 0, 0, FALSE, NULL,
1466 						    0, NULL);
1467 					else
1468 						nfs4_queue_fact(
1469 						    RF_SRV_NOT_RESPOND, mi, 0,
1470 						    0, 0, FALSE, NULL, 0, NULL);
1471 				}
1472 			}
1473 		}
1474 	} while (tryagain);
1475 
1476 	DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status,
1477 	    int, rpcerr.re_errno);
1478 
1479 	if (status != RPC_SUCCESS) {
1480 		zoneid_t zoneid = mi->mi_zone->zone_id;
1481 
1482 		/*
1483 		 * Let soft mounts use the timed out message.
1484 		 */
1485 		if (status == RPC_INPROGRESS)
1486 			status = RPC_TIMEDOUT;
1487 		nfscl->nfscl_stat.badcalls.value.ui64++;
1488 		if (status != RPC_INTR) {
1489 			mutex_enter(&mi->mi_lock);
1490 			mi->mi_flags |= MI4_DOWN;
1491 			mutex_exit(&mi->mi_lock);
1492 			CLNT_GETERR(client, &rpcerr);
1493 #ifdef DEBUG
1494 			bufp = clnt_sperror(client, svp->sv_hostname);
1495 			zprintf(zoneid, "NFS%d %s failed for %s\n",
1496 			    mi->mi_vers, mi->mi_rfsnames[which], bufp);
1497 			if (nfs_has_ctty()) {
1498 				if (!(mi->mi_flags & MI4_NOPRINT)) {
1499 					uprintf("NFS%d %s failed for %s\n",
1500 					    mi->mi_vers, mi->mi_rfsnames[which],
1501 					    bufp);
1502 				}
1503 			}
1504 			kmem_free(bufp, MAXPATHLEN);
1505 #else
1506 			zprintf(zoneid,
1507 			    "NFS %s failed for server %s: error %d (%s)\n",
1508 			    mi->mi_rfsnames[which], svp->sv_hostname,
1509 			    status, clnt_sperrno(status));
1510 			if (nfs_has_ctty()) {
1511 				if (!(mi->mi_flags & MI4_NOPRINT)) {
1512 					uprintf(
1513 				"NFS %s failed for server %s: error %d (%s)\n",
1514 					    mi->mi_rfsnames[which],
1515 					    svp->sv_hostname, status,
1516 					    clnt_sperrno(status));
1517 				}
1518 			}
1519 #endif
1520 			/*
1521 			 * when CLNT_CALL() fails with RPC_AUTHERROR,
1522 			 * re_errno is set appropriately depending on
1523 			 * the authentication error
1524 			 */
1525 			if (status == RPC_VERSMISMATCH ||
1526 			    status == RPC_PROGVERSMISMATCH)
1527 				rpcerr.re_errno = EIO;
1528 		}
1529 	} else {
1530 		/*
1531 		 * Test the value of mi_down and mi_printed without
1532 		 * holding the mi_lock mutex.  If they are both zero,
1533 		 * then it is okay to skip the down and printed
1534 		 * processing.  This saves on a mutex_enter and
1535 		 * mutex_exit pair for a normal, successful RPC.
1536 		 * This was just complete overhead.
1537 		 */
1538 		if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) {
1539 			mutex_enter(&mi->mi_lock);
1540 			mi->mi_flags &= ~MI4_DOWN;
1541 			if (mi->mi_flags & MI4_PRINTED) {
1542 				mi->mi_flags &= ~MI4_PRINTED;
1543 				mutex_exit(&mi->mi_lock);
1544 				if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1545 					nfs4_queue_fact(RF_SRV_OK, mi, 0, 0,
1546 					    0, FALSE, NULL, 0, NULL);
1547 			} else
1548 				mutex_exit(&mi->mi_lock);
1549 		}
1550 
1551 		if (*doqueue == 0) {
1552 			if (!(mi->mi_flags & MI4_NOPRINT) &&
1553 			    !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1554 				nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0,
1555 				    FALSE, NULL, 0, NULL);
1556 
1557 			*doqueue = 1;
1558 		}
1559 	}
1560 
1561 	clfree4(client, ch, nfscl);
1562 	if (cred_cloned)
1563 		crfree(cr);
1564 
1565 	ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
1566 
1567 	TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d",
1568 	    rpcerr.re_errno);
1569 
1570 	*rpc_statusp = status;
1571 	return (rpcerr.re_errno);
1572 }
1573 
1574 /*
1575  * rfs4call - general wrapper for RPC calls initiated by the client
1576  */
1577 void
1578 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp,
1579     cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep)
1580 {
1581 	int i, error;
1582 	enum clnt_stat rpc_status = NFS4_OK;
1583 	int num_resops;
1584 	struct nfs4_clnt *nfscl;
1585 
1586 	ASSERT(nfs_zone() == mi->mi_zone);
1587 	nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
1588 	ASSERT(nfscl != NULL);
1589 
1590 	nfscl->nfscl_stat.calls.value.ui64++;
1591 	mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++;
1592 
1593 	/* Set up the results struct for XDR usage */
1594 	resp->argsp = argsp;
1595 	resp->array = NULL;
1596 	resp->status = 0;
1597 	resp->decode_len = 0;
1598 
1599 	error = nfs4_rfscall(mi, NFSPROC4_COMPOUND,
1600 	    xdr_COMPOUND4args_clnt, (caddr_t)argsp,
1601 	    xdr_COMPOUND4res_clnt, (caddr_t)resp, cr,
1602 	    doqueue, &rpc_status, flags, nfscl);
1603 
1604 	/* Return now if it was an RPC error */
1605 	if (error) {
1606 		ep->error = error;
1607 		ep->stat = resp->status;
1608 		ep->rpc_status = rpc_status;
1609 		return;
1610 	}
1611 
1612 	/* else we'll count the processed operations */
1613 	num_resops = resp->decode_len;
1614 	for (i = 0; i < num_resops; i++) {
1615 		/*
1616 		 * Count the individual operations
1617 		 * processed by the server.
1618 		 */
1619 		if (resp->array[i].resop >= NFSPROC4_NULL &&
1620 		    resp->array[i].resop <= OP_WRITE)
1621 			mi->mi_reqs[resp->array[i].resop].value.ui64++;
1622 	}
1623 
1624 	ep->error = 0;
1625 	ep->stat = resp->status;
1626 	ep->rpc_status = rpc_status;
1627 }
1628 
1629 /*
1630  * nfs4rename_update - updates stored state after a rename.  Currently this
1631  * is the path of the object and anything under it, and the filehandle of
1632  * the renamed object.
1633  */
1634 void
1635 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm)
1636 {
1637 	sfh4_update(VTOR4(renvp)->r_fh, nfh4p);
1638 	fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm);
1639 }
1640 
1641 /*
1642  * Routine to look up the filehandle for the given path and rootvp.
1643  *
1644  * Return values:
1645  * - success: returns zero and *statp is set to NFS4_OK, and *fhp is
1646  *   updated.
1647  * - error: return value (errno value) and/or *statp is set appropriately.
1648  */
1649 #define	RML_ORDINARY	1
1650 #define	RML_NAMED_ATTR	2
1651 #define	RML_ATTRDIR	3
1652 
1653 static void
1654 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp,
1655     int filetype, cred_t *cr,
1656     nfs_fh4 *fhp, nfs4_ga_res_t *garp,		/* fh, attrs for object */
1657     nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp,	/* fh, attrs for parent */
1658     nfs4_error_t *ep)
1659 {
1660 	COMPOUND4args_clnt args;
1661 	COMPOUND4res_clnt res;
1662 	nfs_argop4 *argop;
1663 	nfs_resop4 *resop;
1664 	int num_argops;
1665 	lookup4_param_t lookuparg;
1666 	nfs_fh4 *tmpfhp;
1667 	int doqueue = 1;
1668 	char *path;
1669 	mntinfo4_t *mi;
1670 
1671 	ASSERT(fname != NULL);
1672 	ASSERT(rootvp->v_type == VDIR);
1673 
1674 	mi = VTOMI4(rootvp);
1675 	path = fn_path(fname);
1676 	switch (filetype) {
1677 	case RML_NAMED_ATTR:
1678 		lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR;
1679 		args.ctag = TAG_REMAP_LOOKUP_NA;
1680 		break;
1681 	case RML_ATTRDIR:
1682 		lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR;
1683 		args.ctag = TAG_REMAP_LOOKUP_AD;
1684 		break;
1685 	case RML_ORDINARY:
1686 		lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1687 		args.ctag = TAG_REMAP_LOOKUP;
1688 		break;
1689 	default:
1690 		ep->error = EINVAL;
1691 		return;
1692 	}
1693 	lookuparg.argsp = &args;
1694 	lookuparg.resp = &res;
1695 	lookuparg.header_len = 1;	/* Putfh */
1696 	lookuparg.trailer_len = 0;
1697 	lookuparg.ga_bits = NFS4_VATTR_MASK;
1698 	lookuparg.mi = VTOMI4(rootvp);
1699 
1700 	(void) nfs4lookup_setup(path, &lookuparg, 1);
1701 
1702 	/* 0: putfh directory */
1703 	argop = args.array;
1704 	argop[0].argop = OP_CPUTFH;
1705 	argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh;
1706 
1707 	num_argops = args.array_len;
1708 
1709 	rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1710 
1711 	if (ep->error || res.status != NFS4_OK)
1712 		goto exit;
1713 
1714 	/* get the object filehandle */
1715 	resop = &res.array[res.array_len - 2];
1716 	if (resop->resop != OP_GETFH) {
1717 		nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1718 		    0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1719 		ep->stat = NFS4ERR_SERVERFAULT;
1720 		goto exit;
1721 	}
1722 	tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1723 	if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1724 		nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1725 		    tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1726 		    TAG_NONE, 0, 0);
1727 		ep->stat = NFS4ERR_SERVERFAULT;
1728 		goto exit;
1729 	}
1730 	fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1731 	nfs_fh4_copy(tmpfhp, fhp);
1732 
1733 	/* get the object attributes */
1734 	resop = &res.array[res.array_len - 1];
1735 	if (garp && resop->resop == OP_GETATTR)
1736 		*garp = resop->nfs_resop4_u.opgetattr.ga_res;
1737 
1738 	/* See if there are enough fields in the response for parent info */
1739 	if ((int)res.array_len - 5 <= 0)
1740 		goto exit;
1741 
1742 	/* get the parent filehandle */
1743 	resop = &res.array[res.array_len - 5];
1744 	if (resop->resop != OP_GETFH) {
1745 		nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1746 		    0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1747 		ep->stat = NFS4ERR_SERVERFAULT;
1748 		goto exit;
1749 	}
1750 	tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1751 	if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1752 		nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1753 		    tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1754 		    TAG_NONE, 0, 0);
1755 		ep->stat = NFS4ERR_SERVERFAULT;
1756 		goto exit;
1757 	}
1758 	pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1759 	nfs_fh4_copy(tmpfhp, pfhp);
1760 
1761 	/* get the parent attributes */
1762 	resop = &res.array[res.array_len - 4];
1763 	if (pgarp && resop->resop == OP_GETATTR)
1764 		*pgarp = resop->nfs_resop4_u.opgetattr.ga_res;
1765 
1766 exit:
1767 	/*
1768 	 * It is too hard to remember where all the OP_LOOKUPs are
1769 	 */
1770 	nfs4args_lookup_free(argop, num_argops);
1771 	kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1772 
1773 	if (!ep->error)
1774 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1775 	kmem_free(path, strlen(path)+1);
1776 }
1777 
1778 /*
1779  * NFS client failover / volatile filehandle support
1780  *
1781  * Recover the filehandle for the given rnode.
1782  *
1783  * Errors are returned via the nfs4_error_t parameter.
1784  */
1785 
1786 void
1787 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
1788 {
1789 	int is_stub;
1790 	rnode4_t *rp = VTOR4(vp);
1791 	vnode_t *rootvp = NULL;
1792 	vnode_t *dvp = NULL;
1793 	cred_t *cr, *cred_otw;
1794 	nfs4_ga_res_t gar, pgar;
1795 	nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
1796 	int filetype = RML_ORDINARY;
1797 	nfs4_recov_state_t recov = {NULL, 0, 0};
1798 	int badfhcount = 0;
1799 	nfs4_open_stream_t *osp = NULL;
1800 	bool_t first_time = TRUE;	/* first time getting OTW cred */
1801 	bool_t last_time = FALSE;	/* last time getting OTW cred */
1802 
1803 	NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1804 	    "nfs4_remap_file: remapping %s", rnode4info(rp)));
1805 	ASSERT(nfs4_consistent_type(vp));
1806 
1807 	if (vp->v_flag & VROOT) {
1808 		nfs4_remap_root(mi, ep, flags);
1809 		return;
1810 	}
1811 
1812 	/*
1813 	 * Given the root fh, use the path stored in
1814 	 * the rnode to find the fh for the new server.
1815 	 */
1816 	ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1817 	if (ep->error != 0)
1818 		return;
1819 
1820 	cr = curthread->t_cred;
1821 	ASSERT(cr != NULL);
1822 get_remap_cred:
1823 	/*
1824 	 * Releases the osp, if it is provided.
1825 	 * Puts a hold on the cred_otw and the new osp (if found).
1826 	 */
1827 	cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp,
1828 	    &first_time, &last_time);
1829 	ASSERT(cred_otw != NULL);
1830 
1831 	if (rp->r_flags & R4ISXATTR) {
1832 		filetype = RML_NAMED_ATTR;
1833 		(void) vtodv(vp, &dvp, cred_otw, FALSE);
1834 	}
1835 
1836 	if (vp->v_flag & V_XATTRDIR) {
1837 		filetype = RML_ATTRDIR;
1838 	}
1839 
1840 	if (filetype == RML_ORDINARY && rootvp->v_type == VREG) {
1841 		/* file mount, doesn't need a remap */
1842 		goto done;
1843 	}
1844 
1845 again:
1846 	remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw,
1847 	    &newfh, &gar, &newpfh, &pgar, ep);
1848 
1849 	NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1850 	    "nfs4_remap_file: remap_lookup returned %d/%d",
1851 	    ep->error, ep->stat));
1852 
1853 	if (last_time == FALSE && ep->error == EACCES) {
1854 		crfree(cred_otw);
1855 		if (dvp != NULL)
1856 			VN_RELE(dvp);
1857 		goto get_remap_cred;
1858 	}
1859 	if (ep->error != 0)
1860 		goto done;
1861 
1862 	switch (ep->stat) {
1863 	case NFS4_OK:
1864 		badfhcount = 0;
1865 		if (recov.rs_flags & NFS4_RS_DELAY_MSG) {
1866 			mutex_enter(&rp->r_statelock);
1867 			rp->r_delay_interval = 0;
1868 			mutex_exit(&rp->r_statelock);
1869 			uprintf("NFS File Available..\n");
1870 		}
1871 		break;
1872 	case NFS4ERR_FHEXPIRED:
1873 	case NFS4ERR_BADHANDLE:
1874 	case NFS4ERR_STALE:
1875 		/*
1876 		 * If we ran into filehandle problems, we should try to
1877 		 * remap the root vnode first and hope life gets better.
1878 		 * But we need to avoid loops.
1879 		 */
1880 		if (badfhcount++ > 0)
1881 			goto done;
1882 		if (newfh.nfs_fh4_len != 0) {
1883 			kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1884 			newfh.nfs_fh4_len = 0;
1885 		}
1886 		if (newpfh.nfs_fh4_len != 0) {
1887 			kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1888 			newpfh.nfs_fh4_len = 0;
1889 		}
1890 		/* relative path - remap rootvp then retry */
1891 		VN_RELE(rootvp);
1892 		rootvp = NULL;
1893 		nfs4_remap_root(mi, ep, flags);
1894 		if (ep->error != 0 || ep->stat != NFS4_OK)
1895 			goto done;
1896 		ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1897 		if (ep->error != 0)
1898 			goto done;
1899 		goto again;
1900 	case NFS4ERR_DELAY:
1901 		badfhcount = 0;
1902 		nfs4_set_delay_wait(vp);
1903 		ep->error = nfs4_wait_for_delay(vp, &recov);
1904 		if (ep->error != 0)
1905 			goto done;
1906 		goto again;
1907 	case NFS4ERR_ACCESS:
1908 		/* get new cred, try again */
1909 		if (last_time == TRUE)
1910 			goto done;
1911 		if (dvp != NULL)
1912 			VN_RELE(dvp);
1913 		crfree(cred_otw);
1914 		goto get_remap_cred;
1915 	default:
1916 		goto done;
1917 	}
1918 
1919 	/*
1920 	 * Check on the new and old rnodes before updating;
1921 	 * if the vnode type or size changes, issue a warning
1922 	 * and mark the file dead.
1923 	 */
1924 	mutex_enter(&rp->r_statelock);
1925 	if (flags & NFS4_REMAP_CKATTRS) {
1926 		if (vp->v_type != gar.n4g_va.va_type ||
1927 		    (vp->v_type != VDIR &&
1928 		    rp->r_size != gar.n4g_va.va_size)) {
1929 			NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1930 			    "nfs4_remap_file: size %d vs. %d, type %d vs. %d",
1931 			    (int)rp->r_size, (int)gar.n4g_va.va_size,
1932 			    vp->v_type, gar.n4g_va.va_type));
1933 			mutex_exit(&rp->r_statelock);
1934 			nfs4_queue_event(RE_FILE_DIFF, mi,
1935 			    rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0,
1936 			    TAG_NONE, TAG_NONE, 0, 0);
1937 			nfs4_fail_recov(vp, NULL, 0, NFS4_OK);
1938 			goto done;
1939 		}
1940 	}
1941 	ASSERT(gar.n4g_va.va_type != VNON);
1942 	rp->r_server = mi->mi_curr_serv;
1943 
1944 	/*
1945 	 * Turn this object into a "stub" object if we
1946 	 * crossed an underlying server fs boundary.
1947 	 *
1948 	 * This stub will be for a mirror-mount.
1949 	 * A referral would look like a boundary crossing
1950 	 * as well, but would not be the same type of object,
1951 	 * so we would expect to mark the object dead.
1952 	 *
1953 	 * See comment in r4_do_attrcache() for more details.
1954 	 */
1955 	is_stub = 0;
1956 	if (gar.n4g_fsid_valid) {
1957 		(void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0);
1958 		rp->r_srv_fsid = gar.n4g_fsid;
1959 		if (!FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid))
1960 			is_stub = 1;
1961 		nfs_rw_exit(&rp->r_server->sv_lock);
1962 #ifdef DEBUG
1963 	} else {
1964 		NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1965 		    "remap_file: fsid attr not provided by server.  rp=%p",
1966 		    (void *)rp));
1967 #endif
1968 	}
1969 	if (is_stub)
1970 		r4_stub_mirrormount(rp);
1971 	else
1972 		r4_stub_none(rp);
1973 	mutex_exit(&rp->r_statelock);
1974 	nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */
1975 	sfh4_update(rp->r_fh, &newfh);
1976 	ASSERT(nfs4_consistent_type(vp));
1977 
1978 	/*
1979 	 * If we got parent info, use it to update the parent
1980 	 */
1981 	if (newpfh.nfs_fh4_len != 0) {
1982 		if (rp->r_svnode.sv_dfh != NULL)
1983 			sfh4_update(rp->r_svnode.sv_dfh, &newpfh);
1984 		if (dvp != NULL) {
1985 			/* force update of attrs */
1986 			nfs4_attrcache_noinval(dvp, &pgar, gethrtime());
1987 		}
1988 	}
1989 done:
1990 	if (newfh.nfs_fh4_len != 0)
1991 		kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1992 	if (newpfh.nfs_fh4_len != 0)
1993 		kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1994 	if (cred_otw != NULL)
1995 		crfree(cred_otw);
1996 	if (rootvp != NULL)
1997 		VN_RELE(rootvp);
1998 	if (dvp != NULL)
1999 		VN_RELE(dvp);
2000 	if (osp != NULL)
2001 		open_stream_rele(osp, rp);
2002 }
2003 
2004 /*
2005  * Client-side failover support: remap the filehandle for vp if it appears
2006  * necessary.  errors are returned via the nfs4_error_t parameter; though,
2007  * if there is a problem, we will just try again later.
2008  */
2009 
2010 void
2011 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
2012 {
2013 	if (vp == NULL)
2014 		return;
2015 
2016 	if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY))
2017 		return;
2018 
2019 	if (VTOR4(vp)->r_server == mi->mi_curr_serv)
2020 		return;
2021 
2022 	nfs4_remap_file(mi, vp, flags, ep);
2023 }
2024 
2025 /*
2026  * nfs4_make_dotdot() - find or create a parent vnode of a non-root node.
2027  *
2028  * Our caller has a filehandle for ".." relative to a particular
2029  * directory object.  We want to find or create a parent vnode
2030  * with that filehandle and return it.  We can of course create
2031  * a vnode from this filehandle, but we need to also make sure
2032  * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR)
2033  * that we have a parent FH for future reopens as well.  If
2034  * we have a remap failure, we won't be able to reopen this
2035  * file, but we won't treat that as fatal because a reopen
2036  * is at least unlikely.  Someday nfs4_reopen() should look
2037  * for a missing parent FH and try a remap to recover from it.
2038  *
2039  * need_start_op argument indicates whether this function should
2040  * do a start_op before calling remap_lookup().  This should
2041  * be FALSE, if you are the recovery thread or in an op; otherwise,
2042  * set it to TRUE.
2043  */
2044 int
2045 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp,
2046     cred_t *cr, vnode_t **vpp, int need_start_op)
2047 {
2048 	mntinfo4_t *mi = VTOMI4(dvp);
2049 	nfs4_fname_t *np = NULL, *pnp = NULL;
2050 	vnode_t *vp = NULL, *rootvp = NULL;
2051 	rnode4_t *rp;
2052 	nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
2053 	nfs4_ga_res_t gar, pgar;
2054 	vattr_t va, pva;
2055 	nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
2056 	nfs4_sharedfh_t *sfh = NULL, *psfh = NULL;
2057 	nfs4_recov_state_t recov_state;
2058 
2059 #ifdef DEBUG
2060 	/*
2061 	 * ensure need_start_op is correct
2062 	 */
2063 	{
2064 		int no_need_start_op = (tsd_get(nfs4_tsd_key) ||
2065 		    (curthread == mi->mi_recovthread));
2066 		/* C needs a ^^ operator! */
2067 		ASSERT(((need_start_op) && (!no_need_start_op)) ||
2068 		    ((! need_start_op) && (no_need_start_op)));
2069 	}
2070 #endif
2071 	ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone());
2072 
2073 	NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE,
2074 	    "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp,
2075 	    rnode4info(VTOR4(dvp))));
2076 
2077 	/*
2078 	 * rootvp might be needed eventually. Holding it now will
2079 	 * ensure that r4find_unlocked() will find it, if ".." is the root.
2080 	 */
2081 	e.error = VFS_ROOT(mi->mi_vfsp, &rootvp);
2082 	if (e.error != 0)
2083 		goto out;
2084 	rp = r4find_unlocked(fhp, mi->mi_vfsp);
2085 	if (rp != NULL) {
2086 		*vpp = RTOV4(rp);
2087 		VN_RELE(rootvp);
2088 		return (0);
2089 	}
2090 
2091 	/*
2092 	 * Since we don't have the rnode, we have to go over the wire.
2093 	 * remap_lookup() can get all of the filehandles and attributes
2094 	 * we need in one operation.
2095 	 */
2096 	np = fn_parent(VTOSV(dvp)->sv_name);
2097 	/* if a parent was not found return an error */
2098 	if (np == NULL) {
2099 		e.error = ENOENT;
2100 		goto out;
2101 	}
2102 
2103 	recov_state.rs_flags = 0;
2104 	recov_state.rs_num_retry_despite_err = 0;
2105 recov_retry:
2106 	if (need_start_op) {
2107 		e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP,
2108 		    &recov_state, NULL);
2109 		if (e.error != 0) {
2110 			goto out;
2111 		}
2112 	}
2113 
2114 	pgar.n4g_va.va_type = VNON;
2115 	gar.n4g_va.va_type = VNON;
2116 
2117 	remap_lookup(np, rootvp, RML_ORDINARY, cr,
2118 	    &newfh, &gar, &newpfh, &pgar, &e);
2119 	if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) {
2120 		if (need_start_op) {
2121 			bool_t abort;
2122 
2123 			abort = nfs4_start_recovery(&e, mi,
2124 			    rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL, NULL,
2125 			    NULL);
2126 			if (abort) {
2127 				nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2128 				    &recov_state, FALSE);
2129 				if (e.error == 0)
2130 					e.error = EIO;
2131 				goto out;
2132 			}
2133 			nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2134 			    &recov_state, TRUE);
2135 			goto recov_retry;
2136 		}
2137 		if (e.error == 0)
2138 			e.error = EIO;
2139 		goto out;
2140 	}
2141 
2142 	va = gar.n4g_va;
2143 	pva = pgar.n4g_va;
2144 
2145 	if ((e.error != 0) ||
2146 	    (va.va_type != VDIR)) {
2147 		if (need_start_op)
2148 			nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2149 			    &recov_state, FALSE);
2150 		if (e.error == 0)
2151 			e.error = EIO;
2152 		goto out;
2153 	}
2154 
2155 	if (e.stat != NFS4_OK) {
2156 		if (need_start_op)
2157 			nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2158 			    &recov_state, FALSE);
2159 		e.error = EIO;
2160 		goto out;
2161 	}
2162 
2163 	/*
2164 	 * It is possible for remap_lookup() to return with no error,
2165 	 * but without providing the parent filehandle and attrs.
2166 	 */
2167 	if (pva.va_type != VDIR) {
2168 		/*
2169 		 * Call remap_lookup() again, this time with the
2170 		 * newpfh and pgar args in the first position.
2171 		 */
2172 		pnp = fn_parent(np);
2173 		if (pnp != NULL) {
2174 			remap_lookup(pnp, rootvp, RML_ORDINARY, cr,
2175 			    &newpfh, &pgar, NULL, NULL, &e);
2176 			/*
2177 			 * This remap_lookup call modifies pgar. The following
2178 			 * line prevents trouble when checking the va_type of
2179 			 * pva later in this code.
2180 			 */
2181 			pva = pgar.n4g_va;
2182 
2183 			if (nfs4_needs_recovery(&e, FALSE,
2184 			    mi->mi_vfsp)) {
2185 				if (need_start_op) {
2186 					bool_t abort;
2187 
2188 					abort = nfs4_start_recovery(&e, mi,
2189 					    rootvp, NULL, NULL, NULL,
2190 					    OP_LOOKUP, NULL, NULL, NULL);
2191 					if (abort) {
2192 						nfs4_end_fop(mi, rootvp, NULL,
2193 						    OH_LOOKUP, &recov_state,
2194 						    FALSE);
2195 						if (e.error == 0)
2196 							e.error = EIO;
2197 						goto out;
2198 					}
2199 					nfs4_end_fop(mi, rootvp, NULL,
2200 					    OH_LOOKUP, &recov_state, TRUE);
2201 					goto recov_retry;
2202 				}
2203 				if (e.error == 0)
2204 					e.error = EIO;
2205 				goto out;
2206 			}
2207 
2208 			if (e.stat != NFS4_OK) {
2209 				if (need_start_op)
2210 					nfs4_end_fop(mi, rootvp, NULL,
2211 					    OH_LOOKUP, &recov_state, FALSE);
2212 				e.error = EIO;
2213 				goto out;
2214 			}
2215 		}
2216 		if ((pnp == NULL) ||
2217 		    (e.error != 0) ||
2218 		    (pva.va_type == VNON)) {
2219 			if (need_start_op)
2220 				nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2221 				    &recov_state, FALSE);
2222 			if (e.error == 0)
2223 				e.error = EIO;
2224 			goto out;
2225 		}
2226 	}
2227 	ASSERT(newpfh.nfs_fh4_len != 0);
2228 	if (need_start_op)
2229 		nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE);
2230 	psfh = sfh4_get(&newpfh, mi);
2231 
2232 	sfh = sfh4_get(&newfh, mi);
2233 	vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t);
2234 
2235 out:
2236 	if (np != NULL)
2237 		fn_rele(&np);
2238 	if (pnp != NULL)
2239 		fn_rele(&pnp);
2240 	if (newfh.nfs_fh4_len != 0)
2241 		kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
2242 	if (newpfh.nfs_fh4_len != 0)
2243 		kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
2244 	if (sfh != NULL)
2245 		sfh4_rele(&sfh);
2246 	if (psfh != NULL)
2247 		sfh4_rele(&psfh);
2248 	if (rootvp != NULL)
2249 		VN_RELE(rootvp);
2250 	*vpp = vp;
2251 	return (e.error);
2252 }
2253 
2254 #ifdef DEBUG
2255 size_t r_path_memuse = 0;
2256 #endif
2257 
2258 /*
2259  * NFS client failover support
2260  *
2261  * sv4_free() frees the malloc'd portion of a "servinfo_t".
2262  */
2263 void
2264 sv4_free(servinfo4_t *svp)
2265 {
2266 	servinfo4_t *next;
2267 	struct knetconfig *knconf;
2268 
2269 	while (svp != NULL) {
2270 		next = svp->sv_next;
2271 		if (svp->sv_dhsec)
2272 			sec_clnt_freeinfo(svp->sv_dhsec);
2273 		if (svp->sv_secdata)
2274 			sec_clnt_freeinfo(svp->sv_secdata);
2275 		if (svp->sv_save_secinfo &&
2276 		    svp->sv_save_secinfo != svp->sv_secinfo)
2277 			secinfo_free(svp->sv_save_secinfo);
2278 		if (svp->sv_secinfo)
2279 			secinfo_free(svp->sv_secinfo);
2280 		if (svp->sv_hostname && svp->sv_hostnamelen > 0)
2281 			kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
2282 		knconf = svp->sv_knconf;
2283 		if (knconf != NULL) {
2284 			if (knconf->knc_protofmly != NULL)
2285 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2286 			if (knconf->knc_proto != NULL)
2287 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
2288 			kmem_free(knconf, sizeof (*knconf));
2289 		}
2290 		knconf = svp->sv_origknconf;
2291 		if (knconf != NULL) {
2292 			if (knconf->knc_protofmly != NULL)
2293 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2294 			if (knconf->knc_proto != NULL)
2295 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
2296 			kmem_free(knconf, sizeof (*knconf));
2297 		}
2298 		if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
2299 			kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
2300 		if (svp->sv_path != NULL) {
2301 			kmem_free(svp->sv_path, svp->sv_pathlen);
2302 		}
2303 		nfs_rw_destroy(&svp->sv_lock);
2304 		kmem_free(svp, sizeof (*svp));
2305 		svp = next;
2306 	}
2307 }
2308 
2309 void
2310 nfs4_printfhandle(nfs4_fhandle_t *fhp)
2311 {
2312 	int *ip;
2313 	char *buf;
2314 	size_t bufsize;
2315 	char *cp;
2316 
2317 	/*
2318 	 * 13 == "(file handle:"
2319 	 * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times
2320 	 *	1 == ' '
2321 	 *	8 == maximum strlen of "%x"
2322 	 * 3 == ")\n\0"
2323 	 */
2324 	bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3;
2325 	buf = kmem_alloc(bufsize, KM_NOSLEEP);
2326 	if (buf == NULL)
2327 		return;
2328 
2329 	cp = buf;
2330 	(void) strcpy(cp, "(file handle:");
2331 	while (*cp != '\0')
2332 		cp++;
2333 	for (ip = (int *)fhp->fh_buf;
2334 	    ip < (int *)&fhp->fh_buf[fhp->fh_len];
2335 	    ip++) {
2336 		(void) sprintf(cp, " %x", *ip);
2337 		while (*cp != '\0')
2338 			cp++;
2339 	}
2340 	(void) strcpy(cp, ")\n");
2341 
2342 	zcmn_err(getzoneid(), CE_CONT, "%s", buf);
2343 
2344 	kmem_free(buf, bufsize);
2345 }
2346 
2347 /*
2348  * The NFSv4 readdir cache subsystem.
2349  *
2350  * We provide a set of interfaces to allow the rest of the system to utilize
2351  * a caching mechanism while encapsulating the details of the actual
2352  * implementation.  This should allow for better maintainability and
2353  * extensibility by consolidating the implementation details in one location.
2354  */
2355 
2356 /*
2357  * Comparator used by AVL routines.
2358  */
2359 static int
2360 rddir4_cache_compar(const void *x, const void *y)
2361 {
2362 	rddir4_cache_impl *ai = (rddir4_cache_impl *)x;
2363 	rddir4_cache_impl *bi = (rddir4_cache_impl *)y;
2364 	rddir4_cache *a = &ai->rc;
2365 	rddir4_cache *b = &bi->rc;
2366 
2367 	if (a->nfs4_cookie == b->nfs4_cookie) {
2368 		if (a->buflen == b->buflen)
2369 			return (0);
2370 		if (a->buflen < b->buflen)
2371 			return (-1);
2372 		return (1);
2373 	}
2374 
2375 	if (a->nfs4_cookie < b->nfs4_cookie)
2376 			return (-1);
2377 
2378 	return (1);
2379 }
2380 
2381 /*
2382  * Allocate an opaque handle for the readdir cache.
2383  */
2384 void
2385 rddir4_cache_create(rnode4_t *rp)
2386 {
2387 	ASSERT(rp->r_dir == NULL);
2388 
2389 	rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
2390 
2391 	avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl),
2392 	    offsetof(rddir4_cache_impl, tree));
2393 }
2394 
2395 /*
2396  *  Purge the cache of all cached readdir responses.
2397  */
2398 void
2399 rddir4_cache_purge(rnode4_t *rp)
2400 {
2401 	rddir4_cache_impl	*rdip;
2402 	rddir4_cache_impl	*nrdip;
2403 
2404 	ASSERT(MUTEX_HELD(&rp->r_statelock));
2405 
2406 	if (rp->r_dir == NULL)
2407 		return;
2408 
2409 	rdip = avl_first(rp->r_dir);
2410 
2411 	while (rdip != NULL) {
2412 		nrdip = AVL_NEXT(rp->r_dir, rdip);
2413 		avl_remove(rp->r_dir, rdip);
2414 		rdip->rc.flags &= ~RDDIRCACHED;
2415 		rddir4_cache_rele(rp, &rdip->rc);
2416 		rdip = nrdip;
2417 	}
2418 	ASSERT(avl_numnodes(rp->r_dir) == 0);
2419 }
2420 
2421 /*
2422  * Destroy the readdir cache.
2423  */
2424 void
2425 rddir4_cache_destroy(rnode4_t *rp)
2426 {
2427 	ASSERT(MUTEX_HELD(&rp->r_statelock));
2428 	if (rp->r_dir == NULL)
2429 		return;
2430 
2431 	rddir4_cache_purge(rp);
2432 	avl_destroy(rp->r_dir);
2433 	kmem_free(rp->r_dir, sizeof (avl_tree_t));
2434 	rp->r_dir = NULL;
2435 }
2436 
2437 /*
2438  * Locate a readdir response from the readdir cache.
2439  *
2440  * Return values:
2441  *
2442  * NULL - If there is an unrecoverable situation like the operation may have
2443  *	  been interrupted.
2444  *
2445  * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller.
2446  *		    The flags are set approprately, such that the caller knows
2447  *		    what state the entry is in.
2448  */
2449 rddir4_cache *
2450 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count)
2451 {
2452 	rddir4_cache_impl	*rdip = NULL;
2453 	rddir4_cache_impl	srdip;
2454 	rddir4_cache		*srdc;
2455 	rddir4_cache		*rdc = NULL;
2456 	rddir4_cache		*nrdc = NULL;
2457 	avl_index_t		where;
2458 
2459 top:
2460 	ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER));
2461 	ASSERT(MUTEX_HELD(&rp->r_statelock));
2462 	/*
2463 	 * Check to see if the readdir cache has been disabled.  If so, then
2464 	 * simply allocate an rddir4_cache entry and return it, since caching
2465 	 * operations do not apply.
2466 	 */
2467 	if (rp->r_dir == NULL) {
2468 		if (nrdc == NULL) {
2469 			/*
2470 			 * Drop the lock because we are doing a sleeping
2471 			 * allocation.
2472 			 */
2473 			mutex_exit(&rp->r_statelock);
2474 			rdc = rddir4_cache_alloc(KM_SLEEP);
2475 			rdc->nfs4_cookie = cookie;
2476 			rdc->buflen = count;
2477 			mutex_enter(&rp->r_statelock);
2478 			return (rdc);
2479 		}
2480 		return (nrdc);
2481 	}
2482 
2483 	srdc = &srdip.rc;
2484 	srdc->nfs4_cookie = cookie;
2485 	srdc->buflen = count;
2486 
2487 	rdip = avl_find(rp->r_dir, &srdip, &where);
2488 
2489 	/*
2490 	 * If we didn't find an entry then create one and insert it
2491 	 * into the cache.
2492 	 */
2493 	if (rdip == NULL) {
2494 		/*
2495 		 * Check for the case where we have made a second pass through
2496 		 * the cache due to a lockless allocation.  If we find that no
2497 		 * thread has already inserted this entry, do the insert now
2498 		 * and return.
2499 		 */
2500 		if (nrdc != NULL) {
2501 			avl_insert(rp->r_dir, nrdc->data, where);
2502 			nrdc->flags |= RDDIRCACHED;
2503 			rddir4_cache_hold(nrdc);
2504 			return (nrdc);
2505 		}
2506 
2507 #ifdef DEBUG
2508 		nfs4_readdir_cache_misses++;
2509 #endif
2510 		/*
2511 		 * First, try to allocate an entry without sleeping.  If that
2512 		 * fails then drop the lock and do a sleeping allocation.
2513 		 */
2514 		nrdc = rddir4_cache_alloc(KM_NOSLEEP);
2515 		if (nrdc != NULL) {
2516 			nrdc->nfs4_cookie = cookie;
2517 			nrdc->buflen = count;
2518 			avl_insert(rp->r_dir, nrdc->data, where);
2519 			nrdc->flags |= RDDIRCACHED;
2520 			rddir4_cache_hold(nrdc);
2521 			return (nrdc);
2522 		}
2523 
2524 		/*
2525 		 * Drop the lock and do a sleeping allocation.	We incur
2526 		 * additional overhead by having to search the cache again,
2527 		 * but this case should be rare.
2528 		 */
2529 		mutex_exit(&rp->r_statelock);
2530 		nrdc = rddir4_cache_alloc(KM_SLEEP);
2531 		nrdc->nfs4_cookie = cookie;
2532 		nrdc->buflen = count;
2533 		mutex_enter(&rp->r_statelock);
2534 		/*
2535 		 * We need to take another pass through the cache
2536 		 * since we dropped our lock to perform the alloc.
2537 		 * Another thread may have come by and inserted the
2538 		 * entry we are interested in.
2539 		 */
2540 		goto top;
2541 	}
2542 
2543 	/*
2544 	 * Check to see if we need to free our entry.  This can happen if
2545 	 * another thread came along beat us to the insert.  We can
2546 	 * safely call rddir4_cache_free directly because no other thread
2547 	 * would have a reference to this entry.
2548 	 */
2549 	if (nrdc != NULL)
2550 		rddir4_cache_free((rddir4_cache_impl *)nrdc->data);
2551 
2552 #ifdef DEBUG
2553 	nfs4_readdir_cache_hits++;
2554 #endif
2555 	/*
2556 	 * Found something.  Make sure it's ready to return.
2557 	 */
2558 	rdc = &rdip->rc;
2559 	rddir4_cache_hold(rdc);
2560 	/*
2561 	 * If the cache entry is in the process of being filled in, wait
2562 	 * until this completes.  The RDDIRWAIT bit is set to indicate that
2563 	 * someone is waiting and when the thread currently filling the entry
2564 	 * is done, it should do a cv_broadcast to wakeup all of the threads
2565 	 * waiting for it to finish. If the thread wakes up to find that
2566 	 * someone new is now trying to complete the the entry, go back
2567 	 * to sleep.
2568 	 */
2569 	while (rdc->flags & RDDIR) {
2570 		/*
2571 		 * The entry is not complete.
2572 		 */
2573 		nfs_rw_exit(&rp->r_rwlock);
2574 		rdc->flags |= RDDIRWAIT;
2575 #ifdef DEBUG
2576 		nfs4_readdir_cache_waits++;
2577 #endif
2578 		while (rdc->flags & RDDIRWAIT) {
2579 			if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) {
2580 				/*
2581 				 * We got interrupted, probably the user
2582 				 * typed ^C or an alarm fired.  We free the
2583 				 * new entry if we allocated one.
2584 				 */
2585 				rddir4_cache_rele(rp, rdc);
2586 				mutex_exit(&rp->r_statelock);
2587 				(void) nfs_rw_enter_sig(&rp->r_rwlock,
2588 				    RW_READER, FALSE);
2589 				mutex_enter(&rp->r_statelock);
2590 				return (NULL);
2591 			}
2592 		}
2593 		mutex_exit(&rp->r_statelock);
2594 		(void) nfs_rw_enter_sig(&rp->r_rwlock,
2595 		    RW_READER, FALSE);
2596 		mutex_enter(&rp->r_statelock);
2597 	}
2598 
2599 	/*
2600 	 * The entry we were waiting on may have been purged from
2601 	 * the cache and should no longer be used, release it and
2602 	 * start over.
2603 	 */
2604 	if (!(rdc->flags & RDDIRCACHED)) {
2605 		rddir4_cache_rele(rp, rdc);
2606 		goto top;
2607 	}
2608 
2609 	/*
2610 	 * The entry is completed.  Return it.
2611 	 */
2612 	return (rdc);
2613 }
2614 
2615 /*
2616  * Allocate a cache element and return it.  Can return NULL if memory is
2617  * low.
2618  */
2619 static rddir4_cache *
2620 rddir4_cache_alloc(int flags)
2621 {
2622 	rddir4_cache_impl	*rdip = NULL;
2623 	rddir4_cache		*rc = NULL;
2624 
2625 	rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags);
2626 
2627 	if (rdip != NULL) {
2628 		rc = &rdip->rc;
2629 		rc->data = (void *)rdip;
2630 		rc->nfs4_cookie = 0;
2631 		rc->nfs4_ncookie = 0;
2632 		rc->entries = NULL;
2633 		rc->eof = 0;
2634 		rc->entlen = 0;
2635 		rc->buflen = 0;
2636 		rc->actlen = 0;
2637 		/*
2638 		 * A readdir is required so set the flag.
2639 		 */
2640 		rc->flags = RDDIRREQ;
2641 		cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
2642 		rc->error = 0;
2643 		mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL);
2644 		rdip->count = 1;
2645 #ifdef DEBUG
2646 		atomic_add_64(&clstat4_debug.dirent.value.ui64, 1);
2647 #endif
2648 	}
2649 	return (rc);
2650 }
2651 
2652 /*
2653  * Increment the reference count to this cache element.
2654  */
2655 static void
2656 rddir4_cache_hold(rddir4_cache *rc)
2657 {
2658 	rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data;
2659 
2660 	mutex_enter(&rdip->lock);
2661 	rdip->count++;
2662 	mutex_exit(&rdip->lock);
2663 }
2664 
2665 /*
2666  * Release a reference to this cache element.  If the count is zero then
2667  * free the element.
2668  */
2669 void
2670 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc)
2671 {
2672 	rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data;
2673 
2674 	ASSERT(MUTEX_HELD(&rp->r_statelock));
2675 
2676 	/*
2677 	 * Check to see if we have any waiters.  If so, we can wake them
2678 	 * so that they can proceed.
2679 	 */
2680 	if (rdc->flags & RDDIRWAIT) {
2681 		rdc->flags &= ~RDDIRWAIT;
2682 		cv_broadcast(&rdc->cv);
2683 	}
2684 
2685 	mutex_enter(&rdip->lock);
2686 	ASSERT(rdip->count > 0);
2687 	if (--rdip->count == 0) {
2688 		mutex_exit(&rdip->lock);
2689 		rddir4_cache_free(rdip);
2690 	} else
2691 		mutex_exit(&rdip->lock);
2692 }
2693 
2694 /*
2695  * Free a cache element.
2696  */
2697 static void
2698 rddir4_cache_free(rddir4_cache_impl *rdip)
2699 {
2700 	rddir4_cache *rc = &rdip->rc;
2701 
2702 #ifdef DEBUG
2703 	atomic_add_64(&clstat4_debug.dirent.value.ui64, -1);
2704 #endif
2705 	if (rc->entries != NULL)
2706 		kmem_free(rc->entries, rc->buflen);
2707 	cv_destroy(&rc->cv);
2708 	mutex_destroy(&rdip->lock);
2709 	kmem_free(rdip, sizeof (*rdip));
2710 }
2711 
2712 /*
2713  * Snapshot callback for nfs:0:nfs4_client as registered with the kstat
2714  * framework.
2715  */
2716 static int
2717 cl4_snapshot(kstat_t *ksp, void *buf, int rw)
2718 {
2719 	ksp->ks_snaptime = gethrtime();
2720 	if (rw == KSTAT_WRITE) {
2721 		bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl));
2722 #ifdef DEBUG
2723 		/*
2724 		 * Currently only the global zone can write to kstats, but we
2725 		 * add the check just for paranoia.
2726 		 */
2727 		if (INGLOBALZONE(curproc))
2728 			bcopy((char *)buf + sizeof (clstat4_tmpl),
2729 			    &clstat4_debug, sizeof (clstat4_debug));
2730 #endif
2731 	} else {
2732 		bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl));
2733 #ifdef DEBUG
2734 		/*
2735 		 * If we're displaying the "global" debug kstat values, we
2736 		 * display them as-is to all zones since in fact they apply to
2737 		 * the system as a whole.
2738 		 */
2739 		bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl),
2740 		    sizeof (clstat4_debug));
2741 #endif
2742 	}
2743 	return (0);
2744 }
2745 
2746 
2747 
2748 /*
2749  * Zone support
2750  */
2751 static void *
2752 clinit4_zone(zoneid_t zoneid)
2753 {
2754 	kstat_t *nfs4_client_kstat;
2755 	struct nfs4_clnt *nfscl;
2756 	uint_t ndata;
2757 
2758 	nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
2759 	mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL);
2760 	nfscl->nfscl_chtable4 = NULL;
2761 	nfscl->nfscl_zoneid = zoneid;
2762 
2763 	bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl));
2764 	ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t);
2765 #ifdef DEBUG
2766 	ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t);
2767 #endif
2768 	if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client",
2769 	    "misc", KSTAT_TYPE_NAMED, ndata,
2770 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
2771 		nfs4_client_kstat->ks_private = &nfscl->nfscl_stat;
2772 		nfs4_client_kstat->ks_snapshot = cl4_snapshot;
2773 		kstat_install(nfs4_client_kstat);
2774 	}
2775 	mutex_enter(&nfs4_clnt_list_lock);
2776 	list_insert_head(&nfs4_clnt_list, nfscl);
2777 	mutex_exit(&nfs4_clnt_list_lock);
2778 
2779 	return (nfscl);
2780 }
2781 
2782 /*ARGSUSED*/
2783 static void
2784 clfini4_zone(zoneid_t zoneid, void *arg)
2785 {
2786 	struct nfs4_clnt *nfscl = arg;
2787 	chhead_t *chp, *next;
2788 
2789 	if (nfscl == NULL)
2790 		return;
2791 	mutex_enter(&nfs4_clnt_list_lock);
2792 	list_remove(&nfs4_clnt_list, nfscl);
2793 	mutex_exit(&nfs4_clnt_list_lock);
2794 	clreclaim4_zone(nfscl, 0);
2795 	for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) {
2796 		ASSERT(chp->ch_list == NULL);
2797 		kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
2798 		next = chp->ch_next;
2799 		kmem_free(chp, sizeof (*chp));
2800 	}
2801 	kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid);
2802 	mutex_destroy(&nfscl->nfscl_chtable4_lock);
2803 	kmem_free(nfscl, sizeof (*nfscl));
2804 }
2805 
2806 /*
2807  * Called by endpnt_destructor to make sure the client handles are
2808  * cleaned up before the RPC endpoints.  This becomes a no-op if
2809  * clfini_zone (above) is called first.  This function is needed
2810  * (rather than relying on clfini_zone to clean up) because the ZSD
2811  * callbacks have no ordering mechanism, so we have no way to ensure
2812  * that clfini_zone is called before endpnt_destructor.
2813  */
2814 void
2815 clcleanup4_zone(zoneid_t zoneid)
2816 {
2817 	struct nfs4_clnt *nfscl;
2818 
2819 	mutex_enter(&nfs4_clnt_list_lock);
2820 	nfscl = list_head(&nfs4_clnt_list);
2821 	for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) {
2822 		if (nfscl->nfscl_zoneid == zoneid) {
2823 			clreclaim4_zone(nfscl, 0);
2824 			break;
2825 		}
2826 	}
2827 	mutex_exit(&nfs4_clnt_list_lock);
2828 }
2829 
2830 int
2831 nfs4_subr_init(void)
2832 {
2833 	/*
2834 	 * Allocate and initialize the client handle cache
2835 	 */
2836 	chtab4_cache = kmem_cache_create("client_handle4_cache",
2837 	    sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL,
2838 	    NULL, 0);
2839 
2840 	/*
2841 	 * Initialize the list of per-zone client handles (and associated data).
2842 	 * This needs to be done before we call zone_key_create().
2843 	 */
2844 	list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt),
2845 	    offsetof(struct nfs4_clnt, nfscl_node));
2846 
2847 	/*
2848 	 * Initialize the zone_key for per-zone client handle lists.
2849 	 */
2850 	zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone);
2851 
2852 	if (nfs4err_delay_time == 0)
2853 		nfs4err_delay_time = NFS4ERR_DELAY_TIME;
2854 
2855 	return (0);
2856 }
2857 
2858 int
2859 nfs4_subr_fini(void)
2860 {
2861 	/*
2862 	 * Deallocate the client handle cache
2863 	 */
2864 	kmem_cache_destroy(chtab4_cache);
2865 
2866 	/*
2867 	 * Destroy the zone_key
2868 	 */
2869 	(void) zone_key_delete(nfs4clnt_zone_key);
2870 
2871 	return (0);
2872 }
2873 /*
2874  * Set or Clear direct I/O flag
2875  * VOP_RWLOCK() is held for write access to prevent a race condition
2876  * which would occur if a process is in the middle of a write when
2877  * directio flag gets set. It is possible that all pages may not get flushed.
2878  *
2879  * This is a copy of nfs_directio, changes here may need to be made
2880  * there and vice versa.
2881  */
2882 
2883 int
2884 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr)
2885 {
2886 	int	error = 0;
2887 	rnode4_t *rp;
2888 
2889 	rp = VTOR4(vp);
2890 
2891 	if (cmd == DIRECTIO_ON) {
2892 
2893 		if (rp->r_flags & R4DIRECTIO)
2894 			return (0);
2895 
2896 		/*
2897 		 * Flush the page cache.
2898 		 */
2899 
2900 		(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
2901 
2902 		if (rp->r_flags & R4DIRECTIO) {
2903 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2904 			return (0);
2905 		}
2906 
2907 		if (nfs4_has_pages(vp) &&
2908 		    ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) {
2909 			error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
2910 			    B_INVAL, cr, NULL);
2911 			if (error) {
2912 				if (error == ENOSPC || error == EDQUOT) {
2913 					mutex_enter(&rp->r_statelock);
2914 					if (!rp->r_error)
2915 						rp->r_error = error;
2916 					mutex_exit(&rp->r_statelock);
2917 				}
2918 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2919 				return (error);
2920 			}
2921 		}
2922 
2923 		mutex_enter(&rp->r_statelock);
2924 		rp->r_flags |= R4DIRECTIO;
2925 		mutex_exit(&rp->r_statelock);
2926 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2927 		return (0);
2928 	}
2929 
2930 	if (cmd == DIRECTIO_OFF) {
2931 		mutex_enter(&rp->r_statelock);
2932 		rp->r_flags &= ~R4DIRECTIO;	/* disable direct mode */
2933 		mutex_exit(&rp->r_statelock);
2934 		return (0);
2935 	}
2936 
2937 	return (EINVAL);
2938 }
2939 
2940 /*
2941  * Return TRUE if the file has any pages.  Always go back to
2942  * the master vnode to check v_pages since none of the shadows
2943  * can have pages.
2944  */
2945 
2946 bool_t
2947 nfs4_has_pages(vnode_t *vp)
2948 {
2949 	rnode4_t *rp;
2950 
2951 	rp = VTOR4(vp);
2952 	if (IS_SHADOW(vp, rp))
2953 		vp = RTOV4(rp);	/* RTOV4 always gives the master */
2954 
2955 	return (vn_has_cached_data(vp));
2956 }
2957 
2958 /*
2959  * This table is used to determine whether the client should attempt
2960  * failover based on the clnt_stat value returned by CLNT_CALL.  The
2961  * clnt_stat is used as an index into the table.  If
2962  * the error value that corresponds to the clnt_stat value in the
2963  * table is non-zero, then that is the error to be returned AND
2964  * that signals that failover should be attempted.
2965  *
2966  * Special note: If the RPC_ values change, then direct indexing of the
2967  * table is no longer valid, but having the RPC_ values in the table
2968  * allow the functions to detect the change and issue a warning.
2969  * In this case, the code will always attempt failover as a defensive
2970  * measure.
2971  */
2972 
2973 static struct try_failover_tab {
2974 	enum clnt_stat	cstat;
2975 	int		error;
2976 } try_failover_table [] = {
2977 
2978 	RPC_SUCCESS,		0,
2979 	RPC_CANTENCODEARGS,	0,
2980 	RPC_CANTDECODERES,	0,
2981 	RPC_CANTSEND,		ECOMM,
2982 	RPC_CANTRECV,		ECOMM,
2983 	RPC_TIMEDOUT,		ETIMEDOUT,
2984 	RPC_VERSMISMATCH,	0,
2985 	RPC_AUTHERROR,		0,
2986 	RPC_PROGUNAVAIL,	0,
2987 	RPC_PROGVERSMISMATCH,	0,
2988 	RPC_PROCUNAVAIL,	0,
2989 	RPC_CANTDECODEARGS,	0,
2990 	RPC_SYSTEMERROR,	ENOSR,
2991 	RPC_UNKNOWNHOST,	EHOSTUNREACH,
2992 	RPC_RPCBFAILURE,	ENETUNREACH,
2993 	RPC_PROGNOTREGISTERED,	ECONNREFUSED,
2994 	RPC_FAILED,		ETIMEDOUT,
2995 	RPC_UNKNOWNPROTO,	EHOSTUNREACH,
2996 	RPC_INTR,		0,
2997 	RPC_UNKNOWNADDR,	EHOSTUNREACH,
2998 	RPC_TLIERROR,		0,
2999 	RPC_NOBROADCAST,	EHOSTUNREACH,
3000 	RPC_N2AXLATEFAILURE,	ECONNREFUSED,
3001 	RPC_UDERROR,		0,
3002 	RPC_INPROGRESS,		0,
3003 	RPC_STALERACHANDLE,	EINVAL,
3004 	RPC_CANTCONNECT,	ECONNREFUSED,
3005 	RPC_XPRTFAILED,		ECONNABORTED,
3006 	RPC_CANTCREATESTREAM,	ECONNREFUSED,
3007 	RPC_CANTSTORE,		ENOBUFS
3008 };
3009 
3010 /*
3011  * nfs4_try_failover - determine whether the client should
3012  * attempt failover based on the values stored in the nfs4_error_t.
3013  */
3014 int
3015 nfs4_try_failover(nfs4_error_t *ep)
3016 {
3017 	if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE)
3018 		return (TRUE);
3019 
3020 	if (ep->error && ep->rpc_status != RPC_SUCCESS)
3021 		return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE);
3022 
3023 	return (FALSE);
3024 }
3025 
3026 /*
3027  * try_failover - internal version of nfs4_try_failover, called
3028  * only by rfscall and aclcall.  Determine if failover is warranted
3029  * based on the clnt_stat and return the error number if it is.
3030  */
3031 static int
3032 try_failover(enum clnt_stat rpc_status)
3033 {
3034 	int err = 0;
3035 
3036 	if (rpc_status == RPC_SUCCESS)
3037 		return (0);
3038 
3039 #ifdef	DEBUG
3040 	if (rpc_status != 0 && nfs4_try_failover_any) {
3041 		err = ETIMEDOUT;
3042 		goto done;
3043 	}
3044 #endif
3045 	/*
3046 	 * The rpc status is used as an index into the table.
3047 	 * If the rpc status is outside of the range of the
3048 	 * table or if the rpc error numbers have been changed
3049 	 * since the table was constructed, then print a warning
3050 	 * (DEBUG only) and try failover anyway.  Otherwise, just
3051 	 * grab the resulting error number out of the table.
3052 	 */
3053 	if (rpc_status < RPC_SUCCESS || rpc_status >=
3054 	    sizeof (try_failover_table)/sizeof (try_failover_table[0]) ||
3055 	    try_failover_table[rpc_status].cstat != rpc_status) {
3056 
3057 		err = ETIMEDOUT;
3058 #ifdef	DEBUG
3059 		cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d",
3060 		    rpc_status);
3061 #endif
3062 	} else
3063 		err = try_failover_table[rpc_status].error;
3064 
3065 done:
3066 	if (rpc_status)
3067 		NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
3068 		    "nfs4_try_failover: %strying failover on error %d",
3069 		    err ? "" : "NOT ", rpc_status));
3070 
3071 	return (err);
3072 }
3073 
3074 void
3075 nfs4_error_zinit(nfs4_error_t *ep)
3076 {
3077 	ep->error = 0;
3078 	ep->stat = NFS4_OK;
3079 	ep->rpc_status = RPC_SUCCESS;
3080 }
3081 
3082 void
3083 nfs4_error_init(nfs4_error_t *ep, int error)
3084 {
3085 	ep->error = error;
3086 	ep->stat = NFS4_OK;
3087 	ep->rpc_status = RPC_SUCCESS;
3088 }
3089 
3090 
3091 #ifdef DEBUG
3092 
3093 /*
3094  * Return a 16-bit hash for filehandle, stateid, clientid, owner.
3095  * use the same algorithm as for NFS v3.
3096  *
3097  */
3098 int
3099 hash16(void *p, int len)
3100 {
3101 	int i, rem;
3102 	uint_t *wp;
3103 	uint_t key = 0;
3104 
3105 	/* protect against non word aligned */
3106 	if ((rem = len & 3) != 0)
3107 		len &= ~3;
3108 
3109 	for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) {
3110 		key ^= (*wp >> 16) ^ *wp;
3111 	}
3112 
3113 	/* hash left-over bytes */
3114 	for (i = 0; i < rem; i++)
3115 		key ^= *((uchar_t *)p + i);
3116 
3117 	return (key & 0xffff);
3118 }
3119 
3120 /*
3121  * rnode4info - return filehandle and path information for an rnode.
3122  * XXX MT issues: uses a single static buffer, no locking of path.
3123  */
3124 char *
3125 rnode4info(rnode4_t *rp)
3126 {
3127 	static char buf[80];
3128 	nfs4_fhandle_t fhandle;
3129 	char *path;
3130 	char *type;
3131 
3132 	if (rp == NULL)
3133 		return ("null");
3134 	if (rp->r_flags & R4ISXATTR)
3135 		type = "attr";
3136 	else if (RTOV4(rp)->v_flag & V_XATTRDIR)
3137 		type = "attrdir";
3138 	else if (RTOV4(rp)->v_flag & VROOT)
3139 		type = "root";
3140 	else if (RTOV4(rp)->v_type == VDIR)
3141 		type = "dir";
3142 	else if (RTOV4(rp)->v_type == VREG)
3143 		type = "file";
3144 	else
3145 		type = "other";
3146 	sfh4_copyval(rp->r_fh, &fhandle);
3147 	path = fn_path(rp->r_svnode.sv_name);
3148 	(void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n",
3149 	    (void *)rp, path, type, rp->r_flags,
3150 	    hash16((void *)&fhandle.fh_buf, fhandle.fh_len));
3151 	kmem_free(path, strlen(path)+1);
3152 	return (buf);
3153 }
3154 #endif
3155