xref: /titanic_41/usr/src/uts/common/nfs/nfs4_clnt.h (revision f2a3c691e1fab4dee486fd83642311ec59dc3732)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /*	All Rights Reserved   */
28 
29 /*
30  * Portions of this source code were derived from Berkeley 4.3 BSD
31  * under license from the Regents of the University of California.
32  */
33 
34 #ifndef _NFS4_CLNT_H
35 #define	_NFS4_CLNT_H
36 
37 #pragma ident	"%Z%%M%	%I%	%E% SMI"
38 
39 #include <sys/errno.h>
40 #include <sys/types.h>
41 #include <sys/kstat.h>
42 #include <sys/time.h>
43 #include <sys/flock.h>
44 #include <vm/page.h>
45 #include <nfs/nfs4_kprot.h>
46 #include <nfs/nfs4.h>
47 #include <nfs/rnode.h>
48 #include <sys/avl.h>
49 #include <sys/list.h>
50 
51 #ifdef	__cplusplus
52 extern "C" {
53 #endif
54 
55 #define	NFS4_SIZE_OK(size)	((size) <= MAXOFFSET_T)
56 
57 /* Four states of nfs4_server's lease_valid */
58 #define	NFS4_LEASE_INVALID		0
59 #define	NFS4_LEASE_VALID		1
60 #define	NFS4_LEASE_UNINITIALIZED	2
61 #define	NFS4_LEASE_NOT_STARTED		3
62 
63 /* flag to tell the renew thread it should exit */
64 #define	NFS4_THREAD_EXIT	1
65 
66 /* Default number of seconds to wait on GRACE and DELAY errors */
67 #define	NFS4ERR_DELAY_TIME	10
68 
69 /* Number of hash buckets for open owners for each nfs4_server */
70 #define	NFS4_NUM_OO_BUCKETS	53
71 
72 /* Number of freed open owners (per mntinfo4_t) to keep around */
73 #define	NFS4_NUM_FREED_OPEN_OWNERS	8
74 
75 /* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */
76 #define	NFS4_RETRY_SCLID_DELAY	10
77 
78 /* Number of times we should retry a SETCLIENTID(_CONFIRM) op */
79 #define	NFS4_NUM_SCLID_RETRIES	3
80 
81 /* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */
82 #define	NFS4_NUM_RETRY_BAD_SEQID	3
83 
84 /*
85  * Is the attribute cache valid?  If client holds a delegation, then attrs
86  * are by definition valid.  If not, then check to see if attrs have timed out.
87  */
88 #define	ATTRCACHE4_VALID(vp) (VTOR4(vp)->r_deleg_type != OPEN_DELEGATE_NONE || \
89 	gethrtime() < VTOR4(vp)->r_time_attr_inval)
90 
91 /*
92  * Flags to indicate whether to purge the DNLC for non-directory vnodes
93  * in a call to nfs_purge_caches.
94  */
95 #define	NFS4_NOPURGE_DNLC	0
96 #define	NFS4_PURGE_DNLC		1
97 
98 /*
99  * Is cache valid?
100  * Swap is always valid, if no attributes (attrtime == 0) or
101  * if mtime matches cached mtime it is valid
102  * NOTE: mtime is now a timestruc_t.
103  * Caller should be holding the rnode r_statelock mutex.
104  */
105 #define	CACHE4_VALID(rp, mtime, fsize)				\
106 	((RTOV4(rp)->v_flag & VISSWAP) == VISSWAP ||		\
107 	(((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec &&	\
108 	(mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) &&	\
109 	((fsize) == (rp)->r_attr.va_size)))
110 
111 /*
112  * Macro to detect forced unmount or a zone shutdown.
113  */
114 #define	FS_OR_ZONE_GONE4(vfsp) \
115 	(((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
116 	zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
117 
118 /*
119  * Macro to help determine whether a request failed because the underlying
120  * filesystem has been forcibly unmounted or because of zone shutdown.
121  */
122 #define	NFS4_FRC_UNMT_ERR(err, vfsp) \
123 	((err) == EIO && FS_OR_ZONE_GONE4((vfsp)))
124 
125 /*
126  * Due to the way the address space callbacks are used to execute a delmap,
127  * we must keep track of how many times the same thread has called
128  * VOP_DELMAP()->nfs4_delmap().  This is done by having a list of
129  * nfs4_delmapcall_t's associated with each rnode4_t.  This list is protected
130  * by the rnode4_t's r_statelock.  The individual elements do not need to be
131  * protected as they will only ever be created, modified and destroyed by
132  * one thread (the call_id).
133  * See nfs4_delmap() for further explanation.
134  */
135 typedef struct nfs4_delmapcall {
136 	kthread_t	*call_id;
137 	int		error;	/* error from delmap */
138 	list_node_t	call_node;
139 } nfs4_delmapcall_t;
140 
141 /*
142  * delmap address space callback args
143  */
144 typedef struct nfs4_delmap_args {
145 	vnode_t			*vp;
146 	offset_t		off;
147 	caddr_t			addr;
148 	size_t			len;
149 	uint_t			prot;
150 	uint_t			maxprot;
151 	uint_t			flags;
152 	cred_t			*cr;
153 	nfs4_delmapcall_t	*caller; /* to retrieve errors from the cb */
154 } nfs4_delmap_args_t;
155 
156 /*
157  * client side statistics
158  */
159 /*
160  * Per-zone counters
161  */
162 struct clstat4 {
163 	kstat_named_t	calls;			/* client requests */
164 	kstat_named_t	badcalls;		/* rpc failures */
165 	kstat_named_t	clgets;			/* client handle gets */
166 	kstat_named_t	cltoomany;		/* client handle cache misses */
167 #ifdef DEBUG
168 	kstat_named_t	clalloc;		/* number of client handles */
169 	kstat_named_t	noresponse;		/* server not responding cnt */
170 	kstat_named_t	failover;		/* server failover count */
171 	kstat_named_t	remap;			/* server remap count */
172 #endif
173 };
174 
175 #ifdef DEBUG
176 /*
177  * The following are statistics that describe the behavior of the system as a
178  * whole and don't correspond to any particular zone.
179  */
180 struct clstat4_debug {
181 	kstat_named_t	nrnode;			/* number of allocated rnodes */
182 	kstat_named_t	access;			/* size of access cache */
183 	kstat_named_t	dirent;			/* size of readdir cache */
184 	kstat_named_t	dirents;		/* size of readdir buf cache */
185 	kstat_named_t	reclaim;		/* number of reclaims */
186 	kstat_named_t	clreclaim;		/* number of cl reclaims */
187 	kstat_named_t	f_reclaim;		/* number of free reclaims */
188 	kstat_named_t	a_reclaim;		/* number of active reclaims */
189 	kstat_named_t	r_reclaim;		/* number of rnode reclaims */
190 	kstat_named_t	rpath;			/* bytes used to store rpaths */
191 };
192 extern struct clstat4_debug clstat4_debug;
193 
194 #endif
195 
196 /*
197  * The NFS specific async_reqs structure.
198  */
199 
200 enum iotype4 {
201 	NFS4_READ_AHEAD,
202 	NFS4_PUTAPAGE,
203 	NFS4_PAGEIO,
204 	NFS4_READDIR,
205 	NFS4_INACTIVE,
206 	NFS4_COMMIT
207 };
208 #define	NFS4_ASYNC_TYPES	(NFS4_COMMIT + 1)
209 
210 struct nfs4_async_read_req {
211 	void (*readahead)();		/* pointer to readahead function */
212 	u_offset_t blkoff;		/* offset in file */
213 	struct seg *seg;		/* segment to do i/o to */
214 	caddr_t addr;			/* address to do i/o to */
215 };
216 
217 struct nfs4_pageio_req {
218 	int (*pageio)();		/* pointer to pageio function */
219 	page_t *pp;			/* page list */
220 	u_offset_t io_off;		/* offset in file */
221 	uint_t io_len;			/* size of request */
222 	int flags;
223 };
224 
225 struct nfs4_readdir_req {
226 	int (*readdir)();		/* pointer to readdir function */
227 	struct rddir4_cache *rdc;	/* pointer to cache entry to fill */
228 };
229 
230 struct nfs4_commit_req {
231 	void (*commit)();		/* pointer to commit function */
232 	page_t *plist;			/* page list */
233 	offset4 offset;			/* starting offset */
234 	count4 count;			/* size of range to be commited */
235 };
236 
237 struct nfs4_async_reqs {
238 	struct nfs4_async_reqs *a_next;	/* pointer to next arg struct */
239 #ifdef DEBUG
240 	kthread_t *a_queuer;		/* thread id of queueing thread */
241 #endif
242 	struct vnode *a_vp;		/* vnode pointer */
243 	struct cred *a_cred;		/* cred pointer */
244 	enum iotype4 a_io;		/* i/o type */
245 	union {
246 		struct nfs4_async_read_req a_read_args;
247 		struct nfs4_pageio_req a_pageio_args;
248 		struct nfs4_readdir_req a_readdir_args;
249 		struct nfs4_commit_req a_commit_args;
250 	} a_args;
251 };
252 
253 #define	a_nfs4_readahead a_args.a_read_args.readahead
254 #define	a_nfs4_blkoff a_args.a_read_args.blkoff
255 #define	a_nfs4_seg a_args.a_read_args.seg
256 #define	a_nfs4_addr a_args.a_read_args.addr
257 
258 #define	a_nfs4_putapage a_args.a_pageio_args.pageio
259 #define	a_nfs4_pageio a_args.a_pageio_args.pageio
260 #define	a_nfs4_pp a_args.a_pageio_args.pp
261 #define	a_nfs4_off a_args.a_pageio_args.io_off
262 #define	a_nfs4_len a_args.a_pageio_args.io_len
263 #define	a_nfs4_flags a_args.a_pageio_args.flags
264 
265 #define	a_nfs4_readdir a_args.a_readdir_args.readdir
266 #define	a_nfs4_rdc a_args.a_readdir_args.rdc
267 
268 #define	a_nfs4_commit a_args.a_commit_args.commit
269 #define	a_nfs4_plist a_args.a_commit_args.plist
270 #define	a_nfs4_offset a_args.a_commit_args.offset
271 #define	a_nfs4_count a_args.a_commit_args.count
272 
273 /*
274  * Security information
275  */
276 typedef struct sv_secinfo {
277 	uint_t		count;	/* how many sdata there are */
278 	uint_t		index;	/* which sdata[index] */
279 	struct sec_data	*sdata;
280 } sv_secinfo_t;
281 
282 /*
283  * Hash bucket for the mi's open owner list (mi_oo_list).
284  */
285 typedef struct nfs4_oo_hash_bucket {
286 	list_t			b_oo_hash_list;
287 	kmutex_t		b_lock;
288 } nfs4_oo_hash_bucket_t;
289 
290 /*
291  * Global array of ctags.
292  */
293 extern ctag_t nfs4_ctags[];
294 
295 typedef enum nfs4_tag_type {
296 	TAG_NONE,
297 	TAG_ACCESS,
298 	TAG_CLOSE,
299 	TAG_CLOSE_LOST,
300 	TAG_CLOSE_UNDO,
301 	TAG_COMMIT,
302 	TAG_DELEGRETURN,
303 	TAG_FSINFO,
304 	TAG_GET_SYMLINK,
305 	TAG_GETATTR,
306 	TAG_INACTIVE,
307 	TAG_LINK,
308 	TAG_LOCK,
309 	TAG_LOCK_RECLAIM,
310 	TAG_LOCK_RESEND,
311 	TAG_LOCK_REINSTATE,
312 	TAG_LOCK_UNKNOWN,
313 	TAG_LOCKT,
314 	TAG_LOCKU,
315 	TAG_LOCKU_RESEND,
316 	TAG_LOCKU_REINSTATE,
317 	TAG_LOOKUP,
318 	TAG_LOOKUP_PARENT,
319 	TAG_LOOKUP_VALID,
320 	TAG_LOOKUP_VPARENT,
321 	TAG_MKDIR,
322 	TAG_MKNOD,
323 	TAG_MOUNT,
324 	TAG_OPEN,
325 	TAG_OPEN_CONFIRM,
326 	TAG_OPEN_CONFIRM_LOST,
327 	TAG_OPEN_DG,
328 	TAG_OPEN_DG_LOST,
329 	TAG_OPEN_LOST,
330 	TAG_OPENATTR,
331 	TAG_PATHCONF,
332 	TAG_PUTROOTFH,
333 	TAG_READ,
334 	TAG_READAHEAD,
335 	TAG_READDIR,
336 	TAG_READLINK,
337 	TAG_RELOCK,
338 	TAG_REMAP_LOOKUP,
339 	TAG_REMAP_LOOKUP_AD,
340 	TAG_REMAP_LOOKUP_NA,
341 	TAG_REMAP_MOUNT,
342 	TAG_RMDIR,
343 	TAG_REMOVE,
344 	TAG_RENAME,
345 	TAG_RENAME_VFH,
346 	TAG_RENEW,
347 	TAG_REOPEN,
348 	TAG_REOPEN_LOST,
349 	TAG_SECINFO,
350 	TAG_SETATTR,
351 	TAG_SETCLIENTID,
352 	TAG_SETCLIENTID_CF,
353 	TAG_SYMLINK,
354 	TAG_WRITE
355 } nfs4_tag_type_t;
356 
357 #define	NFS4_TAG_INITIALIZER	{				\
358 		{TAG_NONE,		"",			\
359 			{0x20202020, 0x20202020, 0x20202020}},	\
360 		{TAG_ACCESS,		"access",		\
361 			{0x61636365, 0x73732020, 0x20202020}},	\
362 		{TAG_CLOSE,		"close",		\
363 			{0x636c6f73, 0x65202020, 0x20202020}},	\
364 		{TAG_CLOSE_LOST,	"lost close",		\
365 			{0x6c6f7374, 0x20636c6f, 0x73652020}},	\
366 		{TAG_CLOSE_UNDO,	"undo close",		\
367 			{0x756e646f, 0x20636c6f, 0x73652020}},	\
368 		{TAG_COMMIT,		"commit",		\
369 			{0x636f6d6d, 0x69742020, 0x20202020}},	\
370 		{TAG_DELEGRETURN,	"delegreturn",		\
371 			{0x64656c65, 0x67726574, 0x75726e20}},	\
372 		{TAG_FSINFO,		"fsinfo",		\
373 			{0x6673696e, 0x666f2020, 0x20202020}},	\
374 		{TAG_GET_SYMLINK,	"get symlink text",	\
375 			{0x67657420, 0x736c6e6b, 0x20747874}},	\
376 		{TAG_GETATTR,		"getattr",		\
377 			{0x67657461, 0x74747220, 0x20202020}},	\
378 		{TAG_INACTIVE,		"inactive",		\
379 			{0x696e6163, 0x74697665, 0x20202020}},	\
380 		{TAG_LINK,		"link",			\
381 			{0x6c696e6b, 0x20202020, 0x20202020}},	\
382 		{TAG_LOCK,		"lock",			\
383 			{0x6c6f636b, 0x20202020, 0x20202020}},	\
384 		{TAG_LOCK_RECLAIM,	"reclaim lock",		\
385 			{0x7265636c, 0x61696d20, 0x6c6f636b}},	\
386 		{TAG_LOCK_RESEND,	"resend lock",		\
387 			{0x72657365, 0x6e64206c, 0x6f636b20}},	\
388 		{TAG_LOCK_REINSTATE,	"reinstate lock",	\
389 			{0x7265696e, 0x7374206c, 0x6f636b20}},	\
390 		{TAG_LOCK_UNKNOWN,	"unknown lock",		\
391 			{0x756e6b6e, 0x6f776e20, 0x6c6f636b}},	\
392 		{TAG_LOCKT,		"lock test",		\
393 			{0x6c6f636b, 0x5f746573, 0x74202020}},	\
394 		{TAG_LOCKU,		"unlock",		\
395 			{0x756e6c6f, 0x636b2020, 0x20202020}},	\
396 		{TAG_LOCKU_RESEND,	"resend locku",		\
397 			{0x72657365, 0x6e64206c, 0x6f636b75}},	\
398 		{TAG_LOCKU_REINSTATE,	"reinstate unlock",	\
399 			{0x7265696e, 0x73742075, 0x6e6c636b}},	\
400 		{TAG_LOOKUP,		"lookup",		\
401 			{0x6c6f6f6b, 0x75702020, 0x20202020}},	\
402 		{TAG_LOOKUP_PARENT,	"lookup parent",	\
403 			{0x6c6f6f6b, 0x75702070, 0x6172656e}},	\
404 		{TAG_LOOKUP_VALID,	"lookup valid",		\
405 			{0x6c6f6f6b, 0x75702076, 0x616c6964}},	\
406 		{TAG_LOOKUP_VPARENT,	"lookup valid parent",	\
407 			{0x6c6f6f6b, 0x766c6420, 0x7061726e}},	\
408 		{TAG_MKDIR,		"mkdir",		\
409 			{0x6d6b6469, 0x72202020, 0x20202020}},	\
410 		{TAG_MKNOD,		"mknod",		\
411 			{0x6d6b6e6f, 0x64202020, 0x20202020}},	\
412 		{TAG_MOUNT,		"mount",		\
413 			{0x6d6f756e, 0x74202020, 0x20202020}},	\
414 		{TAG_OPEN,		"open",			\
415 			{0x6f70656e, 0x20202020, 0x20202020}},	\
416 		{TAG_OPEN_CONFIRM,	"open confirm",		\
417 			{0x6f70656e, 0x5f636f6e, 0x6669726d}},	\
418 		{TAG_OPEN_CONFIRM_LOST,	"lost open confirm",	\
419 			{0x6c6f7374, 0x206f7065, 0x6e5f636f}},	\
420 		{TAG_OPEN_DG,		"open downgrade",	\
421 			{0x6f70656e, 0x20646772, 0x61646520}},	\
422 		{TAG_OPEN_DG_LOST,	"lost open downgrade",	\
423 			{0x6c737420, 0x6f70656e, 0x20646772}},	\
424 		{TAG_OPEN_LOST,		"lost open",		\
425 			{0x6c6f7374, 0x206f7065, 0x6e202020}},	\
426 		{TAG_OPENATTR,		"openattr",		\
427 			{0x6f70656e, 0x61747472, 0x20202020}},	\
428 		{TAG_PATHCONF,		"pathhconf",		\
429 			{0x70617468, 0x636f6e66, 0x20202020}},	\
430 		{TAG_PUTROOTFH,		"putrootfh",		\
431 			{0x70757472, 0x6f6f7466, 0x68202020}},	\
432 		{TAG_READ,		"read",			\
433 			{0x72656164, 0x20202020, 0x20202020}},	\
434 		{TAG_READAHEAD,		"readahead",		\
435 			{0x72656164, 0x61686561, 0x64202020}},	\
436 		{TAG_READDIR,		"readdir",		\
437 			{0x72656164, 0x64697220, 0x20202020}},	\
438 		{TAG_READLINK,		"readlink",		\
439 			{0x72656164, 0x6c696e6b, 0x20202020}},	\
440 		{TAG_RELOCK,		"relock",		\
441 			{0x72656c6f, 0x636b2020, 0x20202020}},	\
442 		{TAG_REMAP_LOOKUP,	"remap lookup",		\
443 			{0x72656d61, 0x70206c6f, 0x6f6b7570}},	\
444 		{TAG_REMAP_LOOKUP_AD,	"remap lookup attr dir",	\
445 			{0x72656d70, 0x206c6b75, 0x70206164}},	\
446 		{TAG_REMAP_LOOKUP_NA,	"remap lookup named attrs",	\
447 			{0x72656d70, 0x206c6b75, 0x70206e61}},	\
448 		{TAG_REMAP_MOUNT,	"remap mount",		\
449 			{0x72656d61, 0x70206d6f, 0x756e7420}},	\
450 		{TAG_RMDIR,		"rmdir",		\
451 			{0x726d6469, 0x72202020, 0x20202020}},	\
452 		{TAG_REMOVE,		"remove",		\
453 			{0x72656d6f, 0x76652020, 0x20202020}},	\
454 		{TAG_RENAME,		"rename",		\
455 			{0x72656e61, 0x6d652020, 0x20202020}},	\
456 		{TAG_RENAME_VFH,	"rename volatile fh",	\
457 			{0x72656e61, 0x6d652028, 0x76666829}},	\
458 		{TAG_RENEW,		"renew",		\
459 			{0x72656e65, 0x77202020, 0x20202020}},	\
460 		{TAG_REOPEN,		"reopen",		\
461 			{0x72656f70, 0x656e2020, 0x20202020}},	\
462 		{TAG_REOPEN_LOST,	"lost reopen",		\
463 			{0x6c6f7374, 0x2072656f, 0x70656e20}},	\
464 		{TAG_SECINFO,		"secinfo",		\
465 			{0x73656369, 0x6e666f20, 0x20202020}},	\
466 		{TAG_SETATTR,		"setattr",		\
467 			{0x73657461, 0x74747220, 0x20202020}},	\
468 		{TAG_SETCLIENTID,	"setclientid",		\
469 			{0x73657463, 0x6c69656e, 0x74696420}},	\
470 		{TAG_SETCLIENTID_CF,	"setclientid_confirm",	\
471 			{0x73636c6e, 0x7469645f, 0x636f6e66}},	\
472 		{TAG_SYMLINK,		"symlink",		\
473 			{0x73796d6c, 0x696e6b20, 0x20202020}},	\
474 		{TAG_WRITE,		"write",		\
475 			{0x77726974, 0x65202020, 0x20202020}}	\
476 	}
477 
478 /*
479  * These flags are for differentiating the search criterian for
480  * find_open_owner().  The comparison is done with the open_owners's
481  * 'oo_just_created' flag.
482  */
483 #define	NFS4_PERM_CREATED	0x0
484 #define	NFS4_JUST_CREATED	0x1
485 
486 /*
487  * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw'
488  * is stored upon a successful OPEN.  This is needed when the user's effective
489  * and real uid's don't match.  The 'oo_cred_otw' overrides the credential
490  * passed down by VFS for async read/write, commit, lock, and close operations.
491  *
492  * The oo_ref_count keeps track the number of active references on this
493  * data structure + number of nfs4_open_streams point to this structure.
494  *
495  * 'oo_valid' tells whether this stuct is about to be freed or not.
496  *
497  * 'oo_just_created' tells us whether this struct has just been created but
498  * not been fully finalized (that is created upon an OPEN request and
499  * finalized upon the OPEN success).
500  *
501  * The 'oo_seqid_inuse' is for the open seqid synchronization.  If a thread
502  * is currently using the open owner and it's open_seqid, then it sets the
503  * oo_seqid_inuse to true if it currently is not set.  If it is set then it
504  * does a cv_wait on the oo_cv_seqid_sync condition variable.  When the thread
505  * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process
506  * waiting on the condition variable.
507  *
508  * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW,
509  * and 'oo_last_good_op' is the operation that issued the last valid seqid.
510  *
511  * Lock ordering:
512  *	mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list)
513  *
514  *	oo_seqid_inuse > mntinfo4_t::mi_lock
515  *	oo_seqid_inuse > rnode4_t::r_statelock
516  *	oo_seqid_inuse > rnode4_t::r_statev4_lock
517  *	oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock
518  *
519  * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects:
520  *	oo_last_good_op
521  *	oo_last_good_seqid
522  *	oo_name
523  *	oo_seqid
524  *
525  * The 'oo_lock' protects:
526  *	oo_cred
527  *	oo_cred_otw
528  *	oo_foo_node
529  *	oo_hash_node
530  *	oo_just_created
531  *	oo_ref_count
532  *	oo_valid
533  */
534 
535 typedef struct nfs4_open_owner {
536 	cred_t			*oo_cred;
537 	int			oo_ref_count;
538 	int			oo_valid;
539 	int			oo_just_created;
540 	seqid4			oo_seqid;
541 	seqid4			oo_last_good_seqid;
542 	nfs4_tag_type_t		oo_last_good_op;
543 	unsigned		oo_seqid_inuse:1;
544 	cred_t			*oo_cred_otw;
545 	kcondvar_t		oo_cv_seqid_sync;
546 	/*
547 	 * Fix this to always be 8 bytes
548 	 */
549 	uint64_t		oo_name;
550 	list_node_t		oo_hash_node;
551 	list_node_t		oo_foo_node;
552 	kmutex_t		oo_lock;
553 } nfs4_open_owner_t;
554 
555 /*
556  * Static server information.
557  * These fields are read-only once they are initialized:
558  *	sv_addr
559  *	sv_dhsec
560  *	sv_hostname
561  *	sv_hostnamelen
562  *	sv_knconf
563  *	sv_next
564  *	sv_origknconf
565  *
566  * These fields are protected by sv_lock:
567  *	sv_currsec
568  *	sv_fhandle
569  *	sv_flags
570  *	sv_fsid
571  *	sv_path
572  *	sv_pathlen
573  *	sv_pfhandle
574  *	sv_save_secinfo
575  *	sv_savesec
576  *	sv_secdata
577  *	sv_secinfo
578  *	sv_supp_attrs
579  *
580  * Lock ordering:
581  * nfs_rtable4_lock > sv_lock
582  * rnode4_t::r_statelock > sv_lock
583  */
584 typedef struct servinfo4 {
585 	struct knetconfig *sv_knconf;   /* bound TLI fd */
586 	struct knetconfig *sv_origknconf;	/* For RDMA save orig knconf */
587 	struct netbuf	   sv_addr;	/* server's address */
588 	nfs4_fhandle_t	   sv_fhandle;	/* this server's filehandle */
589 	nfs4_fhandle_t	   sv_pfhandle; /* parent dir filehandle */
590 	int		   sv_pathlen;	/* Length of server path */
591 	char		  *sv_path;	/* Path name on server */
592 	uint32_t	   sv_flags;	/* flags for this server */
593 	sec_data_t	  *sv_secdata;	/* client initiated security data */
594 	sv_secinfo_t	  *sv_secinfo;	/* server security information */
595 	sec_data_t	  *sv_currsec;	/* security data currently used; */
596 					/* points to one of the sec_data */
597 					/* entries in sv_secinfo */
598 	sv_secinfo_t	  *sv_save_secinfo; /* saved secinfo */
599 	sec_data_t	  *sv_savesec;	/* saved security data */
600 	sec_data_t	  *sv_dhsec;    /* AUTH_DH data from the user land */
601 	char		  *sv_hostname;	/* server's hostname */
602 	int		   sv_hostnamelen;  /* server's hostname length */
603 	fattr4_fsid		sv_fsid;    /* fsid of shared obj	*/
604 	fattr4_supported_attrs	sv_supp_attrs;
605 	struct servinfo4  *sv_next;	/* next in list */
606 	nfs_rwlock_t	   sv_lock;
607 } servinfo4_t;
608 
609 /* sv_flags fields */
610 #define	SV4_TRYSECINFO		0x001	/* try secinfo data from the server */
611 #define	SV4_TRYSECDEFAULT	0x002	/* try a default flavor */
612 #define	SV4_NOTINUSE		0x004	/* servinfo4_t had fatal errors */
613 #define	SV4_ROOT_STALE		0x008	/* root vnode got ESTALE */
614 
615 /*
616  * Lock call types.  See nfs4frlock().
617  */
618 typedef enum nfs4_lock_call_type {
619 	NFS4_LCK_CTYPE_NORM,
620 	NFS4_LCK_CTYPE_RECLAIM,
621 	NFS4_LCK_CTYPE_RESEND,
622 	NFS4_LCK_CTYPE_REINSTATE
623 } nfs4_lock_call_type_t;
624 
625 /*
626  * This structure holds the information for a lost open/close/open downgrade/
627  * lock/locku request.  It is also used for requests that are queued up so
628  * that the recovery thread can release server state after a forced
629  * unmount.
630  * "lr_op" is 0 if the struct is uninitialized.  Otherwise, it is set to
631  * the proper OP_* nfs_opnum4 number.  The other fields contain information
632  * to reconstruct the call.
633  *
634  * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the
635  * parent directroy without relying on vtodv (since we may not have a vp
636  * for the file we wish to create).
637  *
638  * lr_putfirst means that the request should go to the front of the resend
639  * queue, rather than the end.
640  */
641 typedef struct nfs4_lost_rqst {
642 	list_node_t			lr_node;
643 	nfs_opnum4			lr_op;
644 	vnode_t				*lr_vp;
645 	vnode_t				*lr_dvp;
646 	nfs4_open_owner_t		*lr_oop;
647 	struct nfs4_open_stream		*lr_osp;
648 	struct nfs4_lock_owner		*lr_lop;
649 	cred_t				*lr_cr;
650 	flock64_t			*lr_flk;
651 	bool_t				lr_putfirst;
652 	union {
653 		struct {
654 			nfs4_lock_call_type_t lru_ctype;
655 			nfs_lock_type4	lru_locktype;
656 		} lru_lockargs;		/* LOCK, LOCKU */
657 		struct {
658 			uint32_t		lru_oaccess;
659 			uint32_t		lru_odeny;
660 			enum open_claim_type4	lru_oclaim;
661 			stateid4		lru_ostateid; /* reopen only */
662 			component4		lru_ofile;
663 		} lru_open_args;
664 		struct {
665 			uint32_t	lru_dg_access;
666 			uint32_t	lru_dg_deny;
667 		} lru_open_dg_args;
668 	} nfs4_lr_u;
669 } nfs4_lost_rqst_t;
670 
671 #define	lr_oacc		nfs4_lr_u.lru_open_args.lru_oaccess
672 #define	lr_odeny	nfs4_lr_u.lru_open_args.lru_odeny
673 #define	lr_oclaim	nfs4_lr_u.lru_open_args.lru_oclaim
674 #define	lr_ostateid	nfs4_lr_u.lru_open_args.lru_ostateid
675 #define	lr_ofile	nfs4_lr_u.lru_open_args.lru_ofile
676 #define	lr_dg_acc	nfs4_lr_u.lru_open_dg_args.lru_dg_access
677 #define	lr_dg_deny	nfs4_lr_u.lru_open_dg_args.lru_dg_deny
678 #define	lr_ctype	nfs4_lr_u.lru_lockargs.lru_ctype
679 #define	lr_locktype	nfs4_lr_u.lru_lockargs.lru_locktype
680 
681 /*
682  * Recovery actions.  Some actions can imply further recovery using a
683  * different recovery action (e.g., recovering the clientid leads to
684  * recovering open files and locks).
685  */
686 
687 typedef enum {
688 	NR_UNUSED,
689 	NR_CLIENTID,
690 	NR_OPENFILES,
691 	NR_FHEXPIRED,
692 	NR_FAILOVER,
693 	NR_WRONGSEC,
694 	NR_EXPIRED,
695 	NR_BAD_STATEID,
696 	NR_BADHANDLE,
697 	NR_BAD_SEQID,
698 	NR_OLDSTATEID,
699 	NR_GRACE,
700 	NR_DELAY,
701 	NR_LOST_LOCK,
702 	NR_LOST_STATE_RQST,
703 	NR_STALE
704 } nfs4_recov_t;
705 
706 /*
707  * Administrative and debug message framework.
708  */
709 
710 #define	NFS4_MSG_MAX	100
711 extern int nfs4_msg_max;
712 
713 typedef enum {
714 	RE_BAD_SEQID,
715 	RE_BADHANDLE,
716 	RE_CLIENTID,
717 	RE_DEAD_FILE,
718 	RE_END,
719 	RE_FAIL_RELOCK,
720 	RE_FAIL_REMAP_LEN,
721 	RE_FAIL_REMAP_OP,
722 	RE_FAILOVER,
723 	RE_FILE_DIFF,
724 	RE_LOST_STATE,
725 	RE_OPENS_CHANGED,
726 	RE_SIGLOST,
727 	RE_SIGLOST_NO_DUMP,
728 	RE_START,
729 	RE_UNEXPECTED_ACTION,
730 	RE_UNEXPECTED_ERRNO,
731 	RE_UNEXPECTED_STATUS,
732 	RE_WRONGSEC,
733 	RE_LOST_STATE_BAD_OP
734 } nfs4_event_type_t;
735 
736 typedef enum {
737 	RFS_NO_INSPECT,
738 	RFS_INSPECT
739 } nfs4_fact_status_t;
740 
741 typedef enum {
742 	RF_BADOWNER,
743 	RF_ERR,
744 	RF_RENEW_EXPIRED,
745 	RF_SRV_NOT_RESPOND,
746 	RF_SRV_OK,
747 	RF_SRVS_NOT_RESPOND,
748 	RF_SRVS_OK,
749 	RF_DELMAP_CB_ERR
750 } nfs4_fact_type_t;
751 
752 typedef enum {
753 	NFS4_MS_DUMP,
754 	NFS4_MS_NO_DUMP
755 } nfs4_msg_status_t;
756 
757 typedef struct nfs4_rfact {
758 	nfs4_fact_type_t	rf_type;
759 	nfs4_fact_status_t	rf_status;
760 	bool_t			rf_reboot;
761 	nfs4_recov_t		rf_action;
762 	nfs_opnum4		rf_op;
763 	nfsstat4		rf_stat4;
764 	timespec_t		rf_time;
765 	int			rf_error;
766 	struct rnode4		*rf_rp1;
767 	char			*rf_char1;
768 } nfs4_rfact_t;
769 
770 typedef struct nfs4_revent {
771 	nfs4_event_type_t	re_type;
772 	nfsstat4		re_stat4;
773 	uint_t			re_uint;
774 	pid_t			re_pid;
775 	struct mntinfo4		*re_mi;
776 	struct rnode4		*re_rp1;
777 	struct rnode4		*re_rp2;
778 	char			*re_char1;
779 	char			*re_char2;
780 	nfs4_tag_type_t		re_tag1;
781 	nfs4_tag_type_t		re_tag2;
782 	seqid4			re_seqid1;
783 	seqid4			re_seqid2;
784 } nfs4_revent_t;
785 
786 typedef enum {
787 	RM_EVENT,
788 	RM_FACT
789 } nfs4_msg_type_t;
790 
791 typedef struct nfs4_debug_msg {
792 	timespec_t		msg_time;
793 	nfs4_msg_type_t		msg_type;
794 	char			*msg_srv;
795 	char			*msg_mntpt;
796 	union {
797 		nfs4_rfact_t	msg_fact;
798 		nfs4_revent_t	msg_event;
799 	} rmsg_u;
800 	nfs4_msg_status_t	msg_status;
801 	list_node_t		msg_node;
802 } nfs4_debug_msg_t;
803 
804 /*
805  * NFS private data per mounted file system
806  *	The mi_lock mutex protects the following fields:
807  *		mi_flags
808  *		mi_in_recovery
809  *		mi_recovflags
810  *		mi_recovthread
811  *		mi_error
812  *		mi_printed
813  *		mi_down
814  *		mi_stsize
815  *		mi_curread
816  *		mi_curwrite
817  *		mi_timers
818  *		mi_curr_serv
819  *		mi_klmconfig
820  *		mi_oo_list
821  *		mi_foo_list
822  *		mi_foo_num
823  *		mi_foo_max
824  *		mi_lost_state
825  *		mi_bseqid_list
826  *
827  *	Normally the netconfig information for the mount comes from
828  *	mi_curr_serv and mi_klmconfig is NULL.  If NLM calls need to use a
829  *	different transport, mi_klmconfig contains the necessary netconfig
830  *	information.
831  *
832  *	The mi_async_lock mutex protects the following fields:
833  *		mi_async_reqs
834  *		mi_async_req_count
835  * 		mi_async_tail
836  *		mi_async_curr
837  *		mi_async_clusters
838  *		mi_async_init_clusters
839  *		mi_threads
840  *		mi_inactive_thread
841  *		mi_manager_thread
842  *
843  *	The nfs4_server_t::s_lock protects the following fields:
844  *		mi_clientid
845  *		mi_clientid_next
846  *		mi_clientid_prev
847  *		mi_open_files
848  *		mi_srvsettime
849  *
850  *	The mntinfo4_t::mi_recovlock protects the following fields:
851  *		mi_srvsettime
852  *
853  *	Locking order:
854  *	  mi4_globals::mig_lock > mi_async_lock
855  *	  mi_async_lock > nfs4_server_t::s_lock > mi_lock
856  *	  mi_recovlock > mi_rename_lock > nfs_rtable4_lock
857  *	  nfs4_server_t::s_recovlock > mi_recovlock
858  *	  rnode4_t::r_rwlock > mi_rename_lock
859  *	  nfs_rtable4_lock > mi_lock
860  *	  nfs4_server_t::s_lock > mi_msg_list_lock
861  *	  mi_recovlock > nfs4_server_t::s_lock
862  *	  mi_recovlock > nfs4_server_lst_lock
863  *
864  * The 'mi_oo_list' represents the hash buckets that contain the
865  * nfs4_open_owenrs for this particular mntinfo4.
866  *
867  * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4.
868  * 'mi_foo_num' is the current number of freed open owners on the list,
869  * 'mi_foo_max' is the maximum number of freed open owners that are allowable
870  * on the list.
871  *
872  * mi_rootfh and mi_srvparentfh are read-only once created, but that just
873  * refers to the pointer.  The contents must be updated to keep in sync
874  * with mi_curr_serv.
875  *
876  * The mi_msg_list_lock protects against adding/deleting entries to the
877  * mi_msg_list, and also the updating/retrieving of mi_lease_period;
878  *
879  * 'mi_zone' is initialized at structure creation time, and never
880  * changes; it may be read without a lock.
881  *
882  * mi_zone_node is linkage into the mi4_globals.mig_list, and is
883  * protected by mi4_globals.mig_list_lock.
884  */
885 struct zone;
886 typedef struct mntinfo4 {
887 	kmutex_t	mi_lock;	/* protects mntinfo4 fields */
888 	struct servinfo4 *mi_servers;   /* server list */
889 	struct servinfo4 *mi_curr_serv; /* current server */
890 	struct nfs4_sharedfh *mi_rootfh; /* root filehandle */
891 	struct nfs4_sharedfh *mi_srvparentfh; /* root's parent on server */
892 	kcondvar_t	mi_failover_cv;	/* failover synchronization */
893 	struct vfs	*mi_vfsp;	/* back pointer to vfs */
894 	enum vtype	mi_type;	/* file type of the root vnode */
895 	uint_t		mi_flags;	/* see below */
896 	uint_t		mi_recovflags;	/* if recovery active; see below */
897 	kthread_t	*mi_recovthread; /* active recov thread or NULL */
898 	uint_t		mi_error;	/* only set/valid when MI4_RECOV_FAIL */
899 					/* is set in mi_flags */
900 	int		mi_tsize;	/* transfer size (bytes) */
901 					/* really read size */
902 	int		mi_stsize;	/* server's max transfer size (bytes) */
903 					/* really write size */
904 	int		mi_timeo;	/* inital timeout in 10th sec */
905 	int		mi_retrans;	/* times to retry request */
906 	hrtime_t	mi_acregmin;	/* min time to hold cached file attr */
907 	hrtime_t	mi_acregmax;	/* max time to hold cached file attr */
908 	hrtime_t	mi_acdirmin;	/* min time to hold cached dir attr */
909 	hrtime_t	mi_acdirmax;	/* max time to hold cached dir attr */
910 	len_t		mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */
911 	int		mi_curread;	/* current read size */
912 	int		mi_curwrite;	/* current write size */
913 	uint_t 		mi_count; 	/* ref count */
914 	/*
915 	 * async I/O management.  There may be a pool of threads to handle
916 	 * async I/O requests, etc., plus there is always one thread that
917 	 * handles over-the-wire requests for VOP_INACTIVE.  The async pool
918 	 * can also help out with VOP_INACTIVE.
919 	 */
920 	struct nfs4_async_reqs *mi_async_reqs[NFS4_ASYNC_TYPES];
921 	struct nfs4_async_reqs *mi_async_tail[NFS4_ASYNC_TYPES];
922 	struct nfs4_async_reqs **mi_async_curr;	/* current async queue */
923 	uint_t		mi_async_clusters[NFS4_ASYNC_TYPES];
924 	uint_t		mi_async_init_clusters;
925 	uint_t		mi_async_req_count; /* # outstanding work requests */
926 	kcondvar_t	mi_async_reqs_cv; /* signaled when there's work */
927 	ushort_t	mi_threads;	/* number of active async threads */
928 	ushort_t	mi_max_threads;	/* max number of async threads */
929 	kthread_t	*mi_manager_thread; /* async manager thread id */
930 	kthread_t	*mi_inactive_thread; /* inactive thread id */
931 	kcondvar_t	mi_inact_req_cv; /* notify VOP_INACTIVE thread */
932 	kcondvar_t	mi_async_work_cv; /* tell workers to work */
933 	kcondvar_t	mi_async_cv;	/* all pool threads exited */
934 	kmutex_t	mi_async_lock;
935 	/*
936 	 * Other stuff
937 	 */
938 	struct pathcnf	*mi_pathconf;	/* static pathconf kludge */
939 	rpcprog_t	mi_prog;	/* RPC program number */
940 	rpcvers_t	mi_vers;	/* RPC program version number */
941 	char		**mi_rfsnames;	/* mapping to proc names */
942 	kstat_named_t	*mi_reqs;	/* count of requests */
943 	clock_t		mi_printftime;	/* last error printf time */
944 	nfs_rwlock_t	mi_recovlock;	/* separate ops from recovery (v4) */
945 	time_t		mi_grace_wait;	/* non-zero represents time to wait */
946 	time_t		mi_srvsettime;	/* when we switched nfs4_server_t */
947 	nfs_rwlock_t	mi_rename_lock;	/* atomic volfh rename  */
948 	struct nfs4_fname *mi_fname;	/* root fname */
949 	list_t		mi_lost_state;	/* resend list */
950 	list_t		mi_bseqid_list; /* bad seqid list */
951 	/*
952 	 * Client Side Failover stats
953 	 */
954 	uint_t		mi_noresponse;	/* server not responding count */
955 	uint_t		mi_failover; 	/* failover to new server count */
956 	uint_t		mi_remap;	/* remap to new server count */
957 	/*
958 	 * Kstat statistics
959 	 */
960 	struct kstat	*mi_io_kstats;
961 	struct kstat	*mi_ro_kstats;
962 	kstat_t		*mi_recov_ksp;	/* ptr to the recovery kstat */
963 
964 	/*
965 	 * Volatile fh flags (nfsv4)
966 	 */
967 	uint32_t	mi_fh_expire_type;
968 	/*
969 	 * Lease Management
970 	 */
971 	struct mntinfo4	*mi_clientid_next;
972 	struct mntinfo4	*mi_clientid_prev;
973 	clientid4	mi_clientid; /* redundant info found in nfs4_server */
974 	int		mi_open_files;	/* count of open files */
975 	int		mi_in_recovery;	/* count of recovery instances */
976 	kcondvar_t	mi_cv_in_recov; /* cv for recovery threads */
977 	/*
978 	 * Open owner stuff.
979 	 */
980 	struct nfs4_oo_hash_bucket	mi_oo_list[NFS4_NUM_OO_BUCKETS];
981 	list_t				mi_foo_list;
982 	int				mi_foo_num;
983 	int				mi_foo_max;
984 	/*
985 	 * Shared filehandle pool.
986 	 */
987 	nfs_rwlock_t			mi_fh_lock;
988 	avl_tree_t			mi_filehandles;
989 
990 	/*
991 	 * Debug message queue.
992 	 */
993 	list_t			mi_msg_list;
994 	int			mi_msg_count;
995 	time_t			mi_lease_period;
996 					/*
997 					 * not guaranteed to be accurate.
998 					 * only should be used by debug queue.
999 					 */
1000 	kmutex_t		mi_msg_list_lock;
1001 	/*
1002 	 * Zones support.
1003 	 */
1004 	struct zone	*mi_zone; /* Zone mounted in */
1005 	list_node_t	mi_zone_node;  /* linkage into per-zone mi list */
1006 } mntinfo4_t;
1007 
1008 /*
1009  * The values for mi_flags.
1010  *
1011  *	MI4_HARD		 hard or soft mount
1012  *	MI4_PRINTED		 responding message printed
1013  *	MI4_INT			 allow INTR on hard mount
1014  * 	MI4_DOWN		 server is down
1015  *	MI4_NOAC		 don't cache attributes
1016  *	MI4_NOCTO		 no close-to-open consistency
1017  *	MI4_LLOCK		 local locking only (no lockmgr)
1018  *	MI4_GRPID		 System V group id inheritance
1019  *	MI4_SHUTDOWN		 System is rebooting or shutting down
1020  *	MI4_LINK		 server supports link
1021  *	MI4_SYMLINK		 server supports symlink
1022  *	MI4_ACL			 server supports NFSv4 ACLs
1023  *	MI4_NOPRINT		 don't print messages
1024  *	MI4_DIRECTIO		 do direct I/O
1025  *	MI4_RECOV_ACTIV		 filesystem has recovery a thread
1026  *	MI4_REMOVE_ON_LAST_CLOSE remove from server's list
1027  *	MI4_RECOV_FAIL		 client recovery failed
1028  *	MI4_PUBLIC		 public/url option used
1029  *	MI4_MOUNTING		 mount in progress, don't failover
1030  *	MI4_POSIX_LOCK		 if server is using POSIX locking
1031  *	MI4_LOCK_DEBUG		 cmn_err'd posix lock err msg
1032  *	MI4_DEAD		 zone has released it
1033  *	MI4_INACTIVE_IDLE	 inactive thread idle
1034  *	MI4_BADOWNER_DEBUG	 badowner error msg per mount
1035  *	MI4_ASYNC_MGR_STOP	 tell async manager to die
1036  *	MI4_TIMEDOUT		 saw a timeout during zone shutdown
1037  */
1038 #define	MI4_HARD		 0x1
1039 #define	MI4_PRINTED		 0x2
1040 #define	MI4_INT			 0x4
1041 #define	MI4_DOWN		 0x8
1042 #define	MI4_NOAC		 0x10
1043 #define	MI4_NOCTO		 0x20
1044 #define	MI4_LLOCK		 0x80
1045 #define	MI4_GRPID		 0x100
1046 #define	MI4_SHUTDOWN		 0x200
1047 #define	MI4_LINK		 0x400
1048 #define	MI4_SYMLINK		 0x800
1049 /* 0x1000 is available */
1050 #define	MI4_ACL			 0x2000
1051 /* 0x4000 is available */
1052 /* 0x8000 is available */
1053 /* 0x10000 is available */
1054 #define	MI4_NOPRINT		 0x20000
1055 #define	MI4_DIRECTIO		 0x40000
1056 /* 0x80000 is available */
1057 #define	MI4_RECOV_ACTIV		 0x100000
1058 #define	MI4_REMOVE_ON_LAST_CLOSE 0x200000
1059 #define	MI4_RECOV_FAIL		 0x400000
1060 #define	MI4_PUBLIC		 0x800000
1061 #define	MI4_MOUNTING		 0x1000000
1062 #define	MI4_POSIX_LOCK		 0x2000000
1063 #define	MI4_LOCK_DEBUG		 0x4000000
1064 #define	MI4_DEAD		 0x8000000
1065 #define	MI4_INACTIVE_IDLE	 0x10000000
1066 #define	MI4_BADOWNER_DEBUG	 0x20000000
1067 #define	MI4_ASYNC_MGR_STOP	 0x40000000
1068 #define	MI4_TIMEDOUT		 0x80000000
1069 
1070 #define	INTR4(vp)	(VTOMI4(vp)->mi_flags & MI4_INT)
1071 
1072 #define	FAILOVER_MOUNT4(mi)	(mi->mi_servers->sv_next)
1073 
1074 /*
1075  * Recovery flags.
1076  *
1077  * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag
1078  * that's important), but some flag is needed to indicate that recovery is
1079  * going on for the filesystem.
1080  */
1081 #define	MI4R_NEED_CLIENTID	0x1
1082 #define	MI4R_REOPEN_FILES	0x2
1083 #define	MI4R_NEED_SECINFO	0x4
1084 #define	MI4R_NEED_NEW_SERVER	0x8
1085 #define	MI4R_REMAP_FILES	0x10
1086 #define	MI4R_SRV_REBOOT		0x20	/* server has rebooted */
1087 #define	MI4R_LOST_STATE		0x40
1088 #define	MI4R_BAD_SEQID		0x80
1089 
1090 #define	MI4_HOLD(mi) {		\
1091 	mi_hold(mi);		\
1092 }
1093 
1094 #define	MI4_RELE(mi) {		\
1095 	mi_rele(mi);		\
1096 }
1097 
1098 /*
1099  * vfs pointer to mount info
1100  */
1101 #define	VFTOMI4(vfsp)	((mntinfo4_t *)((vfsp)->vfs_data))
1102 
1103 /*
1104  * vnode pointer to mount info
1105  */
1106 #define	VTOMI4(vp)	((mntinfo4_t *)(((vp)->v_vfsp)->vfs_data))
1107 
1108 /*
1109  * Lease Management
1110  *
1111  * lease_valid is initially set to NFS4_LEASE_NOT_STARTED.  This is when the
1112  * nfs4_server is first created.  lease_valid is then set to
1113  * NFS4_LEASE_UNITIALIZED when the renew thread is started.  The extra state of
1114  * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread
1115  * already exists when we do SETCLIENTID).  lease_valid is then set to
1116  * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating
1117  * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as
1118  * the lease is renewed.  It is set to NFS4_LEASE_INVALID when the lease
1119  * expires.  Client recovery is needed to set the lease back to
1120  * NFS4_LEASE_VALID from NFS4_LEASE_INVALID.
1121  *
1122  * The s_cred is the credential used to mount the first file system for this
1123  * server.  It used as the credential for the renew thread's calls to the
1124  * server.
1125  *
1126  * The renew thread waits on the condition variable cv_thread_exit.  If the cv
1127  * is signalled, then the thread knows it must check s_thread_exit to see if
1128  * it should exit.  The cv is signaled when the last file system is unmounted
1129  * from a particular server.  s_thread_exit is set to 0 upon thread startup,
1130  * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby
1131  * telling the thread to exit.  s_thread_exit is needed to avoid spurious
1132  * wakeups.
1133  *
1134  * state_ref_count is incremented every time a new file is opened and
1135  * decremented every time a file is closed otw.  This keeps track of whether
1136  * the nfs4_server has state associated with it or not.
1137  *
1138  * s_refcnt is the reference count for storage management of the struct
1139  * itself.
1140  *
1141  * mntinfo4_list points to the doubly linked list of mntinfo4s that share
1142  * this nfs4_server (ie: <clientid, saddr> pair) in the current zone.  This is
1143  * needed for a nfs4_server to get a mntinfo4 for use in rfs4call.
1144  *
1145  * s_recovlock is used to synchronize recovery operations.  The thread
1146  * that is recovering the client must acquire it as a writer.  If the
1147  * thread is using the clientid (including recovery operations on other
1148  * state), acquire it as a reader.
1149  *
1150  * The 's_otw_call_count' keeps track of the number of outstanding over the
1151  * wire requests for this structure.  The struct will not go away as long
1152  * as this is non-zero (or s_refcnt is non-zero).
1153  *
1154  * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count'
1155  * variable to let the renew thread when an outstanding otw request has
1156  * finished.
1157  *
1158  * 'zoneid' and 'zone_globals' are set at creation of this structure
1159  * and are read-only after that; no lock is required to read them.
1160  *
1161  * s_lock protects: everything except cv_thread_exit and s_recovlock.
1162  *
1163  * s_program is used as the index into the nfs4_callback_globals's
1164  * nfs4prog2server table.  When a callback request comes in, we can
1165  * use that request's program number (minus NFS4_CALLBACK) as an index
1166  * into the nfs4prog2server.  That entry will hold the nfs4_server_t ptr.
1167  * We can then access that nfs4_server_t and its 's_deleg_list' (its list of
1168  * delegated rnode4_ts).
1169  *
1170  * Lock order:
1171  * nfs4_server::s_lock > mntinfo4::mi_lock
1172  * nfs_rtable4_lock > s_lock
1173  * nfs4_server_lst_lock > s_lock
1174  * s_recovlock > s_lock
1175  */
1176 struct nfs4_callback_globals;
1177 
1178 typedef struct nfs4_server {
1179 	struct nfs4_server	*forw;
1180 	struct nfs4_server	*back;
1181 	struct netbuf		saddr;
1182 	uint_t			s_flags; /* see below */
1183 	uint_t			s_refcnt;
1184 	clientid4		clientid;	/* what we get from server */
1185 	nfs_client_id4		clidtosend;	/* what we send to server */
1186 	mntinfo4_t		*mntinfo4_list;
1187 	int			lease_valid;
1188 	time_t			s_lease_time;
1189 	time_t			last_renewal_time;
1190 	timespec_t		propagation_delay;
1191 	cred_t			*s_cred;
1192 	kcondvar_t		cv_thread_exit;
1193 	int			s_thread_exit;
1194 	int			state_ref_count;
1195 	int			s_otw_call_count;
1196 	kcondvar_t		s_cv_otw_count;
1197 	kcondvar_t		s_clientid_pend;
1198 	kmutex_t		s_lock;
1199 	list_t			s_deleg_list;
1200 	rpcprog_t		s_program;
1201 	nfs_rwlock_t		s_recovlock;
1202 	kcondvar_t		wait_cb_null; /* used to wait for CB_NULL */
1203 	zoneid_t		zoneid;	/* zone using this nfs4_server_t */
1204 	struct nfs4_callback_globals *zone_globals;	/* globals */
1205 } nfs4_server_t;
1206 
1207 /* nfs4_server flags */
1208 #define	N4S_CLIENTID_SET	1	/* server has our clientid */
1209 #define	N4S_CLIENTID_PEND	0x2	/* server doesn't have clientid */
1210 #define	N4S_CB_PINGED		0x4	/* server has sent us a CB_NULL */
1211 #define	N4S_CB_WAITER		0x8	/* is/has wait{ing/ed} for cb_null */
1212 #define	N4S_BADOWNER_DEBUG	0x10	/* bad owner err msg per client */
1213 
1214 #define	N4S_CB_PAUSE_TIME	10000	/* Amount of time to pause (10ms) */
1215 
1216 struct lease_time_arg {
1217 	time_t	lease_time;
1218 };
1219 
1220 enum nfs4_delegreturn_policy {
1221 	IMMEDIATE,
1222 	FIRSTCLOSE,
1223 	LASTCLOSE,
1224 	INACTIVE
1225 };
1226 
1227 /*
1228  * Operation hints for the recovery framework (mostly).
1229  *
1230  * EXCEPTIONS:
1231  * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR
1232  *	These hints exist to allow user visit/readdir a R4SRVSTUB dir.
1233  *	(dir represents the root of a server fs that has not yet been
1234  *	mounted at client)
1235  */
1236 typedef enum {
1237 	OH_OTHER,
1238 	OH_READ,
1239 	OH_WRITE,
1240 	OH_COMMIT,
1241 	OH_VFH_RENAME,
1242 	OH_MOUNT,
1243 	OH_CLOSE,
1244 	OH_LOCKU,
1245 	OH_DELEGRETURN,
1246 	OH_ACCESS,
1247 	OH_GETACL,
1248 	OH_GETATTR,
1249 	OH_LOOKUP,
1250 	OH_READDIR
1251 } nfs4_op_hint_t;
1252 
1253 /*
1254  * This macro evaluates to non-zero if the given op releases state at the
1255  * server.
1256  */
1257 #define	OH_IS_STATE_RELE(op)	((op) == OH_CLOSE || (op) == OH_LOCKU || \
1258 				(op) == OH_DELEGRETURN)
1259 
1260 #ifdef _KERNEL
1261 
1262 extern void	nfs4_async_manager(struct vfs *);
1263 extern void	nfs4_async_manager_stop(struct vfs *);
1264 extern void	nfs4_async_stop(struct vfs *);
1265 extern int	nfs4_async_stop_sig(struct vfs *);
1266 extern int	nfs4_async_readahead(vnode_t *, u_offset_t, caddr_t,
1267 				struct seg *, cred_t *,
1268 				void (*)(vnode_t *, u_offset_t,
1269 				caddr_t, struct seg *, cred_t *));
1270 extern int	nfs4_async_putapage(vnode_t *, page_t *, u_offset_t, size_t,
1271 				int, cred_t *, int (*)(vnode_t *, page_t *,
1272 				u_offset_t, size_t, int, cred_t *));
1273 extern int	nfs4_async_pageio(vnode_t *, page_t *, u_offset_t, size_t,
1274 				int, cred_t *, int (*)(vnode_t *, page_t *,
1275 				u_offset_t, size_t, int, cred_t *));
1276 extern void	nfs4_async_commit(vnode_t *, page_t *, offset3, count3,
1277 				cred_t *, void (*)(vnode_t *, page_t *,
1278 				offset3, count3, cred_t *));
1279 extern void	nfs4_async_inactive(vnode_t *, cred_t *);
1280 extern void	nfs4_inactive_thread(mntinfo4_t *mi);
1281 extern void	nfs4_inactive_otw(vnode_t *, cred_t *);
1282 extern int	nfs4_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *);
1283 
1284 extern int	nfs4_setopts(vnode_t *, model_t, struct nfs_args *);
1285 extern void	nfs4_mnt_kstat_init(struct vfs *);
1286 
1287 extern void	rfs4call(struct mntinfo4 *, struct COMPOUND4args_clnt *,
1288 			struct COMPOUND4res_clnt *, cred_t *, int *, int,
1289 			nfs4_error_t *);
1290 extern void	nfs4_acl_fill_cache(struct rnode4 *, vsecattr_t *);
1291 extern int	nfs4_attr_otw(vnode_t *, nfs4_tag_type_t,
1292 				nfs4_ga_res_t *, bitmap4, cred_t *);
1293 
1294 extern void	nfs4_attrcache_noinval(vnode_t *, nfs4_ga_res_t *, hrtime_t);
1295 extern void	nfs4_attr_cache(vnode_t *, nfs4_ga_res_t *,
1296 				hrtime_t, cred_t *, int,
1297 				change_info4 *);
1298 extern void	nfs4_purge_rddir_cache(vnode_t *);
1299 extern void	nfs4_invalidate_pages(vnode_t *, u_offset_t, cred_t *);
1300 extern void	nfs4_purge_caches(vnode_t *, int, cred_t *, int);
1301 extern void	nfs4_purge_stale_fh(int, vnode_t *, cred_t *);
1302 
1303 extern void	nfs4rename_update(vnode_t *, vnode_t *, nfs_fh4 *, char *);
1304 extern void	nfs4_update_paths(vnode_t *, char *, vnode_t *, char *,
1305 			vnode_t *);
1306 
1307 extern void	nfs4args_lookup_free(nfs_argop4 *, int);
1308 extern void	nfs4args_copen_free(OPEN4cargs *);
1309 
1310 extern void	nfs4_printfhandle(nfs4_fhandle_t *);
1311 
1312 extern void	nfs_free_mi4(mntinfo4_t *);
1313 extern void	sv4_free(servinfo4_t *);
1314 extern void	nfs4_mi_zonelist_add(mntinfo4_t *);
1315 extern int	nfs4_mi_zonelist_remove(mntinfo4_t *);
1316 extern int 	nfs4_secinfo_recov(mntinfo4_t *, vnode_t *, vnode_t *);
1317 extern void	nfs4_secinfo_init(void);
1318 extern void	nfs4_secinfo_fini(void);
1319 extern int	nfs4_secinfo_path(mntinfo4_t *, cred_t *, int);
1320 extern int 	nfs4_secinfo_vnode_otw(vnode_t *, char *, cred_t *);
1321 extern void	secinfo_free(sv_secinfo_t *);
1322 extern void	save_mnt_secinfo(servinfo4_t *);
1323 extern void	check_mnt_secinfo(servinfo4_t *, vnode_t *);
1324 extern int	vattr_to_fattr4(vattr_t *, vsecattr_t *, fattr4 *, int,
1325 				enum nfs_opnum4, bitmap4 supp_mask);
1326 extern int	nfs4_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
1327 			int, cred_t *);
1328 extern void	nfs4_write_error(vnode_t *, int, cred_t *);
1329 extern void	nfs4_lockcompletion(vnode_t *, int);
1330 extern bool_t	nfs4_map_lost_lock_conflict(vnode_t *);
1331 extern int	vtodv(vnode_t *, vnode_t **, cred_t *, bool_t);
1332 extern void	nfs4open_confirm(vnode_t *, seqid4*, stateid4 *, cred_t *,
1333 		    bool_t, bool_t *, nfs4_open_owner_t *, bool_t,
1334 		    nfs4_error_t *, int *);
1335 extern void	nfs4_error_zinit(nfs4_error_t *);
1336 extern void	nfs4_error_init(nfs4_error_t *, int);
1337 
1338 extern void 	mi_hold(mntinfo4_t *);
1339 extern void	mi_rele(mntinfo4_t *);
1340 
1341 #ifdef DEBUG
1342 extern int	nfs4_consistent_type(vnode_t *);
1343 #endif
1344 
1345 extern void	nfs4_init_dot_entries(void);
1346 extern void	nfs4_destroy_dot_entries(void);
1347 extern struct nfs4_callback_globals	*nfs4_get_callback_globals(void);
1348 
1349 extern struct nfs4_server nfs4_server_lst;
1350 
1351 extern clock_t nfs_write_error_interval;
1352 
1353 #endif /* _KERNEL */
1354 
1355 /*
1356  * Flags for nfs4getfh_otw.
1357  */
1358 
1359 #define	NFS4_GETFH_PUBLIC	0x01
1360 #define	NFS4_GETFH_NEEDSOP	0x02
1361 
1362 /*
1363  * Found through rnodes.
1364  *
1365  * The os_open_ref_count keeps track the number of open file descriptor
1366  * refernces on this data structure.  It will be bumped for any successful
1367  * OTW OPEN call and any OPEN call that determines the OTW call is not
1368  * necessary and the open stream hasn't just been created (see
1369  * nfs4_is_otw_open_necessary).
1370  *
1371  * os_mapcnt is a count of the number of mmapped pages for a particular
1372  * open stream; this in conjunction w/ os_open_ref_count is used to
1373  * determine when to do a close to the server.  This is necessary because
1374  * of the semantics of doing open, mmap, close; the OTW close must be wait
1375  * until all open and mmap references have vanished.
1376  *
1377  * 'os_valid' tells us whether this structure is about to be freed or not,
1378  * if it is then don't return it in find_open_stream().
1379  *
1380  * 'os_final_close' is set when a CLOSE OTW was attempted.  This is needed
1381  * so we can properly count the os_open_ref_count in cases where we VOP_CLOSE
1382  * without a VOP_OPEN, and have nfs4_inactive() drive the OTW CLOSE.  It
1383  * also helps differentiate the VOP_OPEN/VN_RELE case from the VOP_CLOSE
1384  * that tried to close OTW but failed, and left the state cleanup to
1385  * nfs4_inactive/CLOSE_FORCE.
1386  *
1387  * 'os_force_close' is used to let us know if an intervening thread came
1388  * and reopened the open stream after we decided to issue a CLOSE_FORCE,
1389  * but before we could actually process the CLOSE_FORCE.
1390  *
1391  * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the
1392  * lost state queue.
1393  *
1394  * 'open_stateid' is set the last open stateid returned by the server unless
1395  * 'os_delegation' is 1, in which case 'open_stateid' refers to the
1396  * delegation stateid returned by the server.  This is used in cases where the
1397  * client tries to OPEN a file but already has a suitable delegation, so we
1398  * just stick the delegation stateid in the open stream.
1399  *
1400  * os_dc_openacc are open access bits which have been granted to the
1401  * open stream by virtue of a delegation, but which have not been seen
1402  * by the server.  This applies even if the open stream does not have
1403  * os_delegation set.  These bits are used when setting file locks to
1404  * determine whether an open with CLAIM_DELEGATE_CUR needs to be done
1405  * before the lock request can be sent to the server.  See
1406  * nfs4frlock_check_deleg().
1407  *
1408  * 'os_mmap_read/write' keep track of the read and write access our memory
1409  * maps require.  We need to keep track of this so we can provide the proper
1410  * access bits in the open/mmap/close/reboot/reopen case.
1411  *
1412  * 'os_failed_reopen' tells us that we failed to successfully reopen this
1413  * open stream; therefore, we should not use this open stateid as it is
1414  * not valid anymore. This flag is also used to indicate an unsuccessful
1415  * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR.
1416  *
1417  * If 'os_orig_oo_name' is different than os_open_owner's oo_name
1418  * then this tells us that this open stream's open owner used a
1419  * bad seqid (that is, got NFS4ERR_BAD_SEQID).  If different, this open
1420  * stream will no longer be used for future OTW state releasing calls.
1421  *
1422  * Lock ordering:
1423  * rnode4_t::r_os_lock > os_sync_lock
1424  * os_sync_lock > rnode4_t::r_statelock
1425  * os_sync_lock > rnode4_t::r_statev4_lock
1426  * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call)
1427  *
1428  * The 'os_sync_lock' protects:
1429  *	open_stateid
1430  *	os_dc_openacc
1431  *	os_delegation
1432  *	os_failed_reopen
1433  *	os_final_close
1434  *	os_force_close
1435  *	os_mapcnt
1436  *	os_mmap_read
1437  *	os_mmap_write
1438  *	os_open_ref_count
1439  *	os_pending_close
1440  *	os_share_acc_read
1441  *	os_share_acc_write
1442  *	os_share_deny_none
1443  *	os_share_deny_read
1444  *	os_share_deny_write
1445  *	os_ref_count
1446  *	os_valid
1447  *
1448  * The rnode4_t::r_os_lock protects:
1449  *	os_node
1450  *
1451  * These fields are set at creation time and
1452  * read only after that:
1453  *	os_open_owner
1454  *	os_orig_oo_name
1455  */
1456 typedef struct nfs4_open_stream {
1457 	uint64_t		os_share_acc_read;
1458 	uint64_t		os_share_acc_write;
1459 	uint64_t		os_mmap_read;
1460 	uint64_t		os_mmap_write;
1461 	uint32_t		os_share_deny_none;
1462 	uint32_t		os_share_deny_read;
1463 	uint32_t		os_share_deny_write;
1464 	stateid4		open_stateid;
1465 	int			os_dc_openacc;
1466 	int			os_ref_count;
1467 	unsigned		os_valid:1;
1468 	unsigned 		os_delegation:1;
1469 	unsigned		os_final_close:1;
1470 	unsigned 		os_pending_close:1;
1471 	unsigned 		os_failed_reopen:1;
1472 	unsigned		os_force_close:1;
1473 	int			os_open_ref_count;
1474 	long			os_mapcnt;
1475 	list_node_t		os_node;
1476 	struct nfs4_open_owner	*os_open_owner;
1477 	uint64_t		os_orig_oo_name;
1478 	kmutex_t		os_sync_lock;
1479 } nfs4_open_stream_t;
1480 
1481 /*
1482  * This structure describes the format of the lock_owner_name
1483  * field of the lock owner.
1484  */
1485 
1486 typedef struct nfs4_lo_name {
1487 	uint64_t	ln_seq_num;
1488 	pid_t		ln_pid;
1489 } nfs4_lo_name_t;
1490 
1491 /*
1492  * Flags for lo_flags.
1493  */
1494 #define	NFS4_LOCK_SEQID_INUSE	0x1
1495 #define	NFS4_BAD_SEQID_LOCK	0x2
1496 
1497 /*
1498  * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs
1499  * off the rnode.  If the links are NULL it means this object is not on the
1500  * list.
1501  *
1502  * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and
1503  * didn't get a response back.  This is used to figure out if we have
1504  * possible remote v4 locks, so that we can clean up at process exit.  In
1505  * theory, the client should be able to figure out if the server received
1506  * the request (based on what seqid works), so maybe we can get rid of this
1507  * flag someday.
1508  *
1509  * 'lo_ref_count' tells us how many processes/threads are using this data
1510  * structure.  The rnode's list accounts for one reference.
1511  *
1512  * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the
1513  * data structure.  It is then set to NFS4_PERM_CREATED when a lock request
1514  * is successful using this lock owner structure.  We need to keep 'temporary'
1515  * lock owners around so we can properly keep the lock seqid synchronization
1516  * when multiple processes/threads are trying to create the lock owner for the
1517  * first time (especially with the DENIED error case).  Once
1518  * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change.
1519  *
1520  * 'lo_valid' tells us whether this structure is about to be freed or not,
1521  * if it is then don't return it from find_lock_owner().
1522  *
1523  * Retrieving and setting of 'lock_seqid' is protected by the
1524  * NFS4_LOCK_SEQID_INUSE flag.  Waiters for NFS4_LOCK_SEQID_INUSE should
1525  * use 'lo_cv_seqid_sync'.
1526  *
1527  * The setting of 'lock_stateid' is protected by the
1528  * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'.  The retrieving of the
1529  * 'lock_stateid' is protected by 'lo_lock', with the additional
1530  * requirement that the calling function can handle NFS4ERR_OLD_STATEID and
1531  * NFS4ERR_BAD_STATEID as appropiate.
1532  *
1533  * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock
1534  * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID).  With this set,
1535  * this lock owner will no longer be used for future OTW calls.  Once set,
1536  * it is never unset.
1537  *
1538  * Lock ordering:
1539  * rnode4_t::r_statev4_lock > lo_lock
1540  */
1541 typedef struct nfs4_lock_owner {
1542 	struct nfs4_lock_owner	*lo_next_rnode;
1543 	struct nfs4_lock_owner	*lo_prev_rnode;
1544 	int			lo_pid;
1545 	stateid4		lock_stateid;
1546 	seqid4			lock_seqid;
1547 	/*
1548 	 * Fix this to always be 12 bytes
1549 	 */
1550 	nfs4_lo_name_t		lock_owner_name;
1551 	int			lo_ref_count;
1552 	int			lo_valid;
1553 	int			lo_pending_rqsts;
1554 	int			lo_just_created;
1555 	int			lo_flags;
1556 	kcondvar_t		lo_cv_seqid_sync;
1557 	kmutex_t		lo_lock;
1558 	kthread_t		*lo_seqid_holder; /* debugging aid */
1559 } nfs4_lock_owner_t;
1560 
1561 /* for nfs4_lock_owner_t lookups */
1562 typedef enum {LOWN_ANY, LOWN_VALID_STATEID} lown_which_t;
1563 
1564 /* Number of times to retry a call that fails with state independent error */
1565 #define	NFS4_NUM_RECOV_RETRIES	3
1566 
1567 typedef enum {
1568 	NO_SID,
1569 	DEL_SID,
1570 	LOCK_SID,
1571 	OPEN_SID,
1572 	SPEC_SID
1573 } nfs4_stateid_type_t;
1574 
1575 typedef struct nfs4_stateid_types {
1576 	stateid4 d_sid;
1577 	stateid4 l_sid;
1578 	stateid4 o_sid;
1579 	nfs4_stateid_type_t cur_sid_type;
1580 } nfs4_stateid_types_t;
1581 
1582 /*
1583  * Per-zone data for dealing with callbacks.  Included here solely for the
1584  * benefit of MDB.
1585  */
1586 struct nfs4_callback_stats {
1587 	kstat_named_t	delegations;
1588 	kstat_named_t	cb_getattr;
1589 	kstat_named_t	cb_recall;
1590 	kstat_named_t	cb_null;
1591 	kstat_named_t	cb_dispatch;
1592 	kstat_named_t	delegaccept_r;
1593 	kstat_named_t	delegaccept_rw;
1594 	kstat_named_t	delegreturn;
1595 	kstat_named_t	callbacks;
1596 	kstat_named_t	claim_cur;
1597 	kstat_named_t	claim_cur_ok;
1598 	kstat_named_t	recall_trunc;
1599 	kstat_named_t	recall_failed;
1600 	kstat_named_t	return_limit_write;
1601 	kstat_named_t	return_limit_addmap;
1602 	kstat_named_t	deleg_recover;
1603 	kstat_named_t	cb_illegal;
1604 };
1605 
1606 struct nfs4_callback_globals {
1607 	kmutex_t nfs4_cb_lock;
1608 	kmutex_t nfs4_dlist_lock;
1609 	int nfs4_program_hint;
1610 	/* this table maps the program number to the nfs4_server structure */
1611 	struct nfs4_server **nfs4prog2server;
1612 	list_t nfs4_dlist;
1613 	list_t nfs4_cb_ports;
1614 	struct nfs4_callback_stats nfs4_callback_stats;
1615 #ifdef DEBUG
1616 	int nfs4_dlistadd_c;
1617 	int nfs4_dlistclean_c;
1618 #endif
1619 };
1620 
1621 typedef enum {
1622 	CLOSE_NORM,
1623 	CLOSE_DELMAP,
1624 	CLOSE_FORCE,
1625 	CLOSE_RESEND,
1626 	CLOSE_AFTER_RESEND
1627 } nfs4_close_type_t;
1628 
1629 /*
1630  * Structure to hold the bad seqid information that is passed
1631  * to the recovery framework.
1632  */
1633 typedef struct nfs4_bseqid_entry {
1634 	nfs4_open_owner_t	*bs_oop;
1635 	nfs4_lock_owner_t	*bs_lop;
1636 	vnode_t			*bs_vp;
1637 	pid_t			bs_pid;
1638 	nfs4_tag_type_t		bs_tag;
1639 	seqid4			bs_seqid;
1640 	list_node_t		bs_node;
1641 } nfs4_bseqid_entry_t;
1642 
1643 #ifdef _KERNEL
1644 
1645 extern void	nfs4close_one(vnode_t *, nfs4_open_stream_t *, cred_t *, int,
1646 		    nfs4_lost_rqst_t *, nfs4_error_t *, nfs4_close_type_t,
1647 		    size_t, uint_t, uint_t);
1648 extern void	nfs4close_notw(vnode_t *, nfs4_open_stream_t *, int *);
1649 extern void	nfs4_set_lock_stateid(nfs4_lock_owner_t *, stateid4);
1650 extern void	open_owner_hold(nfs4_open_owner_t *);
1651 extern void	open_owner_rele(nfs4_open_owner_t *);
1652 extern nfs4_open_stream_t	*find_or_create_open_stream(nfs4_open_owner_t *,
1653 					struct rnode4 *, int *);
1654 extern nfs4_open_stream_t *find_open_stream(nfs4_open_owner_t *,
1655 				struct rnode4 *);
1656 extern nfs4_open_stream_t *create_open_stream(nfs4_open_owner_t *oop,
1657 				struct rnode4 *rp);
1658 extern void	open_stream_hold(nfs4_open_stream_t *);
1659 extern void	open_stream_rele(nfs4_open_stream_t *, struct rnode4 *);
1660 extern int	nfs4close_all(vnode_t *, cred_t *);
1661 extern void	lock_owner_hold(nfs4_lock_owner_t *);
1662 extern void	lock_owner_rele(nfs4_lock_owner_t *);
1663 extern nfs4_lock_owner_t *create_lock_owner(struct rnode4 *, pid_t);
1664 extern nfs4_lock_owner_t *find_lock_owner(struct rnode4 *, pid_t, lown_which_t);
1665 extern void	nfs4_rnode_remove_lock_owner(struct rnode4 *,
1666 			nfs4_lock_owner_t *);
1667 extern void	nfs4_flush_lock_owners(struct rnode4 *);
1668 extern void nfs4_setlockowner_args(lock_owner4 *, struct rnode4 *, pid_t);
1669 extern void	nfs4_set_open_seqid(seqid4, nfs4_open_owner_t *,
1670 		    nfs4_tag_type_t);
1671 extern void	nfs4_set_lock_seqid(seqid4, nfs4_lock_owner_t *);
1672 extern void	nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *,
1673 		    nfs4_tag_type_t);
1674 extern void	nfs4_end_open_seqid_sync(nfs4_open_owner_t *);
1675 extern int	nfs4_start_open_seqid_sync(nfs4_open_owner_t *, mntinfo4_t *);
1676 extern void	nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *);
1677 extern int	nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *, mntinfo4_t *);
1678 extern void	nfs4_setup_lock_args(nfs4_lock_owner_t *, nfs4_open_owner_t *,
1679 			nfs4_open_stream_t *, clientid4, locker4 *);
1680 extern void	nfs4_destroy_open_owner(nfs4_open_owner_t *);
1681 
1682 extern void		nfs4_renew_lease_thread(nfs4_server_t *);
1683 extern nfs4_server_t	*find_nfs4_server(mntinfo4_t *);
1684 extern nfs4_server_t	*find_nfs4_server_all(mntinfo4_t *, int all);
1685 extern nfs4_server_t	*new_nfs4_server(servinfo4_t *,	cred_t *);
1686 extern void		nfs4_mark_srv_dead(nfs4_server_t *);
1687 extern nfs4_server_t	*servinfo4_to_nfs4_server(servinfo4_t *);
1688 extern void		nfs4_inc_state_ref_count(mntinfo4_t *);
1689 extern void		nfs4_inc_state_ref_count_nolock(nfs4_server_t *,
1690 				mntinfo4_t *);
1691 extern void		nfs4_dec_state_ref_count(mntinfo4_t *);
1692 extern void		nfs4_dec_state_ref_count_nolock(nfs4_server_t *,
1693 				mntinfo4_t *);
1694 extern clientid4	mi2clientid(mntinfo4_t *);
1695 extern int		nfs4_server_in_recovery(nfs4_server_t *);
1696 extern bool_t		nfs4_server_vlock(nfs4_server_t *, int);
1697 extern nfs4_open_owner_t *create_open_owner(cred_t *, mntinfo4_t *);
1698 extern uint64_t		nfs4_get_new_oo_name(void);
1699 extern nfs4_open_owner_t *find_open_owner(cred_t *, int, mntinfo4_t *);
1700 extern nfs4_open_owner_t *find_open_owner_nolock(cred_t *, int, mntinfo4_t *);
1701 extern void	nfs4frlock(nfs4_lock_call_type_t, vnode_t *, int, flock64_t *,
1702 			int, u_offset_t, cred_t *, nfs4_error_t *,
1703 			nfs4_lost_rqst_t *, int *);
1704 extern void	nfs4open_dg_save_lost_rqst(int, nfs4_lost_rqst_t *,
1705 		    nfs4_open_owner_t *, nfs4_open_stream_t *, cred_t *,
1706 		    vnode_t *, int, int);
1707 extern void	nfs4_open_downgrade(int, int, nfs4_open_owner_t *,
1708 		    nfs4_open_stream_t *, vnode_t *, cred_t *,
1709 		    nfs4_lost_rqst_t *, nfs4_error_t *, cred_t **, seqid4 *);
1710 extern seqid4	nfs4_get_open_seqid(nfs4_open_owner_t *);
1711 extern cred_t	*nfs4_get_otw_cred(cred_t *, mntinfo4_t *, nfs4_open_owner_t *);
1712 extern void	nfs4_init_stateid_types(nfs4_stateid_types_t *);
1713 extern void	nfs4_save_stateid(stateid4 *, nfs4_stateid_types_t *);
1714 
1715 extern kmutex_t nfs4_server_lst_lock;
1716 
1717 extern void	nfs4callback_destroy(nfs4_server_t *);
1718 extern void	nfs4_callback_init(void);
1719 extern void	nfs4_callback_fini(void);
1720 extern void	nfs4_cb_args(nfs4_server_t *, struct knetconfig *,
1721 			SETCLIENTID4args *);
1722 extern void	nfs4delegreturn_async(struct rnode4 *, int, bool_t);
1723 
1724 extern enum nfs4_delegreturn_policy nfs4_delegreturn_policy;
1725 
1726 extern void	nfs4_add_mi_to_server(nfs4_server_t *, mntinfo4_t *);
1727 extern void	nfs4_remove_mi_from_server(mntinfo4_t *, nfs4_server_t *);
1728 extern nfs4_server_t *nfs4_move_mi(mntinfo4_t *, servinfo4_t *, servinfo4_t *);
1729 extern bool_t	nfs4_fs_active(nfs4_server_t *);
1730 extern void	nfs4_server_rele(nfs4_server_t *);
1731 extern bool_t	inlease(nfs4_server_t *);
1732 extern bool_t	nfs4_has_pages(vnode_t *);
1733 extern void	nfs4_log_badowner(mntinfo4_t *, nfs_opnum4);
1734 
1735 #endif /* _KERNEL */
1736 
1737 /*
1738  * Client State Recovery
1739  */
1740 
1741 /*
1742  * The following defines are used for rs_flags in
1743  * a nfs4_recov_state_t structure.
1744  *
1745  * NFS4_RS_RENAME_HELD		Indicates that the mi_rename_lock was held.
1746  * NFS4_RS_GRACE_MSG		Set once we have uprintf'ed a grace message.
1747  * NFS4_RS_DELAY_MSG		Set once we have uprintf'ed a delay message.
1748  * NFS4_RS_RECALL_HELD1		r_deleg_recall_lock for vp1 was held.
1749  * NFS4_RS_RECALL_HELD2		r_deleg_recall_lock for vp2 was held.
1750  */
1751 #define	NFS4_RS_RENAME_HELD	0x000000001
1752 #define	NFS4_RS_GRACE_MSG	0x000000002
1753 #define	NFS4_RS_DELAY_MSG	0x000000004
1754 #define	NFS4_RS_RECALL_HELD1	0x000000008
1755 #define	NFS4_RS_RECALL_HELD2	0x000000010
1756 
1757 /*
1758  * Information that is retrieved from nfs4_start_op() and that is
1759  * passed into nfs4_end_op().
1760  *
1761  * rs_sp is a reference to the nfs4_server that was found, or NULL.
1762  *
1763  * rs_num_retry_despite_err is the number times client retried an
1764  * OTW op despite a recovery error.  It is only incremented for hints
1765  * exempt to normal R4RECOVERR processing
1766  * (OH_CLOSE/OH_LOCKU/OH_DELEGRETURN).  (XXX this special-case code
1767  * needs review for possible removal.)
1768  * It is initialized wherever nfs4_recov_state_t is declared -- usually
1769  * very near initialization of rs_flags.
1770  */
1771 typedef struct {
1772 	nfs4_server_t	*rs_sp;
1773 	int		rs_flags;
1774 	int		rs_num_retry_despite_err;
1775 } nfs4_recov_state_t;
1776 
1777 /*
1778  * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root.
1779  */
1780 
1781 #define	NFS4_REMAP_CKATTRS	1
1782 #define	NFS4_REMAP_NEEDSOP	2
1783 
1784 #ifdef _KERNEL
1785 
1786 extern int	nfs4_is_otw_open_necessary(nfs4_open_owner_t *, int,
1787 			vnode_t *, int, int *, int, nfs4_recov_state_t *);
1788 extern void	nfs4setclientid(struct mntinfo4 *, struct cred *, bool_t,
1789 			nfs4_error_t *);
1790 extern void	nfs4_reopen(vnode_t *, nfs4_open_stream_t *, nfs4_error_t *,
1791 			open_claim_type4, bool_t, bool_t);
1792 extern void	nfs4_remap_root(struct mntinfo4 *, nfs4_error_t *, int);
1793 extern void	nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int,
1794 			nfs4_error_t *);
1795 extern void	nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int,
1796 			nfs4_error_t *);
1797 extern int	nfs4_make_dotdot(struct nfs4_sharedfh *, hrtime_t,
1798 			vnode_t *, cred_t *, vnode_t **, int);
1799 extern void	nfs4_fail_recov(vnode_t *, char *, int, nfsstat4);
1800 
1801 extern int	nfs4_needs_recovery(nfs4_error_t *, bool_t, vfs_t *);
1802 extern int	nfs4_recov_marks_dead(nfsstat4);
1803 extern bool_t	nfs4_start_recovery(nfs4_error_t *, struct mntinfo4 *,
1804 			vnode_t *, vnode_t *, stateid4 *,
1805 			nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *);
1806 extern int	nfs4_start_op(struct mntinfo4 *, vnode_t *, vnode_t *,
1807 			nfs4_recov_state_t *);
1808 extern void	nfs4_end_op(struct mntinfo4 *, vnode_t *, vnode_t *,
1809 			nfs4_recov_state_t *, bool_t);
1810 extern int	nfs4_start_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
1811 			nfs4_op_hint_t, nfs4_recov_state_t *, bool_t *);
1812 extern void	nfs4_end_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
1813 				nfs4_op_hint_t, nfs4_recov_state_t *, bool_t);
1814 extern char	*nfs4_recov_action_to_str(nfs4_recov_t);
1815 
1816 extern int	wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t,
1817 			nfs4_recov_state_t *);
1818 extern void	nfs4_end_op_recall(vnode_t *, vnode_t *, nfs4_recov_state_t *);
1819 extern void	nfs4_send_siglost(pid_t, mntinfo4_t *mi, vnode_t *vp, bool_t,
1820 		    int, nfsstat4);
1821 extern time_t	nfs4err_delay_time;
1822 extern void	nfs4_set_grace_wait(mntinfo4_t *);
1823 extern void	nfs4_set_delay_wait(vnode_t *);
1824 extern int	nfs4_wait_for_grace(mntinfo4_t *, nfs4_recov_state_t *);
1825 extern int	nfs4_wait_for_delay(vnode_t *, nfs4_recov_state_t *);
1826 extern nfs4_bseqid_entry_t *nfs4_create_bseqid_entry(nfs4_open_owner_t *,
1827 		    nfs4_lock_owner_t *, vnode_t *, pid_t, nfs4_tag_type_t,
1828 		    seqid4);
1829 
1830 extern void	nfs4_resend_open_otw(vnode_t **, nfs4_lost_rqst_t *,
1831 			nfs4_error_t *);
1832 extern void	nfs4_resend_delegreturn(nfs4_lost_rqst_t *, nfs4_error_t *,
1833 			nfs4_server_t *);
1834 extern int	nfs4_rpc_retry_error(int);
1835 extern int	nfs4_try_failover(nfs4_error_t *);
1836 extern void	nfs4_free_msg(nfs4_debug_msg_t *);
1837 extern void	nfs4_mnt_recov_kstat_init(vfs_t *);
1838 extern void	nfs4_mi_kstat_inc_delay(mntinfo4_t *);
1839 extern void	nfs4_mi_kstat_inc_no_grace(mntinfo4_t *);
1840 extern char	*nfs4_stat_to_str(nfsstat4);
1841 extern char	*nfs4_op_to_str(nfs_opnum4);
1842 
1843 extern void	nfs4_queue_event(nfs4_event_type_t, mntinfo4_t *, char *,
1844 		    uint_t, vnode_t *, vnode_t *, nfsstat4, char *, pid_t,
1845 		    nfs4_tag_type_t, nfs4_tag_type_t, seqid4, seqid4);
1846 extern void	nfs4_queue_fact(nfs4_fact_type_t, mntinfo4_t *, nfsstat4,
1847 		    nfs4_recov_t, nfs_opnum4, bool_t, char *, int, vnode_t *);
1848 #pragma	rarely_called(nfs4_queue_event)
1849 #pragma	rarely_called(nfs4_queue_fact)
1850 
1851 /* Used for preformed "." and ".." dirents */
1852 extern char	*nfs4_dot_entries;
1853 extern char	*nfs4_dot_dot_entry;
1854 
1855 #ifdef	DEBUG
1856 extern uint_t	nfs4_tsd_key;
1857 #endif
1858 
1859 #endif /* _KERNEL */
1860 
1861 /*
1862  * Filehandle management.
1863  *
1864  * Filehandles can change in v4, so rather than storing the filehandle
1865  * directly in the rnode, etc., we manage the filehandle through one of
1866  * these objects.
1867  * Locking: sfh_fh and sfh_tree is protected by the filesystem's
1868  * mi_fh_lock.  The reference count and flags are protected by sfh_lock.
1869  * sfh_mi is read-only.
1870  *
1871  * mntinfo4_t::mi_fh_lock > sfh_lock.
1872  */
1873 
1874 typedef struct nfs4_sharedfh {
1875 	nfs_fh4 sfh_fh;			/* key and current filehandle */
1876 	kmutex_t sfh_lock;
1877 	uint_t sfh_refcnt;		/* reference count */
1878 	uint_t sfh_flags;
1879 	mntinfo4_t *sfh_mi;		/* backptr to filesystem */
1880 	avl_node_t sfh_tree;		/* used by avl package */
1881 } nfs4_sharedfh_t;
1882 
1883 #define	SFH4_SAME(sfh1, sfh2)	((sfh1) == (sfh2))
1884 
1885 /*
1886  * Flags.
1887  */
1888 #define	SFH4_IN_TREE	0x1		/* currently in an AVL tree */
1889 
1890 #ifdef _KERNEL
1891 
1892 extern void sfh4_createtab(avl_tree_t *);
1893 extern nfs4_sharedfh_t *sfh4_get(const nfs_fh4 *, mntinfo4_t *);
1894 extern nfs4_sharedfh_t *sfh4_put(const nfs_fh4 *, mntinfo4_t *,
1895 				nfs4_sharedfh_t *);
1896 extern void sfh4_update(nfs4_sharedfh_t *, const nfs_fh4 *);
1897 extern void sfh4_copyval(const nfs4_sharedfh_t *, nfs4_fhandle_t *);
1898 extern void sfh4_hold(nfs4_sharedfh_t *);
1899 extern void sfh4_rele(nfs4_sharedfh_t **);
1900 extern void sfh4_printfhandle(const nfs4_sharedfh_t *);
1901 
1902 #endif
1903 
1904 /*
1905  * Path and file name management.
1906  *
1907  * This type stores the name of an entry in the filesystem and keeps enough
1908  * information that it can provide a complete path.  All fields are
1909  * protected by fn_lock, except for the reference count, which is managed
1910  * using atomic add/subtract.
1911  *
1912  * Lock order: child and then parent.
1913  */
1914 
1915 typedef struct nfs4_fname {
1916 	struct nfs4_fname *fn_parent;	/* parent name; null if fs root */
1917 	char *fn_name;			/* the actual name */
1918 	ssize_t fn_len;			/* strlen(fn_name) */
1919 	uint32_t fn_refcnt;		/* reference count */
1920 	kmutex_t fn_lock;
1921 	avl_node_t fn_tree;
1922 	avl_tree_t fn_children;		/* children, if any */
1923 } nfs4_fname_t;
1924 
1925 #ifdef _KERNEL
1926 
1927 extern vnode_t	nfs4_xattr_notsupp_vnode;
1928 #define	NFS4_XATTR_DIR_NOTSUPP	&nfs4_xattr_notsupp_vnode
1929 
1930 extern nfs4_fname_t *fn_get(nfs4_fname_t *, char *);
1931 extern void fn_hold(nfs4_fname_t *);
1932 extern void fn_rele(nfs4_fname_t **);
1933 extern char *fn_name(nfs4_fname_t *);
1934 extern char *fn_path(nfs4_fname_t *);
1935 extern void fn_move(nfs4_fname_t *, nfs4_fname_t *, char *);
1936 extern nfs4_fname_t *fn_parent(nfs4_fname_t *);
1937 
1938 #endif
1939 
1940 /*
1941  * Per-zone data for managing client handles, included in this file for the
1942  * benefit of MDB.
1943  */
1944 struct nfs4_clnt {
1945 	struct chhead	*nfscl_chtable4;
1946 	kmutex_t	nfscl_chtable4_lock;
1947 	zoneid_t	nfscl_zoneid;
1948 	list_node_t	nfscl_node;
1949 	struct clstat4	nfscl_stat;
1950 };
1951 
1952 #ifdef	__cplusplus
1953 }
1954 #endif
1955 
1956 #endif /* _NFS4_CLNT_H */
1957