xref: /illumos-gate/usr/src/uts/common/nfs/nfs_clnt.h (revision c5749750a3e052f1194f65a303456224c51dea63)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
26 /*	  All Rights Reserved	*/
27 
28 #ifndef	_NFS_NFS_CLNT_H
29 #define	_NFS_NFS_CLNT_H
30 
31 #include <sys/utsname.h>
32 #include <sys/kstat.h>
33 #include <sys/time.h>
34 #include <vm/page.h>
35 #include <sys/thread.h>
36 #include <nfs/rnode.h>
37 #include <sys/list.h>
38 #include <sys/condvar_impl.h>
39 #include <sys/zone.h>
40 
41 #ifdef	__cplusplus
42 extern "C" {
43 #endif
44 
45 #define	HOSTNAMESZ	32
46 #define	ACREGMIN	3	/* min secs to hold cached file attr */
47 #define	ACREGMAX	60	/* max secs to hold cached file attr */
48 #define	ACDIRMIN	30	/* min secs to hold cached dir attr */
49 #define	ACDIRMAX	60	/* max secs to hold cached dir attr */
50 #define	ACMINMAX	3600	/* 1 hr is longest min timeout */
51 #define	ACMAXMAX	36000	/* 10 hr is longest max timeout */
52 
53 #define	NFS_CALLTYPES	3	/* Lookups, Reads, Writes */
54 
55 /*
56  * rfscall() flags
57  */
58 #define	RFSCALL_SOFT	0x00000001	/* Do op as if fs was soft-mounted */
59 
60 /*
61  * Fake errno passed back from rfscall to indicate transfer size adjustment
62  */
63 #define	ENFS_TRYAGAIN	999
64 
65 /*
66  * The NFS specific async_reqs structure. iotype is grouped to support two
67  * types of async thread pools, please read comments section of mntinfo_t
68  * definition for more information. Care should be taken while adding new
69  * members to this group.
70  */
71 
72 enum iotype {
73 	NFS_PUTAPAGE,
74 	NFS_PAGEIO,
75 	NFS_COMMIT,
76 	NFS_READ_AHEAD,
77 	NFS_READDIR,
78 	NFS_INACTIVE,
79 	NFS_ASYNC_TYPES
80 };
81 #define	NFS_ASYNC_PGOPS_TYPES	(NFS_COMMIT + 1)
82 
83 /*
84  * NFS async requests queue type.
85  */
86 
87 enum ioqtype {
88 	NFS_ASYNC_QUEUE,
89 	NFS_ASYNC_PGOPS_QUEUE,
90 	NFS_MAX_ASYNC_QUEUES
91 };
92 
93 /*
94  * Number of NFS async threads operating exclusively on page op requests.
95  */
96 #define	NUM_ASYNC_PGOPS_THREADS	0x2
97 
98 struct nfs_async_read_req {
99 	void (*readahead)();		/* pointer to readahead function */
100 	u_offset_t blkoff;		/* offset in file */
101 	struct seg *seg;		/* segment to do i/o to */
102 	caddr_t addr;			/* address to do i/o to */
103 };
104 
105 struct nfs_pageio_req {
106 	int (*pageio)();		/* pointer to pageio function */
107 	page_t *pp;			/* page list */
108 	u_offset_t io_off;		/* offset in file */
109 	uint_t io_len;			/* size of request */
110 	int flags;
111 };
112 
113 struct nfs_readdir_req {
114 	int (*readdir)();		/* pointer to readdir function */
115 	struct rddir_cache *rdc;	/* pointer to cache entry to fill */
116 };
117 
118 struct nfs_commit_req {
119 	void (*commit)();		/* pointer to commit function */
120 	page_t *plist;			/* page list */
121 	offset3 offset;			/* starting offset */
122 	count3 count;			/* size of range to be commited */
123 };
124 
125 struct nfs_inactive_req {
126 	void (*inactive)();		/* pointer to inactive function */
127 };
128 
129 struct nfs_async_reqs {
130 	struct nfs_async_reqs *a_next;	/* pointer to next arg struct */
131 #ifdef DEBUG
132 	kthread_t *a_queuer;		/* thread id of queueing thread */
133 #endif
134 	struct vnode *a_vp;		/* vnode pointer */
135 	struct cred *a_cred;		/* cred pointer */
136 	enum iotype a_io;		/* i/o type */
137 	union {
138 		struct nfs_async_read_req a_read_args;
139 		struct nfs_pageio_req a_pageio_args;
140 		struct nfs_readdir_req a_readdir_args;
141 		struct nfs_commit_req a_commit_args;
142 		struct nfs_inactive_req a_inactive_args;
143 	} a_args;
144 };
145 
146 #define	a_nfs_readahead a_args.a_read_args.readahead
147 #define	a_nfs_blkoff a_args.a_read_args.blkoff
148 #define	a_nfs_seg a_args.a_read_args.seg
149 #define	a_nfs_addr a_args.a_read_args.addr
150 
151 #define	a_nfs_putapage a_args.a_pageio_args.pageio
152 #define	a_nfs_pageio a_args.a_pageio_args.pageio
153 #define	a_nfs_pp a_args.a_pageio_args.pp
154 #define	a_nfs_off a_args.a_pageio_args.io_off
155 #define	a_nfs_len a_args.a_pageio_args.io_len
156 #define	a_nfs_flags a_args.a_pageio_args.flags
157 
158 #define	a_nfs_readdir a_args.a_readdir_args.readdir
159 #define	a_nfs_rdc a_args.a_readdir_args.rdc
160 
161 #define	a_nfs_commit a_args.a_commit_args.commit
162 #define	a_nfs_plist a_args.a_commit_args.plist
163 #define	a_nfs_offset a_args.a_commit_args.offset
164 #define	a_nfs_count a_args.a_commit_args.count
165 
166 #define	a_nfs_inactive a_args.a_inactive_args.inactive
167 
168 /*
169  * Due to the way the address space callbacks are used to execute a delmap,
170  * we must keep track of how many times the same thread has called
171  * VOP_DELMAP()->nfs_delmap()/nfs3_delmap().  This is done by having a list of
172  * nfs_delmapcall_t's associated with each rnode_t.  This list is protected
173  * by the rnode_t's r_statelock.  The individual elements do not need to be
174  * protected as they will only ever be created, modified and destroyed by
175  * one thread (the call_id).
176  * See nfs_delmap()/nfs3_delmap() for further explanation.
177  */
178 typedef struct nfs_delmapcall {
179 	kthread_t	*call_id;
180 	int		error;	/* error from delmap */
181 	list_node_t	call_node;
182 } nfs_delmapcall_t;
183 
184 /*
185  * delmap address space callback args
186  */
187 typedef struct nfs_delmap_args {
188 	vnode_t			*vp;
189 	offset_t		off;
190 	caddr_t			addr;
191 	size_t			len;
192 	uint_t			prot;
193 	uint_t			maxprot;
194 	uint_t			flags;
195 	cred_t			*cr;
196 	nfs_delmapcall_t	*caller; /* to retrieve errors from the cb */
197 } nfs_delmap_args_t;
198 
199 #ifdef _KERNEL
200 extern nfs_delmapcall_t	*nfs_init_delmapcall(void);
201 extern void	nfs_free_delmapcall(nfs_delmapcall_t *);
202 extern int	nfs_find_and_delete_delmapcall(rnode_t *, int *errp);
203 #endif /* _KERNEL */
204 
205 /*
206  * The following structures, chhead and chtab,  make up the client handle
207  * cache.  chhead represents a quadruple(RPC program, RPC version, Protocol
208  * Family, and Transport).  For example, a chhead entry could represent
209  * NFS/V3/IPv4/TCP requests.  chhead nodes are linked together as a singly
210  * linked list and is referenced from chtable.
211  *
212  * chtab represents an allocated client handle bound to a particular
213  * quadruple. These nodes chain down from a chhead node.  chtab
214  * entries which are on the chain are considered free, so a thread may simply
215  * unlink the first node without traversing the chain.  When the thread is
216  * completed with its request, it puts the chtab node back on the chain.
217  */
218 typedef struct chhead {
219 	struct chhead *ch_next;	/* next quadruple */
220 	struct chtab *ch_list;	/* pointer to free client handle(s) */
221 	uint64_t ch_timesused;	/* times this quadruple was requested */
222 	rpcprog_t ch_prog;	/* RPC program number */
223 	rpcvers_t ch_vers;	/* RPC version number */
224 	dev_t ch_dev;		/* pseudo device number (i.e. /dev/udp) */
225 	char *ch_protofmly;	/* protocol (i.e. NC_INET, NC_LOOPBACK) */
226 } chhead_t;
227 
228 typedef struct chtab {
229 	struct chtab *ch_list;	/* next free client handle */
230 	struct chhead *ch_head;	/* associated quadruple */
231 	time_t ch_freed;	/* timestamp when freed */
232 	CLIENT *ch_client;	/* pointer to client handle */
233 } chtab_t;
234 
235 /*
236  * clinfo is a structure which encapsulates data that is needed to
237  * obtain a client handle from the cache
238  */
239 typedef struct clinfo {
240 	rpcprog_t cl_prog;	/* RPC program number */
241 	rpcvers_t cl_vers;	/* RPC version number */
242 	uint_t cl_readsize;	/* transfer size */
243 	int cl_retrans;		/* times to retry request */
244 	uint_t cl_flags;	/* info flags */
245 } clinfo_t;
246 
247 /*
248  * Failover information, passed opaquely through rfscall()
249  */
250 typedef struct failinfo {
251 	struct vnode	*vp;
252 	caddr_t		fhp;
253 	void (*copyproc)(caddr_t, vnode_t *);
254 	int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *,
255 			int, vnode_t *, struct cred *, int);
256 	int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int);
257 } failinfo_t;
258 
259 /*
260  * Static server information
261  *
262  * These fields are protected by sv_lock:
263  *	sv_flags
264  */
265 typedef struct servinfo {
266 	struct knetconfig *sv_knconf;   /* bound TLI fd */
267 	struct knetconfig *sv_origknconf;	/* For RDMA save orig knconf */
268 	struct netbuf	sv_addr;	/* server's address */
269 	nfs_fhandle	sv_fhandle;	/* this server's filehandle */
270 	struct sec_data *sv_secdata;	/* security data for rpcsec module */
271 	char	*sv_hostname;		/* server's hostname */
272 	int	sv_hostnamelen;		/* server's hostname length */
273 	uint_t	sv_flags;		/* see below */
274 	struct servinfo	*sv_next;	/* next in list */
275 	kmutex_t sv_lock;
276 } servinfo_t;
277 
278 /*
279  * The values for sv_flags.
280  */
281 #define	SV_ROOT_STALE	0x1		/* root vnode got ESTALE */
282 
283 /*
284  * Switch from RDMA knconf to original mount knconf
285  */
286 
287 #define	ORIG_KNCONF(mi) (mi->mi_curr_serv->sv_origknconf ? \
288 	mi->mi_curr_serv->sv_origknconf : mi->mi_curr_serv->sv_knconf)
289 
290 #if	defined(_KERNEL)
291 /*
292  * NFS private data per mounted file system
293  *	The mi_lock mutex protects the following fields:
294  *		mi_flags
295  *		mi_printed
296  *		mi_down
297  *		mi_tsize
298  *		mi_stsize
299  *		mi_curread
300  *		mi_curwrite
301  *		mi_timers
302  *		mi_curr_serv
303  *		mi_readers
304  *		mi_klmconfig
305  *
306  *	The mi_async_lock mutex protects the following fields:
307  *		mi_async_reqs
308  *		mi_async_req_count
309  *		mi_async_tail
310  *		mi_async_curr[NFS_MAX_ASYNC_QUEUES]
311  *		mi_async_clusters
312  *		mi_async_init_clusters
313  *		mi_threads[NFS_MAX_ASYNC_QUEUES]
314  *		mi_manager_thread
315  *
316  *	Normally the netconfig information for the mount comes from
317  *	mi_curr_serv and mi_klmconfig is NULL.  If NLM calls need to use a
318  *	different transport, mi_klmconfig contains the necessary netconfig
319  *	information.
320  *
321  *	'mi_zone' is initialized at structure creation time, and never
322  *	changes; it may be read without a lock.
323  *
324  *	mi_zone_node is linkage into the mi4_globals.mig_list, and is
325  *	protected by mi4_globals.mig_list_lock.
326  *
327  *	Locking order:
328  *	  mi_globals::mig_lock > mi_async_lock > mi_lock
329  */
330 typedef struct mntinfo {
331 	kmutex_t	mi_lock;	/* protects mntinfo fields */
332 	struct servinfo *mi_servers;    /* server list */
333 	struct servinfo *mi_curr_serv;  /* current server */
334 	kcondvar_t	mi_failover_cv;	/* failover synchronization */
335 	int		mi_readers;	/* failover - users of mi_curr_serv */
336 	struct vfs	*mi_vfsp;	/* back pointer to vfs */
337 	enum vtype	mi_type;	/* file type of the root vnode */
338 	uint_t		mi_flags;	/* see below */
339 	uint_t		mi_tsize;	/* max read transfer size (bytes) */
340 	uint_t		mi_stsize;	/* max write transfer size (bytes) */
341 	int		mi_timeo;	/* inital timeout in 10th sec */
342 	int		mi_retrans;	/* times to retry request */
343 	hrtime_t	mi_acregmin;	/* min time to hold cached file attr */
344 	hrtime_t	mi_acregmax;	/* max time to hold cached file attr */
345 	hrtime_t	mi_acdirmin;	/* min time to hold cached dir attr */
346 	hrtime_t	mi_acdirmax;	/* max time to hold cached dir attr */
347 	len_t		mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */
348 	/*
349 	 * Extra fields for congestion control, one per NFS call type,
350 	 * plus one global one.
351 	 */
352 	struct rpc_timers mi_timers[NFS_CALLTYPES+1];
353 	int		mi_curread;	/* current read size */
354 	int		mi_curwrite;	/* current write size */
355 	/*
356 	 * Async I/O management
357 	 * We have 2 pools of threads working on async I/O:
358 	 *	(i) Threads which work on all async queues. Default number of
359 	 *	threads in this queue is 8. Threads in this pool work on async
360 	 *	queue pointed by mi_async_curr[NFS_ASYNC_QUEUE]. Number of
361 	 *	active threads in this pool is tracked by
362 	 *	mi_threads[NFS_ASYNC_QUEUE].
363 	 * 	(ii)Threads which work only on page op async queues.
364 	 *	Page ops queue comprises of NFS_PUTAPAGE, NFS_PAGEIO &
365 	 *	NFS_COMMIT. Default number of threads in this queue is 2
366 	 *	(NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async
367 	 *	queue pointed by mi_async_curr[NFS_ASYNC_PGOPS_QUEUE]. Number
368 	 *	of active threads in this pool is tracked by
369 	 *	mi_threads[NFS_ASYNC_PGOPS_QUEUE].
370 	 */
371 	struct nfs_async_reqs *mi_async_reqs[NFS_ASYNC_TYPES];
372 	struct nfs_async_reqs *mi_async_tail[NFS_ASYNC_TYPES];
373 	struct nfs_async_reqs **mi_async_curr[NFS_MAX_ASYNC_QUEUES];
374 						/* current async queue */
375 	uint_t		mi_async_clusters[NFS_ASYNC_TYPES];
376 	uint_t		mi_async_init_clusters;
377 	uint_t		mi_async_req_count; /* # outstanding work requests */
378 	kcondvar_t	mi_async_reqs_cv; /* signaled when there's work */
379 	ushort_t	mi_threads[NFS_MAX_ASYNC_QUEUES];
380 					/* number of active async threads */
381 	ushort_t	mi_max_threads;	/* max number of async worker threads */
382 	kthread_t	*mi_manager_thread;  /* async manager thread */
383 	kcondvar_t	mi_async_cv; /* signaled when the last worker dies */
384 	kcondvar_t	mi_async_work_cv[NFS_MAX_ASYNC_QUEUES];
385 					/* tell workers to work */
386 	kmutex_t	mi_async_lock;	/* lock to protect async list */
387 	/*
388 	 * Other stuff
389 	 */
390 	struct pathcnf *mi_pathconf;	/* static pathconf kludge */
391 	rpcprog_t	mi_prog;	/* RPC program number */
392 	rpcvers_t	mi_vers;	/* RPC program version number */
393 	char		**mi_rfsnames;	/* mapping to proc names */
394 	kstat_named_t	*mi_reqs;	/* count of requests */
395 	uchar_t		*mi_call_type;	/* dynamic retrans call types */
396 	uchar_t		*mi_ss_call_type;	/* semisoft call type */
397 	uchar_t		*mi_timer_type;	/* dynamic retrans timer types */
398 	clock_t		mi_printftime;	/* last error printf time */
399 	/*
400 	 * ACL entries
401 	 */
402 	char		**mi_aclnames;	/* mapping to proc names */
403 	kstat_named_t	*mi_aclreqs;	/* count of acl requests */
404 	uchar_t		*mi_acl_call_type; /* dynamic retrans call types */
405 	uchar_t		*mi_acl_ss_call_type; /* semisoft call types */
406 	uchar_t		*mi_acl_timer_type; /* dynamic retrans timer types */
407 	/*
408 	 * Client Side Failover stats
409 	 */
410 	uint_t		mi_noresponse;	/* server not responding count */
411 	uint_t		mi_failover; 	/* failover to new server count */
412 	uint_t		mi_remap;	/* remap to new server count */
413 	/*
414 	 * Kstat statistics
415 	 */
416 	struct kstat	*mi_io_kstats;
417 	struct kstat	*mi_ro_kstats;
418 	struct knetconfig *mi_klmconfig;
419 	/*
420 	 * Zones support.
421 	 */
422 	struct zone	*mi_zone;	/* Zone in which FS is mounted */
423 	zone_ref_t	mi_zone_ref;	/* Reference to aforementioned zone */
424 	list_node_t	mi_zone_node;	/* Linkage into per-zone mi list */
425 	/*
426 	 * Serializes threads in failover_remap.
427 	 * Need to acquire this lock first in failover_remap() function
428 	 * before acquiring any other rnode lock.
429 	 */
430 	kmutex_t	mi_remap_lock;
431 	/*
432 	 * List of rnode_t structures that belongs to this mntinfo
433 	 */
434 	kmutex_t	mi_rnodes_lock;	/* protects the mi_rnodes list */
435 	list_t		mi_rnodes;	/* the list */
436 } mntinfo_t;
437 #endif	/* _KERNEL */
438 
439 /*
440  * vfs pointer to mount info
441  */
442 #define	VFTOMI(vfsp)	((mntinfo_t *)((vfsp)->vfs_data))
443 
444 /*
445  * vnode pointer to mount info
446  */
447 #define	VTOMI(vp)	((mntinfo_t *)(((vp)->v_vfsp)->vfs_data))
448 
449 /*
450  * The values for mi_flags.
451  */
452 #define	MI_HARD		0x1		/* hard or soft mount */
453 #define	MI_PRINTED	0x2		/* not responding message printed */
454 #define	MI_INT		0x4		/* interrupts allowed on hard mount */
455 #define	MI_DOWN		0x8		/* server is down */
456 #define	MI_NOAC		0x10		/* don't cache attributes */
457 #define	MI_NOCTO	0x20		/* no close-to-open consistency */
458 #define	MI_DYNAMIC	0x40		/* dynamic transfer size adjustment */
459 #define	MI_LLOCK	0x80		/* local locking only (no lockmgr) */
460 #define	MI_GRPID	0x100		/* System V group id inheritance */
461 #define	MI_RPCTIMESYNC	0x200		/* RPC time sync */
462 #define	MI_LINK		0x400		/* server supports link */
463 #define	MI_SYMLINK	0x800		/* server supports symlink */
464 #define	MI_READDIRONLY	0x1000		/* use readdir instead of readdirplus */
465 #define	MI_ACL		0x2000		/* server supports NFS_ACL */
466 #define	MI_BINDINPROG	0x4000		/* binding to server is changing */
467 #define	MI_LOOPBACK	0x8000		/* Set if this is a loopback mount */
468 #define	MI_SEMISOFT	0x10000		/* soft reads, hard modify */
469 #define	MI_NOPRINT	0x20000		/* don't print messages */
470 #define	MI_DIRECTIO	0x40000		/* do direct I/O */
471 #define	MI_EXTATTR	0x80000		/* server supports extended attrs */
472 #define	MI_ASYNC_MGR_STOP	0x100000	/* tell async mgr to die */
473 #define	MI_DEAD		0x200000	/* mount has been terminated */
474 
475 /*
476  * Read-only mntinfo statistics
477  */
478 struct mntinfo_kstat {
479 	char		mik_proto[KNC_STRSIZE];
480 	uint32_t	mik_vers;
481 	uint_t		mik_flags;
482 	uint_t		mik_secmod;
483 	uint32_t	mik_curread;
484 	uint32_t	mik_curwrite;
485 	int		mik_timeo;
486 	int		mik_retrans;
487 	uint_t		mik_acregmin;
488 	uint_t		mik_acregmax;
489 	uint_t		mik_acdirmin;
490 	uint_t		mik_acdirmax;
491 	struct {
492 		uint32_t srtt;
493 		uint32_t deviate;
494 		uint32_t rtxcur;
495 	} mik_timers[NFS_CALLTYPES+1];
496 	uint32_t	mik_noresponse;
497 	uint32_t	mik_failover;
498 	uint32_t	mik_remap;
499 	char		mik_curserver[SYS_NMLN];
500 };
501 
502 /*
503  * Macro to wakeup sleeping async worker threads.
504  */
505 #define	NFS_WAKE_ASYNC_WORKER(work_cv)	{				\
506 	if (CV_HAS_WAITERS(&work_cv[NFS_ASYNC_QUEUE]))			\
507 		cv_signal(&work_cv[NFS_ASYNC_QUEUE]);			\
508 	else if (CV_HAS_WAITERS(&work_cv[NFS_ASYNC_PGOPS_QUEUE]))	\
509 		cv_signal(&work_cv[NFS_ASYNC_PGOPS_QUEUE]);		\
510 }
511 
512 #define	NFS_WAKEALL_ASYNC_WORKERS(work_cv) {				\
513 	cv_broadcast(&work_cv[NFS_ASYNC_QUEUE]);			\
514 	cv_broadcast(&work_cv[NFS_ASYNC_PGOPS_QUEUE]);			\
515 }
516 
517 /*
518  * Mark cached attributes as timed out
519  *
520  * The caller must not be holding the rnode r_statelock mutex.
521  */
522 #define	PURGE_ATTRCACHE(vp)	{				\
523 	rnode_t *rp = VTOR(vp);					\
524 	mutex_enter(&rp->r_statelock);				\
525 	PURGE_ATTRCACHE_LOCKED(rp);				\
526 	mutex_exit(&rp->r_statelock);				\
527 }
528 
529 #define	PURGE_ATTRCACHE_LOCKED(rp)	{			\
530 	ASSERT(MUTEX_HELD(&rp->r_statelock));			\
531 	rp->r_attrtime = gethrtime();				\
532 	rp->r_mtime = rp->r_attrtime;				\
533 }
534 
535 /*
536  * Is the attribute cache valid?
537  */
538 #define	ATTRCACHE_VALID(vp)	(gethrtime() < VTOR(vp)->r_attrtime)
539 
540 /*
541  * Flags to indicate whether to purge the DNLC for non-directory vnodes
542  * in a call to nfs_purge_caches.
543  */
544 #define	NFS_NOPURGE_DNLC	0
545 #define	NFS_PURGE_DNLC		1
546 
547 /*
548  * If returned error is ESTALE flush all caches.
549  */
550 #define	PURGE_STALE_FH(error, vp, cr)				\
551 	if ((error) == ESTALE) {				\
552 		struct rnode *rp = VTOR(vp);			\
553 		if (vp->v_flag & VROOT) {			\
554 			servinfo_t *svp = rp->r_server;		\
555 			mutex_enter(&svp->sv_lock);		\
556 			svp->sv_flags |= SV_ROOT_STALE;		\
557 			mutex_exit(&svp->sv_lock);		\
558 		}						\
559 		mutex_enter(&rp->r_statelock);			\
560 		rp->r_flags |= RSTALE;				\
561 		if (!rp->r_error)				\
562 			rp->r_error = (error);			\
563 		mutex_exit(&rp->r_statelock);			\
564 		if (vn_has_cached_data(vp))			\
565 			nfs_invalidate_pages((vp), (u_offset_t)0, (cr)); \
566 		nfs_purge_caches((vp), NFS_PURGE_DNLC, (cr));	\
567 	}
568 
569 /*
570  * Is cache valid?
571  * Swap is always valid, if no attributes (attrtime == 0) or
572  * if mtime matches cached mtime it is valid
573  * NOTE: mtime is now a timestruc_t.
574  * Caller should be holding the rnode r_statelock mutex.
575  */
576 #define	CACHE_VALID(rp, mtime, fsize)				\
577 	((RTOV(rp)->v_flag & VISSWAP) == VISSWAP ||		\
578 	(((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec &&	\
579 	(mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) &&	\
580 	((fsize) == (rp)->r_attr.va_size)))
581 
582 /*
583  * Macro to detect forced unmount or a zone shutdown.
584  */
585 #define	FS_OR_ZONE_GONE(vfsp) \
586 	(((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
587 	zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
588 
589 /*
590  * Convert NFS tunables to hrtime_t units, seconds to nanoseconds.
591  */
592 #define	SEC2HR(sec)	((sec) * (long long)NANOSEC)
593 #define	HR2SEC(hr)	((hr) / (long long)NANOSEC)
594 
595 /*
596  * Structure to identify owner of a PC file share reservation.
597  */
598 struct nfs_owner {
599 	int	magic;		/* magic uniquifying number */
600 	char	hname[16];	/* first 16 bytes of hostname */
601 	char	lowner[8];	/* local owner from fcntl */
602 };
603 
604 /*
605  * Values for magic.
606  */
607 #define	NFS_OWNER_MAGIC	0x1D81E
608 
609 /*
610  * Support for extended attributes
611  */
612 #define	XATTR_DIR_NAME	"/@/"		/* used for DNLC entries */
613 #define	XATTR_RPATH	"ExTaTtR"	/* used for r_path for failover */
614 
615 /*
616  * Short hand for checking to see whether the file system was mounted
617  * interruptible or not.
618  */
619 #define	INTR(vp)	(VTOMI(vp)->mi_flags & MI_INT)
620 
621 /*
622  * Short hand for checking whether failover is enabled or not
623  */
624 #define	FAILOVER_MOUNT(mi)	(mi->mi_servers->sv_next)
625 
626 /*
627  * How long will async threads wait for additional work.
628  */
629 #define	NFS_ASYNC_TIMEOUT	(60 * 1 * hz)	/* 1 minute */
630 
631 #ifdef _KERNEL
632 extern int	clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
633 		    struct chtab **);
634 extern void	clfree(CLIENT *, struct chtab *);
635 extern void	nfs_mi_zonelist_add(mntinfo_t *);
636 extern void	nfs_free_mi(mntinfo_t *);
637 extern void	nfs_mnt_kstat_init(struct vfs *);
638 #endif
639 
640 /*
641  * Per-zone data for managing client handles.  Included here solely for the
642  * benefit of MDB.
643  */
644 /*
645  * client side statistics
646  */
647 struct clstat {
648 	kstat_named_t	calls;			/* client requests */
649 	kstat_named_t	badcalls;		/* rpc failures */
650 	kstat_named_t	clgets;			/* client handle gets */
651 	kstat_named_t	cltoomany;		/* client handle cache misses */
652 #ifdef DEBUG
653 	kstat_named_t	clalloc;		/* number of client handles */
654 	kstat_named_t	noresponse;		/* server not responding cnt */
655 	kstat_named_t	failover;		/* server failover count */
656 	kstat_named_t	remap;			/* server remap count */
657 #endif
658 };
659 
660 struct nfs_clnt {
661 	struct chhead	*nfscl_chtable;
662 	kmutex_t	nfscl_chtable_lock;
663 	zoneid_t	nfscl_zoneid;
664 	list_node_t	nfscl_node;
665 	struct clstat	nfscl_stat;
666 };
667 
668 #ifdef	__cplusplus
669 }
670 #endif
671 
672 #endif	/* _NFS_NFS_CLNT_H */
673