xref: /titanic_44/usr/src/uts/common/nfs/rnode4.h (revision b6131d8fb9d81bf7aaa169dfc3f2f24f68825b18)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  *	Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  *	Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #ifndef	_NFS_RNODE4_H
30 #define	_NFS_RNODE4_H
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 #include <nfs/rnode.h>		/* for symlink_cache, nfs_rwlock_t, etc. */
39 #include <nfs/nfs4.h>
40 #include <nfs/nfs4_clnt.h>
41 #include <sys/thread.h>
42 #include <sys/sysmacros.h>	/* for offsetof */
43 
44 typedef enum nfs4_stub_type {
45 	NFS4_STUB_NONE,
46 	NFS4_STUB_MIRRORMOUNT
47 } nfs4_stub_type_t;
48 
49 typedef enum nfs4_access_type {
50 	NFS4_ACCESS_UNKNOWN,
51 	NFS4_ACCESS_ALLOWED,
52 	NFS4_ACCESS_DENIED
53 } nfs4_access_type_t;
54 
55 /*
56  * Access cache
57  */
58 typedef struct acache4_hash {
59 	struct acache4 *next;
60 	struct acache4 *prev;
61 	krwlock_t lock;
62 } acache4_hash_t;
63 
64 typedef struct acache4 {
65 	struct acache4 *next;	/* next and prev must be first */
66 	struct acache4 *prev;
67 	uint32_t known;
68 	uint32_t allowed;
69 	struct rnode4 *rnode;
70 	cred_t *cred;
71 	struct acache4 *list;
72 	struct acache4_hash *hashq;
73 } acache4_t;
74 
75 /*
76  * Note on the different buffer sizes in rddir4_cache:
77  * There seems to be some discrepancy between the intended and actual
78  * use of entlen and buflen, which does not correspond to the comment below.
79  *	entlen - nfsv2/3 used as both alloc'd size of entries buffer and
80  *		as the actual size of the entries (XXX is this correct?).
81  *		nfsv4 will use it only as the alloc'd size.
82  *	buflen - used for calculations of readahead.
83  *	actlen - added for nfsv4 to serve as the size of the useful
84  *		portion of the entries buffer. That is because in
85  *		nfsv4, the otw entries are converted to system entries,
86  *		and may not be the same size - thus buffer may not be full.
87  */
88 typedef struct rddir4_cache {
89 	lloff_t _cookie;	/* cookie used to find this cache entry */
90 	lloff_t _ncookie;	/* cookie used to find the next cache entry */
91 	char *entries;		/* buffer containing dirent entries */
92 	int eof;		/* EOF reached after this request */
93 	int entlen;		/* size of dirent entries in buf */
94 	int buflen;		/* size of the buffer used to store entries */
95 	int actlen;		/* size of the actual entries (nfsv4 only) */
96 	int flags;		/* control flags, see below */
97 	kcondvar_t cv;		/* cv for blocking */
98 	int error;		/* error from RPC operation */
99 	void *data;		/* private data */
100 } rddir4_cache;
101 
102 #define	nfs4_cookie	_cookie._f
103 #define	nfs4_ncookie	_ncookie._f
104 
105 /*
106  * Shadow vnode, v4 only.
107  *
108  * A file's shadow vnode list is protected by its hash bucket lock,
109  * r_hashq->r_lock.
110  *
111  * sv_r_vnode is protected by the appropriate vnode locks.
112  *
113  * sv_dfh, sv_name, sv_dfileid, and sv_dfileid_valid are protected
114  * by rp->r_svlock.
115  */
116 
117 typedef struct insq_link {
118 	void	*forw;
119 	void	*back;
120 } insq_link_t;
121 
122 typedef struct svnode {
123 	insq_link_t	sv_link;	/* must be first for insque */
124 	vnode_t		*sv_r_vnode;	/* vnode for this shadow */
125 	nfs4_fname_t	*sv_name;	/* component name */
126 	nfs4_sharedfh_t	*sv_dfh;	/* directory file handle */
127 } svnode_t;
128 
129 #define	sv_forw			sv_link.forw
130 #define	sv_back			sv_link.back
131 extern svnode_t			*vtosv(vnode_t *);
132 #define	VTOSV(vp)		vtosv(vp)
133 #define	SVTOV(svp)		(((svp)->sv_r_vnode))
134 #define	IS_SHADOW(vp, rp)	((vp) != (rp)->r_vnode)
135 
136 /*
137  * The format of the hash bucket used to lookup rnodes from a file handle.
138  */
139 typedef struct r4hashq {
140 	struct rnode4 *r_hashf;
141 	struct rnode4 *r_hashb;
142 	krwlock_t r_lock;
143 } r4hashq_t;
144 
145 /*
146  * Remote file information structure.
147  *
148  * The rnode is the "inode" for remote files.  It contains all the
149  * information necessary to handle remote file on the client side.
150  *
151  * Note on file sizes:  we keep two file sizes in the rnode: the size
152  * according to the client (r_size) and the size according to the server
153  * (r_attr.va_size).  They can differ because we modify r_size during a
154  * write system call (nfs_rdwr), before the write request goes over the
155  * wire (before the file is actually modified on the server).  If an OTW
156  * request occurs before the cached data is written to the server the file
157  * size returned from the server (r_attr.va_size) may not match r_size.
158  * r_size is the one we use, in general.  r_attr.va_size is only used to
159  * determine whether or not our cached data is valid.
160  *
161  * Each rnode has 5 locks associated with it (not including the rnode
162  * hash table and free list locks):
163  *
164  *	r_rwlock:	Serializes nfs_write and nfs_setattr requests
165  *			and allows nfs_read requests to proceed in parallel.
166  *			Serializes reads/updates to directories.
167  *
168  *	r_lkserlock:	Serializes lock requests with map, write, and
169  *			readahead operations.
170  *
171  *	r_statelock:	Protects all fields in the rnode except for
172  *			those listed below.  This lock is intented
173  *			to be held for relatively short periods of
174  *			time (not accross entire putpage operations,
175  *			for example).
176  *
177  *	r_statev4_lock:	Protects the created_v4 flag, the lock_owners list,
178  *			and all the delegation fields except r_deleg_list.
179  *
180  *	r_os_lock:	Protects r_open_streams.
181  *
182  *
183  * The following members are protected by the mutex rp4freelist_lock:
184  *	r_freef
185  *	r_freeb
186  *
187  * The following members are protected by the hash bucket rwlock:
188  *	r_hashf
189  *	r_hashb
190  *
191  * r_fh is read-only except when an rnode is created (or recycled from the
192  * free list).
193  *
194  * The following members are protected by nfs4_server_t::s_lock:
195  *	r_deleg_list
196  *
197  * Note: r_modaddr is only accessed when the r_statelock mutex is held.
198  *	Its value is also controlled via r_rwlock.  It is assumed that
199  *	there will be only 1 writer active at a time, so it safe to
200  *	set r_modaddr and release r_statelock as long as the r_rwlock
201  *	writer lock is held.
202  *
203  * 64-bit offsets: the code formerly assumed that atomic reads of
204  * r_size were safe and reliable; on 32-bit architectures, this is
205  * not true since an intervening bus cycle from another processor
206  * could update half of the size field.  The r_statelock must now
207  * be held whenever any kind of access of r_size is made.
208  *
209  * Lock ordering:
210  * 	r_rwlock > r_lkserlock > r_os_lock > r_statelock > r_statev4_lock
211  *	vnode_t::v_lock > r_os_lock
212  */
213 struct exportinfo;	/* defined in nfs/export.h */
214 struct servinfo4;	/* defined in nfs/nfs4_clnt.h */
215 struct failinfo;	/* defined in nfs/nfs_clnt.h */
216 struct mntinfo4;	/* defined in nfs/nfs4_clnt.h */
217 
218 typedef struct rnode4 {
219 	/* the hash fields must be first to match the rhashq_t */
220 	struct rnode4	*r_hashf;	/* hash queue forward pointer */
221 	struct rnode4	*r_hashb;	/* hash queue back pointer */
222 	struct rnode4	*r_freef;	/* free list forward pointer */
223 	struct rnode4	*r_freeb;	/* free list back pointer */
224 	r4hashq_t	*r_hashq;	/* pointer to the hash bucket */
225 
226 	svnode_t	r_svnode;	/* "master" shadow vnode for file */
227 	kmutex_t	r_svlock;	/* serializes access to svnode list */
228 	nfs_rwlock_t	r_rwlock;	/* serializes write/setattr requests */
229 	nfs_rwlock_t	r_lkserlock;	/* serialize lock with other ops */
230 	kmutex_t	r_statelock;	/* protects (most of) rnode contents */
231 	nfs4_sharedfh_t	*r_fh;		/* file handle */
232 	struct servinfo4
233 			*r_server;	/* current server */
234 	u_offset_t	r_nextr;	/* next byte read offset (read-ahead) */
235 	uint_t		r_flags;	/* flags, see below */
236 	short		r_error;	/* async write error */
237 	cred_t		*r_unlcred;	/* unlinked credentials */
238 	char		*r_unlname;	/* unlinked file name */
239 	vnode_t		*r_unldvp;	/* parent dir of unlinked file */
240 	vnode_t		*r_xattr_dir;	/* cached xattr dir vnode */
241 	len_t		r_size;		/* client's view of file size */
242 	vattr_t		r_attr;		/* cached vnode attributes */
243 	hrtime_t	r_time_attr_saved; /* time attributes were cached */
244 	hrtime_t	r_time_attr_inval; /* time attributes become invalid */
245 	hrtime_t	r_time_cache_inval; /* time caches become invalid */
246 	time_t		r_delay_wait;	/* future time for DELAY handling */
247 	int		r_delay_interval; /* Number of Secs of last DELAY */
248 	time_t		r_last_recov;	/* time of last recovery operation */
249 	nfs4_recov_t	r_recov_act;	/* action from last recovery op */
250 	long		r_mapcnt;	/* count of mmapped pages */
251 	uint_t		r_count;	/* # of refs not reflect in v_count */
252 	uint_t		r_awcount;	/* # of outstanding async write */
253 	uint_t		r_gcount;	/* getattrs waiting to flush pages */
254 	kcondvar_t	r_cv;		/* condvar for blocked threads */
255 	int		(*r_putapage)	/* address of putapage routine */
256 		(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *);
257 	void		*r_dir;		/* cache of readdir responses */
258 	rddir4_cache	*r_direof;	/* pointer to the EOF entry */
259 	symlink_cache	r_symlink;	/* cached readlink response */
260 	verifier4	r_writeverf;	/* file data write verifier */
261 	u_offset_t	r_modaddr;	/* address for page in writerp */
262 	commit_t	r_commit;	/* commit information */
263 	u_offset_t	r_truncaddr;	/* base for truncate operation */
264 	vsecattr_t	*r_secattr;	/* cached security attributes (acls) */
265 	verifier4	r_cookieverf4;	/* version 4 readdir cookie verifier */
266 	nfs4_pathconf_info_t r_pathconf; /* cached pathconf info */
267 	acache4_t	*r_acache;	/* list of access cache entries */
268 	list_t		r_open_streams;	/* open streams list */
269 	kmutex_t	r_os_lock;	/* protects r_open_streams */
270 	nfs4_lock_owner_t
271 			r_lo_head;	/* lock owners list head */
272 	int		created_v4;	/* 1 if file has been created in v4 */
273 	kmutex_t	r_statev4_lock;	/* protects created_v4, state4ptr */
274 
275 	list_node_t	r_deleg_link;	/* linkage into list of */
276 					/* delegated rnodes for this server */
277 	open_delegation_type4
278 			r_deleg_type;	/* type of delegation granted */
279 	stateid4	r_deleg_stateid;
280 					/* delegation state id */
281 	nfs_space_limit4
282 			r_deleg_limit;	/* file limits returned from */
283 					/* server on delegated open */
284 	nfsace4		r_deleg_perms;	/* file permissions returned from */
285 					/* server on delegated open */
286 	fattr4_change	r_deleg_change;	/* current deleg change attr */
287 	fattr4_change	r_deleg_change_grant;
288 					/* change @ write deleg grant */
289 	cred_t		*r_deleg_cred;	/* credential in force when the */
290 					/* delegation was granted */
291 	open_delegation_type4
292 			r_deleg_needs_recovery;
293 					/* delegation needs recovery */
294 					/* This contains the delegation type */
295 					/* for use with CLAIM_PREVIOUS. */
296 					/* OPEN_DELEGATE_NONE means recovery */
297 					/* is not needed. */
298 	unsigned	r_deleg_needs_recall:1;
299 					/* delegation has been recalled by */
300 					/* the server during open with */
301 					/* CLAIM_PREVIOUS */
302 	unsigned 	r_deleg_return_pending:1;
303 					/* delegreturn is pending, don't use */
304 					/* the delegation stateid, set in */
305 					/* nfs4_dlistadd */
306 	unsigned 	r_deleg_return_inprog:1;
307 					/* delegreturn is in progress, may */
308 					/* only be set by nfs4delegreturn. */
309 	nfs_rwlock_t    r_deleg_recall_lock;
310 					/* lock for synchronizing delegreturn */
311 					/* with in other operations, acquired */
312 					/* in read mode by nfs4_start_fop, */
313 					/* acquired in write mode in */
314 					/* nfs4delegreturn */
315 	fattr4_change	r_change;	/* GETATTR4 change attr;  client  */
316 					/* should always request change   */
317 					/* when c/mtime requested to keep */
318 					/* change and c/mtime in sync	  */
319 	fattr4_fileid	r_mntd_fid;	/* mounted on fileid attr	  */
320 	kthread_t	*r_serial;	/* attrcache validation thread */
321 	kthread_t	*r_pgflush;	/* thread flushing page cache */
322 	list_t		r_indelmap;	/* list of delmap callers */
323 	fattr4_fsid	r_srv_fsid;	/* fsid of srv fs containing object */
324 					/* when rnode created; compare with */
325 					/* sv_fsid (servinfo4_t) to see why */
326 					/* stub type was set		    */
327 	nfs4_stub_type_t	r_stub_type;
328 					/* e.g. mirror-mount */
329 } rnode4_t;
330 
331 #define	r_vnode	r_svnode.sv_r_vnode
332 
333 /*
334  * Flags
335  */
336 #define	R4READDIRWATTR	0x1	/* Use READDIR with attributes */
337 #define	R4DIRTY		0x2	/* dirty pages from write operation */
338 #define	R4STALE		0x4	/* stale, don't even attempt to write */
339 #define	R4MODINPROGRESS	0x8	/* page modification happening */
340 #define	R4TRUNCATE	0x10	/* truncating, don't commit */
341 #define	R4HAVEVERF	0x20	/* have a write verifier to compare against */
342 #define	R4COMMIT	0x40	/* commit in progress */
343 #define	R4COMMITWAIT	0x80	/* someone is waiting to do a commit */
344 #define	R4HASHED	0x100	/* rnode is in hash queues */
345 #define	R4OUTOFSPACE	0x200	/* an out of space error has happened */
346 #define	R4LODANGLERS	0x400	/* rnode has dangling lock_owners to cleanup */
347 #define	R4WRITEMODIFIED	0x800	/* file data has been modified by write */
348 #define	R4DIRECTIO	0x1000	/* bypass the buffer cache */
349 #define	R4RECOVERR	0x2000	/* couldn't recover */
350 #define	R4RECEXPFH	0x4000	/* recovering expired filehandle */
351 #define	R4RECOVERRP	0x8000	/* R4RECOVERR pending, but not set (yet) */
352 #define	R4ISXATTR	0x20000	/* rnode is a named attribute */
353 #define	R4DELMAPLIST	0x40000	/* delmap callers tracked for as callback */
354 #define	R4PGFLUSH	0x80000	/* page flush thread active */
355 #define	R4LOOKUP	0x200000 /* a lookup has been done in the directory */
356 /*
357  * Convert between vnode and rnode
358  */
359 #define	RTOV4(rp)	((rp)->r_vnode)
360 #define	VTOR4(vp)	((rnode4_t *)((vp)->v_data))
361 
362 #define	RP_ISSTUB(rp)	(((rp)->r_stub_type != NFS4_STUB_NONE))
363 #define	RP_ISSTUB_MIRRORMOUNT(rp) ((rp)->r_stub_type == NFS4_STUB_MIRRORMOUNT)
364 
365 /*
366  * Open file instances.
367  */
368 
369 typedef struct nfs4_opinst {
370 	struct nfs4_opinst	*re_next; /* next in list */
371 	vnode_t			*re_vp;	/* held reference */
372 	uint32_t		re_numosp; /* number of valid open streams */
373 	nfs4_open_stream_t	**re_osp; /* held reference */
374 } nfs4_opinst_t;
375 
376 #ifdef _KERNEL
377 
378 extern long nrnode;
379 
380 /* Used for r_delay_interval */
381 #define	NFS4_INITIAL_DELAY_INTERVAL	 1
382 #define	NFS4_MAX_DELAY_INTERVAL		20
383 
384 extern rnode4_t	*r4find(r4hashq_t *, nfs4_sharedfh_t *, struct vfs *);
385 extern rnode4_t	*r4find_unlocked(nfs4_sharedfh_t *, struct vfs *);
386 extern void	r4flush(struct vfs *, cred_t *);
387 extern void	destroy_rtable4(struct vfs *, cred_t *);
388 extern int	check_rtable4(struct vfs *);
389 extern void	rp4_addfree(rnode4_t *, cred_t *);
390 extern void	rp4_addhash(rnode4_t *);
391 extern void	rp4_rmhash(rnode4_t *);
392 extern void	rp4_rmhash_locked(rnode4_t *);
393 extern int	rtable4hash(nfs4_sharedfh_t *);
394 
395 extern vnode_t *makenfs4node(nfs4_sharedfh_t *, nfs4_ga_res_t *, struct vfs *,
396 				hrtime_t, cred_t *, vnode_t *, nfs4_fname_t *);
397 extern vnode_t *makenfs4node_by_fh(nfs4_sharedfh_t *, nfs4_sharedfh_t *,
398     nfs4_fname_t **, nfs4_ga_res_t *, mntinfo4_t *, cred_t *, hrtime_t);
399 
400 extern nfs4_opinst_t *r4mkopenlist(struct mntinfo4 *);
401 extern void	r4releopenlist(nfs4_opinst_t *);
402 
403 /* Access cache calls */
404 extern nfs4_access_type_t nfs4_access_check(rnode4_t *, uint32_t, cred_t *);
405 extern void	nfs4_access_cache(rnode4_t *rp, uint32_t, uint32_t, cred_t *);
406 extern int	nfs4_access_purge_rp(rnode4_t *);
407 
408 extern int	nfs4_free_data_reclaim(rnode4_t *);
409 extern void	nfs4_rnode_invalidate(struct vfs *);
410 
411 extern time_t	r2lease_time(rnode4_t *);
412 extern int	nfs4_directio(vnode_t *, int, cred_t *);
413 
414 /* shadow vnode functions */
415 extern void	sv_activate(vnode_t **, vnode_t *, nfs4_fname_t **, int);
416 extern vnode_t	*sv_find(vnode_t *, vnode_t *, nfs4_fname_t **);
417 extern void	sv_update_path(vnode_t *, char *, char *);
418 extern void	sv_inactive(vnode_t *);
419 extern void	sv_exchange(vnode_t **);
420 extern void	sv_uninit(svnode_t *);
421 extern void	nfs4_clear_open_streams(rnode4_t *);
422 
423 /*
424  * Mark cached attributes as timed out
425  *
426  * The caller must not be holding the rnode r_statelock mutex.
427  */
428 #define	PURGE_ATTRCACHE4_LOCKED(rp)				\
429 	rp->r_time_attr_inval = gethrtime();			\
430 	rp->r_time_attr_saved = rp->r_time_attr_inval;		\
431 	rp->r_pathconf.pc4_xattr_valid = 0;			\
432 	rp->r_pathconf.pc4_cache_valid = 0;
433 
434 #define	PURGE_ATTRCACHE4(vp)	{				\
435 	rnode4_t *rp = VTOR4(vp);				\
436 	mutex_enter(&rp->r_statelock);				\
437 	PURGE_ATTRCACHE4_LOCKED(rp);				\
438 	mutex_exit(&rp->r_statelock);				\
439 }
440 
441 
442 extern void	nfs4_async_readdir(vnode_t *, rddir4_cache *,
443 			cred_t *, int (*)(vnode_t *, rddir4_cache *, cred_t *));
444 extern char	*rnode4info(rnode4_t *rp);
445 
446 extern int	writerp4(rnode4_t *, caddr_t, int, struct uio *, int);
447 extern void	nfs4_set_nonvattrs(rnode4_t *, struct nfs4attr_to_vattr *);
448 extern void	nfs4delegabandon(rnode4_t *);
449 extern stateid4 nfs4_get_w_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *,
450 			nfs_opnum4, nfs4_stateid_types_t *);
451 extern stateid4 nfs4_get_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *,
452 			nfs_opnum4, nfs4_stateid_types_t *, bool_t);
453 extern nfsstat4 nfs4_find_or_create_lock_owner(pid_t, rnode4_t *, cred_t *,
454 			nfs4_open_owner_t **, nfs4_open_stream_t **,
455 			nfs4_lock_owner_t **);
456 extern cred_t   *nfs4_get_otw_cred_by_osp(rnode4_t *, cred_t *,
457 			nfs4_open_stream_t **, bool_t *, bool_t *);
458 
459 
460 /*
461  * Defines for the flag argument of nfs4delegreturn
462  */
463 #define	NFS4_DR_FORCE	0x1	/* discard even if start_op fails */
464 #define	NFS4_DR_PUSH	0x2	/* push modified data back to the server */
465 #define	NFS4_DR_DISCARD	0x4	/* discard the delegation w/o delegreturn */
466 #define	NFS4_DR_DID_OP	0x8	/* calling function did nfs4_start_op */
467 #define	NFS4_DR_RECALL	0x10	/* delegreturn done in response to CB_RECALL */
468 #define	NFS4_DR_REOPEN	0x20	/* perform file reopens, if applicable */
469 
470 extern int nfs4delegreturn(rnode4_t *, int);
471 extern void	nfs4_delegreturn_all(nfs4_server_t *);
472 extern void	nfs4delegreturn_cleanup(rnode4_t *, nfs4_server_t *);
473 extern void nfs4_delegation_accept(rnode4_t *, open_claim_type4, OPEN4res *,
474 		nfs4_ga_res_t *, cred_t *);
475 
476 extern void	nfs4_dlistclean(void);
477 extern void	nfs4_deleg_discard(mntinfo4_t *, nfs4_server_t *);
478 
479 extern void	rddir4_cache_create(rnode4_t *);
480 extern void	rddir4_cache_purge(rnode4_t *);
481 extern void	rddir4_cache_destroy(rnode4_t *);
482 extern rddir4_cache *rddir4_cache_lookup(rnode4_t *, offset_t, int);
483 extern void	rddir4_cache_rele(rnode4_t *, rddir4_cache *);
484 
485 extern void	r4_stub_mirrormount(rnode4_t *);
486 extern void	r4_stub_none(rnode4_t *);
487 
488 #ifdef DEBUG
489 extern char	*rddir4_cache_buf_alloc(size_t, int);
490 extern void	rddir4_cache_buf_free(void *, size_t);
491 #endif
492 
493 
494 
495 #endif /* _KERNEL */
496 
497 #ifdef	__cplusplus
498 }
499 #endif
500 
501 #endif	/* _NFS_RNODE4_H */
502