1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #ifndef _NFS_RNODE4_H 30 #define _NFS_RNODE4_H 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 38 #include <nfs/rnode.h> /* for symlink_cache, nfs_rwlock_t, etc. */ 39 #include <nfs/nfs4.h> 40 #include <nfs/nfs4_clnt.h> 41 #include <sys/thread.h> 42 #include <sys/sysmacros.h> /* for offsetof */ 43 44 typedef enum nfs4_stub_type { 45 NFS4_STUB_NONE, 46 NFS4_STUB_MIRRORMOUNT 47 } nfs4_stub_type_t; 48 49 typedef enum nfs4_access_type { 50 NFS4_ACCESS_UNKNOWN, 51 NFS4_ACCESS_ALLOWED, 52 NFS4_ACCESS_DENIED 53 } nfs4_access_type_t; 54 55 /* 56 * Access cache 57 */ 58 typedef struct acache4_hash { 59 struct acache4 *next; 60 struct acache4 *prev; 61 krwlock_t lock; 62 } acache4_hash_t; 63 64 typedef struct acache4 { 65 struct acache4 *next; /* next and prev must be first */ 66 struct acache4 *prev; 67 uint32_t known; 68 uint32_t allowed; 69 struct rnode4 *rnode; 70 cred_t *cred; 71 struct acache4 *list; 72 struct acache4_hash *hashq; 73 } acache4_t; 74 75 /* 76 * Note on the different buffer sizes in rddir4_cache: 77 * There seems to be some discrepancy between the intended and actual 78 * use of entlen and buflen, which does not correspond to the comment below. 79 * entlen - nfsv2/3 used as both alloc'd size of entries buffer and 80 * as the actual size of the entries (XXX is this correct?). 81 * nfsv4 will use it only as the alloc'd size. 82 * buflen - used for calculations of readahead. 83 * actlen - added for nfsv4 to serve as the size of the useful 84 * portion of the entries buffer. That is because in 85 * nfsv4, the otw entries are converted to system entries, 86 * and may not be the same size - thus buffer may not be full. 87 */ 88 typedef struct rddir4_cache { 89 lloff_t _cookie; /* cookie used to find this cache entry */ 90 lloff_t _ncookie; /* cookie used to find the next cache entry */ 91 char *entries; /* buffer containing dirent entries */ 92 int eof; /* EOF reached after this request */ 93 int entlen; /* size of dirent entries in buf */ 94 int buflen; /* size of the buffer used to store entries */ 95 int actlen; /* size of the actual entries (nfsv4 only) */ 96 int flags; /* control flags, see below */ 97 kcondvar_t cv; /* cv for blocking */ 98 int error; /* error from RPC operation */ 99 void *data; /* private data */ 100 } rddir4_cache; 101 102 #define nfs4_cookie _cookie._f 103 #define nfs4_ncookie _ncookie._f 104 105 /* 106 * Shadow vnode, v4 only. 107 * 108 * A file's shadow vnode list is protected by its hash bucket lock, 109 * r_hashq->r_lock. 110 * 111 * sv_r_vnode is protected by the appropriate vnode locks. 112 * 113 * sv_dfh, sv_name, sv_dfileid, and sv_dfileid_valid are protected 114 * by rp->r_svlock. 115 */ 116 117 typedef struct insq_link { 118 void *forw; 119 void *back; 120 } insq_link_t; 121 122 typedef struct svnode { 123 insq_link_t sv_link; /* must be first for insque */ 124 vnode_t *sv_r_vnode; /* vnode for this shadow */ 125 nfs4_fname_t *sv_name; /* component name */ 126 nfs4_sharedfh_t *sv_dfh; /* directory file handle */ 127 } svnode_t; 128 129 #define sv_forw sv_link.forw 130 #define sv_back sv_link.back 131 extern svnode_t *vtosv(vnode_t *); 132 #define VTOSV(vp) vtosv(vp) 133 #define SVTOV(svp) (((svp)->sv_r_vnode)) 134 #define IS_SHADOW(vp, rp) ((vp) != (rp)->r_vnode) 135 136 /* 137 * The format of the hash bucket used to lookup rnodes from a file handle. 138 */ 139 typedef struct r4hashq { 140 struct rnode4 *r_hashf; 141 struct rnode4 *r_hashb; 142 krwlock_t r_lock; 143 } r4hashq_t; 144 145 /* 146 * Remote file information structure. 147 * 148 * The rnode is the "inode" for remote files. It contains all the 149 * information necessary to handle remote file on the client side. 150 * 151 * Note on file sizes: we keep two file sizes in the rnode: the size 152 * according to the client (r_size) and the size according to the server 153 * (r_attr.va_size). They can differ because we modify r_size during a 154 * write system call (nfs_rdwr), before the write request goes over the 155 * wire (before the file is actually modified on the server). If an OTW 156 * request occurs before the cached data is written to the server the file 157 * size returned from the server (r_attr.va_size) may not match r_size. 158 * r_size is the one we use, in general. r_attr.va_size is only used to 159 * determine whether or not our cached data is valid. 160 * 161 * Each rnode has 5 locks associated with it (not including the rnode 162 * hash table and free list locks): 163 * 164 * r_rwlock: Serializes nfs_write and nfs_setattr requests 165 * and allows nfs_read requests to proceed in parallel. 166 * Serializes reads/updates to directories. 167 * 168 * r_lkserlock: Serializes lock requests with map, write, and 169 * readahead operations. 170 * 171 * r_statelock: Protects all fields in the rnode except for 172 * those listed below. This lock is intented 173 * to be held for relatively short periods of 174 * time (not accross entire putpage operations, 175 * for example). 176 * 177 * r_statev4_lock: Protects the created_v4 flag, the lock_owners list, 178 * and all the delegation fields except r_deleg_list. 179 * 180 * r_os_lock: Protects r_open_streams. 181 * 182 * 183 * The following members are protected by the mutex rp4freelist_lock: 184 * r_freef 185 * r_freeb 186 * 187 * The following members are protected by the hash bucket rwlock: 188 * r_hashf 189 * r_hashb 190 * 191 * r_fh is read-only except when an rnode is created (or recycled from the 192 * free list). 193 * 194 * The following members are protected by nfs4_server_t::s_lock: 195 * r_deleg_list 196 * 197 * Note: r_modaddr is only accessed when the r_statelock mutex is held. 198 * Its value is also controlled via r_rwlock. It is assumed that 199 * there will be only 1 writer active at a time, so it safe to 200 * set r_modaddr and release r_statelock as long as the r_rwlock 201 * writer lock is held. 202 * 203 * 64-bit offsets: the code formerly assumed that atomic reads of 204 * r_size were safe and reliable; on 32-bit architectures, this is 205 * not true since an intervening bus cycle from another processor 206 * could update half of the size field. The r_statelock must now 207 * be held whenever any kind of access of r_size is made. 208 * 209 * Lock ordering: 210 * r_rwlock > r_lkserlock > r_os_lock > r_statelock > r_statev4_lock 211 * vnode_t::v_lock > r_os_lock 212 */ 213 struct exportinfo; /* defined in nfs/export.h */ 214 struct servinfo4; /* defined in nfs/nfs4_clnt.h */ 215 struct failinfo; /* defined in nfs/nfs_clnt.h */ 216 struct mntinfo4; /* defined in nfs/nfs4_clnt.h */ 217 218 typedef struct rnode4 { 219 /* the hash fields must be first to match the rhashq_t */ 220 struct rnode4 *r_hashf; /* hash queue forward pointer */ 221 struct rnode4 *r_hashb; /* hash queue back pointer */ 222 struct rnode4 *r_freef; /* free list forward pointer */ 223 struct rnode4 *r_freeb; /* free list back pointer */ 224 r4hashq_t *r_hashq; /* pointer to the hash bucket */ 225 226 svnode_t r_svnode; /* "master" shadow vnode for file */ 227 kmutex_t r_svlock; /* serializes access to svnode list */ 228 nfs_rwlock_t r_rwlock; /* serializes write/setattr requests */ 229 nfs_rwlock_t r_lkserlock; /* serialize lock with other ops */ 230 kmutex_t r_statelock; /* protects (most of) rnode contents */ 231 nfs4_sharedfh_t *r_fh; /* file handle */ 232 struct servinfo4 233 *r_server; /* current server */ 234 u_offset_t r_nextr; /* next byte read offset (read-ahead) */ 235 uint_t r_flags; /* flags, see below */ 236 short r_error; /* async write error */ 237 cred_t *r_unlcred; /* unlinked credentials */ 238 char *r_unlname; /* unlinked file name */ 239 vnode_t *r_unldvp; /* parent dir of unlinked file */ 240 vnode_t *r_xattr_dir; /* cached xattr dir vnode */ 241 len_t r_size; /* client's view of file size */ 242 vattr_t r_attr; /* cached vnode attributes */ 243 hrtime_t r_time_attr_saved; /* time attributes were cached */ 244 hrtime_t r_time_attr_inval; /* time attributes become invalid */ 245 hrtime_t r_time_cache_inval; /* time caches become invalid */ 246 time_t r_delay_wait; /* future time for DELAY handling */ 247 int r_delay_interval; /* Number of Secs of last DELAY */ 248 time_t r_last_recov; /* time of last recovery operation */ 249 nfs4_recov_t r_recov_act; /* action from last recovery op */ 250 long r_mapcnt; /* count of mmapped pages */ 251 uint_t r_count; /* # of refs not reflect in v_count */ 252 uint_t r_awcount; /* # of outstanding async write */ 253 uint_t r_gcount; /* getattrs waiting to flush pages */ 254 kcondvar_t r_cv; /* condvar for blocked threads */ 255 int (*r_putapage) /* address of putapage routine */ 256 (vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *); 257 void *r_dir; /* cache of readdir responses */ 258 rddir4_cache *r_direof; /* pointer to the EOF entry */ 259 symlink_cache r_symlink; /* cached readlink response */ 260 verifier4 r_writeverf; /* file data write verifier */ 261 u_offset_t r_modaddr; /* address for page in writerp */ 262 commit_t r_commit; /* commit information */ 263 u_offset_t r_truncaddr; /* base for truncate operation */ 264 vsecattr_t *r_secattr; /* cached security attributes (acls) */ 265 verifier4 r_cookieverf4; /* version 4 readdir cookie verifier */ 266 nfs4_pathconf_info_t r_pathconf; /* cached pathconf info */ 267 acache4_t *r_acache; /* list of access cache entries */ 268 list_t r_open_streams; /* open streams list */ 269 kmutex_t r_os_lock; /* protects r_open_streams */ 270 nfs4_lock_owner_t 271 r_lo_head; /* lock owners list head */ 272 int created_v4; /* 1 if file has been created in v4 */ 273 kmutex_t r_statev4_lock; /* protects created_v4, state4ptr */ 274 275 list_node_t r_deleg_link; /* linkage into list of */ 276 /* delegated rnodes for this server */ 277 open_delegation_type4 278 r_deleg_type; /* type of delegation granted */ 279 stateid4 r_deleg_stateid; 280 /* delegation state id */ 281 nfs_space_limit4 282 r_deleg_limit; /* file limits returned from */ 283 /* server on delegated open */ 284 nfsace4 r_deleg_perms; /* file permissions returned from */ 285 /* server on delegated open */ 286 fattr4_change r_deleg_change; /* current deleg change attr */ 287 fattr4_change r_deleg_change_grant; 288 /* change @ write deleg grant */ 289 cred_t *r_deleg_cred; /* credential in force when the */ 290 /* delegation was granted */ 291 open_delegation_type4 292 r_deleg_needs_recovery; 293 /* delegation needs recovery */ 294 /* This contains the delegation type */ 295 /* for use with CLAIM_PREVIOUS. */ 296 /* OPEN_DELEGATE_NONE means recovery */ 297 /* is not needed. */ 298 unsigned r_deleg_needs_recall:1; 299 /* delegation has been recalled by */ 300 /* the server during open with */ 301 /* CLAIM_PREVIOUS */ 302 unsigned r_deleg_return_pending:1; 303 /* delegreturn is pending, don't use */ 304 /* the delegation stateid, set in */ 305 /* nfs4_dlistadd */ 306 unsigned r_deleg_return_inprog:1; 307 /* delegreturn is in progress, may */ 308 /* only be set by nfs4delegreturn. */ 309 nfs_rwlock_t r_deleg_recall_lock; 310 /* lock for synchronizing delegreturn */ 311 /* with in other operations, acquired */ 312 /* in read mode by nfs4_start_fop, */ 313 /* acquired in write mode in */ 314 /* nfs4delegreturn */ 315 fattr4_change r_change; /* GETATTR4 change attr; client */ 316 /* should always request change */ 317 /* when c/mtime requested to keep */ 318 /* change and c/mtime in sync */ 319 fattr4_fileid r_mntd_fid; /* mounted on fileid attr */ 320 kthread_t *r_serial; /* attrcache validation thread */ 321 kthread_t *r_pgflush; /* thread flushing page cache */ 322 list_t r_indelmap; /* list of delmap callers */ 323 fattr4_fsid r_srv_fsid; /* fsid of srv fs containing object */ 324 /* when rnode created; compare with */ 325 /* sv_fsid (servinfo4_t) to see why */ 326 /* stub type was set */ 327 nfs4_stub_type_t r_stub_type; 328 /* e.g. mirror-mount */ 329 } rnode4_t; 330 331 #define r_vnode r_svnode.sv_r_vnode 332 333 /* 334 * Flags 335 */ 336 #define R4READDIRWATTR 0x1 /* Use READDIR with attributes */ 337 #define R4DIRTY 0x2 /* dirty pages from write operation */ 338 #define R4STALE 0x4 /* stale, don't even attempt to write */ 339 #define R4MODINPROGRESS 0x8 /* page modification happening */ 340 #define R4TRUNCATE 0x10 /* truncating, don't commit */ 341 #define R4HAVEVERF 0x20 /* have a write verifier to compare against */ 342 #define R4COMMIT 0x40 /* commit in progress */ 343 #define R4COMMITWAIT 0x80 /* someone is waiting to do a commit */ 344 #define R4HASHED 0x100 /* rnode is in hash queues */ 345 #define R4OUTOFSPACE 0x200 /* an out of space error has happened */ 346 #define R4LODANGLERS 0x400 /* rnode has dangling lock_owners to cleanup */ 347 #define R4WRITEMODIFIED 0x800 /* file data has been modified by write */ 348 #define R4DIRECTIO 0x1000 /* bypass the buffer cache */ 349 #define R4RECOVERR 0x2000 /* couldn't recover */ 350 #define R4RECEXPFH 0x4000 /* recovering expired filehandle */ 351 #define R4RECOVERRP 0x8000 /* R4RECOVERR pending, but not set (yet) */ 352 #define R4ISXATTR 0x20000 /* rnode is a named attribute */ 353 #define R4DELMAPLIST 0x40000 /* delmap callers tracked for as callback */ 354 #define R4PGFLUSH 0x80000 /* page flush thread active */ 355 #define R4LOOKUP 0x200000 /* a lookup has been done in the directory */ 356 /* 357 * Convert between vnode and rnode 358 */ 359 #define RTOV4(rp) ((rp)->r_vnode) 360 #define VTOR4(vp) ((rnode4_t *)((vp)->v_data)) 361 362 #define RP_ISSTUB(rp) (((rp)->r_stub_type != NFS4_STUB_NONE)) 363 #define RP_ISSTUB_MIRRORMOUNT(rp) ((rp)->r_stub_type == NFS4_STUB_MIRRORMOUNT) 364 365 /* 366 * Open file instances. 367 */ 368 369 typedef struct nfs4_opinst { 370 struct nfs4_opinst *re_next; /* next in list */ 371 vnode_t *re_vp; /* held reference */ 372 uint32_t re_numosp; /* number of valid open streams */ 373 nfs4_open_stream_t **re_osp; /* held reference */ 374 } nfs4_opinst_t; 375 376 #ifdef _KERNEL 377 378 extern long nrnode; 379 380 /* Used for r_delay_interval */ 381 #define NFS4_INITIAL_DELAY_INTERVAL 1 382 #define NFS4_MAX_DELAY_INTERVAL 20 383 384 extern rnode4_t *r4find(r4hashq_t *, nfs4_sharedfh_t *, struct vfs *); 385 extern rnode4_t *r4find_unlocked(nfs4_sharedfh_t *, struct vfs *); 386 extern void r4flush(struct vfs *, cred_t *); 387 extern void destroy_rtable4(struct vfs *, cred_t *); 388 extern int check_rtable4(struct vfs *); 389 extern void rp4_addfree(rnode4_t *, cred_t *); 390 extern void rp4_addhash(rnode4_t *); 391 extern void rp4_rmhash(rnode4_t *); 392 extern void rp4_rmhash_locked(rnode4_t *); 393 extern int rtable4hash(nfs4_sharedfh_t *); 394 395 extern vnode_t *makenfs4node(nfs4_sharedfh_t *, nfs4_ga_res_t *, struct vfs *, 396 hrtime_t, cred_t *, vnode_t *, nfs4_fname_t *); 397 extern vnode_t *makenfs4node_by_fh(nfs4_sharedfh_t *, nfs4_sharedfh_t *, 398 nfs4_fname_t **, nfs4_ga_res_t *, mntinfo4_t *, cred_t *, hrtime_t); 399 400 extern nfs4_opinst_t *r4mkopenlist(struct mntinfo4 *); 401 extern void r4releopenlist(nfs4_opinst_t *); 402 403 /* Access cache calls */ 404 extern nfs4_access_type_t nfs4_access_check(rnode4_t *, uint32_t, cred_t *); 405 extern void nfs4_access_cache(rnode4_t *rp, uint32_t, uint32_t, cred_t *); 406 extern int nfs4_access_purge_rp(rnode4_t *); 407 408 extern int nfs4_free_data_reclaim(rnode4_t *); 409 extern void nfs4_rnode_invalidate(struct vfs *); 410 411 extern time_t r2lease_time(rnode4_t *); 412 extern int nfs4_directio(vnode_t *, int, cred_t *); 413 414 /* shadow vnode functions */ 415 extern void sv_activate(vnode_t **, vnode_t *, nfs4_fname_t **, int); 416 extern vnode_t *sv_find(vnode_t *, vnode_t *, nfs4_fname_t **); 417 extern void sv_update_path(vnode_t *, char *, char *); 418 extern void sv_inactive(vnode_t *); 419 extern void sv_exchange(vnode_t **); 420 extern void sv_uninit(svnode_t *); 421 extern void nfs4_clear_open_streams(rnode4_t *); 422 423 /* 424 * Mark cached attributes as timed out 425 * 426 * The caller must not be holding the rnode r_statelock mutex. 427 */ 428 #define PURGE_ATTRCACHE4_LOCKED(rp) \ 429 rp->r_time_attr_inval = gethrtime(); \ 430 rp->r_time_attr_saved = rp->r_time_attr_inval; \ 431 rp->r_pathconf.pc4_xattr_valid = 0; \ 432 rp->r_pathconf.pc4_cache_valid = 0; 433 434 #define PURGE_ATTRCACHE4(vp) { \ 435 rnode4_t *rp = VTOR4(vp); \ 436 mutex_enter(&rp->r_statelock); \ 437 PURGE_ATTRCACHE4_LOCKED(rp); \ 438 mutex_exit(&rp->r_statelock); \ 439 } 440 441 442 extern void nfs4_async_readdir(vnode_t *, rddir4_cache *, 443 cred_t *, int (*)(vnode_t *, rddir4_cache *, cred_t *)); 444 extern char *rnode4info(rnode4_t *rp); 445 446 extern int writerp4(rnode4_t *, caddr_t, int, struct uio *, int); 447 extern void nfs4_set_nonvattrs(rnode4_t *, struct nfs4attr_to_vattr *); 448 extern void nfs4delegabandon(rnode4_t *); 449 extern stateid4 nfs4_get_w_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *, 450 nfs_opnum4, nfs4_stateid_types_t *); 451 extern stateid4 nfs4_get_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *, 452 nfs_opnum4, nfs4_stateid_types_t *, bool_t); 453 extern nfsstat4 nfs4_find_or_create_lock_owner(pid_t, rnode4_t *, cred_t *, 454 nfs4_open_owner_t **, nfs4_open_stream_t **, 455 nfs4_lock_owner_t **); 456 extern cred_t *nfs4_get_otw_cred_by_osp(rnode4_t *, cred_t *, 457 nfs4_open_stream_t **, bool_t *, bool_t *); 458 459 460 /* 461 * Defines for the flag argument of nfs4delegreturn 462 */ 463 #define NFS4_DR_FORCE 0x1 /* discard even if start_op fails */ 464 #define NFS4_DR_PUSH 0x2 /* push modified data back to the server */ 465 #define NFS4_DR_DISCARD 0x4 /* discard the delegation w/o delegreturn */ 466 #define NFS4_DR_DID_OP 0x8 /* calling function did nfs4_start_op */ 467 #define NFS4_DR_RECALL 0x10 /* delegreturn done in response to CB_RECALL */ 468 #define NFS4_DR_REOPEN 0x20 /* perform file reopens, if applicable */ 469 470 extern int nfs4delegreturn(rnode4_t *, int); 471 extern void nfs4_delegreturn_all(nfs4_server_t *); 472 extern void nfs4delegreturn_cleanup(rnode4_t *, nfs4_server_t *); 473 extern void nfs4_delegation_accept(rnode4_t *, open_claim_type4, OPEN4res *, 474 nfs4_ga_res_t *, cred_t *); 475 476 extern void nfs4_dlistclean(void); 477 extern void nfs4_deleg_discard(mntinfo4_t *, nfs4_server_t *); 478 479 extern void rddir4_cache_create(rnode4_t *); 480 extern void rddir4_cache_purge(rnode4_t *); 481 extern void rddir4_cache_destroy(rnode4_t *); 482 extern rddir4_cache *rddir4_cache_lookup(rnode4_t *, offset_t, int); 483 extern void rddir4_cache_rele(rnode4_t *, rddir4_cache *); 484 485 extern void r4_stub_mirrormount(rnode4_t *); 486 extern void r4_stub_none(rnode4_t *); 487 488 #ifdef DEBUG 489 extern char *rddir4_cache_buf_alloc(size_t, int); 490 extern void rddir4_cache_buf_free(void *, size_t); 491 #endif 492 493 494 495 #endif /* _KERNEL */ 496 497 #ifdef __cplusplus 498 } 499 #endif 500 501 #endif /* _NFS_RNODE4_H */ 502