1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * Portions of this source code were derived from Berkeley 4.3 BSD 30 * under license from the Regents of the University of California. 31 */ 32 33 #ifndef _NFS4_CLNT_H 34 #define _NFS4_CLNT_H 35 36 #include <sys/errno.h> 37 #include <sys/types.h> 38 #include <sys/kstat.h> 39 #include <sys/time.h> 40 #include <sys/flock.h> 41 #include <vm/page.h> 42 #include <nfs/nfs4_kprot.h> 43 #include <nfs/nfs4.h> 44 #include <nfs/rnode.h> 45 #include <sys/avl.h> 46 #include <sys/list.h> 47 #include <rpc/auth.h> 48 #include <sys/door.h> 49 #include <sys/condvar_impl.h> 50 #include <sys/zone.h> 51 52 #ifdef __cplusplus 53 extern "C" { 54 #endif 55 56 #define NFS4_SIZE_OK(size) ((size) <= MAXOFFSET_T) 57 58 /* Four states of nfs4_server's lease_valid */ 59 #define NFS4_LEASE_INVALID 0 60 #define NFS4_LEASE_VALID 1 61 #define NFS4_LEASE_UNINITIALIZED 2 62 #define NFS4_LEASE_NOT_STARTED 3 63 64 /* flag to tell the renew thread it should exit */ 65 #define NFS4_THREAD_EXIT 1 66 67 /* Default number of seconds to wait on GRACE and DELAY errors */ 68 #define NFS4ERR_DELAY_TIME 10 69 70 /* Number of hash buckets for open owners for each nfs4_server */ 71 #define NFS4_NUM_OO_BUCKETS 53 72 73 /* Number of freed open owners (per mntinfo4_t) to keep around */ 74 #define NFS4_NUM_FREED_OPEN_OWNERS 8 75 76 /* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */ 77 #define NFS4_RETRY_SCLID_DELAY 10 78 79 /* Number of times we should retry a SETCLIENTID(_CONFIRM) op */ 80 #define NFS4_NUM_SCLID_RETRIES 3 81 82 /* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */ 83 #define NFS4_NUM_RETRY_BAD_SEQID 3 84 85 /* 86 * Macro to wakeup sleeping async worker threads. 87 */ 88 #define NFS4_WAKE_ASYNC_WORKER(work_cv) { \ 89 if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_QUEUE])) \ 90 cv_signal(&work_cv[NFS4_ASYNC_QUEUE]); \ 91 else if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_PGOPS_QUEUE])) \ 92 cv_signal(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]); \ 93 } 94 95 #define NFS4_WAKEALL_ASYNC_WORKERS(work_cv) { \ 96 cv_broadcast(&work_cv[NFS4_ASYNC_QUEUE]); \ 97 cv_broadcast(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]); \ 98 } 99 100 /* 101 * Is the attribute cache valid? If client holds a delegation, then attrs 102 * are by definition valid. If not, then check to see if attrs have timed out. 103 */ 104 #define ATTRCACHE4_VALID(vp) (VTOR4(vp)->r_deleg_type != OPEN_DELEGATE_NONE || \ 105 gethrtime() < VTOR4(vp)->r_time_attr_inval) 106 107 /* 108 * Flags to indicate whether to purge the DNLC for non-directory vnodes 109 * in a call to nfs_purge_caches. 110 */ 111 #define NFS4_NOPURGE_DNLC 0 112 #define NFS4_PURGE_DNLC 1 113 114 /* 115 * Is cache valid? 116 * Swap is always valid, if no attributes (attrtime == 0) or 117 * if mtime matches cached mtime it is valid 118 * NOTE: mtime is now a timestruc_t. 119 * Caller should be holding the rnode r_statelock mutex. 120 */ 121 #define CACHE4_VALID(rp, mtime, fsize) \ 122 ((RTOV4(rp)->v_flag & VISSWAP) == VISSWAP || \ 123 (((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec && \ 124 (mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) && \ 125 ((fsize) == (rp)->r_attr.va_size))) 126 127 /* 128 * Macro to detect forced unmount or a zone shutdown. 129 */ 130 #define FS_OR_ZONE_GONE4(vfsp) \ 131 (((vfsp)->vfs_flag & VFS_UNMOUNTED) || \ 132 zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) 133 134 /* 135 * Macro to help determine whether a request failed because the underlying 136 * filesystem has been forcibly unmounted or because of zone shutdown. 137 */ 138 #define NFS4_FRC_UNMT_ERR(err, vfsp) \ 139 ((err) == EIO && FS_OR_ZONE_GONE4((vfsp))) 140 141 /* 142 * Due to the way the address space callbacks are used to execute a delmap, 143 * we must keep track of how many times the same thread has called 144 * VOP_DELMAP()->nfs4_delmap(). This is done by having a list of 145 * nfs4_delmapcall_t's associated with each rnode4_t. This list is protected 146 * by the rnode4_t's r_statelock. The individual elements do not need to be 147 * protected as they will only ever be created, modified and destroyed by 148 * one thread (the call_id). 149 * See nfs4_delmap() for further explanation. 150 */ 151 typedef struct nfs4_delmapcall { 152 kthread_t *call_id; 153 int error; /* error from delmap */ 154 list_node_t call_node; 155 } nfs4_delmapcall_t; 156 157 /* 158 * delmap address space callback args 159 */ 160 typedef struct nfs4_delmap_args { 161 vnode_t *vp; 162 offset_t off; 163 caddr_t addr; 164 size_t len; 165 uint_t prot; 166 uint_t maxprot; 167 uint_t flags; 168 cred_t *cr; 169 nfs4_delmapcall_t *caller; /* to retrieve errors from the cb */ 170 } nfs4_delmap_args_t; 171 172 /* 173 * client side statistics 174 */ 175 /* 176 * Per-zone counters 177 */ 178 struct clstat4 { 179 kstat_named_t calls; /* client requests */ 180 kstat_named_t badcalls; /* rpc failures */ 181 kstat_named_t referrals; /* referrals */ 182 kstat_named_t referlinks; /* referrals as symlinks */ 183 kstat_named_t clgets; /* client handle gets */ 184 kstat_named_t cltoomany; /* client handle cache misses */ 185 #ifdef DEBUG 186 kstat_named_t clalloc; /* number of client handles */ 187 kstat_named_t noresponse; /* server not responding cnt */ 188 kstat_named_t failover; /* server failover count */ 189 kstat_named_t remap; /* server remap count */ 190 #endif 191 }; 192 193 #ifdef DEBUG 194 /* 195 * The following are statistics that describe the behavior of the system as a 196 * whole and don't correspond to any particular zone. 197 */ 198 struct clstat4_debug { 199 kstat_named_t nrnode; /* number of allocated rnodes */ 200 kstat_named_t access; /* size of access cache */ 201 kstat_named_t dirent; /* size of readdir cache */ 202 kstat_named_t dirents; /* size of readdir buf cache */ 203 kstat_named_t reclaim; /* number of reclaims */ 204 kstat_named_t clreclaim; /* number of cl reclaims */ 205 kstat_named_t f_reclaim; /* number of free reclaims */ 206 kstat_named_t a_reclaim; /* number of active reclaims */ 207 kstat_named_t r_reclaim; /* number of rnode reclaims */ 208 kstat_named_t rpath; /* bytes used to store rpaths */ 209 }; 210 extern struct clstat4_debug clstat4_debug; 211 212 #endif 213 214 /* 215 * The NFS specific async_reqs structure. iotype4 is grouped to support two 216 * types of async thread pools, please read comments section of mntinfo4_t 217 * definition for more information. Care should be taken while adding new 218 * members to this group. 219 */ 220 221 enum iotype4 { 222 NFS4_PUTAPAGE, 223 NFS4_PAGEIO, 224 NFS4_COMMIT, 225 NFS4_READ_AHEAD, 226 NFS4_READDIR, 227 NFS4_INACTIVE, 228 NFS4_ASYNC_TYPES 229 }; 230 #define NFS4_ASYNC_PGOPS_TYPES (NFS4_COMMIT + 1) 231 232 /* 233 * NFS async requests queue type. 234 */ 235 enum ioqtype4 { 236 NFS4_ASYNC_QUEUE, 237 NFS4_ASYNC_PGOPS_QUEUE, 238 NFS4_MAX_ASYNC_QUEUES 239 }; 240 241 /* 242 * Number of NFS async threads operating exclusively on page op requests. 243 */ 244 #define NUM_ASYNC_PGOPS_THREADS 0x2 245 246 struct nfs4_async_read_req { 247 void (*readahead)(); /* pointer to readahead function */ 248 u_offset_t blkoff; /* offset in file */ 249 struct seg *seg; /* segment to do i/o to */ 250 caddr_t addr; /* address to do i/o to */ 251 }; 252 253 struct nfs4_pageio_req { 254 int (*pageio)(); /* pointer to pageio function */ 255 page_t *pp; /* page list */ 256 u_offset_t io_off; /* offset in file */ 257 uint_t io_len; /* size of request */ 258 int flags; 259 }; 260 261 struct nfs4_readdir_req { 262 int (*readdir)(); /* pointer to readdir function */ 263 struct rddir4_cache *rdc; /* pointer to cache entry to fill */ 264 }; 265 266 struct nfs4_commit_req { 267 void (*commit)(); /* pointer to commit function */ 268 page_t *plist; /* page list */ 269 offset4 offset; /* starting offset */ 270 count4 count; /* size of range to be commited */ 271 }; 272 273 struct nfs4_async_reqs { 274 struct nfs4_async_reqs *a_next; /* pointer to next arg struct */ 275 #ifdef DEBUG 276 kthread_t *a_queuer; /* thread id of queueing thread */ 277 #endif 278 struct vnode *a_vp; /* vnode pointer */ 279 struct cred *a_cred; /* cred pointer */ 280 enum iotype4 a_io; /* i/o type */ 281 union { 282 struct nfs4_async_read_req a_read_args; 283 struct nfs4_pageio_req a_pageio_args; 284 struct nfs4_readdir_req a_readdir_args; 285 struct nfs4_commit_req a_commit_args; 286 } a_args; 287 }; 288 289 #define a_nfs4_readahead a_args.a_read_args.readahead 290 #define a_nfs4_blkoff a_args.a_read_args.blkoff 291 #define a_nfs4_seg a_args.a_read_args.seg 292 #define a_nfs4_addr a_args.a_read_args.addr 293 294 #define a_nfs4_putapage a_args.a_pageio_args.pageio 295 #define a_nfs4_pageio a_args.a_pageio_args.pageio 296 #define a_nfs4_pp a_args.a_pageio_args.pp 297 #define a_nfs4_off a_args.a_pageio_args.io_off 298 #define a_nfs4_len a_args.a_pageio_args.io_len 299 #define a_nfs4_flags a_args.a_pageio_args.flags 300 301 #define a_nfs4_readdir a_args.a_readdir_args.readdir 302 #define a_nfs4_rdc a_args.a_readdir_args.rdc 303 304 #define a_nfs4_commit a_args.a_commit_args.commit 305 #define a_nfs4_plist a_args.a_commit_args.plist 306 #define a_nfs4_offset a_args.a_commit_args.offset 307 #define a_nfs4_count a_args.a_commit_args.count 308 309 /* 310 * Security information 311 */ 312 typedef struct sv_secinfo { 313 uint_t count; /* how many sdata there are */ 314 uint_t index; /* which sdata[index] */ 315 struct sec_data *sdata; 316 } sv_secinfo_t; 317 318 /* 319 * Hash bucket for the mi's open owner list (mi_oo_list). 320 */ 321 typedef struct nfs4_oo_hash_bucket { 322 list_t b_oo_hash_list; 323 kmutex_t b_lock; 324 } nfs4_oo_hash_bucket_t; 325 326 /* 327 * Global array of ctags. 328 */ 329 extern ctag_t nfs4_ctags[]; 330 331 typedef enum nfs4_tag_type { 332 TAG_NONE, 333 TAG_ACCESS, 334 TAG_CLOSE, 335 TAG_CLOSE_LOST, 336 TAG_CLOSE_UNDO, 337 TAG_COMMIT, 338 TAG_DELEGRETURN, 339 TAG_FSINFO, 340 TAG_GET_SYMLINK, 341 TAG_GETATTR, 342 TAG_GETATTR_FSLOCATION, 343 TAG_INACTIVE, 344 TAG_LINK, 345 TAG_LOCK, 346 TAG_LOCK_RECLAIM, 347 TAG_LOCK_RESEND, 348 TAG_LOCK_REINSTATE, 349 TAG_LOCK_UNKNOWN, 350 TAG_LOCKT, 351 TAG_LOCKU, 352 TAG_LOCKU_RESEND, 353 TAG_LOCKU_REINSTATE, 354 TAG_LOOKUP, 355 TAG_LOOKUP_PARENT, 356 TAG_LOOKUP_VALID, 357 TAG_LOOKUP_VPARENT, 358 TAG_MKDIR, 359 TAG_MKNOD, 360 TAG_MOUNT, 361 TAG_OPEN, 362 TAG_OPEN_CONFIRM, 363 TAG_OPEN_CONFIRM_LOST, 364 TAG_OPEN_DG, 365 TAG_OPEN_DG_LOST, 366 TAG_OPEN_LOST, 367 TAG_OPENATTR, 368 TAG_PATHCONF, 369 TAG_PUTROOTFH, 370 TAG_READ, 371 TAG_READAHEAD, 372 TAG_READDIR, 373 TAG_READLINK, 374 TAG_RELOCK, 375 TAG_REMAP_LOOKUP, 376 TAG_REMAP_LOOKUP_AD, 377 TAG_REMAP_LOOKUP_NA, 378 TAG_REMAP_MOUNT, 379 TAG_RMDIR, 380 TAG_REMOVE, 381 TAG_RENAME, 382 TAG_RENAME_VFH, 383 TAG_RENEW, 384 TAG_REOPEN, 385 TAG_REOPEN_LOST, 386 TAG_SECINFO, 387 TAG_SETATTR, 388 TAG_SETCLIENTID, 389 TAG_SETCLIENTID_CF, 390 TAG_SYMLINK, 391 TAG_WRITE 392 } nfs4_tag_type_t; 393 394 #define NFS4_TAG_INITIALIZER { \ 395 {TAG_NONE, "", \ 396 {0x20202020, 0x20202020, 0x20202020}}, \ 397 {TAG_ACCESS, "access", \ 398 {0x61636365, 0x73732020, 0x20202020}}, \ 399 {TAG_CLOSE, "close", \ 400 {0x636c6f73, 0x65202020, 0x20202020}}, \ 401 {TAG_CLOSE_LOST, "lost close", \ 402 {0x6c6f7374, 0x20636c6f, 0x73652020}}, \ 403 {TAG_CLOSE_UNDO, "undo close", \ 404 {0x756e646f, 0x20636c6f, 0x73652020}}, \ 405 {TAG_COMMIT, "commit", \ 406 {0x636f6d6d, 0x69742020, 0x20202020}}, \ 407 {TAG_DELEGRETURN, "delegreturn", \ 408 {0x64656c65, 0x67726574, 0x75726e20}}, \ 409 {TAG_FSINFO, "fsinfo", \ 410 {0x6673696e, 0x666f2020, 0x20202020}}, \ 411 {TAG_GET_SYMLINK, "get symlink text", \ 412 {0x67657420, 0x736c6e6b, 0x20747874}}, \ 413 {TAG_GETATTR, "getattr", \ 414 {0x67657461, 0x74747220, 0x20202020}}, \ 415 {TAG_GETATTR_FSLOCATION, "getattr fslocation", \ 416 {0x67657461, 0x74747220, 0x66736c6f}}, \ 417 {TAG_INACTIVE, "inactive", \ 418 {0x696e6163, 0x74697665, 0x20202020}}, \ 419 {TAG_LINK, "link", \ 420 {0x6c696e6b, 0x20202020, 0x20202020}}, \ 421 {TAG_LOCK, "lock", \ 422 {0x6c6f636b, 0x20202020, 0x20202020}}, \ 423 {TAG_LOCK_RECLAIM, "reclaim lock", \ 424 {0x7265636c, 0x61696d20, 0x6c6f636b}}, \ 425 {TAG_LOCK_RESEND, "resend lock", \ 426 {0x72657365, 0x6e64206c, 0x6f636b20}}, \ 427 {TAG_LOCK_REINSTATE, "reinstate lock", \ 428 {0x7265696e, 0x7374206c, 0x6f636b20}}, \ 429 {TAG_LOCK_UNKNOWN, "unknown lock", \ 430 {0x756e6b6e, 0x6f776e20, 0x6c6f636b}}, \ 431 {TAG_LOCKT, "lock test", \ 432 {0x6c6f636b, 0x5f746573, 0x74202020}}, \ 433 {TAG_LOCKU, "unlock", \ 434 {0x756e6c6f, 0x636b2020, 0x20202020}}, \ 435 {TAG_LOCKU_RESEND, "resend locku", \ 436 {0x72657365, 0x6e64206c, 0x6f636b75}}, \ 437 {TAG_LOCKU_REINSTATE, "reinstate unlock", \ 438 {0x7265696e, 0x73742075, 0x6e6c636b}}, \ 439 {TAG_LOOKUP, "lookup", \ 440 {0x6c6f6f6b, 0x75702020, 0x20202020}}, \ 441 {TAG_LOOKUP_PARENT, "lookup parent", \ 442 {0x6c6f6f6b, 0x75702070, 0x6172656e}}, \ 443 {TAG_LOOKUP_VALID, "lookup valid", \ 444 {0x6c6f6f6b, 0x75702076, 0x616c6964}}, \ 445 {TAG_LOOKUP_VPARENT, "lookup valid parent", \ 446 {0x6c6f6f6b, 0x766c6420, 0x7061726e}}, \ 447 {TAG_MKDIR, "mkdir", \ 448 {0x6d6b6469, 0x72202020, 0x20202020}}, \ 449 {TAG_MKNOD, "mknod", \ 450 {0x6d6b6e6f, 0x64202020, 0x20202020}}, \ 451 {TAG_MOUNT, "mount", \ 452 {0x6d6f756e, 0x74202020, 0x20202020}}, \ 453 {TAG_OPEN, "open", \ 454 {0x6f70656e, 0x20202020, 0x20202020}}, \ 455 {TAG_OPEN_CONFIRM, "open confirm", \ 456 {0x6f70656e, 0x5f636f6e, 0x6669726d}}, \ 457 {TAG_OPEN_CONFIRM_LOST, "lost open confirm", \ 458 {0x6c6f7374, 0x206f7065, 0x6e5f636f}}, \ 459 {TAG_OPEN_DG, "open downgrade", \ 460 {0x6f70656e, 0x20646772, 0x61646520}}, \ 461 {TAG_OPEN_DG_LOST, "lost open downgrade", \ 462 {0x6c737420, 0x6f70656e, 0x20646772}}, \ 463 {TAG_OPEN_LOST, "lost open", \ 464 {0x6c6f7374, 0x206f7065, 0x6e202020}}, \ 465 {TAG_OPENATTR, "openattr", \ 466 {0x6f70656e, 0x61747472, 0x20202020}}, \ 467 {TAG_PATHCONF, "pathconf", \ 468 {0x70617468, 0x636f6e66, 0x20202020}}, \ 469 {TAG_PUTROOTFH, "putrootfh", \ 470 {0x70757472, 0x6f6f7466, 0x68202020}}, \ 471 {TAG_READ, "read", \ 472 {0x72656164, 0x20202020, 0x20202020}}, \ 473 {TAG_READAHEAD, "readahead", \ 474 {0x72656164, 0x61686561, 0x64202020}}, \ 475 {TAG_READDIR, "readdir", \ 476 {0x72656164, 0x64697220, 0x20202020}}, \ 477 {TAG_READLINK, "readlink", \ 478 {0x72656164, 0x6c696e6b, 0x20202020}}, \ 479 {TAG_RELOCK, "relock", \ 480 {0x72656c6f, 0x636b2020, 0x20202020}}, \ 481 {TAG_REMAP_LOOKUP, "remap lookup", \ 482 {0x72656d61, 0x70206c6f, 0x6f6b7570}}, \ 483 {TAG_REMAP_LOOKUP_AD, "remap lookup attr dir", \ 484 {0x72656d70, 0x206c6b75, 0x70206164}}, \ 485 {TAG_REMAP_LOOKUP_NA, "remap lookup named attrs", \ 486 {0x72656d70, 0x206c6b75, 0x70206e61}}, \ 487 {TAG_REMAP_MOUNT, "remap mount", \ 488 {0x72656d61, 0x70206d6f, 0x756e7420}}, \ 489 {TAG_RMDIR, "rmdir", \ 490 {0x726d6469, 0x72202020, 0x20202020}}, \ 491 {TAG_REMOVE, "remove", \ 492 {0x72656d6f, 0x76652020, 0x20202020}}, \ 493 {TAG_RENAME, "rename", \ 494 {0x72656e61, 0x6d652020, 0x20202020}}, \ 495 {TAG_RENAME_VFH, "rename volatile fh", \ 496 {0x72656e61, 0x6d652028, 0x76666829}}, \ 497 {TAG_RENEW, "renew", \ 498 {0x72656e65, 0x77202020, 0x20202020}}, \ 499 {TAG_REOPEN, "reopen", \ 500 {0x72656f70, 0x656e2020, 0x20202020}}, \ 501 {TAG_REOPEN_LOST, "lost reopen", \ 502 {0x6c6f7374, 0x2072656f, 0x70656e20}}, \ 503 {TAG_SECINFO, "secinfo", \ 504 {0x73656369, 0x6e666f20, 0x20202020}}, \ 505 {TAG_SETATTR, "setattr", \ 506 {0x73657461, 0x74747220, 0x20202020}}, \ 507 {TAG_SETCLIENTID, "setclientid", \ 508 {0x73657463, 0x6c69656e, 0x74696420}}, \ 509 {TAG_SETCLIENTID_CF, "setclientid_confirm", \ 510 {0x73636c6e, 0x7469645f, 0x636f6e66}}, \ 511 {TAG_SYMLINK, "symlink", \ 512 {0x73796d6c, 0x696e6b20, 0x20202020}}, \ 513 {TAG_WRITE, "write", \ 514 {0x77726974, 0x65202020, 0x20202020}} \ 515 } 516 517 /* 518 * These flags are for differentiating the search criterian for 519 * find_open_owner(). The comparison is done with the open_owners's 520 * 'oo_just_created' flag. 521 */ 522 #define NFS4_PERM_CREATED 0x0 523 #define NFS4_JUST_CREATED 0x1 524 525 /* 526 * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw' 527 * is stored upon a successful OPEN. This is needed when the user's effective 528 * and real uid's don't match. The 'oo_cred_otw' overrides the credential 529 * passed down by VFS for async read/write, commit, lock, and close operations. 530 * 531 * The oo_ref_count keeps track the number of active references on this 532 * data structure + number of nfs4_open_streams point to this structure. 533 * 534 * 'oo_valid' tells whether this stuct is about to be freed or not. 535 * 536 * 'oo_just_created' tells us whether this struct has just been created but 537 * not been fully finalized (that is created upon an OPEN request and 538 * finalized upon the OPEN success). 539 * 540 * The 'oo_seqid_inuse' is for the open seqid synchronization. If a thread 541 * is currently using the open owner and it's open_seqid, then it sets the 542 * oo_seqid_inuse to true if it currently is not set. If it is set then it 543 * does a cv_wait on the oo_cv_seqid_sync condition variable. When the thread 544 * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process 545 * waiting on the condition variable. 546 * 547 * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW, 548 * and 'oo_last_good_op' is the operation that issued the last valid seqid. 549 * 550 * Lock ordering: 551 * mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list) 552 * 553 * oo_seqid_inuse > mntinfo4_t::mi_lock 554 * oo_seqid_inuse > rnode4_t::r_statelock 555 * oo_seqid_inuse > rnode4_t::r_statev4_lock 556 * oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock 557 * 558 * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects: 559 * oo_last_good_op 560 * oo_last_good_seqid 561 * oo_name 562 * oo_seqid 563 * 564 * The 'oo_lock' protects: 565 * oo_cred 566 * oo_cred_otw 567 * oo_foo_node 568 * oo_hash_node 569 * oo_just_created 570 * oo_ref_count 571 * oo_valid 572 */ 573 574 typedef struct nfs4_open_owner { 575 cred_t *oo_cred; 576 int oo_ref_count; 577 int oo_valid; 578 int oo_just_created; 579 seqid4 oo_seqid; 580 seqid4 oo_last_good_seqid; 581 nfs4_tag_type_t oo_last_good_op; 582 unsigned oo_seqid_inuse:1; 583 cred_t *oo_cred_otw; 584 kcondvar_t oo_cv_seqid_sync; 585 /* 586 * Fix this to always be 8 bytes 587 */ 588 uint64_t oo_name; 589 list_node_t oo_hash_node; 590 list_node_t oo_foo_node; 591 kmutex_t oo_lock; 592 } nfs4_open_owner_t; 593 594 /* 595 * Static server information. 596 * These fields are read-only once they are initialized; sv_lock 597 * should be held as writer if they are changed during mount: 598 * sv_addr 599 * sv_dhsec 600 * sv_hostname 601 * sv_hostnamelen 602 * sv_knconf 603 * sv_next 604 * sv_origknconf 605 * 606 * These fields are protected by sv_lock: 607 * sv_currsec 608 * sv_fhandle 609 * sv_flags 610 * sv_fsid 611 * sv_path 612 * sv_pathlen 613 * sv_pfhandle 614 * sv_save_secinfo 615 * sv_savesec 616 * sv_secdata 617 * sv_secinfo 618 * sv_supp_attrs 619 * 620 * Lock ordering: 621 * nfs_rtable4_lock > sv_lock 622 * rnode4_t::r_statelock > sv_lock 623 */ 624 typedef struct servinfo4 { 625 struct knetconfig *sv_knconf; /* bound TLI fd */ 626 struct knetconfig *sv_origknconf; /* For RDMA save orig knconf */ 627 struct netbuf sv_addr; /* server's address */ 628 nfs4_fhandle_t sv_fhandle; /* this server's filehandle */ 629 nfs4_fhandle_t sv_pfhandle; /* parent dir filehandle */ 630 int sv_pathlen; /* Length of server path */ 631 char *sv_path; /* Path name on server */ 632 uint32_t sv_flags; /* flags for this server */ 633 sec_data_t *sv_secdata; /* client initiated security data */ 634 sv_secinfo_t *sv_secinfo; /* server security information */ 635 sec_data_t *sv_currsec; /* security data currently used; */ 636 /* points to one of the sec_data */ 637 /* entries in sv_secinfo */ 638 sv_secinfo_t *sv_save_secinfo; /* saved secinfo */ 639 sec_data_t *sv_savesec; /* saved security data */ 640 sec_data_t *sv_dhsec; /* AUTH_DH data from the user land */ 641 char *sv_hostname; /* server's hostname */ 642 int sv_hostnamelen; /* server's hostname length */ 643 fattr4_fsid sv_fsid; /* fsid of shared obj */ 644 fattr4_supported_attrs sv_supp_attrs; 645 struct servinfo4 *sv_next; /* next in list */ 646 nfs_rwlock_t sv_lock; 647 } servinfo4_t; 648 649 /* sv_flags fields */ 650 #define SV4_TRYSECINFO 0x001 /* try secinfo data from the server */ 651 #define SV4_TRYSECDEFAULT 0x002 /* try a default flavor */ 652 #define SV4_NOTINUSE 0x004 /* servinfo4_t had fatal errors */ 653 #define SV4_ROOT_STALE 0x008 /* root vnode got ESTALE */ 654 655 /* 656 * Lock call types. See nfs4frlock(). 657 */ 658 typedef enum nfs4_lock_call_type { 659 NFS4_LCK_CTYPE_NORM, 660 NFS4_LCK_CTYPE_RECLAIM, 661 NFS4_LCK_CTYPE_RESEND, 662 NFS4_LCK_CTYPE_REINSTATE 663 } nfs4_lock_call_type_t; 664 665 /* 666 * This structure holds the information for a lost open/close/open downgrade/ 667 * lock/locku request. It is also used for requests that are queued up so 668 * that the recovery thread can release server state after a forced 669 * unmount. 670 * "lr_op" is 0 if the struct is uninitialized. Otherwise, it is set to 671 * the proper OP_* nfs_opnum4 number. The other fields contain information 672 * to reconstruct the call. 673 * 674 * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the 675 * parent directroy without relying on vtodv (since we may not have a vp 676 * for the file we wish to create). 677 * 678 * lr_putfirst means that the request should go to the front of the resend 679 * queue, rather than the end. 680 */ 681 typedef struct nfs4_lost_rqst { 682 list_node_t lr_node; 683 nfs_opnum4 lr_op; 684 vnode_t *lr_vp; 685 vnode_t *lr_dvp; 686 nfs4_open_owner_t *lr_oop; 687 struct nfs4_open_stream *lr_osp; 688 struct nfs4_lock_owner *lr_lop; 689 cred_t *lr_cr; 690 flock64_t *lr_flk; 691 bool_t lr_putfirst; 692 union { 693 struct { 694 nfs4_lock_call_type_t lru_ctype; 695 nfs_lock_type4 lru_locktype; 696 } lru_lockargs; /* LOCK, LOCKU */ 697 struct { 698 uint32_t lru_oaccess; 699 uint32_t lru_odeny; 700 enum open_claim_type4 lru_oclaim; 701 stateid4 lru_ostateid; /* reopen only */ 702 component4 lru_ofile; 703 } lru_open_args; 704 struct { 705 uint32_t lru_dg_access; 706 uint32_t lru_dg_deny; 707 } lru_open_dg_args; 708 } nfs4_lr_u; 709 } nfs4_lost_rqst_t; 710 711 #define lr_oacc nfs4_lr_u.lru_open_args.lru_oaccess 712 #define lr_odeny nfs4_lr_u.lru_open_args.lru_odeny 713 #define lr_oclaim nfs4_lr_u.lru_open_args.lru_oclaim 714 #define lr_ostateid nfs4_lr_u.lru_open_args.lru_ostateid 715 #define lr_ofile nfs4_lr_u.lru_open_args.lru_ofile 716 #define lr_dg_acc nfs4_lr_u.lru_open_dg_args.lru_dg_access 717 #define lr_dg_deny nfs4_lr_u.lru_open_dg_args.lru_dg_deny 718 #define lr_ctype nfs4_lr_u.lru_lockargs.lru_ctype 719 #define lr_locktype nfs4_lr_u.lru_lockargs.lru_locktype 720 721 /* 722 * Recovery actions. Some actions can imply further recovery using a 723 * different recovery action (e.g., recovering the clientid leads to 724 * recovering open files and locks). 725 */ 726 727 typedef enum { 728 NR_UNUSED, 729 NR_CLIENTID, 730 NR_OPENFILES, 731 NR_FHEXPIRED, 732 NR_FAILOVER, 733 NR_WRONGSEC, 734 NR_EXPIRED, 735 NR_BAD_STATEID, 736 NR_BADHANDLE, 737 NR_BAD_SEQID, 738 NR_OLDSTATEID, 739 NR_GRACE, 740 NR_DELAY, 741 NR_LOST_LOCK, 742 NR_LOST_STATE_RQST, 743 NR_STALE, 744 NR_MOVED 745 } nfs4_recov_t; 746 747 /* 748 * Administrative and debug message framework. 749 */ 750 751 #define NFS4_MSG_MAX 100 752 extern int nfs4_msg_max; 753 754 #define NFS4_REFERRAL_LOOP_MAX 20 755 756 typedef enum { 757 RE_BAD_SEQID, 758 RE_BADHANDLE, 759 RE_CLIENTID, 760 RE_DEAD_FILE, 761 RE_END, 762 RE_FAIL_RELOCK, 763 RE_FAIL_REMAP_LEN, 764 RE_FAIL_REMAP_OP, 765 RE_FAILOVER, 766 RE_FILE_DIFF, 767 RE_LOST_STATE, 768 RE_OPENS_CHANGED, 769 RE_SIGLOST, 770 RE_SIGLOST_NO_DUMP, 771 RE_START, 772 RE_UNEXPECTED_ACTION, 773 RE_UNEXPECTED_ERRNO, 774 RE_UNEXPECTED_STATUS, 775 RE_WRONGSEC, 776 RE_LOST_STATE_BAD_OP, 777 RE_REFERRAL 778 } nfs4_event_type_t; 779 780 typedef enum { 781 RFS_NO_INSPECT, 782 RFS_INSPECT 783 } nfs4_fact_status_t; 784 785 typedef enum { 786 RF_BADOWNER, 787 RF_ERR, 788 RF_RENEW_EXPIRED, 789 RF_SRV_NOT_RESPOND, 790 RF_SRV_OK, 791 RF_SRVS_NOT_RESPOND, 792 RF_SRVS_OK, 793 RF_DELMAP_CB_ERR, 794 RF_SENDQ_FULL 795 } nfs4_fact_type_t; 796 797 typedef enum { 798 NFS4_MS_DUMP, 799 NFS4_MS_NO_DUMP 800 } nfs4_msg_status_t; 801 802 typedef struct nfs4_rfact { 803 nfs4_fact_type_t rf_type; 804 nfs4_fact_status_t rf_status; 805 bool_t rf_reboot; 806 nfs4_recov_t rf_action; 807 nfs_opnum4 rf_op; 808 nfsstat4 rf_stat4; 809 timespec_t rf_time; 810 int rf_error; 811 struct rnode4 *rf_rp1; 812 char *rf_char1; 813 } nfs4_rfact_t; 814 815 typedef struct nfs4_revent { 816 nfs4_event_type_t re_type; 817 nfsstat4 re_stat4; 818 uint_t re_uint; 819 pid_t re_pid; 820 struct mntinfo4 *re_mi; 821 struct rnode4 *re_rp1; 822 struct rnode4 *re_rp2; 823 char *re_char1; 824 char *re_char2; 825 nfs4_tag_type_t re_tag1; 826 nfs4_tag_type_t re_tag2; 827 seqid4 re_seqid1; 828 seqid4 re_seqid2; 829 } nfs4_revent_t; 830 831 typedef enum { 832 RM_EVENT, 833 RM_FACT 834 } nfs4_msg_type_t; 835 836 typedef struct nfs4_debug_msg { 837 timespec_t msg_time; 838 nfs4_msg_type_t msg_type; 839 char *msg_srv; 840 char *msg_mntpt; 841 union { 842 nfs4_rfact_t msg_fact; 843 nfs4_revent_t msg_event; 844 } rmsg_u; 845 nfs4_msg_status_t msg_status; 846 list_node_t msg_node; 847 } nfs4_debug_msg_t; 848 849 /* 850 * NFS private data per mounted file system 851 * The mi_lock mutex protects the following fields: 852 * mi_flags 853 * mi_in_recovery 854 * mi_recovflags 855 * mi_recovthread 856 * mi_error 857 * mi_printed 858 * mi_down 859 * mi_stsize 860 * mi_curread 861 * mi_curwrite 862 * mi_timers 863 * mi_curr_serv 864 * mi_klmconfig 865 * mi_oo_list 866 * mi_foo_list 867 * mi_foo_num 868 * mi_foo_max 869 * mi_lost_state 870 * mi_bseqid_list 871 * mi_ephemeral 872 * mi_ephemeral_tree 873 * 874 * Normally the netconfig information for the mount comes from 875 * mi_curr_serv and mi_klmconfig is NULL. If NLM calls need to use a 876 * different transport, mi_klmconfig contains the necessary netconfig 877 * information. 878 * 879 * The mi_async_lock mutex protects the following fields: 880 * mi_async_reqs 881 * mi_async_req_count 882 * mi_async_tail 883 * mi_async_curr[NFS4_MAX_ASYNC_QUEUES] 884 * mi_async_clusters 885 * mi_async_init_clusters 886 * mi_threads[NFS4_MAX_ASYNC_QUEUES] 887 * mi_inactive_thread 888 * mi_manager_thread 889 * 890 * The nfs4_server_t::s_lock protects the following fields: 891 * mi_clientid 892 * mi_clientid_next 893 * mi_clientid_prev 894 * mi_open_files 895 * 896 * The mntinfo4_t::mi_recovlock protects the following fields: 897 * mi_srvsettime 898 * mi_srvset_cnt 899 * mi_srv 900 * 901 * Changing mi_srv from one nfs4_server_t to a different one requires 902 * holding the mi_recovlock as RW_WRITER. 903 * Exception: setting mi_srv the first time in mount/mountroot is done 904 * holding the mi_recovlock as RW_READER. 905 * 906 * Locking order: 907 * mi4_globals::mig_lock > mi_async_lock 908 * mi_async_lock > nfs4_server_t::s_lock > mi_lock 909 * mi_recovlock > mi_rename_lock > nfs_rtable4_lock 910 * nfs4_server_t::s_recovlock > mi_recovlock 911 * rnode4_t::r_rwlock > mi_rename_lock 912 * nfs_rtable4_lock > mi_lock 913 * nfs4_server_t::s_lock > mi_msg_list_lock 914 * mi_recovlock > nfs4_server_t::s_lock 915 * mi_recovlock > nfs4_server_lst_lock 916 * 917 * The 'mi_oo_list' represents the hash buckets that contain the 918 * nfs4_open_owenrs for this particular mntinfo4. 919 * 920 * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4. 921 * 'mi_foo_num' is the current number of freed open owners on the list, 922 * 'mi_foo_max' is the maximum number of freed open owners that are allowable 923 * on the list. 924 * 925 * mi_rootfh and mi_srvparentfh are read-only once created, but that just 926 * refers to the pointer. The contents must be updated to keep in sync 927 * with mi_curr_serv. 928 * 929 * The mi_msg_list_lock protects against adding/deleting entries to the 930 * mi_msg_list, and also the updating/retrieving of mi_lease_period; 931 * 932 * 'mi_zone' is initialized at structure creation time, and never 933 * changes; it may be read without a lock. 934 * 935 * mi_zone_node is linkage into the mi4_globals.mig_list, and is 936 * protected by mi4_globals.mig_list_lock. 937 * 938 * If MI4_EPHEMERAL is set in mi_flags, then mi_ephemeral points to an 939 * ephemeral structure for this ephemeral mount point. It can not be 940 * NULL. Also, mi_ephemeral_tree points to the root of the ephemeral 941 * tree. 942 * 943 * If MI4_EPHEMERAL is not set in mi_flags, then mi_ephemeral has 944 * to be NULL. If mi_ephemeral_tree is non-NULL, then this node 945 * is the enclosing mntinfo4 for the ephemeral tree. 946 */ 947 struct zone; 948 struct nfs4_ephemeral; 949 struct nfs4_ephemeral_tree; 950 struct nfs4_server; 951 typedef struct mntinfo4 { 952 kmutex_t mi_lock; /* protects mntinfo4 fields */ 953 struct servinfo4 *mi_servers; /* server list */ 954 struct servinfo4 *mi_curr_serv; /* current server */ 955 struct nfs4_sharedfh *mi_rootfh; /* root filehandle */ 956 struct nfs4_sharedfh *mi_srvparentfh; /* root's parent on server */ 957 kcondvar_t mi_failover_cv; /* failover synchronization */ 958 struct vfs *mi_vfsp; /* back pointer to vfs */ 959 enum vtype mi_type; /* file type of the root vnode */ 960 uint_t mi_flags; /* see below */ 961 uint_t mi_recovflags; /* if recovery active; see below */ 962 kthread_t *mi_recovthread; /* active recov thread or NULL */ 963 uint_t mi_error; /* only set/valid when MI4_RECOV_FAIL */ 964 /* is set in mi_flags */ 965 int mi_tsize; /* transfer size (bytes) */ 966 /* really read size */ 967 int mi_stsize; /* server's max transfer size (bytes) */ 968 /* really write size */ 969 int mi_timeo; /* inital timeout in 10th sec */ 970 int mi_retrans; /* times to retry request */ 971 hrtime_t mi_acregmin; /* min time to hold cached file attr */ 972 hrtime_t mi_acregmax; /* max time to hold cached file attr */ 973 hrtime_t mi_acdirmin; /* min time to hold cached dir attr */ 974 hrtime_t mi_acdirmax; /* max time to hold cached dir attr */ 975 len_t mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */ 976 int mi_curread; /* current read size */ 977 int mi_curwrite; /* current write size */ 978 uint_t mi_count; /* ref count */ 979 /* 980 * Async I/O management 981 * We have 2 pools of threads working on async I/O: 982 * (1) Threads which work on all async queues. Default number of 983 * threads in this queue is 8. Threads in this pool work on async 984 * queue pointed by mi_async_curr[NFS4_ASYNC_QUEUE]. Number of 985 * active threads in this pool is tracked by 986 * mi_threads[NFS4_ASYNC_QUEUE]. 987 * (2) Threads which work only on page op async queues. 988 * Page ops queue comprises of NFS4_PUTAPAGE, NFS4_PAGEIO & 989 * NFS4_COMMIT. Default number of threads in this queue is 2 990 * (NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async 991 * queue pointed by mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE]. Number 992 * of active threads in this pool is tracked by 993 * mi_threads[NFS4_ASYNC_PGOPS_QUEUE]. 994 * 995 * In addition to above two pools, there is always one thread that 996 * handles over-the-wire requests for VOP_INACTIVE. 997 */ 998 struct nfs4_async_reqs *mi_async_reqs[NFS4_ASYNC_TYPES]; 999 struct nfs4_async_reqs *mi_async_tail[NFS4_ASYNC_TYPES]; 1000 struct nfs4_async_reqs **mi_async_curr[NFS4_MAX_ASYNC_QUEUES]; 1001 /* current async queue */ 1002 uint_t mi_async_clusters[NFS4_ASYNC_TYPES]; 1003 uint_t mi_async_init_clusters; 1004 uint_t mi_async_req_count; /* # outstanding work requests */ 1005 kcondvar_t mi_async_reqs_cv; /* signaled when there's work */ 1006 ushort_t mi_threads[NFS4_MAX_ASYNC_QUEUES]; 1007 /* number of active async threads */ 1008 ushort_t mi_max_threads; /* max number of async threads */ 1009 kthread_t *mi_manager_thread; /* async manager thread id */ 1010 kthread_t *mi_inactive_thread; /* inactive thread id */ 1011 kcondvar_t mi_inact_req_cv; /* notify VOP_INACTIVE thread */ 1012 kcondvar_t mi_async_work_cv[NFS4_MAX_ASYNC_QUEUES]; 1013 /* tell workers to work */ 1014 kcondvar_t mi_async_cv; /* all pool threads exited */ 1015 kmutex_t mi_async_lock; 1016 /* 1017 * Other stuff 1018 */ 1019 struct pathcnf *mi_pathconf; /* static pathconf kludge */ 1020 rpcprog_t mi_prog; /* RPC program number */ 1021 rpcvers_t mi_vers; /* RPC program version number */ 1022 char **mi_rfsnames; /* mapping to proc names */ 1023 kstat_named_t *mi_reqs; /* count of requests */ 1024 clock_t mi_printftime; /* last error printf time */ 1025 nfs_rwlock_t mi_recovlock; /* separate ops from recovery (v4) */ 1026 time_t mi_grace_wait; /* non-zero represents time to wait */ 1027 /* when we switched nfs4_server_t - only for observability purposes */ 1028 time_t mi_srvsettime; 1029 nfs_rwlock_t mi_rename_lock; /* atomic volfh rename */ 1030 struct nfs4_fname *mi_fname; /* root fname */ 1031 list_t mi_lost_state; /* resend list */ 1032 list_t mi_bseqid_list; /* bad seqid list */ 1033 /* 1034 * Client Side Failover stats 1035 */ 1036 uint_t mi_noresponse; /* server not responding count */ 1037 uint_t mi_failover; /* failover to new server count */ 1038 uint_t mi_remap; /* remap to new server count */ 1039 /* 1040 * Kstat statistics 1041 */ 1042 struct kstat *mi_io_kstats; 1043 struct kstat *mi_ro_kstats; 1044 kstat_t *mi_recov_ksp; /* ptr to the recovery kstat */ 1045 1046 /* 1047 * Volatile fh flags (nfsv4) 1048 */ 1049 uint32_t mi_fh_expire_type; 1050 /* 1051 * Lease Management 1052 */ 1053 struct mntinfo4 *mi_clientid_next; 1054 struct mntinfo4 *mi_clientid_prev; 1055 clientid4 mi_clientid; /* redundant info found in nfs4_server */ 1056 int mi_open_files; /* count of open files */ 1057 int mi_in_recovery; /* count of recovery instances */ 1058 kcondvar_t mi_cv_in_recov; /* cv for recovery threads */ 1059 /* 1060 * Open owner stuff. 1061 */ 1062 struct nfs4_oo_hash_bucket mi_oo_list[NFS4_NUM_OO_BUCKETS]; 1063 list_t mi_foo_list; 1064 int mi_foo_num; 1065 int mi_foo_max; 1066 /* 1067 * Shared filehandle pool. 1068 */ 1069 nfs_rwlock_t mi_fh_lock; 1070 avl_tree_t mi_filehandles; 1071 1072 /* 1073 * Debug message queue. 1074 */ 1075 list_t mi_msg_list; 1076 int mi_msg_count; 1077 time_t mi_lease_period; 1078 /* 1079 * not guaranteed to be accurate. 1080 * only should be used by debug queue. 1081 */ 1082 kmutex_t mi_msg_list_lock; 1083 /* 1084 * Zones support. 1085 */ 1086 struct zone *mi_zone; /* Zone in which FS is mounted */ 1087 zone_ref_t mi_zone_ref; /* Reference to aforementioned zone */ 1088 list_node_t mi_zone_node; /* linkage into per-zone mi list */ 1089 1090 /* 1091 * Links for unmounting ephemeral mounts. 1092 */ 1093 struct nfs4_ephemeral *mi_ephemeral; 1094 struct nfs4_ephemeral_tree *mi_ephemeral_tree; 1095 1096 uint_t mi_srvset_cnt; /* increment when changing the nfs4_server_t */ 1097 struct nfs4_server *mi_srv; /* backpointer to nfs4_server_t */ 1098 /* 1099 * Referral related info. 1100 */ 1101 int mi_vfs_referral_loop_cnt; 1102 /* 1103 * List of rnode4_t structures that belongs to this mntinfo4 1104 */ 1105 kmutex_t mi_rnodes_lock; /* protects the mi_rnodes list */ 1106 list_t mi_rnodes; /* the list */ 1107 } mntinfo4_t; 1108 1109 /* 1110 * The values for mi_flags. 1111 * 1112 * MI4_HARD hard or soft mount 1113 * MI4_PRINTED responding message printed 1114 * MI4_INT allow INTR on hard mount 1115 * MI4_DOWN server is down 1116 * MI4_NOAC don't cache attributes 1117 * MI4_NOCTO no close-to-open consistency 1118 * MI4_LLOCK local locking only (no lockmgr) 1119 * MI4_GRPID System V group id inheritance 1120 * MI4_SHUTDOWN System is rebooting or shutting down 1121 * MI4_LINK server supports link 1122 * MI4_SYMLINK server supports symlink 1123 * MI4_EPHEMERAL_RECURSED an ephemeral mount being unmounted 1124 * due to a recursive call - no need 1125 * for additional recursion 1126 * MI4_ACL server supports NFSv4 ACLs 1127 * MI4_MIRRORMOUNT is a mirrormount 1128 * MI4_NOPRINT don't print messages 1129 * MI4_DIRECTIO do direct I/O 1130 * MI4_RECOV_ACTIV filesystem has recovery a thread 1131 * MI4_REMOVE_ON_LAST_CLOSE remove from server's list 1132 * MI4_RECOV_FAIL client recovery failed 1133 * MI4_PUBLIC public/url option used 1134 * MI4_MOUNTING mount in progress, don't failover 1135 * MI4_POSIX_LOCK if server is using POSIX locking 1136 * MI4_LOCK_DEBUG cmn_err'd posix lock err msg 1137 * MI4_DEAD zone has released it 1138 * MI4_INACTIVE_IDLE inactive thread idle 1139 * MI4_BADOWNER_DEBUG badowner error msg per mount 1140 * MI4_ASYNC_MGR_STOP tell async manager to die 1141 * MI4_TIMEDOUT saw a timeout during zone shutdown 1142 * MI4_EPHEMERAL is an ephemeral mount 1143 */ 1144 #define MI4_HARD 0x1 1145 #define MI4_PRINTED 0x2 1146 #define MI4_INT 0x4 1147 #define MI4_DOWN 0x8 1148 #define MI4_NOAC 0x10 1149 #define MI4_NOCTO 0x20 1150 #define MI4_LLOCK 0x80 1151 #define MI4_GRPID 0x100 1152 #define MI4_SHUTDOWN 0x200 1153 #define MI4_LINK 0x400 1154 #define MI4_SYMLINK 0x800 1155 #define MI4_EPHEMERAL_RECURSED 0x1000 1156 #define MI4_ACL 0x2000 1157 /* MI4_MIRRORMOUNT is also defined in nfsstat.c */ 1158 #define MI4_MIRRORMOUNT 0x4000 1159 #define MI4_REFERRAL 0x8000 1160 /* 0x10000 is available */ 1161 #define MI4_NOPRINT 0x20000 1162 #define MI4_DIRECTIO 0x40000 1163 /* 0x80000 is available */ 1164 #define MI4_RECOV_ACTIV 0x100000 1165 #define MI4_REMOVE_ON_LAST_CLOSE 0x200000 1166 #define MI4_RECOV_FAIL 0x400000 1167 #define MI4_PUBLIC 0x800000 1168 #define MI4_MOUNTING 0x1000000 1169 #define MI4_POSIX_LOCK 0x2000000 1170 #define MI4_LOCK_DEBUG 0x4000000 1171 #define MI4_DEAD 0x8000000 1172 #define MI4_INACTIVE_IDLE 0x10000000 1173 #define MI4_BADOWNER_DEBUG 0x20000000 1174 #define MI4_ASYNC_MGR_STOP 0x40000000 1175 #define MI4_TIMEDOUT 0x80000000 1176 1177 #define MI4_EPHEMERAL (MI4_MIRRORMOUNT | MI4_REFERRAL) 1178 1179 #define INTR4(vp) (VTOMI4(vp)->mi_flags & MI4_INT) 1180 1181 #define FAILOVER_MOUNT4(mi) (mi->mi_servers->sv_next) 1182 1183 /* 1184 * Recovery flags. 1185 * 1186 * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag 1187 * that's important), but some flag is needed to indicate that recovery is 1188 * going on for the filesystem. 1189 */ 1190 #define MI4R_NEED_CLIENTID 0x1 1191 #define MI4R_REOPEN_FILES 0x2 1192 #define MI4R_NEED_SECINFO 0x4 1193 #define MI4R_NEED_NEW_SERVER 0x8 1194 #define MI4R_REMAP_FILES 0x10 1195 #define MI4R_SRV_REBOOT 0x20 /* server has rebooted */ 1196 #define MI4R_LOST_STATE 0x40 1197 #define MI4R_BAD_SEQID 0x80 1198 #define MI4R_MOVED 0x100 1199 1200 #define MI4_HOLD(mi) { \ 1201 mi_hold(mi); \ 1202 } 1203 1204 #define MI4_RELE(mi) { \ 1205 mi_rele(mi); \ 1206 } 1207 1208 /* 1209 * vfs pointer to mount info 1210 */ 1211 #define VFTOMI4(vfsp) ((mntinfo4_t *)((vfsp)->vfs_data)) 1212 1213 /* 1214 * vnode pointer to mount info 1215 */ 1216 #define VTOMI4(vp) ((mntinfo4_t *)(((vp)->v_vfsp)->vfs_data)) 1217 1218 /* 1219 * Lease Management 1220 * 1221 * lease_valid is initially set to NFS4_LEASE_NOT_STARTED. This is when the 1222 * nfs4_server is first created. lease_valid is then set to 1223 * NFS4_LEASE_UNITIALIZED when the renew thread is started. The extra state of 1224 * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread 1225 * already exists when we do SETCLIENTID). lease_valid is then set to 1226 * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating 1227 * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as 1228 * the lease is renewed. It is set to NFS4_LEASE_INVALID when the lease 1229 * expires. Client recovery is needed to set the lease back to 1230 * NFS4_LEASE_VALID from NFS4_LEASE_INVALID. 1231 * 1232 * The s_cred is the credential used to mount the first file system for this 1233 * server. It used as the credential for the renew thread's calls to the 1234 * server. 1235 * 1236 * The renew thread waits on the condition variable cv_thread_exit. If the cv 1237 * is signalled, then the thread knows it must check s_thread_exit to see if 1238 * it should exit. The cv is signaled when the last file system is unmounted 1239 * from a particular server. s_thread_exit is set to 0 upon thread startup, 1240 * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby 1241 * telling the thread to exit. s_thread_exit is needed to avoid spurious 1242 * wakeups. 1243 * 1244 * state_ref_count is incremented every time a new file is opened and 1245 * decremented every time a file is closed otw. This keeps track of whether 1246 * the nfs4_server has state associated with it or not. 1247 * 1248 * s_refcnt is the reference count for storage management of the struct 1249 * itself. 1250 * 1251 * mntinfo4_list points to the doubly linked list of mntinfo4s that share 1252 * this nfs4_server (ie: <clientid, saddr> pair) in the current zone. This is 1253 * needed for a nfs4_server to get a mntinfo4 for use in rfs4call. 1254 * 1255 * s_recovlock is used to synchronize recovery operations. The thread 1256 * that is recovering the client must acquire it as a writer. If the 1257 * thread is using the clientid (including recovery operations on other 1258 * state), acquire it as a reader. 1259 * 1260 * The 's_otw_call_count' keeps track of the number of outstanding over the 1261 * wire requests for this structure. The struct will not go away as long 1262 * as this is non-zero (or s_refcnt is non-zero). 1263 * 1264 * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count' 1265 * variable to let the renew thread when an outstanding otw request has 1266 * finished. 1267 * 1268 * 'zoneid' and 'zone_globals' are set at creation of this structure 1269 * and are read-only after that; no lock is required to read them. 1270 * 1271 * s_lock protects: everything except cv_thread_exit and s_recovlock. 1272 * 1273 * s_program is used as the index into the nfs4_callback_globals's 1274 * nfs4prog2server table. When a callback request comes in, we can 1275 * use that request's program number (minus NFS4_CALLBACK) as an index 1276 * into the nfs4prog2server. That entry will hold the nfs4_server_t ptr. 1277 * We can then access that nfs4_server_t and its 's_deleg_list' (its list of 1278 * delegated rnode4_ts). 1279 * 1280 * Lock order: 1281 * nfs4_server::s_lock > mntinfo4::mi_lock 1282 * nfs_rtable4_lock > s_lock 1283 * nfs4_server_lst_lock > s_lock 1284 * s_recovlock > s_lock 1285 */ 1286 struct nfs4_callback_globals; 1287 1288 #define RS_SERVER_GONE 1 1289 typedef struct nfs4_rcsync { 1290 list_node_t rs_link; 1291 uint64_t rs_seq; 1292 int rs_flags; 1293 mntinfo4_t *rs_mi; 1294 } nfs4_rcsync_t; 1295 1296 typedef struct nfs4_server { 1297 struct nfs4_server *forw; 1298 struct nfs4_server *back; 1299 struct netbuf saddr; 1300 uint_t s_flags; /* see below */ 1301 uint_t s_refcnt; 1302 clientid4 clientid; /* what we get from server */ 1303 nfs_client_id4 clidtosend; /* what we send to server */ 1304 mntinfo4_t *mntinfo4_list; 1305 int lease_valid; 1306 time_t s_lease_time; 1307 time_t last_renewal_time; 1308 timespec_t propagation_delay; 1309 cred_t *s_cred; 1310 kcondvar_t cv_thread_exit; 1311 int s_thread_exit; 1312 int state_ref_count; 1313 int s_otw_call_count; 1314 kcondvar_t s_cv_otw_count; 1315 kcondvar_t s_clientid_pend; 1316 kmutex_t s_lock; 1317 list_t s_deleg_list; 1318 rpcprog_t s_program; 1319 nfs_rwlock_t s_recovlock; 1320 kcondvar_t wait_cb_null; /* used to wait for CB_NULL */ 1321 zoneid_t zoneid; /* zone using this nfs4_server_t */ 1322 struct nfs4_callback_globals *zone_globals; /* globals */ 1323 kmutex_t s_rcsync_lock; 1324 kcondvar_t s_rcsync_cv; 1325 list_t s_rcsync_list; /* list of pending opens */ 1326 uint64_t s_rcsync_seq; 1327 } nfs4_server_t; 1328 1329 /* nfs4_server flags */ 1330 #define N4S_CLIENTID_SET 1 /* server has our clientid */ 1331 #define N4S_CLIENTID_PEND 0x2 /* server doesn't have clientid */ 1332 #define N4S_CB_PINGED 0x4 /* server has sent us a CB_NULL */ 1333 #define N4S_CB_WAITER 0x8 /* is/has wait{ing/ed} for cb_null */ 1334 #define N4S_INSERTED 0x10 /* list has reference for server */ 1335 #define N4S_BADOWNER_DEBUG 0x20 /* bad owner err msg per client */ 1336 1337 #define N4S_CB_PAUSE_TIME 10000 /* Amount of time to pause (10ms) */ 1338 1339 struct lease_time_arg { 1340 time_t lease_time; 1341 }; 1342 1343 enum nfs4_delegreturn_policy { 1344 IMMEDIATE, 1345 FIRSTCLOSE, 1346 LASTCLOSE, 1347 INACTIVE 1348 }; 1349 1350 /* 1351 * Operation hints for the recovery framework (mostly). 1352 * 1353 * EXCEPTIONS: 1354 * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR 1355 * These hints exist to allow user visit/readdir a R4SRVSTUB dir. 1356 * (dir represents the root of a server fs that has not yet been 1357 * mounted at client) 1358 */ 1359 typedef enum { 1360 OH_OTHER, 1361 OH_READ, 1362 OH_WRITE, 1363 OH_COMMIT, 1364 OH_VFH_RENAME, 1365 OH_MOUNT, 1366 OH_CLOSE, 1367 OH_LOCKU, 1368 OH_DELEGRETURN, 1369 OH_ACCESS, 1370 OH_GETACL, 1371 OH_GETATTR, 1372 OH_LOOKUP, 1373 OH_READDIR 1374 } nfs4_op_hint_t; 1375 1376 /* 1377 * This data structure is used to track ephemeral mounts for both 1378 * mirror mounts and referrals. 1379 * 1380 * Note that each nfs4_ephemeral can only have one other nfs4_ephemeral 1381 * pointing at it. So we don't need two backpointers to walk 1382 * back up the tree. 1383 * 1384 * An ephemeral tree is pointed to by an enclosing non-ephemeral 1385 * mntinfo4. The root is also pointed to by its ephemeral 1386 * mntinfo4. ne_child will get us back to it, while ne_prior 1387 * will get us back to the non-ephemeral mntinfo4. This is an 1388 * edge case we will need to be wary of when walking back up the 1389 * tree. 1390 * 1391 * The way we handle this edge case is to have ne_prior be NULL 1392 * for the root nfs4_ephemeral node. 1393 */ 1394 typedef struct nfs4_ephemeral { 1395 mntinfo4_t *ne_mount; /* who encloses us */ 1396 struct nfs4_ephemeral *ne_child; /* first child node */ 1397 struct nfs4_ephemeral *ne_peer; /* next sibling */ 1398 struct nfs4_ephemeral *ne_prior; /* who points at us */ 1399 time_t ne_ref_time; /* time last referenced */ 1400 uint_t ne_mount_to; /* timeout at */ 1401 int ne_state; /* used to traverse */ 1402 } nfs4_ephemeral_t; 1403 1404 /* 1405 * State for the node (set in ne_state): 1406 */ 1407 #define NFS4_EPHEMERAL_OK 0x0 1408 #define NFS4_EPHEMERAL_VISIT_CHILD 0x1 1409 #define NFS4_EPHEMERAL_VISIT_SIBLING 0x2 1410 #define NFS4_EPHEMERAL_PROCESS_ME 0x4 1411 #define NFS4_EPHEMERAL_CHILD_ERROR 0x8 1412 #define NFS4_EPHEMERAL_PEER_ERROR 0x10 1413 1414 /* 1415 * These are the locks used in processing ephemeral data: 1416 * 1417 * mi->mi_lock 1418 * 1419 * net->net_tree_lock 1420 * This lock is used to gate all tree operations. 1421 * If it is held, then no other process may 1422 * traverse the tree. This allows us to not 1423 * throw a hold on each vfs_t in the tree. 1424 * Can be held for a "long" time. 1425 * 1426 * net->net_cnt_lock 1427 * Used to protect refcnt and status. 1428 * Must be held for a really short time. 1429 * 1430 * nfs4_ephemeral_thread_lock 1431 * Is only held to create the harvester for the zone. 1432 * There is no ordering imposed on it. 1433 * Held for a really short time. 1434 * 1435 * Some further detail on the interactions: 1436 * 1437 * net_tree_lock controls access to net_root. Access needs to first be 1438 * attempted in a non-blocking check. 1439 * 1440 * net_cnt_lock controls access to net_refcnt and net_status. It must only be 1441 * held for very short periods of time, unless the refcnt is 0 and the status 1442 * is INVALID. 1443 * 1444 * Before a caller can grab net_tree_lock, it must first grab net_cnt_lock 1445 * to bump the net_refcnt. It then releases it and does the action specific 1446 * algorithm to get the net_tree_lock. Once it has that, then it is okay to 1447 * grab the net_cnt_lock and change the status. The status can only be 1448 * changed if the caller has the net_tree_lock held as well. 1449 * 1450 * Note that the initial grab of net_cnt_lock must occur whilst 1451 * mi_lock is being held. This prevents stale data in that if the 1452 * ephemeral tree is non-NULL, then the harvester can not remove 1453 * the tree from the mntinfo node until it grabs that lock. I.e., 1454 * we get the pointer to the tree and hold the lock atomically 1455 * with respect to being in mi_lock. 1456 * 1457 * When a caller is done with net_tree_lock, it can decrement the net_refcnt 1458 * either before it releases net_tree_lock or after. 1459 * 1460 * In either event, to decrement net_refcnt, it must hold net_cnt_lock. 1461 * 1462 * Note that the overall locking scheme for the nodes is to control access 1463 * via the tree. The current scheme could easily be extended such that 1464 * the enclosing root referenced a "forest" of trees. The underlying trees 1465 * would be autonomous with respect to locks. 1466 * 1467 * Note that net_next is controlled by external locks 1468 * particular to the data structure that the tree is being added to. 1469 */ 1470 typedef struct nfs4_ephemeral_tree { 1471 mntinfo4_t *net_mount; 1472 nfs4_ephemeral_t *net_root; 1473 struct nfs4_ephemeral_tree *net_next; 1474 kmutex_t net_tree_lock; 1475 kmutex_t net_cnt_lock; 1476 uint_t net_status; 1477 uint_t net_refcnt; 1478 } nfs4_ephemeral_tree_t; 1479 1480 /* 1481 * State for the tree (set in net_status): 1482 */ 1483 #define NFS4_EPHEMERAL_TREE_OK 0x0 1484 #define NFS4_EPHEMERAL_TREE_BUILDING 0x1 1485 #define NFS4_EPHEMERAL_TREE_DEROOTING 0x2 1486 #define NFS4_EPHEMERAL_TREE_INVALID 0x4 1487 #define NFS4_EPHEMERAL_TREE_MOUNTING 0x8 1488 #define NFS4_EPHEMERAL_TREE_UMOUNTING 0x10 1489 #define NFS4_EPHEMERAL_TREE_LOCKED 0x20 1490 1491 #define NFS4_EPHEMERAL_TREE_PROCESSING (NFS4_EPHEMERAL_TREE_DEROOTING | \ 1492 NFS4_EPHEMERAL_TREE_INVALID | NFS4_EPHEMERAL_TREE_UMOUNTING | \ 1493 NFS4_EPHEMERAL_TREE_LOCKED) 1494 1495 /* 1496 * This macro evaluates to non-zero if the given op releases state at the 1497 * server. 1498 */ 1499 #define OH_IS_STATE_RELE(op) ((op) == OH_CLOSE || (op) == OH_LOCKU || \ 1500 (op) == OH_DELEGRETURN) 1501 1502 #ifdef _KERNEL 1503 1504 extern void nfs4_async_manager(struct vfs *); 1505 extern void nfs4_async_manager_stop(struct vfs *); 1506 extern void nfs4_async_stop(struct vfs *); 1507 extern int nfs4_async_stop_sig(struct vfs *); 1508 extern int nfs4_async_readahead(vnode_t *, u_offset_t, caddr_t, 1509 struct seg *, cred_t *, 1510 void (*)(vnode_t *, u_offset_t, 1511 caddr_t, struct seg *, cred_t *)); 1512 extern int nfs4_async_putapage(vnode_t *, page_t *, u_offset_t, size_t, 1513 int, cred_t *, int (*)(vnode_t *, page_t *, 1514 u_offset_t, size_t, int, cred_t *)); 1515 extern int nfs4_async_pageio(vnode_t *, page_t *, u_offset_t, size_t, 1516 int, cred_t *, int (*)(vnode_t *, page_t *, 1517 u_offset_t, size_t, int, cred_t *)); 1518 extern void nfs4_async_commit(vnode_t *, page_t *, offset3, count3, 1519 cred_t *, void (*)(vnode_t *, page_t *, 1520 offset3, count3, cred_t *)); 1521 extern void nfs4_async_inactive(vnode_t *, cred_t *); 1522 extern void nfs4_inactive_thread(mntinfo4_t *mi); 1523 extern void nfs4_inactive_otw(vnode_t *, cred_t *); 1524 extern int nfs4_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *); 1525 1526 extern int nfs4_setopts(vnode_t *, model_t, struct nfs_args *); 1527 extern void nfs4_mnt_kstat_init(struct vfs *); 1528 1529 extern void rfs4call(struct mntinfo4 *, struct COMPOUND4args_clnt *, 1530 struct COMPOUND4res_clnt *, cred_t *, int *, int, 1531 nfs4_error_t *); 1532 extern void nfs4_acl_fill_cache(struct rnode4 *, vsecattr_t *); 1533 extern int nfs4_attr_otw(vnode_t *, nfs4_tag_type_t, 1534 nfs4_ga_res_t *, bitmap4, cred_t *); 1535 1536 extern void nfs4_attrcache_noinval(vnode_t *, nfs4_ga_res_t *, hrtime_t); 1537 extern void nfs4_attr_cache(vnode_t *, nfs4_ga_res_t *, 1538 hrtime_t, cred_t *, int, 1539 change_info4 *); 1540 extern void nfs4_purge_rddir_cache(vnode_t *); 1541 extern void nfs4_invalidate_pages(vnode_t *, u_offset_t, cred_t *); 1542 extern void nfs4_purge_caches(vnode_t *, int, cred_t *, int); 1543 extern void nfs4_purge_stale_fh(int, vnode_t *, cred_t *); 1544 extern void nfs4_flush_pages(vnode_t *vp, cred_t *cr); 1545 1546 extern void nfs4rename_update(vnode_t *, vnode_t *, nfs_fh4 *, char *); 1547 extern void nfs4_update_paths(vnode_t *, char *, vnode_t *, char *, 1548 vnode_t *); 1549 1550 extern void nfs4args_lookup_free(nfs_argop4 *, int); 1551 extern void nfs4args_copen_free(OPEN4cargs *); 1552 1553 extern void nfs4_printfhandle(nfs4_fhandle_t *); 1554 1555 extern void nfs_free_mi4(mntinfo4_t *); 1556 extern void sv4_free(servinfo4_t *); 1557 extern void nfs4_mi_zonelist_add(mntinfo4_t *); 1558 extern int nfs4_mi_zonelist_remove(mntinfo4_t *); 1559 extern int nfs4_secinfo_recov(mntinfo4_t *, vnode_t *, vnode_t *); 1560 extern void nfs4_secinfo_init(void); 1561 extern void nfs4_secinfo_fini(void); 1562 extern int nfs4_secinfo_path(mntinfo4_t *, cred_t *, int); 1563 extern int nfs4_secinfo_vnode_otw(vnode_t *, char *, cred_t *); 1564 extern void secinfo_free(sv_secinfo_t *); 1565 extern void save_mnt_secinfo(servinfo4_t *); 1566 extern void check_mnt_secinfo(servinfo4_t *, vnode_t *); 1567 extern int vattr_to_fattr4(vattr_t *, vsecattr_t *, fattr4 *, int, 1568 enum nfs_opnum4, bitmap4 supp_mask); 1569 extern int nfs4_putapage(vnode_t *, page_t *, u_offset_t *, size_t *, 1570 int, cred_t *); 1571 extern void nfs4_write_error(vnode_t *, int, cred_t *); 1572 extern void nfs4_lockcompletion(vnode_t *, int); 1573 extern bool_t nfs4_map_lost_lock_conflict(vnode_t *); 1574 extern int vtodv(vnode_t *, vnode_t **, cred_t *, bool_t); 1575 extern int vtoname(vnode_t *, char *, ssize_t); 1576 extern void nfs4open_confirm(vnode_t *, seqid4*, stateid4 *, cred_t *, 1577 bool_t, bool_t *, nfs4_open_owner_t *, bool_t, 1578 nfs4_error_t *, int *); 1579 extern void nfs4_error_zinit(nfs4_error_t *); 1580 extern void nfs4_error_init(nfs4_error_t *, int); 1581 extern void nfs4_free_args(struct nfs_args *); 1582 1583 extern void mi_hold(mntinfo4_t *); 1584 extern void mi_rele(mntinfo4_t *); 1585 1586 extern vnode_t *find_referral_stubvp(vnode_t *, char *, cred_t *); 1587 extern int nfs4_setup_referral(vnode_t *, char *, vnode_t **, cred_t *); 1588 1589 extern sec_data_t *copy_sec_data(sec_data_t *); 1590 extern gss_clntdata_t *copy_sec_data_gss(gss_clntdata_t *); 1591 1592 #ifdef DEBUG 1593 extern int nfs4_consistent_type(vnode_t *); 1594 #endif 1595 1596 extern void nfs4_init_dot_entries(void); 1597 extern void nfs4_destroy_dot_entries(void); 1598 extern struct nfs4_callback_globals *nfs4_get_callback_globals(void); 1599 1600 extern struct nfs4_server nfs4_server_lst; 1601 1602 extern clock_t nfs_write_error_interval; 1603 1604 #endif /* _KERNEL */ 1605 1606 /* 1607 * Flags for nfs4getfh_otw. 1608 */ 1609 1610 #define NFS4_GETFH_PUBLIC 0x01 1611 #define NFS4_GETFH_NEEDSOP 0x02 1612 1613 /* 1614 * Found through rnodes. 1615 * 1616 * The os_open_ref_count keeps track the number of open file descriptor 1617 * refernces on this data structure. It will be bumped for any successful 1618 * OTW OPEN call and any OPEN call that determines the OTW call is not 1619 * necessary and the open stream hasn't just been created (see 1620 * nfs4_is_otw_open_necessary). 1621 * 1622 * os_mapcnt is a count of the number of mmapped pages for a particular 1623 * open stream; this in conjunction w/ os_open_ref_count is used to 1624 * determine when to do a close to the server. This is necessary because 1625 * of the semantics of doing open, mmap, close; the OTW close must be wait 1626 * until all open and mmap references have vanished. 1627 * 1628 * 'os_valid' tells us whether this structure is about to be freed or not, 1629 * if it is then don't return it in find_open_stream(). 1630 * 1631 * 'os_final_close' is set when a CLOSE OTW was attempted. This is needed 1632 * so we can properly count the os_open_ref_count in cases where we VOP_CLOSE 1633 * without a VOP_OPEN, and have nfs4_inactive() drive the OTW CLOSE. It 1634 * also helps differentiate the VOP_OPEN/VN_RELE case from the VOP_CLOSE 1635 * that tried to close OTW but failed, and left the state cleanup to 1636 * nfs4_inactive/CLOSE_FORCE. 1637 * 1638 * 'os_force_close' is used to let us know if an intervening thread came 1639 * and reopened the open stream after we decided to issue a CLOSE_FORCE, 1640 * but before we could actually process the CLOSE_FORCE. 1641 * 1642 * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the 1643 * lost state queue. 1644 * 1645 * 'open_stateid' is set the last open stateid returned by the server unless 1646 * 'os_delegation' is 1, in which case 'open_stateid' refers to the 1647 * delegation stateid returned by the server. This is used in cases where the 1648 * client tries to OPEN a file but already has a suitable delegation, so we 1649 * just stick the delegation stateid in the open stream. 1650 * 1651 * os_dc_openacc are open access bits which have been granted to the 1652 * open stream by virtue of a delegation, but which have not been seen 1653 * by the server. This applies even if the open stream does not have 1654 * os_delegation set. These bits are used when setting file locks to 1655 * determine whether an open with CLAIM_DELEGATE_CUR needs to be done 1656 * before the lock request can be sent to the server. See 1657 * nfs4frlock_check_deleg(). 1658 * 1659 * 'os_mmap_read/write' keep track of the read and write access our memory 1660 * maps require. We need to keep track of this so we can provide the proper 1661 * access bits in the open/mmap/close/reboot/reopen case. 1662 * 1663 * 'os_failed_reopen' tells us that we failed to successfully reopen this 1664 * open stream; therefore, we should not use this open stateid as it is 1665 * not valid anymore. This flag is also used to indicate an unsuccessful 1666 * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR. 1667 * 1668 * If 'os_orig_oo_name' is different than os_open_owner's oo_name 1669 * then this tells us that this open stream's open owner used a 1670 * bad seqid (that is, got NFS4ERR_BAD_SEQID). If different, this open 1671 * stream will no longer be used for future OTW state releasing calls. 1672 * 1673 * Lock ordering: 1674 * rnode4_t::r_os_lock > os_sync_lock 1675 * os_sync_lock > rnode4_t::r_statelock 1676 * os_sync_lock > rnode4_t::r_statev4_lock 1677 * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call) 1678 * 1679 * The 'os_sync_lock' protects: 1680 * open_stateid 1681 * os_dc_openacc 1682 * os_delegation 1683 * os_failed_reopen 1684 * os_final_close 1685 * os_force_close 1686 * os_mapcnt 1687 * os_mmap_read 1688 * os_mmap_write 1689 * os_open_ref_count 1690 * os_pending_close 1691 * os_share_acc_read 1692 * os_share_acc_write 1693 * os_share_deny_none 1694 * os_share_deny_read 1695 * os_share_deny_write 1696 * os_ref_count 1697 * os_valid 1698 * 1699 * The rnode4_t::r_os_lock protects: 1700 * os_node 1701 * 1702 * These fields are set at creation time and 1703 * read only after that: 1704 * os_open_owner 1705 * os_orig_oo_name 1706 */ 1707 typedef struct nfs4_open_stream { 1708 uint64_t os_share_acc_read; 1709 uint64_t os_share_acc_write; 1710 uint64_t os_mmap_read; 1711 uint64_t os_mmap_write; 1712 uint32_t os_share_deny_none; 1713 uint32_t os_share_deny_read; 1714 uint32_t os_share_deny_write; 1715 stateid4 open_stateid; 1716 int os_dc_openacc; 1717 int os_ref_count; 1718 unsigned os_valid:1; 1719 unsigned os_delegation:1; 1720 unsigned os_final_close:1; 1721 unsigned os_pending_close:1; 1722 unsigned os_failed_reopen:1; 1723 unsigned os_force_close:1; 1724 int os_open_ref_count; 1725 long os_mapcnt; 1726 list_node_t os_node; 1727 struct nfs4_open_owner *os_open_owner; 1728 uint64_t os_orig_oo_name; 1729 kmutex_t os_sync_lock; 1730 } nfs4_open_stream_t; 1731 1732 /* 1733 * This structure describes the format of the lock_owner_name 1734 * field of the lock owner. 1735 */ 1736 1737 typedef struct nfs4_lo_name { 1738 uint64_t ln_seq_num; 1739 pid_t ln_pid; 1740 } nfs4_lo_name_t; 1741 1742 /* 1743 * Flags for lo_flags. 1744 */ 1745 #define NFS4_LOCK_SEQID_INUSE 0x1 1746 #define NFS4_BAD_SEQID_LOCK 0x2 1747 1748 /* 1749 * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs 1750 * off the rnode. If the links are NULL it means this object is not on the 1751 * list. 1752 * 1753 * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and 1754 * didn't get a response back. This is used to figure out if we have 1755 * possible remote v4 locks, so that we can clean up at process exit. In 1756 * theory, the client should be able to figure out if the server received 1757 * the request (based on what seqid works), so maybe we can get rid of this 1758 * flag someday. 1759 * 1760 * 'lo_ref_count' tells us how many processes/threads are using this data 1761 * structure. The rnode's list accounts for one reference. 1762 * 1763 * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the 1764 * data structure. It is then set to NFS4_PERM_CREATED when a lock request 1765 * is successful using this lock owner structure. We need to keep 'temporary' 1766 * lock owners around so we can properly keep the lock seqid synchronization 1767 * when multiple processes/threads are trying to create the lock owner for the 1768 * first time (especially with the DENIED error case). Once 1769 * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change. 1770 * 1771 * 'lo_valid' tells us whether this structure is about to be freed or not, 1772 * if it is then don't return it from find_lock_owner(). 1773 * 1774 * Retrieving and setting of 'lock_seqid' is protected by the 1775 * NFS4_LOCK_SEQID_INUSE flag. Waiters for NFS4_LOCK_SEQID_INUSE should 1776 * use 'lo_cv_seqid_sync'. 1777 * 1778 * The setting of 'lock_stateid' is protected by the 1779 * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'. The retrieving of the 1780 * 'lock_stateid' is protected by 'lo_lock', with the additional 1781 * requirement that the calling function can handle NFS4ERR_OLD_STATEID and 1782 * NFS4ERR_BAD_STATEID as appropiate. 1783 * 1784 * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock 1785 * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID). With this set, 1786 * this lock owner will no longer be used for future OTW calls. Once set, 1787 * it is never unset. 1788 * 1789 * Lock ordering: 1790 * rnode4_t::r_statev4_lock > lo_lock 1791 */ 1792 typedef struct nfs4_lock_owner { 1793 struct nfs4_lock_owner *lo_next_rnode; 1794 struct nfs4_lock_owner *lo_prev_rnode; 1795 int lo_pid; 1796 stateid4 lock_stateid; 1797 seqid4 lock_seqid; 1798 /* 1799 * Fix this to always be 12 bytes 1800 */ 1801 nfs4_lo_name_t lock_owner_name; 1802 int lo_ref_count; 1803 int lo_valid; 1804 int lo_pending_rqsts; 1805 int lo_just_created; 1806 int lo_flags; 1807 kcondvar_t lo_cv_seqid_sync; 1808 kmutex_t lo_lock; 1809 kthread_t *lo_seqid_holder; /* debugging aid */ 1810 } nfs4_lock_owner_t; 1811 1812 /* for nfs4_lock_owner_t lookups */ 1813 typedef enum {LOWN_ANY, LOWN_VALID_STATEID} lown_which_t; 1814 1815 /* Number of times to retry a call that fails with state independent error */ 1816 #define NFS4_NUM_RECOV_RETRIES 3 1817 1818 typedef enum { 1819 NO_SID, 1820 DEL_SID, 1821 LOCK_SID, 1822 OPEN_SID, 1823 SPEC_SID 1824 } nfs4_stateid_type_t; 1825 1826 typedef struct nfs4_stateid_types { 1827 stateid4 d_sid; 1828 stateid4 l_sid; 1829 stateid4 o_sid; 1830 nfs4_stateid_type_t cur_sid_type; 1831 } nfs4_stateid_types_t; 1832 1833 /* 1834 * Per-zone data for dealing with callbacks. Included here solely for the 1835 * benefit of MDB. 1836 */ 1837 struct nfs4_callback_stats { 1838 kstat_named_t delegations; 1839 kstat_named_t cb_getattr; 1840 kstat_named_t cb_recall; 1841 kstat_named_t cb_null; 1842 kstat_named_t cb_dispatch; 1843 kstat_named_t delegaccept_r; 1844 kstat_named_t delegaccept_rw; 1845 kstat_named_t delegreturn; 1846 kstat_named_t callbacks; 1847 kstat_named_t claim_cur; 1848 kstat_named_t claim_cur_ok; 1849 kstat_named_t recall_trunc; 1850 kstat_named_t recall_failed; 1851 kstat_named_t return_limit_write; 1852 kstat_named_t return_limit_addmap; 1853 kstat_named_t deleg_recover; 1854 kstat_named_t cb_illegal; 1855 }; 1856 1857 struct nfs4_callback_globals { 1858 kmutex_t nfs4_cb_lock; 1859 kmutex_t nfs4_dlist_lock; 1860 int nfs4_program_hint; 1861 /* this table maps the program number to the nfs4_server structure */ 1862 struct nfs4_server **nfs4prog2server; 1863 list_t nfs4_dlist; 1864 list_t nfs4_cb_ports; 1865 struct nfs4_callback_stats nfs4_callback_stats; 1866 #ifdef DEBUG 1867 int nfs4_dlistadd_c; 1868 int nfs4_dlistclean_c; 1869 #endif 1870 }; 1871 1872 typedef enum { 1873 CLOSE_NORM, 1874 CLOSE_DELMAP, 1875 CLOSE_FORCE, 1876 CLOSE_RESEND, 1877 CLOSE_AFTER_RESEND 1878 } nfs4_close_type_t; 1879 1880 /* 1881 * Structure to hold the bad seqid information that is passed 1882 * to the recovery framework. 1883 */ 1884 typedef struct nfs4_bseqid_entry { 1885 nfs4_open_owner_t *bs_oop; 1886 nfs4_lock_owner_t *bs_lop; 1887 vnode_t *bs_vp; 1888 pid_t bs_pid; 1889 nfs4_tag_type_t bs_tag; 1890 seqid4 bs_seqid; 1891 list_node_t bs_node; 1892 } nfs4_bseqid_entry_t; 1893 1894 #ifdef _KERNEL 1895 1896 extern void nfs4close_one(vnode_t *, nfs4_open_stream_t *, cred_t *, int, 1897 nfs4_lost_rqst_t *, nfs4_error_t *, nfs4_close_type_t, 1898 size_t, uint_t, uint_t); 1899 extern void nfs4close_notw(vnode_t *, nfs4_open_stream_t *, int *); 1900 extern void nfs4_set_lock_stateid(nfs4_lock_owner_t *, stateid4); 1901 extern void open_owner_hold(nfs4_open_owner_t *); 1902 extern void open_owner_rele(nfs4_open_owner_t *); 1903 extern nfs4_open_stream_t *find_or_create_open_stream(nfs4_open_owner_t *, 1904 struct rnode4 *, int *); 1905 extern nfs4_open_stream_t *find_open_stream(nfs4_open_owner_t *, 1906 struct rnode4 *); 1907 extern nfs4_open_stream_t *create_open_stream(nfs4_open_owner_t *oop, 1908 struct rnode4 *rp); 1909 extern void open_stream_hold(nfs4_open_stream_t *); 1910 extern void open_stream_rele(nfs4_open_stream_t *, struct rnode4 *); 1911 extern int nfs4close_all(vnode_t *, cred_t *); 1912 extern void lock_owner_hold(nfs4_lock_owner_t *); 1913 extern void lock_owner_rele(nfs4_lock_owner_t *); 1914 extern nfs4_lock_owner_t *create_lock_owner(struct rnode4 *, pid_t); 1915 extern nfs4_lock_owner_t *find_lock_owner(struct rnode4 *, pid_t, lown_which_t); 1916 extern void nfs4_rnode_remove_lock_owner(struct rnode4 *, 1917 nfs4_lock_owner_t *); 1918 extern void nfs4_flush_lock_owners(struct rnode4 *); 1919 extern void nfs4_setlockowner_args(lock_owner4 *, struct rnode4 *, pid_t); 1920 extern void nfs4_set_open_seqid(seqid4, nfs4_open_owner_t *, 1921 nfs4_tag_type_t); 1922 extern void nfs4_set_lock_seqid(seqid4, nfs4_lock_owner_t *); 1923 extern void nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *, 1924 nfs4_tag_type_t); 1925 extern void nfs4_end_open_seqid_sync(nfs4_open_owner_t *); 1926 extern int nfs4_start_open_seqid_sync(nfs4_open_owner_t *, mntinfo4_t *); 1927 extern void nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *); 1928 extern int nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *, mntinfo4_t *); 1929 extern void nfs4_setup_lock_args(nfs4_lock_owner_t *, nfs4_open_owner_t *, 1930 nfs4_open_stream_t *, clientid4, locker4 *); 1931 extern void nfs4_destroy_open_owner(nfs4_open_owner_t *); 1932 1933 extern void nfs4_renew_lease_thread(nfs4_server_t *); 1934 extern nfs4_server_t *find_nfs4_server(mntinfo4_t *); 1935 extern nfs4_server_t *find_nfs4_server_all(mntinfo4_t *, int all); 1936 extern nfs4_server_t *new_nfs4_server(servinfo4_t *, cred_t *); 1937 extern void nfs4_mark_srv_dead(nfs4_server_t *); 1938 extern nfs4_server_t *servinfo4_to_nfs4_server(servinfo4_t *); 1939 extern void nfs4_inc_state_ref_count(mntinfo4_t *); 1940 extern void nfs4_inc_state_ref_count_nolock(nfs4_server_t *, 1941 mntinfo4_t *); 1942 extern void nfs4_dec_state_ref_count(mntinfo4_t *); 1943 extern void nfs4_dec_state_ref_count_nolock(nfs4_server_t *, 1944 mntinfo4_t *); 1945 extern clientid4 mi2clientid(mntinfo4_t *); 1946 extern int nfs4_server_in_recovery(nfs4_server_t *); 1947 extern bool_t nfs4_server_vlock(nfs4_server_t *, int); 1948 extern nfs4_open_owner_t *create_open_owner(cred_t *, mntinfo4_t *); 1949 extern uint64_t nfs4_get_new_oo_name(void); 1950 extern nfs4_open_owner_t *find_open_owner(cred_t *, int, mntinfo4_t *); 1951 extern nfs4_open_owner_t *find_open_owner_nolock(cred_t *, int, mntinfo4_t *); 1952 extern void nfs4frlock(nfs4_lock_call_type_t, vnode_t *, int, flock64_t *, 1953 cred_t *, nfs4_error_t *, nfs4_lost_rqst_t *, int *); 1954 extern void nfs4open_dg_save_lost_rqst(int, nfs4_lost_rqst_t *, 1955 nfs4_open_owner_t *, nfs4_open_stream_t *, cred_t *, 1956 vnode_t *, int, int); 1957 extern void nfs4_open_downgrade(int, int, nfs4_open_owner_t *, 1958 nfs4_open_stream_t *, vnode_t *, cred_t *, 1959 nfs4_lost_rqst_t *, nfs4_error_t *, cred_t **, seqid4 *); 1960 extern seqid4 nfs4_get_open_seqid(nfs4_open_owner_t *); 1961 extern cred_t *nfs4_get_otw_cred(cred_t *, mntinfo4_t *, nfs4_open_owner_t *); 1962 extern void nfs4_init_stateid_types(nfs4_stateid_types_t *); 1963 extern void nfs4_save_stateid(stateid4 *, nfs4_stateid_types_t *); 1964 1965 extern kmutex_t nfs4_server_lst_lock; 1966 1967 extern void nfs4callback_destroy(nfs4_server_t *); 1968 extern void nfs4_callback_init(void); 1969 extern void nfs4_callback_fini(void); 1970 extern void nfs4_cb_args(nfs4_server_t *, struct knetconfig *, 1971 SETCLIENTID4args *); 1972 extern void nfs4delegreturn_async(struct rnode4 *, int, bool_t); 1973 1974 extern enum nfs4_delegreturn_policy nfs4_delegreturn_policy; 1975 1976 extern void nfs4_add_mi_to_server(nfs4_server_t *, mntinfo4_t *); 1977 extern void nfs4_remove_mi_from_server(mntinfo4_t *, nfs4_server_t *); 1978 extern nfs4_server_t *nfs4_move_mi(mntinfo4_t *, servinfo4_t *, servinfo4_t *); 1979 extern bool_t nfs4_fs_active(nfs4_server_t *); 1980 extern void nfs4_server_rele(nfs4_server_t *); 1981 extern bool_t inlease(nfs4_server_t *); 1982 extern bool_t nfs4_has_pages(vnode_t *); 1983 extern void nfs4_log_badowner(mntinfo4_t *, nfs_opnum4); 1984 1985 #endif /* _KERNEL */ 1986 1987 /* 1988 * Client State Recovery 1989 */ 1990 1991 /* 1992 * The following defines are used for rs_flags in 1993 * a nfs4_recov_state_t structure. 1994 * 1995 * NFS4_RS_RENAME_HELD Indicates that the mi_rename_lock was held. 1996 * NFS4_RS_GRACE_MSG Set once we have uprintf'ed a grace message. 1997 * NFS4_RS_DELAY_MSG Set once we have uprintf'ed a delay message. 1998 * NFS4_RS_RECALL_HELD1 r_deleg_recall_lock for vp1 was held. 1999 * NFS4_RS_RECALL_HELD2 r_deleg_recall_lock for vp2 was held. 2000 */ 2001 #define NFS4_RS_RENAME_HELD 0x000000001 2002 #define NFS4_RS_GRACE_MSG 0x000000002 2003 #define NFS4_RS_DELAY_MSG 0x000000004 2004 #define NFS4_RS_RECALL_HELD1 0x000000008 2005 #define NFS4_RS_RECALL_HELD2 0x000000010 2006 2007 /* 2008 * Information that is retrieved from nfs4_start_op() and that is 2009 * passed into nfs4_end_op(). 2010 * 2011 * rs_sp is a reference to the nfs4_server that was found, or NULL. 2012 * 2013 * rs_num_retry_despite_err is the number times client retried an 2014 * OTW op despite a recovery error. It is only incremented for hints 2015 * exempt to normal R4RECOVERR processing 2016 * (OH_CLOSE/OH_LOCKU/OH_DELEGRETURN). (XXX this special-case code 2017 * needs review for possible removal.) 2018 * It is initialized wherever nfs4_recov_state_t is declared -- usually 2019 * very near initialization of rs_flags. 2020 */ 2021 typedef struct { 2022 nfs4_server_t *rs_sp; 2023 int rs_flags; 2024 int rs_num_retry_despite_err; 2025 } nfs4_recov_state_t; 2026 2027 /* 2028 * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root. 2029 */ 2030 2031 #define NFS4_REMAP_CKATTRS 1 2032 #define NFS4_REMAP_NEEDSOP 2 2033 2034 #ifdef _KERNEL 2035 2036 extern int nfs4_is_otw_open_necessary(nfs4_open_owner_t *, int, 2037 vnode_t *, int, int *, int, nfs4_recov_state_t *); 2038 extern void nfs4setclientid(struct mntinfo4 *, struct cred *, bool_t, 2039 nfs4_error_t *); 2040 extern void nfs4_reopen(vnode_t *, nfs4_open_stream_t *, nfs4_error_t *, 2041 open_claim_type4, bool_t, bool_t); 2042 extern void nfs4_remap_root(struct mntinfo4 *, nfs4_error_t *, int); 2043 extern void nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int, 2044 nfs4_error_t *); 2045 extern void nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int, 2046 nfs4_error_t *); 2047 extern int nfs4_make_dotdot(struct nfs4_sharedfh *, hrtime_t, 2048 vnode_t *, cred_t *, vnode_t **, int); 2049 extern void nfs4_fail_recov(vnode_t *, char *, int, nfsstat4); 2050 2051 extern int nfs4_needs_recovery(nfs4_error_t *, bool_t, vfs_t *); 2052 extern int nfs4_recov_marks_dead(nfsstat4); 2053 extern bool_t nfs4_start_recovery(nfs4_error_t *, struct mntinfo4 *, 2054 vnode_t *, vnode_t *, stateid4 *, 2055 nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *, 2056 vnode_t *, char *); 2057 extern int nfs4_start_op(struct mntinfo4 *, vnode_t *, vnode_t *, 2058 nfs4_recov_state_t *); 2059 extern void nfs4_end_op(struct mntinfo4 *, vnode_t *, vnode_t *, 2060 nfs4_recov_state_t *, bool_t); 2061 extern int nfs4_start_fop(struct mntinfo4 *, vnode_t *, vnode_t *, 2062 nfs4_op_hint_t, nfs4_recov_state_t *, bool_t *); 2063 extern void nfs4_end_fop(struct mntinfo4 *, vnode_t *, vnode_t *, 2064 nfs4_op_hint_t, nfs4_recov_state_t *, bool_t); 2065 extern char *nfs4_recov_action_to_str(nfs4_recov_t); 2066 2067 /* 2068 * In sequence, code desiring to unmount an ephemeral tree must 2069 * call nfs4_ephemeral_umount, nfs4_ephemeral_umount_activate, 2070 * and nfs4_ephemeral_umount_unlock. The _unlock must also be 2071 * called on all error paths that occur before it would naturally 2072 * be invoked. 2073 * 2074 * The caller must also provde a pointer to a boolean to keep track 2075 * of whether or not the code in _unlock is to be ran. 2076 */ 2077 extern void nfs4_ephemeral_umount_activate(mntinfo4_t *, 2078 bool_t *, nfs4_ephemeral_tree_t **); 2079 extern int nfs4_ephemeral_umount(mntinfo4_t *, int, cred_t *, 2080 bool_t *, nfs4_ephemeral_tree_t **); 2081 extern void nfs4_ephemeral_umount_unlock(bool_t *, 2082 nfs4_ephemeral_tree_t **); 2083 2084 extern int nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp); 2085 2086 extern int nfs4_callmapid(utf8string *, struct nfs_fsl_info *); 2087 extern int nfs4_fetch_locations(mntinfo4_t *, struct nfs4_sharedfh *, 2088 char *, cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, bool_t); 2089 2090 extern int wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t, 2091 nfs4_recov_state_t *); 2092 extern void nfs4_end_op_recall(vnode_t *, vnode_t *, nfs4_recov_state_t *); 2093 extern void nfs4_send_siglost(pid_t, mntinfo4_t *mi, vnode_t *vp, bool_t, 2094 int, nfsstat4); 2095 extern time_t nfs4err_delay_time; 2096 extern void nfs4_set_grace_wait(mntinfo4_t *); 2097 extern void nfs4_set_delay_wait(vnode_t *); 2098 extern int nfs4_wait_for_grace(mntinfo4_t *, nfs4_recov_state_t *); 2099 extern int nfs4_wait_for_delay(vnode_t *, nfs4_recov_state_t *); 2100 extern nfs4_bseqid_entry_t *nfs4_create_bseqid_entry(nfs4_open_owner_t *, 2101 nfs4_lock_owner_t *, vnode_t *, pid_t, nfs4_tag_type_t, 2102 seqid4); 2103 2104 extern void nfs4_resend_open_otw(vnode_t **, nfs4_lost_rqst_t *, 2105 nfs4_error_t *); 2106 extern void nfs4_resend_delegreturn(nfs4_lost_rqst_t *, nfs4_error_t *, 2107 nfs4_server_t *); 2108 extern int nfs4_rpc_retry_error(int); 2109 extern int nfs4_try_failover(nfs4_error_t *); 2110 extern void nfs4_free_msg(nfs4_debug_msg_t *); 2111 extern void nfs4_mnt_recov_kstat_init(vfs_t *); 2112 extern void nfs4_mi_kstat_inc_delay(mntinfo4_t *); 2113 extern void nfs4_mi_kstat_inc_no_grace(mntinfo4_t *); 2114 extern char *nfs4_stat_to_str(nfsstat4); 2115 extern char *nfs4_op_to_str(nfs_opnum4); 2116 2117 extern void nfs4_queue_event(nfs4_event_type_t, mntinfo4_t *, char *, 2118 uint_t, vnode_t *, vnode_t *, nfsstat4, char *, pid_t, 2119 nfs4_tag_type_t, nfs4_tag_type_t, seqid4, seqid4); 2120 extern void nfs4_queue_fact(nfs4_fact_type_t, mntinfo4_t *, nfsstat4, 2121 nfs4_recov_t, nfs_opnum4, bool_t, char *, int, vnode_t *); 2122 #pragma rarely_called(nfs4_queue_event) 2123 #pragma rarely_called(nfs4_queue_fact) 2124 2125 /* Used for preformed "." and ".." dirents */ 2126 extern char *nfs4_dot_entries; 2127 extern char *nfs4_dot_dot_entry; 2128 2129 #ifdef DEBUG 2130 extern uint_t nfs4_tsd_key; 2131 #endif 2132 2133 #endif /* _KERNEL */ 2134 2135 /* 2136 * Filehandle management. 2137 * 2138 * Filehandles can change in v4, so rather than storing the filehandle 2139 * directly in the rnode, etc., we manage the filehandle through one of 2140 * these objects. 2141 * Locking: sfh_fh and sfh_tree is protected by the filesystem's 2142 * mi_fh_lock. The reference count and flags are protected by sfh_lock. 2143 * sfh_mi is read-only. 2144 * 2145 * mntinfo4_t::mi_fh_lock > sfh_lock. 2146 */ 2147 2148 typedef struct nfs4_sharedfh { 2149 nfs_fh4 sfh_fh; /* key and current filehandle */ 2150 kmutex_t sfh_lock; 2151 uint_t sfh_refcnt; /* reference count */ 2152 uint_t sfh_flags; 2153 mntinfo4_t *sfh_mi; /* backptr to filesystem */ 2154 avl_node_t sfh_tree; /* used by avl package */ 2155 } nfs4_sharedfh_t; 2156 2157 #define SFH4_SAME(sfh1, sfh2) ((sfh1) == (sfh2)) 2158 2159 /* 2160 * Flags. 2161 */ 2162 #define SFH4_IN_TREE 0x1 /* currently in an AVL tree */ 2163 2164 #ifdef _KERNEL 2165 2166 extern void sfh4_createtab(avl_tree_t *); 2167 extern nfs4_sharedfh_t *sfh4_get(const nfs_fh4 *, mntinfo4_t *); 2168 extern nfs4_sharedfh_t *sfh4_put(const nfs_fh4 *, mntinfo4_t *, 2169 nfs4_sharedfh_t *); 2170 extern void sfh4_update(nfs4_sharedfh_t *, const nfs_fh4 *); 2171 extern void sfh4_copyval(const nfs4_sharedfh_t *, nfs4_fhandle_t *); 2172 extern void sfh4_hold(nfs4_sharedfh_t *); 2173 extern void sfh4_rele(nfs4_sharedfh_t **); 2174 extern void sfh4_printfhandle(const nfs4_sharedfh_t *); 2175 2176 #endif 2177 2178 /* 2179 * Path and file name management. 2180 * 2181 * This type stores the name of an entry in the filesystem and keeps enough 2182 * information that it can provide a complete path. All fields are 2183 * protected by fn_lock, except for the reference count, which is managed 2184 * using atomic add/subtract. 2185 * 2186 * Additionally shared filehandle for this fname is stored. 2187 * Normally, fn_get() when it creates this fname stores the passed in 2188 * shared fh in fn_sfh by doing sfh_hold. Similarly the path which 2189 * destroys this fname releases the reference on this fh by doing sfh_rele. 2190 * 2191 * fn_get uses the fn_sfh to refine the comparision in cases 2192 * where we have matched the name but have differing file handles, 2193 * this normally happens due to 2194 * 2195 * 1. Server side rename of a file/directory. 2196 * 2. Another client renaming a file/directory on the server. 2197 * 2198 * Differing names but same filehandle is possible as in the case of hardlinks, 2199 * but differing filehandles with same name component will later confuse 2200 * the client and can cause various panics. 2201 * 2202 * Lock order: child and then parent. 2203 */ 2204 2205 typedef struct nfs4_fname { 2206 struct nfs4_fname *fn_parent; /* parent name; null if fs root */ 2207 char *fn_name; /* the actual name */ 2208 ssize_t fn_len; /* strlen(fn_name) */ 2209 uint32_t fn_refcnt; /* reference count */ 2210 kmutex_t fn_lock; 2211 avl_node_t fn_tree; 2212 avl_tree_t fn_children; /* children, if any */ 2213 nfs4_sharedfh_t *fn_sfh; /* The fh for this fname */ 2214 } nfs4_fname_t; 2215 2216 #ifdef _KERNEL 2217 2218 extern vnode_t nfs4_xattr_notsupp_vnode; 2219 #define NFS4_XATTR_DIR_NOTSUPP &nfs4_xattr_notsupp_vnode 2220 2221 extern nfs4_fname_t *fn_get(nfs4_fname_t *, char *, nfs4_sharedfh_t *); 2222 extern void fn_hold(nfs4_fname_t *); 2223 extern void fn_rele(nfs4_fname_t **); 2224 extern char *fn_name(nfs4_fname_t *); 2225 extern char *fn_path(nfs4_fname_t *); 2226 extern void fn_move(nfs4_fname_t *, nfs4_fname_t *, char *); 2227 extern nfs4_fname_t *fn_parent(nfs4_fname_t *); 2228 2229 /* Referral Support */ 2230 extern int nfs4_process_referral(mntinfo4_t *, nfs4_sharedfh_t *, char *, 2231 cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, struct nfs_fsl_info *); 2232 2233 #endif 2234 2235 /* 2236 * Per-zone data for managing client handles, included in this file for the 2237 * benefit of MDB. 2238 */ 2239 struct nfs4_clnt { 2240 struct chhead *nfscl_chtable4; 2241 kmutex_t nfscl_chtable4_lock; 2242 zoneid_t nfscl_zoneid; 2243 list_node_t nfscl_node; 2244 struct clstat4 nfscl_stat; 2245 }; 2246 2247 #ifdef __cplusplus 2248 } 2249 #endif 2250 2251 #endif /* _NFS4_CLNT_H */ 2252