1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cmn_err.h> 35 #include <sys/vtrace.h> 36 #include <sys/session.h> 37 #include <sys/thread.h> 38 #include <sys/dnlc.h> 39 #include <sys/cred.h> 40 #include <sys/priv.h> 41 #include <sys/list.h> 42 #include <sys/sdt.h> 43 #include <sys/policy.h> 44 45 #include <rpc/types.h> 46 #include <rpc/xdr.h> 47 48 #include <nfs/nfs.h> 49 50 #include <nfs/nfs_clnt.h> 51 52 #include <nfs/nfs4.h> 53 #include <nfs/rnode4.h> 54 #include <nfs/nfs4_clnt.h> 55 56 /* 57 * client side statistics 58 */ 59 static const struct clstat4 clstat4_tmpl = { 60 { "calls", KSTAT_DATA_UINT64 }, 61 { "badcalls", KSTAT_DATA_UINT64 }, 62 { "referrals", KSTAT_DATA_UINT64 }, 63 { "referlinks", KSTAT_DATA_UINT64 }, 64 { "clgets", KSTAT_DATA_UINT64 }, 65 { "cltoomany", KSTAT_DATA_UINT64 }, 66 #ifdef DEBUG 67 { "clalloc", KSTAT_DATA_UINT64 }, 68 { "noresponse", KSTAT_DATA_UINT64 }, 69 { "failover", KSTAT_DATA_UINT64 }, 70 { "remap", KSTAT_DATA_UINT64 }, 71 #endif 72 }; 73 74 #ifdef DEBUG 75 struct clstat4_debug clstat4_debug = { 76 { "nrnode", KSTAT_DATA_UINT64 }, 77 { "access", KSTAT_DATA_UINT64 }, 78 { "dirent", KSTAT_DATA_UINT64 }, 79 { "dirents", KSTAT_DATA_UINT64 }, 80 { "reclaim", KSTAT_DATA_UINT64 }, 81 { "clreclaim", KSTAT_DATA_UINT64 }, 82 { "f_reclaim", KSTAT_DATA_UINT64 }, 83 { "a_reclaim", KSTAT_DATA_UINT64 }, 84 { "r_reclaim", KSTAT_DATA_UINT64 }, 85 { "r_path", KSTAT_DATA_UINT64 }, 86 }; 87 #endif 88 89 /* 90 * We keep a global list of per-zone client data, so we can clean up all zones 91 * if we get low on memory. 92 */ 93 static list_t nfs4_clnt_list; 94 static kmutex_t nfs4_clnt_list_lock; 95 zone_key_t nfs4clnt_zone_key; 96 97 static struct kmem_cache *chtab4_cache; 98 99 #ifdef DEBUG 100 static int nfs4_rfscall_debug; 101 static int nfs4_try_failover_any; 102 int nfs4_utf8_debug = 0; 103 #endif 104 105 /* 106 * NFSv4 readdir cache implementation 107 */ 108 typedef struct rddir4_cache_impl { 109 rddir4_cache rc; /* readdir cache element */ 110 kmutex_t lock; /* lock protects count */ 111 uint_t count; /* reference count */ 112 avl_node_t tree; /* AVL tree link */ 113 } rddir4_cache_impl; 114 115 static int rddir4_cache_compar(const void *, const void *); 116 static void rddir4_cache_free(rddir4_cache_impl *); 117 static rddir4_cache *rddir4_cache_alloc(int); 118 static void rddir4_cache_hold(rddir4_cache *); 119 static int try_failover(enum clnt_stat); 120 121 static int nfs4_readdir_cache_hits = 0; 122 static int nfs4_readdir_cache_waits = 0; 123 static int nfs4_readdir_cache_misses = 0; 124 125 /* 126 * Shared nfs4 functions 127 */ 128 129 /* 130 * Copy an nfs_fh4. The destination storage (to->nfs_fh4_val) must already 131 * be allocated. 132 */ 133 134 void 135 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to) 136 { 137 to->nfs_fh4_len = from->nfs_fh4_len; 138 bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len); 139 } 140 141 /* 142 * nfs4cmpfh - compare 2 filehandles. 143 * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is 144 * "less" than the second, +1 if the first is "greater" than the second. 145 */ 146 147 int 148 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2) 149 { 150 const char *c1, *c2; 151 152 if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len) 153 return (-1); 154 if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len) 155 return (1); 156 for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val; 157 c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len; 158 c1++, c2++) { 159 if (*c1 < *c2) 160 return (-1); 161 if (*c1 > *c2) 162 return (1); 163 } 164 165 return (0); 166 } 167 168 /* 169 * Compare two v4 filehandles. Return zero if they're the same, non-zero 170 * if they're not. Like nfs4cmpfh(), but different filehandle 171 * representation, and doesn't provide information about greater than or 172 * less than. 173 */ 174 175 int 176 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2) 177 { 178 if (fh1->fh_len == fh2->fh_len) 179 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len)); 180 181 return (1); 182 } 183 184 int 185 stateid4_cmp(stateid4 *s1, stateid4 *s2) 186 { 187 if (bcmp(s1, s2, sizeof (stateid4)) == 0) 188 return (1); 189 else 190 return (0); 191 } 192 193 nfsstat4 194 puterrno4(int error) 195 { 196 switch (error) { 197 case 0: 198 return (NFS4_OK); 199 case EPERM: 200 return (NFS4ERR_PERM); 201 case ENOENT: 202 return (NFS4ERR_NOENT); 203 case EINTR: 204 return (NFS4ERR_IO); 205 case EIO: 206 return (NFS4ERR_IO); 207 case ENXIO: 208 return (NFS4ERR_NXIO); 209 case ENOMEM: 210 return (NFS4ERR_RESOURCE); 211 case EACCES: 212 return (NFS4ERR_ACCESS); 213 case EBUSY: 214 return (NFS4ERR_IO); 215 case EEXIST: 216 return (NFS4ERR_EXIST); 217 case EXDEV: 218 return (NFS4ERR_XDEV); 219 case ENODEV: 220 return (NFS4ERR_IO); 221 case ENOTDIR: 222 return (NFS4ERR_NOTDIR); 223 case EISDIR: 224 return (NFS4ERR_ISDIR); 225 case EINVAL: 226 return (NFS4ERR_INVAL); 227 case EMFILE: 228 return (NFS4ERR_RESOURCE); 229 case EFBIG: 230 return (NFS4ERR_FBIG); 231 case ENOSPC: 232 return (NFS4ERR_NOSPC); 233 case EROFS: 234 return (NFS4ERR_ROFS); 235 case EMLINK: 236 return (NFS4ERR_MLINK); 237 case EDEADLK: 238 return (NFS4ERR_DEADLOCK); 239 case ENOLCK: 240 return (NFS4ERR_DENIED); 241 case EREMOTE: 242 return (NFS4ERR_SERVERFAULT); 243 case ENOTSUP: 244 return (NFS4ERR_NOTSUPP); 245 case EDQUOT: 246 return (NFS4ERR_DQUOT); 247 case ENAMETOOLONG: 248 return (NFS4ERR_NAMETOOLONG); 249 case EOVERFLOW: 250 return (NFS4ERR_INVAL); 251 case ENOSYS: 252 return (NFS4ERR_NOTSUPP); 253 case ENOTEMPTY: 254 return (NFS4ERR_NOTEMPTY); 255 case EOPNOTSUPP: 256 return (NFS4ERR_NOTSUPP); 257 case ESTALE: 258 return (NFS4ERR_STALE); 259 case EAGAIN: 260 if (curthread->t_flag & T_WOULDBLOCK) { 261 curthread->t_flag &= ~T_WOULDBLOCK; 262 return (NFS4ERR_DELAY); 263 } 264 return (NFS4ERR_LOCKED); 265 default: 266 return ((enum nfsstat4)error); 267 } 268 } 269 270 int 271 geterrno4(enum nfsstat4 status) 272 { 273 switch (status) { 274 case NFS4_OK: 275 return (0); 276 case NFS4ERR_PERM: 277 return (EPERM); 278 case NFS4ERR_NOENT: 279 return (ENOENT); 280 case NFS4ERR_IO: 281 return (EIO); 282 case NFS4ERR_NXIO: 283 return (ENXIO); 284 case NFS4ERR_ACCESS: 285 return (EACCES); 286 case NFS4ERR_EXIST: 287 return (EEXIST); 288 case NFS4ERR_XDEV: 289 return (EXDEV); 290 case NFS4ERR_NOTDIR: 291 return (ENOTDIR); 292 case NFS4ERR_ISDIR: 293 return (EISDIR); 294 case NFS4ERR_INVAL: 295 return (EINVAL); 296 case NFS4ERR_FBIG: 297 return (EFBIG); 298 case NFS4ERR_NOSPC: 299 return (ENOSPC); 300 case NFS4ERR_ROFS: 301 return (EROFS); 302 case NFS4ERR_MLINK: 303 return (EMLINK); 304 case NFS4ERR_NAMETOOLONG: 305 return (ENAMETOOLONG); 306 case NFS4ERR_NOTEMPTY: 307 return (ENOTEMPTY); 308 case NFS4ERR_DQUOT: 309 return (EDQUOT); 310 case NFS4ERR_STALE: 311 return (ESTALE); 312 case NFS4ERR_BADHANDLE: 313 return (ESTALE); 314 case NFS4ERR_BAD_COOKIE: 315 return (EINVAL); 316 case NFS4ERR_NOTSUPP: 317 return (EOPNOTSUPP); 318 case NFS4ERR_TOOSMALL: 319 return (EINVAL); 320 case NFS4ERR_SERVERFAULT: 321 return (EIO); 322 case NFS4ERR_BADTYPE: 323 return (EINVAL); 324 case NFS4ERR_DELAY: 325 return (ENXIO); 326 case NFS4ERR_SAME: 327 return (EPROTO); 328 case NFS4ERR_DENIED: 329 return (ENOLCK); 330 case NFS4ERR_EXPIRED: 331 return (EPROTO); 332 case NFS4ERR_LOCKED: 333 return (EACCES); 334 case NFS4ERR_GRACE: 335 return (EAGAIN); 336 case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */ 337 return (ESTALE); 338 case NFS4ERR_SHARE_DENIED: 339 return (EACCES); 340 case NFS4ERR_WRONGSEC: 341 return (EPERM); 342 case NFS4ERR_CLID_INUSE: 343 return (EAGAIN); 344 case NFS4ERR_RESOURCE: 345 return (EAGAIN); 346 case NFS4ERR_MOVED: 347 return (EPROTO); 348 case NFS4ERR_NOFILEHANDLE: 349 return (EIO); 350 case NFS4ERR_MINOR_VERS_MISMATCH: 351 return (ENOTSUP); 352 case NFS4ERR_STALE_CLIENTID: 353 return (EIO); 354 case NFS4ERR_STALE_STATEID: 355 return (EIO); 356 case NFS4ERR_OLD_STATEID: 357 return (EIO); 358 case NFS4ERR_BAD_STATEID: 359 return (EIO); 360 case NFS4ERR_BAD_SEQID: 361 return (EIO); 362 case NFS4ERR_NOT_SAME: 363 return (EPROTO); 364 case NFS4ERR_LOCK_RANGE: 365 return (EPROTO); 366 case NFS4ERR_SYMLINK: 367 return (EPROTO); 368 case NFS4ERR_RESTOREFH: 369 return (EPROTO); 370 case NFS4ERR_LEASE_MOVED: 371 return (EPROTO); 372 case NFS4ERR_ATTRNOTSUPP: 373 return (ENOTSUP); 374 case NFS4ERR_NO_GRACE: 375 return (EPROTO); 376 case NFS4ERR_RECLAIM_BAD: 377 return (EPROTO); 378 case NFS4ERR_RECLAIM_CONFLICT: 379 return (EPROTO); 380 case NFS4ERR_BADXDR: 381 return (EINVAL); 382 case NFS4ERR_LOCKS_HELD: 383 return (EIO); 384 case NFS4ERR_OPENMODE: 385 return (EACCES); 386 case NFS4ERR_BADOWNER: 387 /* 388 * Client and server are in different DNS domains 389 * and the NFSMAPID_DOMAIN in /etc/default/nfs 390 * doesn't match. No good answer here. Return 391 * EACCESS, which translates to "permission denied". 392 */ 393 return (EACCES); 394 case NFS4ERR_BADCHAR: 395 return (EINVAL); 396 case NFS4ERR_BADNAME: 397 return (EINVAL); 398 case NFS4ERR_BAD_RANGE: 399 return (EIO); 400 case NFS4ERR_LOCK_NOTSUPP: 401 return (ENOTSUP); 402 case NFS4ERR_OP_ILLEGAL: 403 return (EINVAL); 404 case NFS4ERR_DEADLOCK: 405 return (EDEADLK); 406 case NFS4ERR_FILE_OPEN: 407 return (EACCES); 408 case NFS4ERR_ADMIN_REVOKED: 409 return (EPROTO); 410 case NFS4ERR_CB_PATH_DOWN: 411 return (EPROTO); 412 default: 413 #ifdef DEBUG 414 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d", 415 status); 416 #endif 417 return ((int)status); 418 } 419 } 420 421 void 422 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op) 423 { 424 nfs4_server_t *server; 425 426 /* 427 * Return if already printed/queued a msg 428 * for this mount point. 429 */ 430 if (mi->mi_flags & MI4_BADOWNER_DEBUG) 431 return; 432 /* 433 * Happens once per client <-> server pair. 434 */ 435 if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 436 mi->mi_flags & MI4_INT)) 437 return; 438 439 server = find_nfs4_server(mi); 440 if (server == NULL) { 441 nfs_rw_exit(&mi->mi_recovlock); 442 return; 443 } 444 445 if (!(server->s_flags & N4S_BADOWNER_DEBUG)) { 446 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 447 "!NFSMAPID_DOMAIN does not match" 448 " the server: %s domain.\n" 449 "Please check configuration", 450 mi->mi_curr_serv->sv_hostname); 451 server->s_flags |= N4S_BADOWNER_DEBUG; 452 } 453 mutex_exit(&server->s_lock); 454 nfs4_server_rele(server); 455 nfs_rw_exit(&mi->mi_recovlock); 456 457 /* 458 * Happens once per mntinfo4_t. 459 * This error is deemed as one of the recovery facts "RF_BADOWNER", 460 * queue this in the mesg queue for this mount_info. This message 461 * is not printed, meaning its absent from id_to_dump_solo_fact() 462 * but its there for inspection if the queue is ever dumped/inspected. 463 */ 464 mutex_enter(&mi->mi_lock); 465 if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) { 466 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op, 467 FALSE, NULL, 0, NULL); 468 mi->mi_flags |= MI4_BADOWNER_DEBUG; 469 } 470 mutex_exit(&mi->mi_lock); 471 } 472 473 int 474 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime) 475 { 476 int64_t sec; 477 int32_t nsec; 478 479 /* 480 * Here check that the nfsv4 time is valid for the system. 481 * nfsv4 time value is a signed 64-bit, and the system time 482 * may be either int64_t or int32_t (depends on the kernel), 483 * so if the kernel is 32-bit, the nfsv4 time value may not fit. 484 */ 485 #ifndef _LP64 486 if (! NFS4_TIME_OK(ntime->seconds)) { 487 return (EOVERFLOW); 488 } 489 #endif 490 491 /* Invalid to specify 1 billion (or more) nsecs */ 492 if (ntime->nseconds >= 1000000000) 493 return (EINVAL); 494 495 if (ntime->seconds < 0) { 496 sec = ntime->seconds + 1; 497 nsec = -1000000000 + ntime->nseconds; 498 } else { 499 sec = ntime->seconds; 500 nsec = ntime->nseconds; 501 } 502 503 vatime->tv_sec = sec; 504 vatime->tv_nsec = nsec; 505 506 return (0); 507 } 508 509 int 510 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime) 511 { 512 int64_t sec; 513 uint32_t nsec; 514 515 /* 516 * nfsv4 time value is a signed 64-bit, and the system time 517 * may be either int64_t or int32_t (depends on the kernel), 518 * so all system time values will fit. 519 */ 520 if (vatime->tv_nsec >= 0) { 521 sec = vatime->tv_sec; 522 nsec = vatime->tv_nsec; 523 } else { 524 sec = vatime->tv_sec - 1; 525 nsec = 1000000000 + vatime->tv_nsec; 526 } 527 ntime->seconds = sec; 528 ntime->nseconds = nsec; 529 530 return (0); 531 } 532 533 /* 534 * Converts a utf8 string to a valid null terminated filename string. 535 * 536 * XXX - Not actually translating the UTF-8 string as per RFC 2279. 537 * For now, just validate that the UTF-8 string off the wire 538 * does not have characters that will freak out UFS, and leave 539 * it at that. 540 */ 541 char * 542 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s) 543 { 544 ASSERT(lenp != NULL); 545 546 if (u8s == NULL || u8s->utf8string_len <= 0 || 547 u8s->utf8string_val == NULL) 548 return (NULL); 549 550 /* 551 * Check for obvious illegal filename chars 552 */ 553 if (utf8_strchr(u8s, '/') != NULL) { 554 #ifdef DEBUG 555 if (nfs4_utf8_debug) { 556 char *path; 557 int len = u8s->utf8string_len; 558 559 path = kmem_alloc(len + 1, KM_SLEEP); 560 bcopy(u8s->utf8string_val, path, len); 561 path[len] = '\0'; 562 563 zcmn_err(getzoneid(), CE_WARN, 564 "Invalid UTF-8 filename: %s", path); 565 566 kmem_free(path, len + 1); 567 } 568 #endif 569 return (NULL); 570 } 571 572 return (utf8_to_str(u8s, lenp, s)); 573 } 574 575 /* 576 * Converts a utf8 string to a C string. 577 * kmem_allocs a new string if not supplied 578 */ 579 char * 580 utf8_to_str(utf8string *str, uint_t *lenp, char *s) 581 { 582 char *sp; 583 char *u8p; 584 int len; 585 int i; 586 587 ASSERT(lenp != NULL); 588 589 if (str == NULL) 590 return (NULL); 591 592 u8p = str->utf8string_val; 593 len = str->utf8string_len; 594 if (len <= 0 || u8p == NULL) { 595 if (s) 596 *s = '\0'; 597 return (NULL); 598 } 599 600 sp = s; 601 if (sp == NULL) 602 sp = kmem_alloc(len + 1, KM_SLEEP); 603 604 /* 605 * At least check for embedded nulls 606 */ 607 for (i = 0; i < len; i++) { 608 sp[i] = u8p[i]; 609 if (u8p[i] == '\0') { 610 #ifdef DEBUG 611 zcmn_err(getzoneid(), CE_WARN, 612 "Embedded NULL in UTF-8 string"); 613 #endif 614 if (s == NULL) 615 kmem_free(sp, len + 1); 616 return (NULL); 617 } 618 } 619 sp[len] = '\0'; 620 *lenp = len + 1; 621 622 return (sp); 623 } 624 625 /* 626 * str_to_utf8 - converts a null-terminated C string to a utf8 string 627 */ 628 utf8string * 629 str_to_utf8(char *nm, utf8string *str) 630 { 631 int len; 632 633 if (str == NULL) 634 return (NULL); 635 636 if (nm == NULL || *nm == '\0') { 637 str->utf8string_len = 0; 638 str->utf8string_val = NULL; 639 } 640 641 len = strlen(nm); 642 643 str->utf8string_val = kmem_alloc(len, KM_SLEEP); 644 str->utf8string_len = len; 645 bcopy(nm, str->utf8string_val, len); 646 647 return (str); 648 } 649 650 utf8string * 651 utf8_copy(utf8string *src, utf8string *dest) 652 { 653 if (src == NULL) 654 return (NULL); 655 if (dest == NULL) 656 return (NULL); 657 658 if (src->utf8string_len > 0) { 659 dest->utf8string_val = kmem_alloc(src->utf8string_len, 660 KM_SLEEP); 661 bcopy(src->utf8string_val, dest->utf8string_val, 662 src->utf8string_len); 663 dest->utf8string_len = src->utf8string_len; 664 } else { 665 dest->utf8string_val = NULL; 666 dest->utf8string_len = 0; 667 } 668 669 return (dest); 670 } 671 672 int 673 utf8_compare(const utf8string *a, const utf8string *b) 674 { 675 int mlen, cmp; 676 int alen, blen; 677 char *aval, *bval; 678 679 if ((a == NULL) && (b == NULL)) 680 return (0); 681 else if (a == NULL) 682 return (-1); 683 else if (b == NULL) 684 return (1); 685 686 alen = a->utf8string_len; 687 blen = b->utf8string_len; 688 aval = a->utf8string_val; 689 bval = b->utf8string_val; 690 691 if (((alen == 0) || (aval == NULL)) && 692 ((blen == 0) || (bval == NULL))) 693 return (0); 694 else if ((alen == 0) || (aval == NULL)) 695 return (-1); 696 else if ((blen == 0) || (bval == NULL)) 697 return (1); 698 699 mlen = MIN(alen, blen); 700 cmp = strncmp(aval, bval, mlen); 701 702 if ((cmp == 0) && (alen == blen)) 703 return (0); 704 else if ((cmp == 0) && (alen < blen)) 705 return (-1); 706 else if (cmp == 0) 707 return (1); 708 else if (cmp < 0) 709 return (-1); 710 return (1); 711 } 712 713 /* 714 * utf8_dir_verify - checks that the utf8 string is valid 715 */ 716 int 717 utf8_dir_verify(utf8string *str) 718 { 719 char *nm; 720 int len; 721 722 if (str == NULL) 723 return (0); 724 725 nm = str->utf8string_val; 726 len = str->utf8string_len; 727 if (nm == NULL || len == 0) { 728 return (0); 729 } 730 731 if (len == 1 && nm[0] == '.') 732 return (0); 733 if (len == 2 && nm[0] == '.' && nm[1] == '.') 734 return (0); 735 736 if (utf8_strchr(str, '/') != NULL) 737 return (0); 738 739 if (utf8_strchr(str, '\0') != NULL) 740 return (0); 741 742 return (1); 743 } 744 745 /* 746 * from rpcsec module (common/rpcsec) 747 */ 748 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **); 749 extern void sec_clnt_freeh(AUTH *); 750 extern void sec_clnt_freeinfo(struct sec_data *); 751 752 /* 753 * authget() gets an auth handle based on the security 754 * information from the servinfo in mountinfo. 755 * The auth handle is stored in ch_client->cl_auth. 756 * 757 * First security flavor of choice is to use sv_secdata 758 * which is initiated by the client. If that fails, get 759 * secinfo from the server and then select one from the 760 * server secinfo list . 761 * 762 * For RPCSEC_GSS flavor, upon success, a secure context is 763 * established between client and server. 764 */ 765 int 766 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr) 767 { 768 int error, i; 769 770 /* 771 * SV4_TRYSECINFO indicates to try the secinfo list from 772 * sv_secinfo until a successful one is reached. Point 773 * sv_currsec to the selected security mechanism for 774 * later sessions. 775 */ 776 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 777 if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) { 778 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count; 779 i++) { 780 if (!(error = sec_clnt_geth(ch_client, 781 &svp->sv_secinfo->sdata[i], 782 cr, &ch_client->cl_auth))) { 783 784 svp->sv_currsec = &svp->sv_secinfo->sdata[i]; 785 svp->sv_secinfo->index = i; 786 /* done */ 787 svp->sv_flags &= ~SV4_TRYSECINFO; 788 break; 789 } 790 791 /* 792 * Allow the caller retry with the security flavor 793 * pointed by svp->sv_secinfo->index when 794 * ETIMEDOUT/ECONNRESET occurs. 795 */ 796 if (error == ETIMEDOUT || error == ECONNRESET) { 797 svp->sv_secinfo->index = i; 798 break; 799 } 800 } 801 } else { 802 /* sv_currsec points to one of the entries in sv_secinfo */ 803 if (svp->sv_currsec) { 804 error = sec_clnt_geth(ch_client, svp->sv_currsec, cr, 805 &ch_client->cl_auth); 806 } else { 807 /* If it's null, use sv_secdata. */ 808 error = sec_clnt_geth(ch_client, svp->sv_secdata, cr, 809 &ch_client->cl_auth); 810 } 811 } 812 nfs_rw_exit(&svp->sv_lock); 813 814 return (error); 815 } 816 817 /* 818 * Common handle get program for NFS, NFS ACL, and NFS AUTH client. 819 */ 820 int 821 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 822 struct chtab **chp, struct nfs4_clnt *nfscl) 823 { 824 struct chhead *ch, *newch; 825 struct chhead **plistp; 826 struct chtab *cp; 827 int error; 828 k_sigset_t smask; 829 830 if (newcl == NULL || chp == NULL || ci == NULL) 831 return (EINVAL); 832 833 *newcl = NULL; 834 *chp = NULL; 835 836 /* 837 * Find an unused handle or create one 838 */ 839 newch = NULL; 840 nfscl->nfscl_stat.clgets.value.ui64++; 841 top: 842 /* 843 * Find the correct entry in the cache to check for free 844 * client handles. The search is based on the RPC program 845 * number, program version number, dev_t for the transport 846 * device, and the protocol family. 847 */ 848 mutex_enter(&nfscl->nfscl_chtable4_lock); 849 plistp = &nfscl->nfscl_chtable4; 850 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 851 if (ch->ch_prog == ci->cl_prog && 852 ch->ch_vers == ci->cl_vers && 853 ch->ch_dev == svp->sv_knconf->knc_rdev && 854 (strcmp(ch->ch_protofmly, 855 svp->sv_knconf->knc_protofmly) == 0)) 856 break; 857 plistp = &ch->ch_next; 858 } 859 860 /* 861 * If we didn't find a cache entry for this quadruple, then 862 * create one. If we don't have one already preallocated, 863 * then drop the cache lock, create one, and then start over. 864 * If we did have a preallocated entry, then just add it to 865 * the front of the list. 866 */ 867 if (ch == NULL) { 868 if (newch == NULL) { 869 mutex_exit(&nfscl->nfscl_chtable4_lock); 870 newch = kmem_alloc(sizeof (*newch), KM_SLEEP); 871 newch->ch_timesused = 0; 872 newch->ch_prog = ci->cl_prog; 873 newch->ch_vers = ci->cl_vers; 874 newch->ch_dev = svp->sv_knconf->knc_rdev; 875 newch->ch_protofmly = kmem_alloc( 876 strlen(svp->sv_knconf->knc_protofmly) + 1, 877 KM_SLEEP); 878 (void) strcpy(newch->ch_protofmly, 879 svp->sv_knconf->knc_protofmly); 880 newch->ch_list = NULL; 881 goto top; 882 } 883 ch = newch; 884 newch = NULL; 885 ch->ch_next = nfscl->nfscl_chtable4; 886 nfscl->nfscl_chtable4 = ch; 887 /* 888 * We found a cache entry, but if it isn't on the front of the 889 * list, then move it to the front of the list to try to take 890 * advantage of locality of operations. 891 */ 892 } else if (ch != nfscl->nfscl_chtable4) { 893 *plistp = ch->ch_next; 894 ch->ch_next = nfscl->nfscl_chtable4; 895 nfscl->nfscl_chtable4 = ch; 896 } 897 898 /* 899 * If there was a free client handle cached, then remove it 900 * from the list, init it, and use it. 901 */ 902 if (ch->ch_list != NULL) { 903 cp = ch->ch_list; 904 ch->ch_list = cp->ch_list; 905 mutex_exit(&nfscl->nfscl_chtable4_lock); 906 if (newch != NULL) { 907 kmem_free(newch->ch_protofmly, 908 strlen(newch->ch_protofmly) + 1); 909 kmem_free(newch, sizeof (*newch)); 910 } 911 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf, 912 &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr); 913 914 /* 915 * Get an auth handle. 916 */ 917 error = authget(svp, cp->ch_client, cr); 918 if (error || cp->ch_client->cl_auth == NULL) { 919 CLNT_DESTROY(cp->ch_client); 920 kmem_cache_free(chtab4_cache, cp); 921 return ((error != 0) ? error : EINTR); 922 } 923 ch->ch_timesused++; 924 *newcl = cp->ch_client; 925 *chp = cp; 926 return (0); 927 } 928 929 /* 930 * There weren't any free client handles which fit, so allocate 931 * a new one and use that. 932 */ 933 #ifdef DEBUG 934 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1); 935 #endif 936 mutex_exit(&nfscl->nfscl_chtable4_lock); 937 938 nfscl->nfscl_stat.cltoomany.value.ui64++; 939 if (newch != NULL) { 940 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1); 941 kmem_free(newch, sizeof (*newch)); 942 } 943 944 cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP); 945 cp->ch_head = ch; 946 947 sigintr(&smask, (int)ci->cl_flags & MI4_INT); 948 error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog, 949 ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client); 950 sigunintr(&smask); 951 952 if (error != 0) { 953 kmem_cache_free(chtab4_cache, cp); 954 #ifdef DEBUG 955 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 956 #endif 957 /* 958 * Warning is unnecessary if error is EINTR. 959 */ 960 if (error != EINTR) { 961 nfs_cmn_err(error, CE_WARN, 962 "clget: couldn't create handle: %m\n"); 963 } 964 return (error); 965 } 966 (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL); 967 auth_destroy(cp->ch_client->cl_auth); 968 969 /* 970 * Get an auth handle. 971 */ 972 error = authget(svp, cp->ch_client, cr); 973 if (error || cp->ch_client->cl_auth == NULL) { 974 CLNT_DESTROY(cp->ch_client); 975 kmem_cache_free(chtab4_cache, cp); 976 #ifdef DEBUG 977 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 978 #endif 979 return ((error != 0) ? error : EINTR); 980 } 981 ch->ch_timesused++; 982 *newcl = cp->ch_client; 983 ASSERT(cp->ch_client->cl_nosignal == FALSE); 984 *chp = cp; 985 return (0); 986 } 987 988 static int 989 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 990 struct chtab **chp, struct nfs4_clnt *nfscl) 991 { 992 clinfo_t ci; 993 bool_t is_recov; 994 int firstcall, error = 0; 995 996 /* 997 * Set read buffer size to rsize 998 * and add room for RPC headers. 999 */ 1000 ci.cl_readsize = mi->mi_tsize; 1001 if (ci.cl_readsize != 0) 1002 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 1003 1004 /* 1005 * If soft mount and server is down just try once. 1006 * meaning: do not retransmit. 1007 */ 1008 if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN)) 1009 ci.cl_retrans = 0; 1010 else 1011 ci.cl_retrans = mi->mi_retrans; 1012 1013 ci.cl_prog = mi->mi_prog; 1014 ci.cl_vers = mi->mi_vers; 1015 ci.cl_flags = mi->mi_flags; 1016 1017 /* 1018 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS 1019 * security flavor, the client tries to establish a security context 1020 * by contacting the server. If the connection is timed out or reset, 1021 * e.g. server reboot, we will try again. 1022 */ 1023 is_recov = (curthread == mi->mi_recovthread); 1024 firstcall = 1; 1025 1026 do { 1027 error = clget4(&ci, svp, cr, newcl, chp, nfscl); 1028 1029 if (error == 0) 1030 break; 1031 1032 /* 1033 * For forced unmount and zone shutdown, bail out but 1034 * let the recovery thread do one more transmission. 1035 */ 1036 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) && 1037 (!is_recov || !firstcall)) { 1038 error = EIO; 1039 break; 1040 } 1041 1042 /* do not retry for soft mount */ 1043 if (!(mi->mi_flags & MI4_HARD)) 1044 break; 1045 1046 /* let the caller deal with the failover case */ 1047 if (FAILOVER_MOUNT4(mi)) 1048 break; 1049 1050 firstcall = 0; 1051 1052 } while (error == ETIMEDOUT || error == ECONNRESET); 1053 1054 return (error); 1055 } 1056 1057 void 1058 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl) 1059 { 1060 if (cl->cl_auth != NULL) { 1061 sec_clnt_freeh(cl->cl_auth); 1062 cl->cl_auth = NULL; 1063 } 1064 1065 /* 1066 * Timestamp this cache entry so that we know when it was last 1067 * used. 1068 */ 1069 cp->ch_freed = gethrestime_sec(); 1070 1071 /* 1072 * Add the free client handle to the front of the list. 1073 * This way, the list will be sorted in youngest to oldest 1074 * order. 1075 */ 1076 mutex_enter(&nfscl->nfscl_chtable4_lock); 1077 cp->ch_list = cp->ch_head->ch_list; 1078 cp->ch_head->ch_list = cp; 1079 mutex_exit(&nfscl->nfscl_chtable4_lock); 1080 } 1081 1082 #define CL_HOLDTIME 60 /* time to hold client handles */ 1083 1084 static void 1085 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime) 1086 { 1087 struct chhead *ch; 1088 struct chtab *cp; /* list of objects that can be reclaimed */ 1089 struct chtab *cpe; 1090 struct chtab *cpl; 1091 struct chtab **cpp; 1092 #ifdef DEBUG 1093 int n = 0; 1094 clstat4_debug.clreclaim.value.ui64++; 1095 #endif 1096 1097 /* 1098 * Need to reclaim some memory, so step through the cache 1099 * looking through the lists for entries which can be freed. 1100 */ 1101 cp = NULL; 1102 1103 mutex_enter(&nfscl->nfscl_chtable4_lock); 1104 1105 /* 1106 * Here we step through each non-NULL quadruple and start to 1107 * construct the reclaim list pointed to by cp. Note that 1108 * cp will contain all eligible chtab entries. When this traversal 1109 * completes, chtab entries from the last quadruple will be at the 1110 * front of cp and entries from previously inspected quadruples have 1111 * been appended to the rear of cp. 1112 */ 1113 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 1114 if (ch->ch_list == NULL) 1115 continue; 1116 /* 1117 * Search each list for entries older then 1118 * cl_holdtime seconds. The lists are maintained 1119 * in youngest to oldest order so that when the 1120 * first entry is found which is old enough, then 1121 * all of the rest of the entries on the list will 1122 * be old enough as well. 1123 */ 1124 cpl = ch->ch_list; 1125 cpp = &ch->ch_list; 1126 while (cpl != NULL && 1127 cpl->ch_freed + cl_holdtime > gethrestime_sec()) { 1128 cpp = &cpl->ch_list; 1129 cpl = cpl->ch_list; 1130 } 1131 if (cpl != NULL) { 1132 *cpp = NULL; 1133 if (cp != NULL) { 1134 cpe = cpl; 1135 while (cpe->ch_list != NULL) 1136 cpe = cpe->ch_list; 1137 cpe->ch_list = cp; 1138 } 1139 cp = cpl; 1140 } 1141 } 1142 1143 mutex_exit(&nfscl->nfscl_chtable4_lock); 1144 1145 /* 1146 * If cp is empty, then there is nothing to reclaim here. 1147 */ 1148 if (cp == NULL) 1149 return; 1150 1151 /* 1152 * Step through the list of entries to free, destroying each client 1153 * handle and kmem_free'ing the memory for each entry. 1154 */ 1155 while (cp != NULL) { 1156 #ifdef DEBUG 1157 n++; 1158 #endif 1159 CLNT_DESTROY(cp->ch_client); 1160 cpl = cp->ch_list; 1161 kmem_cache_free(chtab4_cache, cp); 1162 cp = cpl; 1163 } 1164 1165 #ifdef DEBUG 1166 /* 1167 * Update clalloc so that nfsstat shows the current number 1168 * of allocated client handles. 1169 */ 1170 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n); 1171 #endif 1172 } 1173 1174 /* ARGSUSED */ 1175 static void 1176 clreclaim4(void *all) 1177 { 1178 struct nfs4_clnt *nfscl; 1179 1180 /* 1181 * The system is low on memory; go through and try to reclaim some from 1182 * every zone on the system. 1183 */ 1184 mutex_enter(&nfs4_clnt_list_lock); 1185 nfscl = list_head(&nfs4_clnt_list); 1186 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) 1187 clreclaim4_zone(nfscl, CL_HOLDTIME); 1188 mutex_exit(&nfs4_clnt_list_lock); 1189 } 1190 1191 /* 1192 * Minimum time-out values indexed by call type 1193 * These units are in "eights" of a second to avoid multiplies 1194 */ 1195 static unsigned int minimum_timeo[] = { 1196 6, 7, 10 1197 }; 1198 1199 #define SHORTWAIT (NFS_COTS_TIMEO / 10) 1200 1201 /* 1202 * Back off for retransmission timeout, MAXTIMO is in hz of a sec 1203 */ 1204 #define MAXTIMO (20*hz) 1205 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim)) 1206 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1)) 1207 1208 static int 1209 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 1210 xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue, 1211 enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl) 1212 { 1213 CLIENT *client; 1214 struct chtab *ch; 1215 cred_t *cr = icr; 1216 struct rpc_err rpcerr, rpcerr_tmp; 1217 enum clnt_stat status; 1218 int error; 1219 struct timeval wait; 1220 int timeo; /* in units of hz */ 1221 bool_t tryagain, is_recov; 1222 bool_t cred_cloned = FALSE; 1223 k_sigset_t smask; 1224 servinfo4_t *svp; 1225 #ifdef DEBUG 1226 char *bufp; 1227 #endif 1228 int firstcall; 1229 1230 rpcerr.re_status = RPC_SUCCESS; 1231 1232 /* 1233 * If we know that we are rebooting then let's 1234 * not bother with doing any over the wireness. 1235 */ 1236 mutex_enter(&mi->mi_lock); 1237 if (mi->mi_flags & MI4_SHUTDOWN) { 1238 mutex_exit(&mi->mi_lock); 1239 return (EIO); 1240 } 1241 mutex_exit(&mi->mi_lock); 1242 1243 /* For TSOL, use a new cred which has net_mac_aware flag */ 1244 if (!cred_cloned && is_system_labeled()) { 1245 cred_cloned = TRUE; 1246 cr = crdup(icr); 1247 (void) setpflags(NET_MAC_AWARE, 1, cr); 1248 } 1249 1250 /* 1251 * clget() calls clnt_tli_kinit() which clears the xid, so we 1252 * are guaranteed to reprocess the retry as a new request. 1253 */ 1254 svp = mi->mi_curr_serv; 1255 rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl); 1256 if (rpcerr.re_errno != 0) 1257 return (rpcerr.re_errno); 1258 1259 timeo = (mi->mi_timeo * hz) / 10; 1260 1261 /* 1262 * If hard mounted fs, retry call forever unless hard error 1263 * occurs. 1264 * 1265 * For forced unmount, let the recovery thread through but return 1266 * an error for all others. This is so that user processes can 1267 * exit quickly. The recovery thread bails out after one 1268 * transmission so that it can tell if it needs to continue. 1269 * 1270 * For zone shutdown, behave as above to encourage quick 1271 * process exit, but also fail quickly when servers have 1272 * timed out before and reduce the timeouts. 1273 */ 1274 is_recov = (curthread == mi->mi_recovthread); 1275 firstcall = 1; 1276 do { 1277 tryagain = FALSE; 1278 1279 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE, 1280 "nfs4_rfscall: vfs_flag=0x%x, %s", 1281 mi->mi_vfsp->vfs_flag, 1282 is_recov ? "recov thread" : "not recov thread")); 1283 1284 /* 1285 * It's possible while we're retrying the admin 1286 * decided to reboot. 1287 */ 1288 mutex_enter(&mi->mi_lock); 1289 if (mi->mi_flags & MI4_SHUTDOWN) { 1290 mutex_exit(&mi->mi_lock); 1291 clfree4(client, ch, nfscl); 1292 if (cred_cloned) 1293 crfree(cr); 1294 return (EIO); 1295 } 1296 mutex_exit(&mi->mi_lock); 1297 1298 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) && 1299 (!is_recov || !firstcall)) { 1300 clfree4(client, ch, nfscl); 1301 if (cred_cloned) 1302 crfree(cr); 1303 return (EIO); 1304 } 1305 1306 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) { 1307 mutex_enter(&mi->mi_lock); 1308 if ((mi->mi_flags & MI4_TIMEDOUT) || 1309 !is_recov || !firstcall) { 1310 mutex_exit(&mi->mi_lock); 1311 clfree4(client, ch, nfscl); 1312 if (cred_cloned) 1313 crfree(cr); 1314 return (EIO); 1315 } 1316 mutex_exit(&mi->mi_lock); 1317 timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10; 1318 } 1319 1320 firstcall = 0; 1321 TICK_TO_TIMEVAL(timeo, &wait); 1322 1323 /* 1324 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 1325 * and SIGTERM. (Preserving the existing masks). 1326 * Mask out SIGINT if mount option nointr is specified. 1327 */ 1328 sigintr(&smask, (int)mi->mi_flags & MI4_INT); 1329 if (!(mi->mi_flags & MI4_INT)) 1330 client->cl_nosignal = TRUE; 1331 1332 /* 1333 * If there is a current signal, then don't bother 1334 * even trying to send out the request because we 1335 * won't be able to block waiting for the response. 1336 * Simply assume RPC_INTR and get on with it. 1337 */ 1338 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 1339 status = RPC_INTR; 1340 else { 1341 status = CLNT_CALL(client, which, xdrargs, argsp, 1342 xdrres, resp, wait); 1343 } 1344 1345 if (!(mi->mi_flags & MI4_INT)) 1346 client->cl_nosignal = FALSE; 1347 /* 1348 * restore original signal mask 1349 */ 1350 sigunintr(&smask); 1351 1352 switch (status) { 1353 case RPC_SUCCESS: 1354 break; 1355 1356 case RPC_INTR: 1357 /* 1358 * There is no way to recover from this error, 1359 * even if mount option nointr is specified. 1360 * SIGKILL, for example, cannot be blocked. 1361 */ 1362 rpcerr.re_status = RPC_INTR; 1363 rpcerr.re_errno = EINTR; 1364 break; 1365 1366 case RPC_UDERROR: 1367 /* 1368 * If the NFS server is local (vold) and 1369 * it goes away then we get RPC_UDERROR. 1370 * This is a retryable error, so we would 1371 * loop, so check to see if the specific 1372 * error was ECONNRESET, indicating that 1373 * target did not exist at all. If so, 1374 * return with RPC_PROGUNAVAIL and 1375 * ECONNRESET to indicate why. 1376 */ 1377 CLNT_GETERR(client, &rpcerr); 1378 if (rpcerr.re_errno == ECONNRESET) { 1379 rpcerr.re_status = RPC_PROGUNAVAIL; 1380 rpcerr.re_errno = ECONNRESET; 1381 break; 1382 } 1383 /*FALLTHROUGH*/ 1384 1385 default: /* probably RPC_TIMEDOUT */ 1386 1387 if (IS_UNRECOVERABLE_RPC(status)) 1388 break; 1389 1390 /* 1391 * increment server not responding count 1392 */ 1393 mutex_enter(&mi->mi_lock); 1394 mi->mi_noresponse++; 1395 mutex_exit(&mi->mi_lock); 1396 #ifdef DEBUG 1397 nfscl->nfscl_stat.noresponse.value.ui64++; 1398 #endif 1399 /* 1400 * On zone shutdown, mark server dead and move on. 1401 */ 1402 if (zone_status_get(curproc->p_zone) >= 1403 ZONE_IS_SHUTTING_DOWN) { 1404 mutex_enter(&mi->mi_lock); 1405 mi->mi_flags |= MI4_TIMEDOUT; 1406 mutex_exit(&mi->mi_lock); 1407 clfree4(client, ch, nfscl); 1408 if (cred_cloned) 1409 crfree(cr); 1410 return (EIO); 1411 } 1412 1413 /* 1414 * NFS client failover support: 1415 * return and let the caller take care of 1416 * failover. We only return for failover mounts 1417 * because otherwise we want the "not responding" 1418 * message, the timer updates, etc. 1419 */ 1420 if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) && 1421 (error = try_failover(status)) != 0) { 1422 clfree4(client, ch, nfscl); 1423 if (cred_cloned) 1424 crfree(cr); 1425 *rpc_statusp = status; 1426 return (error); 1427 } 1428 1429 if (flags & RFSCALL_SOFT) 1430 break; 1431 1432 tryagain = TRUE; 1433 1434 /* 1435 * The call is in progress (over COTS). 1436 * Try the CLNT_CALL again, but don't 1437 * print a noisy error message. 1438 */ 1439 if (status == RPC_INPROGRESS) 1440 break; 1441 1442 timeo = backoff(timeo); 1443 CLNT_GETERR(client, &rpcerr_tmp); 1444 1445 mutex_enter(&mi->mi_lock); 1446 if (!(mi->mi_flags & MI4_PRINTED)) { 1447 mi->mi_flags |= MI4_PRINTED; 1448 mutex_exit(&mi->mi_lock); 1449 if ((status == RPC_CANTSEND) && 1450 (rpcerr_tmp.re_errno == ENOBUFS)) 1451 nfs4_queue_fact(RF_SENDQ_FULL, mi, 0, 1452 0, 0, FALSE, NULL, 0, NULL); 1453 else 1454 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 1455 0, 0, 0, FALSE, NULL, 0, NULL); 1456 } else 1457 mutex_exit(&mi->mi_lock); 1458 1459 if (*doqueue && nfs_has_ctty()) { 1460 *doqueue = 0; 1461 if (!(mi->mi_flags & MI4_NOPRINT)) { 1462 if ((status == RPC_CANTSEND) && 1463 (rpcerr_tmp.re_errno == ENOBUFS)) 1464 nfs4_queue_fact(RF_SENDQ_FULL, 1465 mi, 0, 0, 0, FALSE, NULL, 1466 0, NULL); 1467 else 1468 nfs4_queue_fact( 1469 RF_SRV_NOT_RESPOND, mi, 0, 1470 0, 0, FALSE, NULL, 0, NULL); 1471 } 1472 } 1473 } 1474 } while (tryagain); 1475 1476 DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status, 1477 int, rpcerr.re_errno); 1478 1479 if (status != RPC_SUCCESS) { 1480 zoneid_t zoneid = mi->mi_zone->zone_id; 1481 1482 /* 1483 * Let soft mounts use the timed out message. 1484 */ 1485 if (status == RPC_INPROGRESS) 1486 status = RPC_TIMEDOUT; 1487 nfscl->nfscl_stat.badcalls.value.ui64++; 1488 if (status != RPC_INTR) { 1489 mutex_enter(&mi->mi_lock); 1490 mi->mi_flags |= MI4_DOWN; 1491 mutex_exit(&mi->mi_lock); 1492 CLNT_GETERR(client, &rpcerr); 1493 #ifdef DEBUG 1494 bufp = clnt_sperror(client, svp->sv_hostname); 1495 zprintf(zoneid, "NFS%d %s failed for %s\n", 1496 mi->mi_vers, mi->mi_rfsnames[which], bufp); 1497 if (nfs_has_ctty()) { 1498 if (!(mi->mi_flags & MI4_NOPRINT)) { 1499 uprintf("NFS%d %s failed for %s\n", 1500 mi->mi_vers, mi->mi_rfsnames[which], 1501 bufp); 1502 } 1503 } 1504 kmem_free(bufp, MAXPATHLEN); 1505 #else 1506 zprintf(zoneid, 1507 "NFS %s failed for server %s: error %d (%s)\n", 1508 mi->mi_rfsnames[which], svp->sv_hostname, 1509 status, clnt_sperrno(status)); 1510 if (nfs_has_ctty()) { 1511 if (!(mi->mi_flags & MI4_NOPRINT)) { 1512 uprintf( 1513 "NFS %s failed for server %s: error %d (%s)\n", 1514 mi->mi_rfsnames[which], 1515 svp->sv_hostname, status, 1516 clnt_sperrno(status)); 1517 } 1518 } 1519 #endif 1520 /* 1521 * when CLNT_CALL() fails with RPC_AUTHERROR, 1522 * re_errno is set appropriately depending on 1523 * the authentication error 1524 */ 1525 if (status == RPC_VERSMISMATCH || 1526 status == RPC_PROGVERSMISMATCH) 1527 rpcerr.re_errno = EIO; 1528 } 1529 } else { 1530 /* 1531 * Test the value of mi_down and mi_printed without 1532 * holding the mi_lock mutex. If they are both zero, 1533 * then it is okay to skip the down and printed 1534 * processing. This saves on a mutex_enter and 1535 * mutex_exit pair for a normal, successful RPC. 1536 * This was just complete overhead. 1537 */ 1538 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) { 1539 mutex_enter(&mi->mi_lock); 1540 mi->mi_flags &= ~MI4_DOWN; 1541 if (mi->mi_flags & MI4_PRINTED) { 1542 mi->mi_flags &= ~MI4_PRINTED; 1543 mutex_exit(&mi->mi_lock); 1544 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1545 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 1546 0, FALSE, NULL, 0, NULL); 1547 } else 1548 mutex_exit(&mi->mi_lock); 1549 } 1550 1551 if (*doqueue == 0) { 1552 if (!(mi->mi_flags & MI4_NOPRINT) && 1553 !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1554 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0, 1555 FALSE, NULL, 0, NULL); 1556 1557 *doqueue = 1; 1558 } 1559 } 1560 1561 clfree4(client, ch, nfscl); 1562 if (cred_cloned) 1563 crfree(cr); 1564 1565 ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 1566 1567 TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d", 1568 rpcerr.re_errno); 1569 1570 *rpc_statusp = status; 1571 return (rpcerr.re_errno); 1572 } 1573 1574 /* 1575 * rfs4call - general wrapper for RPC calls initiated by the client 1576 */ 1577 void 1578 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp, 1579 cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep) 1580 { 1581 int i, error; 1582 enum clnt_stat rpc_status = NFS4_OK; 1583 int num_resops; 1584 struct nfs4_clnt *nfscl; 1585 1586 ASSERT(nfs_zone() == mi->mi_zone); 1587 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1588 ASSERT(nfscl != NULL); 1589 1590 nfscl->nfscl_stat.calls.value.ui64++; 1591 mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++; 1592 1593 /* Set up the results struct for XDR usage */ 1594 resp->argsp = argsp; 1595 resp->array = NULL; 1596 resp->status = 0; 1597 resp->decode_len = 0; 1598 1599 error = nfs4_rfscall(mi, NFSPROC4_COMPOUND, 1600 xdr_COMPOUND4args_clnt, (caddr_t)argsp, 1601 xdr_COMPOUND4res_clnt, (caddr_t)resp, cr, 1602 doqueue, &rpc_status, flags, nfscl); 1603 1604 /* Return now if it was an RPC error */ 1605 if (error) { 1606 ep->error = error; 1607 ep->stat = resp->status; 1608 ep->rpc_status = rpc_status; 1609 return; 1610 } 1611 1612 /* else we'll count the processed operations */ 1613 num_resops = resp->decode_len; 1614 for (i = 0; i < num_resops; i++) { 1615 /* 1616 * Count the individual operations 1617 * processed by the server. 1618 */ 1619 if (resp->array[i].resop >= NFSPROC4_NULL && 1620 resp->array[i].resop <= OP_WRITE) 1621 mi->mi_reqs[resp->array[i].resop].value.ui64++; 1622 } 1623 1624 ep->error = 0; 1625 ep->stat = resp->status; 1626 ep->rpc_status = rpc_status; 1627 } 1628 1629 /* 1630 * nfs4rename_update - updates stored state after a rename. Currently this 1631 * is the path of the object and anything under it, and the filehandle of 1632 * the renamed object. 1633 */ 1634 void 1635 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm) 1636 { 1637 sfh4_update(VTOR4(renvp)->r_fh, nfh4p); 1638 fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm); 1639 } 1640 1641 /* 1642 * Routine to look up the filehandle for the given path and rootvp. 1643 * 1644 * Return values: 1645 * - success: returns zero and *statp is set to NFS4_OK, and *fhp is 1646 * updated. 1647 * - error: return value (errno value) and/or *statp is set appropriately. 1648 */ 1649 #define RML_ORDINARY 1 1650 #define RML_NAMED_ATTR 2 1651 #define RML_ATTRDIR 3 1652 1653 static void 1654 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp, 1655 int filetype, cred_t *cr, 1656 nfs_fh4 *fhp, nfs4_ga_res_t *garp, /* fh, attrs for object */ 1657 nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp, /* fh, attrs for parent */ 1658 nfs4_error_t *ep) 1659 { 1660 COMPOUND4args_clnt args; 1661 COMPOUND4res_clnt res; 1662 nfs_argop4 *argop; 1663 nfs_resop4 *resop; 1664 int num_argops; 1665 lookup4_param_t lookuparg; 1666 nfs_fh4 *tmpfhp; 1667 int doqueue = 1; 1668 char *path; 1669 mntinfo4_t *mi; 1670 1671 ASSERT(fname != NULL); 1672 ASSERT(rootvp->v_type == VDIR); 1673 1674 mi = VTOMI4(rootvp); 1675 path = fn_path(fname); 1676 switch (filetype) { 1677 case RML_NAMED_ATTR: 1678 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR; 1679 args.ctag = TAG_REMAP_LOOKUP_NA; 1680 break; 1681 case RML_ATTRDIR: 1682 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR; 1683 args.ctag = TAG_REMAP_LOOKUP_AD; 1684 break; 1685 case RML_ORDINARY: 1686 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1687 args.ctag = TAG_REMAP_LOOKUP; 1688 break; 1689 default: 1690 ep->error = EINVAL; 1691 return; 1692 } 1693 lookuparg.argsp = &args; 1694 lookuparg.resp = &res; 1695 lookuparg.header_len = 1; /* Putfh */ 1696 lookuparg.trailer_len = 0; 1697 lookuparg.ga_bits = NFS4_VATTR_MASK; 1698 lookuparg.mi = VTOMI4(rootvp); 1699 1700 (void) nfs4lookup_setup(path, &lookuparg, 1); 1701 1702 /* 0: putfh directory */ 1703 argop = args.array; 1704 argop[0].argop = OP_CPUTFH; 1705 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh; 1706 1707 num_argops = args.array_len; 1708 1709 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1710 1711 if (ep->error || res.status != NFS4_OK) 1712 goto exit; 1713 1714 /* get the object filehandle */ 1715 resop = &res.array[res.array_len - 2]; 1716 if (resop->resop != OP_GETFH) { 1717 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1718 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1719 ep->stat = NFS4ERR_SERVERFAULT; 1720 goto exit; 1721 } 1722 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1723 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1724 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1725 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1726 TAG_NONE, 0, 0); 1727 ep->stat = NFS4ERR_SERVERFAULT; 1728 goto exit; 1729 } 1730 fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1731 nfs_fh4_copy(tmpfhp, fhp); 1732 1733 /* get the object attributes */ 1734 resop = &res.array[res.array_len - 1]; 1735 if (garp && resop->resop == OP_GETATTR) 1736 *garp = resop->nfs_resop4_u.opgetattr.ga_res; 1737 1738 /* See if there are enough fields in the response for parent info */ 1739 if ((int)res.array_len - 5 <= 0) 1740 goto exit; 1741 1742 /* get the parent filehandle */ 1743 resop = &res.array[res.array_len - 5]; 1744 if (resop->resop != OP_GETFH) { 1745 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1746 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1747 ep->stat = NFS4ERR_SERVERFAULT; 1748 goto exit; 1749 } 1750 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1751 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1752 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1753 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1754 TAG_NONE, 0, 0); 1755 ep->stat = NFS4ERR_SERVERFAULT; 1756 goto exit; 1757 } 1758 pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1759 nfs_fh4_copy(tmpfhp, pfhp); 1760 1761 /* get the parent attributes */ 1762 resop = &res.array[res.array_len - 4]; 1763 if (pgarp && resop->resop == OP_GETATTR) 1764 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res; 1765 1766 exit: 1767 /* 1768 * It is too hard to remember where all the OP_LOOKUPs are 1769 */ 1770 nfs4args_lookup_free(argop, num_argops); 1771 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1772 1773 if (!ep->error) 1774 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1775 kmem_free(path, strlen(path)+1); 1776 } 1777 1778 /* 1779 * NFS client failover / volatile filehandle support 1780 * 1781 * Recover the filehandle for the given rnode. 1782 * 1783 * Errors are returned via the nfs4_error_t parameter. 1784 */ 1785 1786 void 1787 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 1788 { 1789 int is_stub; 1790 rnode4_t *rp = VTOR4(vp); 1791 vnode_t *rootvp = NULL; 1792 vnode_t *dvp = NULL; 1793 cred_t *cr, *cred_otw; 1794 nfs4_ga_res_t gar, pgar; 1795 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 1796 int filetype = RML_ORDINARY; 1797 nfs4_recov_state_t recov = {NULL, 0, 0}; 1798 int badfhcount = 0; 1799 nfs4_open_stream_t *osp = NULL; 1800 bool_t first_time = TRUE; /* first time getting OTW cred */ 1801 bool_t last_time = FALSE; /* last time getting OTW cred */ 1802 1803 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1804 "nfs4_remap_file: remapping %s", rnode4info(rp))); 1805 ASSERT(nfs4_consistent_type(vp)); 1806 1807 if (vp->v_flag & VROOT) { 1808 nfs4_remap_root(mi, ep, flags); 1809 return; 1810 } 1811 1812 /* 1813 * Given the root fh, use the path stored in 1814 * the rnode to find the fh for the new server. 1815 */ 1816 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1817 if (ep->error != 0) 1818 return; 1819 1820 cr = curthread->t_cred; 1821 ASSERT(cr != NULL); 1822 get_remap_cred: 1823 /* 1824 * Releases the osp, if it is provided. 1825 * Puts a hold on the cred_otw and the new osp (if found). 1826 */ 1827 cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp, 1828 &first_time, &last_time); 1829 ASSERT(cred_otw != NULL); 1830 1831 if (rp->r_flags & R4ISXATTR) { 1832 filetype = RML_NAMED_ATTR; 1833 (void) vtodv(vp, &dvp, cred_otw, FALSE); 1834 } 1835 1836 if (vp->v_flag & V_XATTRDIR) { 1837 filetype = RML_ATTRDIR; 1838 } 1839 1840 if (filetype == RML_ORDINARY && rootvp->v_type == VREG) { 1841 /* file mount, doesn't need a remap */ 1842 goto done; 1843 } 1844 1845 again: 1846 remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw, 1847 &newfh, &gar, &newpfh, &pgar, ep); 1848 1849 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1850 "nfs4_remap_file: remap_lookup returned %d/%d", 1851 ep->error, ep->stat)); 1852 1853 if (last_time == FALSE && ep->error == EACCES) { 1854 crfree(cred_otw); 1855 if (dvp != NULL) 1856 VN_RELE(dvp); 1857 goto get_remap_cred; 1858 } 1859 if (ep->error != 0) 1860 goto done; 1861 1862 switch (ep->stat) { 1863 case NFS4_OK: 1864 badfhcount = 0; 1865 if (recov.rs_flags & NFS4_RS_DELAY_MSG) { 1866 mutex_enter(&rp->r_statelock); 1867 rp->r_delay_interval = 0; 1868 mutex_exit(&rp->r_statelock); 1869 uprintf("NFS File Available..\n"); 1870 } 1871 break; 1872 case NFS4ERR_FHEXPIRED: 1873 case NFS4ERR_BADHANDLE: 1874 case NFS4ERR_STALE: 1875 /* 1876 * If we ran into filehandle problems, we should try to 1877 * remap the root vnode first and hope life gets better. 1878 * But we need to avoid loops. 1879 */ 1880 if (badfhcount++ > 0) 1881 goto done; 1882 if (newfh.nfs_fh4_len != 0) { 1883 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1884 newfh.nfs_fh4_len = 0; 1885 } 1886 if (newpfh.nfs_fh4_len != 0) { 1887 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1888 newpfh.nfs_fh4_len = 0; 1889 } 1890 /* relative path - remap rootvp then retry */ 1891 VN_RELE(rootvp); 1892 rootvp = NULL; 1893 nfs4_remap_root(mi, ep, flags); 1894 if (ep->error != 0 || ep->stat != NFS4_OK) 1895 goto done; 1896 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1897 if (ep->error != 0) 1898 goto done; 1899 goto again; 1900 case NFS4ERR_DELAY: 1901 badfhcount = 0; 1902 nfs4_set_delay_wait(vp); 1903 ep->error = nfs4_wait_for_delay(vp, &recov); 1904 if (ep->error != 0) 1905 goto done; 1906 goto again; 1907 case NFS4ERR_ACCESS: 1908 /* get new cred, try again */ 1909 if (last_time == TRUE) 1910 goto done; 1911 if (dvp != NULL) 1912 VN_RELE(dvp); 1913 crfree(cred_otw); 1914 goto get_remap_cred; 1915 default: 1916 goto done; 1917 } 1918 1919 /* 1920 * Check on the new and old rnodes before updating; 1921 * if the vnode type or size changes, issue a warning 1922 * and mark the file dead. 1923 */ 1924 mutex_enter(&rp->r_statelock); 1925 if (flags & NFS4_REMAP_CKATTRS) { 1926 if (vp->v_type != gar.n4g_va.va_type || 1927 (vp->v_type != VDIR && 1928 rp->r_size != gar.n4g_va.va_size)) { 1929 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1930 "nfs4_remap_file: size %d vs. %d, type %d vs. %d", 1931 (int)rp->r_size, (int)gar.n4g_va.va_size, 1932 vp->v_type, gar.n4g_va.va_type)); 1933 mutex_exit(&rp->r_statelock); 1934 nfs4_queue_event(RE_FILE_DIFF, mi, 1935 rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0, 1936 TAG_NONE, TAG_NONE, 0, 0); 1937 nfs4_fail_recov(vp, NULL, 0, NFS4_OK); 1938 goto done; 1939 } 1940 } 1941 ASSERT(gar.n4g_va.va_type != VNON); 1942 rp->r_server = mi->mi_curr_serv; 1943 1944 /* 1945 * Turn this object into a "stub" object if we 1946 * crossed an underlying server fs boundary. 1947 * 1948 * This stub will be for a mirror-mount. 1949 * A referral would look like a boundary crossing 1950 * as well, but would not be the same type of object, 1951 * so we would expect to mark the object dead. 1952 * 1953 * See comment in r4_do_attrcache() for more details. 1954 */ 1955 is_stub = 0; 1956 if (gar.n4g_fsid_valid) { 1957 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0); 1958 rp->r_srv_fsid = gar.n4g_fsid; 1959 if (!FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid)) 1960 is_stub = 1; 1961 nfs_rw_exit(&rp->r_server->sv_lock); 1962 #ifdef DEBUG 1963 } else { 1964 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1965 "remap_file: fsid attr not provided by server. rp=%p", 1966 (void *)rp)); 1967 #endif 1968 } 1969 if (is_stub) 1970 r4_stub_mirrormount(rp); 1971 else 1972 r4_stub_none(rp); 1973 mutex_exit(&rp->r_statelock); 1974 nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */ 1975 sfh4_update(rp->r_fh, &newfh); 1976 ASSERT(nfs4_consistent_type(vp)); 1977 1978 /* 1979 * If we got parent info, use it to update the parent 1980 */ 1981 if (newpfh.nfs_fh4_len != 0) { 1982 if (rp->r_svnode.sv_dfh != NULL) 1983 sfh4_update(rp->r_svnode.sv_dfh, &newpfh); 1984 if (dvp != NULL) { 1985 /* force update of attrs */ 1986 nfs4_attrcache_noinval(dvp, &pgar, gethrtime()); 1987 } 1988 } 1989 done: 1990 if (newfh.nfs_fh4_len != 0) 1991 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1992 if (newpfh.nfs_fh4_len != 0) 1993 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1994 if (cred_otw != NULL) 1995 crfree(cred_otw); 1996 if (rootvp != NULL) 1997 VN_RELE(rootvp); 1998 if (dvp != NULL) 1999 VN_RELE(dvp); 2000 if (osp != NULL) 2001 open_stream_rele(osp, rp); 2002 } 2003 2004 /* 2005 * Client-side failover support: remap the filehandle for vp if it appears 2006 * necessary. errors are returned via the nfs4_error_t parameter; though, 2007 * if there is a problem, we will just try again later. 2008 */ 2009 2010 void 2011 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 2012 { 2013 if (vp == NULL) 2014 return; 2015 2016 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY)) 2017 return; 2018 2019 if (VTOR4(vp)->r_server == mi->mi_curr_serv) 2020 return; 2021 2022 nfs4_remap_file(mi, vp, flags, ep); 2023 } 2024 2025 /* 2026 * nfs4_make_dotdot() - find or create a parent vnode of a non-root node. 2027 * 2028 * Our caller has a filehandle for ".." relative to a particular 2029 * directory object. We want to find or create a parent vnode 2030 * with that filehandle and return it. We can of course create 2031 * a vnode from this filehandle, but we need to also make sure 2032 * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR) 2033 * that we have a parent FH for future reopens as well. If 2034 * we have a remap failure, we won't be able to reopen this 2035 * file, but we won't treat that as fatal because a reopen 2036 * is at least unlikely. Someday nfs4_reopen() should look 2037 * for a missing parent FH and try a remap to recover from it. 2038 * 2039 * need_start_op argument indicates whether this function should 2040 * do a start_op before calling remap_lookup(). This should 2041 * be FALSE, if you are the recovery thread or in an op; otherwise, 2042 * set it to TRUE. 2043 */ 2044 int 2045 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp, 2046 cred_t *cr, vnode_t **vpp, int need_start_op) 2047 { 2048 mntinfo4_t *mi = VTOMI4(dvp); 2049 nfs4_fname_t *np = NULL, *pnp = NULL; 2050 vnode_t *vp = NULL, *rootvp = NULL; 2051 rnode4_t *rp; 2052 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 2053 nfs4_ga_res_t gar, pgar; 2054 vattr_t va, pva; 2055 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 2056 nfs4_sharedfh_t *sfh = NULL, *psfh = NULL; 2057 nfs4_recov_state_t recov_state; 2058 2059 #ifdef DEBUG 2060 /* 2061 * ensure need_start_op is correct 2062 */ 2063 { 2064 int no_need_start_op = (tsd_get(nfs4_tsd_key) || 2065 (curthread == mi->mi_recovthread)); 2066 /* C needs a ^^ operator! */ 2067 ASSERT(((need_start_op) && (!no_need_start_op)) || 2068 ((! need_start_op) && (no_need_start_op))); 2069 } 2070 #endif 2071 ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone()); 2072 2073 NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE, 2074 "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp, 2075 rnode4info(VTOR4(dvp)))); 2076 2077 /* 2078 * rootvp might be needed eventually. Holding it now will 2079 * ensure that r4find_unlocked() will find it, if ".." is the root. 2080 */ 2081 e.error = VFS_ROOT(mi->mi_vfsp, &rootvp); 2082 if (e.error != 0) 2083 goto out; 2084 rp = r4find_unlocked(fhp, mi->mi_vfsp); 2085 if (rp != NULL) { 2086 *vpp = RTOV4(rp); 2087 VN_RELE(rootvp); 2088 return (0); 2089 } 2090 2091 /* 2092 * Since we don't have the rnode, we have to go over the wire. 2093 * remap_lookup() can get all of the filehandles and attributes 2094 * we need in one operation. 2095 */ 2096 np = fn_parent(VTOSV(dvp)->sv_name); 2097 /* if a parent was not found return an error */ 2098 if (np == NULL) { 2099 e.error = ENOENT; 2100 goto out; 2101 } 2102 2103 recov_state.rs_flags = 0; 2104 recov_state.rs_num_retry_despite_err = 0; 2105 recov_retry: 2106 if (need_start_op) { 2107 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP, 2108 &recov_state, NULL); 2109 if (e.error != 0) { 2110 goto out; 2111 } 2112 } 2113 va.va_type = VNON; 2114 pva.va_type = VNON; 2115 remap_lookup(np, rootvp, RML_ORDINARY, cr, 2116 &newfh, &gar, &newpfh, &pgar, &e); 2117 if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) { 2118 if (need_start_op) { 2119 bool_t abort; 2120 2121 abort = nfs4_start_recovery(&e, mi, 2122 rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL, NULL, 2123 NULL); 2124 if (abort) { 2125 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2126 &recov_state, FALSE); 2127 if (e.error == 0) 2128 e.error = EIO; 2129 goto out; 2130 } 2131 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2132 &recov_state, TRUE); 2133 goto recov_retry; 2134 } 2135 if (e.error == 0) 2136 e.error = EIO; 2137 goto out; 2138 } 2139 2140 if (!e.error) { 2141 va = gar.n4g_va; 2142 pva = pgar.n4g_va; 2143 } 2144 2145 if ((e.error != 0) || 2146 (va.va_type != VDIR)) { 2147 if (need_start_op) 2148 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2149 &recov_state, FALSE); 2150 if (e.error == 0) 2151 e.error = EIO; 2152 goto out; 2153 } 2154 2155 if (e.stat != NFS4_OK) { 2156 if (need_start_op) 2157 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2158 &recov_state, FALSE); 2159 e.error = EIO; 2160 goto out; 2161 } 2162 2163 /* 2164 * It is possible for remap_lookup() to return with no error, 2165 * but without providing the parent filehandle and attrs. 2166 */ 2167 if (pva.va_type != VDIR) { 2168 /* 2169 * Call remap_lookup() again, this time with the 2170 * newpfh and pgar args in the first position. 2171 */ 2172 pnp = fn_parent(np); 2173 if (pnp != NULL) { 2174 remap_lookup(pnp, rootvp, RML_ORDINARY, cr, 2175 &newpfh, &pgar, NULL, NULL, &e); 2176 if (nfs4_needs_recovery(&e, FALSE, 2177 mi->mi_vfsp)) { 2178 if (need_start_op) { 2179 bool_t abort; 2180 2181 abort = nfs4_start_recovery(&e, mi, 2182 rootvp, NULL, NULL, NULL, 2183 OP_LOOKUP, NULL, NULL, NULL); 2184 if (abort) { 2185 nfs4_end_fop(mi, rootvp, NULL, 2186 OH_LOOKUP, &recov_state, 2187 FALSE); 2188 if (e.error == 0) 2189 e.error = EIO; 2190 goto out; 2191 } 2192 nfs4_end_fop(mi, rootvp, NULL, 2193 OH_LOOKUP, &recov_state, TRUE); 2194 goto recov_retry; 2195 } 2196 if (e.error == 0) 2197 e.error = EIO; 2198 goto out; 2199 } 2200 2201 if (e.stat != NFS4_OK) { 2202 if (need_start_op) 2203 nfs4_end_fop(mi, rootvp, NULL, 2204 OH_LOOKUP, &recov_state, FALSE); 2205 e.error = EIO; 2206 goto out; 2207 } 2208 } 2209 if ((pnp == NULL) || 2210 (e.error != 0) || 2211 (pva.va_type == VNON)) { 2212 if (need_start_op) 2213 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2214 &recov_state, FALSE); 2215 if (e.error == 0) 2216 e.error = EIO; 2217 goto out; 2218 } 2219 } 2220 ASSERT(newpfh.nfs_fh4_len != 0); 2221 if (need_start_op) 2222 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE); 2223 psfh = sfh4_get(&newpfh, mi); 2224 2225 sfh = sfh4_get(&newfh, mi); 2226 vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t); 2227 2228 out: 2229 if (np != NULL) 2230 fn_rele(&np); 2231 if (pnp != NULL) 2232 fn_rele(&pnp); 2233 if (newfh.nfs_fh4_len != 0) 2234 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 2235 if (newpfh.nfs_fh4_len != 0) 2236 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 2237 if (sfh != NULL) 2238 sfh4_rele(&sfh); 2239 if (psfh != NULL) 2240 sfh4_rele(&psfh); 2241 if (rootvp != NULL) 2242 VN_RELE(rootvp); 2243 *vpp = vp; 2244 return (e.error); 2245 } 2246 2247 #ifdef DEBUG 2248 size_t r_path_memuse = 0; 2249 #endif 2250 2251 /* 2252 * NFS client failover support 2253 * 2254 * sv4_free() frees the malloc'd portion of a "servinfo_t". 2255 */ 2256 void 2257 sv4_free(servinfo4_t *svp) 2258 { 2259 servinfo4_t *next; 2260 struct knetconfig *knconf; 2261 2262 while (svp != NULL) { 2263 next = svp->sv_next; 2264 if (svp->sv_dhsec) 2265 sec_clnt_freeinfo(svp->sv_dhsec); 2266 if (svp->sv_secdata) 2267 sec_clnt_freeinfo(svp->sv_secdata); 2268 if (svp->sv_save_secinfo && 2269 svp->sv_save_secinfo != svp->sv_secinfo) 2270 secinfo_free(svp->sv_save_secinfo); 2271 if (svp->sv_secinfo) 2272 secinfo_free(svp->sv_secinfo); 2273 if (svp->sv_hostname && svp->sv_hostnamelen > 0) 2274 kmem_free(svp->sv_hostname, svp->sv_hostnamelen); 2275 knconf = svp->sv_knconf; 2276 if (knconf != NULL) { 2277 if (knconf->knc_protofmly != NULL) 2278 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2279 if (knconf->knc_proto != NULL) 2280 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2281 kmem_free(knconf, sizeof (*knconf)); 2282 } 2283 knconf = svp->sv_origknconf; 2284 if (knconf != NULL) { 2285 if (knconf->knc_protofmly != NULL) 2286 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2287 if (knconf->knc_proto != NULL) 2288 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2289 kmem_free(knconf, sizeof (*knconf)); 2290 } 2291 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0) 2292 kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen); 2293 if (svp->sv_path != NULL) { 2294 kmem_free(svp->sv_path, svp->sv_pathlen); 2295 } 2296 nfs_rw_destroy(&svp->sv_lock); 2297 kmem_free(svp, sizeof (*svp)); 2298 svp = next; 2299 } 2300 } 2301 2302 void 2303 nfs4_printfhandle(nfs4_fhandle_t *fhp) 2304 { 2305 int *ip; 2306 char *buf; 2307 size_t bufsize; 2308 char *cp; 2309 2310 /* 2311 * 13 == "(file handle:" 2312 * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times 2313 * 1 == ' ' 2314 * 8 == maximum strlen of "%x" 2315 * 3 == ")\n\0" 2316 */ 2317 bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3; 2318 buf = kmem_alloc(bufsize, KM_NOSLEEP); 2319 if (buf == NULL) 2320 return; 2321 2322 cp = buf; 2323 (void) strcpy(cp, "(file handle:"); 2324 while (*cp != '\0') 2325 cp++; 2326 for (ip = (int *)fhp->fh_buf; 2327 ip < (int *)&fhp->fh_buf[fhp->fh_len]; 2328 ip++) { 2329 (void) sprintf(cp, " %x", *ip); 2330 while (*cp != '\0') 2331 cp++; 2332 } 2333 (void) strcpy(cp, ")\n"); 2334 2335 zcmn_err(getzoneid(), CE_CONT, "%s", buf); 2336 2337 kmem_free(buf, bufsize); 2338 } 2339 2340 /* 2341 * The NFSv4 readdir cache subsystem. 2342 * 2343 * We provide a set of interfaces to allow the rest of the system to utilize 2344 * a caching mechanism while encapsulating the details of the actual 2345 * implementation. This should allow for better maintainability and 2346 * extensibility by consolidating the implementation details in one location. 2347 */ 2348 2349 /* 2350 * Comparator used by AVL routines. 2351 */ 2352 static int 2353 rddir4_cache_compar(const void *x, const void *y) 2354 { 2355 rddir4_cache_impl *ai = (rddir4_cache_impl *)x; 2356 rddir4_cache_impl *bi = (rddir4_cache_impl *)y; 2357 rddir4_cache *a = &ai->rc; 2358 rddir4_cache *b = &bi->rc; 2359 2360 if (a->nfs4_cookie == b->nfs4_cookie) { 2361 if (a->buflen == b->buflen) 2362 return (0); 2363 if (a->buflen < b->buflen) 2364 return (-1); 2365 return (1); 2366 } 2367 2368 if (a->nfs4_cookie < b->nfs4_cookie) 2369 return (-1); 2370 2371 return (1); 2372 } 2373 2374 /* 2375 * Allocate an opaque handle for the readdir cache. 2376 */ 2377 void 2378 rddir4_cache_create(rnode4_t *rp) 2379 { 2380 ASSERT(rp->r_dir == NULL); 2381 2382 rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 2383 2384 avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl), 2385 offsetof(rddir4_cache_impl, tree)); 2386 } 2387 2388 /* 2389 * Purge the cache of all cached readdir responses. 2390 */ 2391 void 2392 rddir4_cache_purge(rnode4_t *rp) 2393 { 2394 rddir4_cache_impl *rdip; 2395 rddir4_cache_impl *nrdip; 2396 2397 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2398 2399 if (rp->r_dir == NULL) 2400 return; 2401 2402 rdip = avl_first(rp->r_dir); 2403 2404 while (rdip != NULL) { 2405 nrdip = AVL_NEXT(rp->r_dir, rdip); 2406 avl_remove(rp->r_dir, rdip); 2407 rdip->rc.flags &= ~RDDIRCACHED; 2408 rddir4_cache_rele(rp, &rdip->rc); 2409 rdip = nrdip; 2410 } 2411 ASSERT(avl_numnodes(rp->r_dir) == 0); 2412 } 2413 2414 /* 2415 * Destroy the readdir cache. 2416 */ 2417 void 2418 rddir4_cache_destroy(rnode4_t *rp) 2419 { 2420 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2421 if (rp->r_dir == NULL) 2422 return; 2423 2424 rddir4_cache_purge(rp); 2425 avl_destroy(rp->r_dir); 2426 kmem_free(rp->r_dir, sizeof (avl_tree_t)); 2427 rp->r_dir = NULL; 2428 } 2429 2430 /* 2431 * Locate a readdir response from the readdir cache. 2432 * 2433 * Return values: 2434 * 2435 * NULL - If there is an unrecoverable situation like the operation may have 2436 * been interrupted. 2437 * 2438 * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller. 2439 * The flags are set approprately, such that the caller knows 2440 * what state the entry is in. 2441 */ 2442 rddir4_cache * 2443 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count) 2444 { 2445 rddir4_cache_impl *rdip = NULL; 2446 rddir4_cache_impl srdip; 2447 rddir4_cache *srdc; 2448 rddir4_cache *rdc = NULL; 2449 rddir4_cache *nrdc = NULL; 2450 avl_index_t where; 2451 2452 top: 2453 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER)); 2454 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2455 /* 2456 * Check to see if the readdir cache has been disabled. If so, then 2457 * simply allocate an rddir4_cache entry and return it, since caching 2458 * operations do not apply. 2459 */ 2460 if (rp->r_dir == NULL) { 2461 if (nrdc == NULL) { 2462 /* 2463 * Drop the lock because we are doing a sleeping 2464 * allocation. 2465 */ 2466 mutex_exit(&rp->r_statelock); 2467 rdc = rddir4_cache_alloc(KM_SLEEP); 2468 rdc->nfs4_cookie = cookie; 2469 rdc->buflen = count; 2470 mutex_enter(&rp->r_statelock); 2471 return (rdc); 2472 } 2473 return (nrdc); 2474 } 2475 2476 srdc = &srdip.rc; 2477 srdc->nfs4_cookie = cookie; 2478 srdc->buflen = count; 2479 2480 rdip = avl_find(rp->r_dir, &srdip, &where); 2481 2482 /* 2483 * If we didn't find an entry then create one and insert it 2484 * into the cache. 2485 */ 2486 if (rdip == NULL) { 2487 /* 2488 * Check for the case where we have made a second pass through 2489 * the cache due to a lockless allocation. If we find that no 2490 * thread has already inserted this entry, do the insert now 2491 * and return. 2492 */ 2493 if (nrdc != NULL) { 2494 avl_insert(rp->r_dir, nrdc->data, where); 2495 nrdc->flags |= RDDIRCACHED; 2496 rddir4_cache_hold(nrdc); 2497 return (nrdc); 2498 } 2499 2500 #ifdef DEBUG 2501 nfs4_readdir_cache_misses++; 2502 #endif 2503 /* 2504 * First, try to allocate an entry without sleeping. If that 2505 * fails then drop the lock and do a sleeping allocation. 2506 */ 2507 nrdc = rddir4_cache_alloc(KM_NOSLEEP); 2508 if (nrdc != NULL) { 2509 nrdc->nfs4_cookie = cookie; 2510 nrdc->buflen = count; 2511 avl_insert(rp->r_dir, nrdc->data, where); 2512 nrdc->flags |= RDDIRCACHED; 2513 rddir4_cache_hold(nrdc); 2514 return (nrdc); 2515 } 2516 2517 /* 2518 * Drop the lock and do a sleeping allocation. We incur 2519 * additional overhead by having to search the cache again, 2520 * but this case should be rare. 2521 */ 2522 mutex_exit(&rp->r_statelock); 2523 nrdc = rddir4_cache_alloc(KM_SLEEP); 2524 nrdc->nfs4_cookie = cookie; 2525 nrdc->buflen = count; 2526 mutex_enter(&rp->r_statelock); 2527 /* 2528 * We need to take another pass through the cache 2529 * since we dropped our lock to perform the alloc. 2530 * Another thread may have come by and inserted the 2531 * entry we are interested in. 2532 */ 2533 goto top; 2534 } 2535 2536 /* 2537 * Check to see if we need to free our entry. This can happen if 2538 * another thread came along beat us to the insert. We can 2539 * safely call rddir4_cache_free directly because no other thread 2540 * would have a reference to this entry. 2541 */ 2542 if (nrdc != NULL) 2543 rddir4_cache_free((rddir4_cache_impl *)nrdc->data); 2544 2545 #ifdef DEBUG 2546 nfs4_readdir_cache_hits++; 2547 #endif 2548 /* 2549 * Found something. Make sure it's ready to return. 2550 */ 2551 rdc = &rdip->rc; 2552 rddir4_cache_hold(rdc); 2553 /* 2554 * If the cache entry is in the process of being filled in, wait 2555 * until this completes. The RDDIRWAIT bit is set to indicate that 2556 * someone is waiting and when the thread currently filling the entry 2557 * is done, it should do a cv_broadcast to wakeup all of the threads 2558 * waiting for it to finish. If the thread wakes up to find that 2559 * someone new is now trying to complete the the entry, go back 2560 * to sleep. 2561 */ 2562 while (rdc->flags & RDDIR) { 2563 /* 2564 * The entry is not complete. 2565 */ 2566 nfs_rw_exit(&rp->r_rwlock); 2567 rdc->flags |= RDDIRWAIT; 2568 #ifdef DEBUG 2569 nfs4_readdir_cache_waits++; 2570 #endif 2571 while (rdc->flags & RDDIRWAIT) { 2572 if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) { 2573 /* 2574 * We got interrupted, probably the user 2575 * typed ^C or an alarm fired. We free the 2576 * new entry if we allocated one. 2577 */ 2578 rddir4_cache_rele(rp, rdc); 2579 mutex_exit(&rp->r_statelock); 2580 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2581 RW_READER, FALSE); 2582 mutex_enter(&rp->r_statelock); 2583 return (NULL); 2584 } 2585 } 2586 mutex_exit(&rp->r_statelock); 2587 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2588 RW_READER, FALSE); 2589 mutex_enter(&rp->r_statelock); 2590 } 2591 2592 /* 2593 * The entry we were waiting on may have been purged from 2594 * the cache and should no longer be used, release it and 2595 * start over. 2596 */ 2597 if (!(rdc->flags & RDDIRCACHED)) { 2598 rddir4_cache_rele(rp, rdc); 2599 goto top; 2600 } 2601 2602 /* 2603 * The entry is completed. Return it. 2604 */ 2605 return (rdc); 2606 } 2607 2608 /* 2609 * Allocate a cache element and return it. Can return NULL if memory is 2610 * low. 2611 */ 2612 static rddir4_cache * 2613 rddir4_cache_alloc(int flags) 2614 { 2615 rddir4_cache_impl *rdip = NULL; 2616 rddir4_cache *rc = NULL; 2617 2618 rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags); 2619 2620 if (rdip != NULL) { 2621 rc = &rdip->rc; 2622 rc->data = (void *)rdip; 2623 rc->nfs4_cookie = 0; 2624 rc->nfs4_ncookie = 0; 2625 rc->entries = NULL; 2626 rc->eof = 0; 2627 rc->entlen = 0; 2628 rc->buflen = 0; 2629 rc->actlen = 0; 2630 /* 2631 * A readdir is required so set the flag. 2632 */ 2633 rc->flags = RDDIRREQ; 2634 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL); 2635 rc->error = 0; 2636 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL); 2637 rdip->count = 1; 2638 #ifdef DEBUG 2639 atomic_add_64(&clstat4_debug.dirent.value.ui64, 1); 2640 #endif 2641 } 2642 return (rc); 2643 } 2644 2645 /* 2646 * Increment the reference count to this cache element. 2647 */ 2648 static void 2649 rddir4_cache_hold(rddir4_cache *rc) 2650 { 2651 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data; 2652 2653 mutex_enter(&rdip->lock); 2654 rdip->count++; 2655 mutex_exit(&rdip->lock); 2656 } 2657 2658 /* 2659 * Release a reference to this cache element. If the count is zero then 2660 * free the element. 2661 */ 2662 void 2663 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc) 2664 { 2665 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data; 2666 2667 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2668 2669 /* 2670 * Check to see if we have any waiters. If so, we can wake them 2671 * so that they can proceed. 2672 */ 2673 if (rdc->flags & RDDIRWAIT) { 2674 rdc->flags &= ~RDDIRWAIT; 2675 cv_broadcast(&rdc->cv); 2676 } 2677 2678 mutex_enter(&rdip->lock); 2679 ASSERT(rdip->count > 0); 2680 if (--rdip->count == 0) { 2681 mutex_exit(&rdip->lock); 2682 rddir4_cache_free(rdip); 2683 } else 2684 mutex_exit(&rdip->lock); 2685 } 2686 2687 /* 2688 * Free a cache element. 2689 */ 2690 static void 2691 rddir4_cache_free(rddir4_cache_impl *rdip) 2692 { 2693 rddir4_cache *rc = &rdip->rc; 2694 2695 #ifdef DEBUG 2696 atomic_add_64(&clstat4_debug.dirent.value.ui64, -1); 2697 #endif 2698 if (rc->entries != NULL) 2699 kmem_free(rc->entries, rc->buflen); 2700 cv_destroy(&rc->cv); 2701 mutex_destroy(&rdip->lock); 2702 kmem_free(rdip, sizeof (*rdip)); 2703 } 2704 2705 /* 2706 * Snapshot callback for nfs:0:nfs4_client as registered with the kstat 2707 * framework. 2708 */ 2709 static int 2710 cl4_snapshot(kstat_t *ksp, void *buf, int rw) 2711 { 2712 ksp->ks_snaptime = gethrtime(); 2713 if (rw == KSTAT_WRITE) { 2714 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl)); 2715 #ifdef DEBUG 2716 /* 2717 * Currently only the global zone can write to kstats, but we 2718 * add the check just for paranoia. 2719 */ 2720 if (INGLOBALZONE(curproc)) 2721 bcopy((char *)buf + sizeof (clstat4_tmpl), 2722 &clstat4_debug, sizeof (clstat4_debug)); 2723 #endif 2724 } else { 2725 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl)); 2726 #ifdef DEBUG 2727 /* 2728 * If we're displaying the "global" debug kstat values, we 2729 * display them as-is to all zones since in fact they apply to 2730 * the system as a whole. 2731 */ 2732 bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl), 2733 sizeof (clstat4_debug)); 2734 #endif 2735 } 2736 return (0); 2737 } 2738 2739 2740 2741 /* 2742 * Zone support 2743 */ 2744 static void * 2745 clinit4_zone(zoneid_t zoneid) 2746 { 2747 kstat_t *nfs4_client_kstat; 2748 struct nfs4_clnt *nfscl; 2749 uint_t ndata; 2750 2751 nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP); 2752 mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL); 2753 nfscl->nfscl_chtable4 = NULL; 2754 nfscl->nfscl_zoneid = zoneid; 2755 2756 bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl)); 2757 ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t); 2758 #ifdef DEBUG 2759 ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t); 2760 #endif 2761 if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client", 2762 "misc", KSTAT_TYPE_NAMED, ndata, 2763 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) { 2764 nfs4_client_kstat->ks_private = &nfscl->nfscl_stat; 2765 nfs4_client_kstat->ks_snapshot = cl4_snapshot; 2766 kstat_install(nfs4_client_kstat); 2767 } 2768 mutex_enter(&nfs4_clnt_list_lock); 2769 list_insert_head(&nfs4_clnt_list, nfscl); 2770 mutex_exit(&nfs4_clnt_list_lock); 2771 2772 return (nfscl); 2773 } 2774 2775 /*ARGSUSED*/ 2776 static void 2777 clfini4_zone(zoneid_t zoneid, void *arg) 2778 { 2779 struct nfs4_clnt *nfscl = arg; 2780 chhead_t *chp, *next; 2781 2782 if (nfscl == NULL) 2783 return; 2784 mutex_enter(&nfs4_clnt_list_lock); 2785 list_remove(&nfs4_clnt_list, nfscl); 2786 mutex_exit(&nfs4_clnt_list_lock); 2787 clreclaim4_zone(nfscl, 0); 2788 for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) { 2789 ASSERT(chp->ch_list == NULL); 2790 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1); 2791 next = chp->ch_next; 2792 kmem_free(chp, sizeof (*chp)); 2793 } 2794 kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid); 2795 mutex_destroy(&nfscl->nfscl_chtable4_lock); 2796 kmem_free(nfscl, sizeof (*nfscl)); 2797 } 2798 2799 /* 2800 * Called by endpnt_destructor to make sure the client handles are 2801 * cleaned up before the RPC endpoints. This becomes a no-op if 2802 * clfini_zone (above) is called first. This function is needed 2803 * (rather than relying on clfini_zone to clean up) because the ZSD 2804 * callbacks have no ordering mechanism, so we have no way to ensure 2805 * that clfini_zone is called before endpnt_destructor. 2806 */ 2807 void 2808 clcleanup4_zone(zoneid_t zoneid) 2809 { 2810 struct nfs4_clnt *nfscl; 2811 2812 mutex_enter(&nfs4_clnt_list_lock); 2813 nfscl = list_head(&nfs4_clnt_list); 2814 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) { 2815 if (nfscl->nfscl_zoneid == zoneid) { 2816 clreclaim4_zone(nfscl, 0); 2817 break; 2818 } 2819 } 2820 mutex_exit(&nfs4_clnt_list_lock); 2821 } 2822 2823 int 2824 nfs4_subr_init(void) 2825 { 2826 /* 2827 * Allocate and initialize the client handle cache 2828 */ 2829 chtab4_cache = kmem_cache_create("client_handle4_cache", 2830 sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL, 2831 NULL, 0); 2832 2833 /* 2834 * Initialize the list of per-zone client handles (and associated data). 2835 * This needs to be done before we call zone_key_create(). 2836 */ 2837 list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt), 2838 offsetof(struct nfs4_clnt, nfscl_node)); 2839 2840 /* 2841 * Initialize the zone_key for per-zone client handle lists. 2842 */ 2843 zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone); 2844 2845 if (nfs4err_delay_time == 0) 2846 nfs4err_delay_time = NFS4ERR_DELAY_TIME; 2847 2848 return (0); 2849 } 2850 2851 int 2852 nfs4_subr_fini(void) 2853 { 2854 /* 2855 * Deallocate the client handle cache 2856 */ 2857 kmem_cache_destroy(chtab4_cache); 2858 2859 /* 2860 * Destroy the zone_key 2861 */ 2862 (void) zone_key_delete(nfs4clnt_zone_key); 2863 2864 return (0); 2865 } 2866 /* 2867 * Set or Clear direct I/O flag 2868 * VOP_RWLOCK() is held for write access to prevent a race condition 2869 * which would occur if a process is in the middle of a write when 2870 * directio flag gets set. It is possible that all pages may not get flushed. 2871 * 2872 * This is a copy of nfs_directio, changes here may need to be made 2873 * there and vice versa. 2874 */ 2875 2876 int 2877 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr) 2878 { 2879 int error = 0; 2880 rnode4_t *rp; 2881 2882 rp = VTOR4(vp); 2883 2884 if (cmd == DIRECTIO_ON) { 2885 2886 if (rp->r_flags & R4DIRECTIO) 2887 return (0); 2888 2889 /* 2890 * Flush the page cache. 2891 */ 2892 2893 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 2894 2895 if (rp->r_flags & R4DIRECTIO) { 2896 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2897 return (0); 2898 } 2899 2900 if (nfs4_has_pages(vp) && 2901 ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) { 2902 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 2903 B_INVAL, cr, NULL); 2904 if (error) { 2905 if (error == ENOSPC || error == EDQUOT) { 2906 mutex_enter(&rp->r_statelock); 2907 if (!rp->r_error) 2908 rp->r_error = error; 2909 mutex_exit(&rp->r_statelock); 2910 } 2911 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2912 return (error); 2913 } 2914 } 2915 2916 mutex_enter(&rp->r_statelock); 2917 rp->r_flags |= R4DIRECTIO; 2918 mutex_exit(&rp->r_statelock); 2919 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2920 return (0); 2921 } 2922 2923 if (cmd == DIRECTIO_OFF) { 2924 mutex_enter(&rp->r_statelock); 2925 rp->r_flags &= ~R4DIRECTIO; /* disable direct mode */ 2926 mutex_exit(&rp->r_statelock); 2927 return (0); 2928 } 2929 2930 return (EINVAL); 2931 } 2932 2933 /* 2934 * Return TRUE if the file has any pages. Always go back to 2935 * the master vnode to check v_pages since none of the shadows 2936 * can have pages. 2937 */ 2938 2939 bool_t 2940 nfs4_has_pages(vnode_t *vp) 2941 { 2942 rnode4_t *rp; 2943 2944 rp = VTOR4(vp); 2945 if (IS_SHADOW(vp, rp)) 2946 vp = RTOV4(rp); /* RTOV4 always gives the master */ 2947 2948 return (vn_has_cached_data(vp)); 2949 } 2950 2951 /* 2952 * This table is used to determine whether the client should attempt 2953 * failover based on the clnt_stat value returned by CLNT_CALL. The 2954 * clnt_stat is used as an index into the table. If 2955 * the error value that corresponds to the clnt_stat value in the 2956 * table is non-zero, then that is the error to be returned AND 2957 * that signals that failover should be attempted. 2958 * 2959 * Special note: If the RPC_ values change, then direct indexing of the 2960 * table is no longer valid, but having the RPC_ values in the table 2961 * allow the functions to detect the change and issue a warning. 2962 * In this case, the code will always attempt failover as a defensive 2963 * measure. 2964 */ 2965 2966 static struct try_failover_tab { 2967 enum clnt_stat cstat; 2968 int error; 2969 } try_failover_table [] = { 2970 2971 RPC_SUCCESS, 0, 2972 RPC_CANTENCODEARGS, 0, 2973 RPC_CANTDECODERES, 0, 2974 RPC_CANTSEND, ECOMM, 2975 RPC_CANTRECV, ECOMM, 2976 RPC_TIMEDOUT, ETIMEDOUT, 2977 RPC_VERSMISMATCH, 0, 2978 RPC_AUTHERROR, 0, 2979 RPC_PROGUNAVAIL, 0, 2980 RPC_PROGVERSMISMATCH, 0, 2981 RPC_PROCUNAVAIL, 0, 2982 RPC_CANTDECODEARGS, 0, 2983 RPC_SYSTEMERROR, ENOSR, 2984 RPC_UNKNOWNHOST, EHOSTUNREACH, 2985 RPC_RPCBFAILURE, ENETUNREACH, 2986 RPC_PROGNOTREGISTERED, ECONNREFUSED, 2987 RPC_FAILED, ETIMEDOUT, 2988 RPC_UNKNOWNPROTO, EHOSTUNREACH, 2989 RPC_INTR, 0, 2990 RPC_UNKNOWNADDR, EHOSTUNREACH, 2991 RPC_TLIERROR, 0, 2992 RPC_NOBROADCAST, EHOSTUNREACH, 2993 RPC_N2AXLATEFAILURE, ECONNREFUSED, 2994 RPC_UDERROR, 0, 2995 RPC_INPROGRESS, 0, 2996 RPC_STALERACHANDLE, EINVAL, 2997 RPC_CANTCONNECT, ECONNREFUSED, 2998 RPC_XPRTFAILED, ECONNABORTED, 2999 RPC_CANTCREATESTREAM, ECONNREFUSED, 3000 RPC_CANTSTORE, ENOBUFS 3001 }; 3002 3003 /* 3004 * nfs4_try_failover - determine whether the client should 3005 * attempt failover based on the values stored in the nfs4_error_t. 3006 */ 3007 int 3008 nfs4_try_failover(nfs4_error_t *ep) 3009 { 3010 if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE) 3011 return (TRUE); 3012 3013 if (ep->error && ep->rpc_status != RPC_SUCCESS) 3014 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE); 3015 3016 return (FALSE); 3017 } 3018 3019 /* 3020 * try_failover - internal version of nfs4_try_failover, called 3021 * only by rfscall and aclcall. Determine if failover is warranted 3022 * based on the clnt_stat and return the error number if it is. 3023 */ 3024 static int 3025 try_failover(enum clnt_stat rpc_status) 3026 { 3027 int err = 0; 3028 3029 if (rpc_status == RPC_SUCCESS) 3030 return (0); 3031 3032 #ifdef DEBUG 3033 if (rpc_status != 0 && nfs4_try_failover_any) { 3034 err = ETIMEDOUT; 3035 goto done; 3036 } 3037 #endif 3038 /* 3039 * The rpc status is used as an index into the table. 3040 * If the rpc status is outside of the range of the 3041 * table or if the rpc error numbers have been changed 3042 * since the table was constructed, then print a warning 3043 * (DEBUG only) and try failover anyway. Otherwise, just 3044 * grab the resulting error number out of the table. 3045 */ 3046 if (rpc_status < RPC_SUCCESS || rpc_status >= 3047 sizeof (try_failover_table)/sizeof (try_failover_table[0]) || 3048 try_failover_table[rpc_status].cstat != rpc_status) { 3049 3050 err = ETIMEDOUT; 3051 #ifdef DEBUG 3052 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d", 3053 rpc_status); 3054 #endif 3055 } else 3056 err = try_failover_table[rpc_status].error; 3057 3058 done: 3059 if (rpc_status) 3060 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3061 "nfs4_try_failover: %strying failover on error %d", 3062 err ? "" : "NOT ", rpc_status)); 3063 3064 return (err); 3065 } 3066 3067 void 3068 nfs4_error_zinit(nfs4_error_t *ep) 3069 { 3070 ep->error = 0; 3071 ep->stat = NFS4_OK; 3072 ep->rpc_status = RPC_SUCCESS; 3073 } 3074 3075 void 3076 nfs4_error_init(nfs4_error_t *ep, int error) 3077 { 3078 ep->error = error; 3079 ep->stat = NFS4_OK; 3080 ep->rpc_status = RPC_SUCCESS; 3081 } 3082 3083 3084 #ifdef DEBUG 3085 3086 /* 3087 * Return a 16-bit hash for filehandle, stateid, clientid, owner. 3088 * use the same algorithm as for NFS v3. 3089 * 3090 */ 3091 int 3092 hash16(void *p, int len) 3093 { 3094 int i, rem; 3095 uint_t *wp; 3096 uint_t key = 0; 3097 3098 /* protect against non word aligned */ 3099 if ((rem = len & 3) != 0) 3100 len &= ~3; 3101 3102 for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) { 3103 key ^= (*wp >> 16) ^ *wp; 3104 } 3105 3106 /* hash left-over bytes */ 3107 for (i = 0; i < rem; i++) 3108 key ^= *((uchar_t *)p + i); 3109 3110 return (key & 0xffff); 3111 } 3112 3113 /* 3114 * rnode4info - return filehandle and path information for an rnode. 3115 * XXX MT issues: uses a single static buffer, no locking of path. 3116 */ 3117 char * 3118 rnode4info(rnode4_t *rp) 3119 { 3120 static char buf[80]; 3121 nfs4_fhandle_t fhandle; 3122 char *path; 3123 char *type; 3124 3125 if (rp == NULL) 3126 return ("null"); 3127 if (rp->r_flags & R4ISXATTR) 3128 type = "attr"; 3129 else if (RTOV4(rp)->v_flag & V_XATTRDIR) 3130 type = "attrdir"; 3131 else if (RTOV4(rp)->v_flag & VROOT) 3132 type = "root"; 3133 else if (RTOV4(rp)->v_type == VDIR) 3134 type = "dir"; 3135 else if (RTOV4(rp)->v_type == VREG) 3136 type = "file"; 3137 else 3138 type = "other"; 3139 sfh4_copyval(rp->r_fh, &fhandle); 3140 path = fn_path(rp->r_svnode.sv_name); 3141 (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n", 3142 (void *)rp, path, type, rp->r_flags, 3143 hash16((void *)&fhandle.fh_buf, fhandle.fh_len)); 3144 kmem_free(path, strlen(path)+1); 3145 return (buf); 3146 } 3147 #endif 3148