1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 27 */ 28 29 /* 30 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 31 * All Rights Reserved 32 */ 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/cmn_err.h> 38 #include <sys/vtrace.h> 39 #include <sys/session.h> 40 #include <sys/thread.h> 41 #include <sys/dnlc.h> 42 #include <sys/cred.h> 43 #include <sys/priv.h> 44 #include <sys/list.h> 45 #include <sys/sdt.h> 46 #include <sys/policy.h> 47 48 #include <rpc/types.h> 49 #include <rpc/xdr.h> 50 51 #include <nfs/nfs.h> 52 53 #include <nfs/nfs_clnt.h> 54 55 #include <nfs/nfs4.h> 56 #include <nfs/rnode4.h> 57 #include <nfs/nfs4_clnt.h> 58 59 /* 60 * client side statistics 61 */ 62 static const struct clstat4 clstat4_tmpl = { 63 { "calls", KSTAT_DATA_UINT64 }, 64 { "badcalls", KSTAT_DATA_UINT64 }, 65 { "referrals", KSTAT_DATA_UINT64 }, 66 { "referlinks", KSTAT_DATA_UINT64 }, 67 { "clgets", KSTAT_DATA_UINT64 }, 68 { "cltoomany", KSTAT_DATA_UINT64 }, 69 #ifdef DEBUG 70 { "clalloc", KSTAT_DATA_UINT64 }, 71 { "noresponse", KSTAT_DATA_UINT64 }, 72 { "failover", KSTAT_DATA_UINT64 }, 73 { "remap", KSTAT_DATA_UINT64 }, 74 #endif 75 }; 76 77 #ifdef DEBUG 78 struct clstat4_debug clstat4_debug = { 79 { "nrnode", KSTAT_DATA_UINT64 }, 80 { "access", KSTAT_DATA_UINT64 }, 81 { "dirent", KSTAT_DATA_UINT64 }, 82 { "dirents", KSTAT_DATA_UINT64 }, 83 { "reclaim", KSTAT_DATA_UINT64 }, 84 { "clreclaim", KSTAT_DATA_UINT64 }, 85 { "f_reclaim", KSTAT_DATA_UINT64 }, 86 { "a_reclaim", KSTAT_DATA_UINT64 }, 87 { "r_reclaim", KSTAT_DATA_UINT64 }, 88 { "r_path", KSTAT_DATA_UINT64 }, 89 }; 90 #endif 91 92 /* 93 * We keep a global list of per-zone client data, so we can clean up all zones 94 * if we get low on memory. 95 */ 96 static list_t nfs4_clnt_list; 97 static kmutex_t nfs4_clnt_list_lock; 98 zone_key_t nfs4clnt_zone_key; 99 100 static struct kmem_cache *chtab4_cache; 101 102 #ifdef DEBUG 103 static int nfs4_rfscall_debug; 104 static int nfs4_try_failover_any; 105 int nfs4_utf8_debug = 0; 106 #endif 107 108 /* 109 * NFSv4 readdir cache implementation 110 */ 111 typedef struct rddir4_cache_impl { 112 rddir4_cache rc; /* readdir cache element */ 113 kmutex_t lock; /* lock protects count */ 114 uint_t count; /* reference count */ 115 avl_node_t tree; /* AVL tree link */ 116 } rddir4_cache_impl; 117 118 static int rddir4_cache_compar(const void *, const void *); 119 static void rddir4_cache_free(rddir4_cache_impl *); 120 static rddir4_cache *rddir4_cache_alloc(int); 121 static void rddir4_cache_hold(rddir4_cache *); 122 static int try_failover(enum clnt_stat); 123 124 static int nfs4_readdir_cache_hits = 0; 125 static int nfs4_readdir_cache_waits = 0; 126 static int nfs4_readdir_cache_misses = 0; 127 128 /* 129 * Shared nfs4 functions 130 */ 131 132 /* 133 * Copy an nfs_fh4. The destination storage (to->nfs_fh4_val) must already 134 * be allocated. 135 */ 136 137 void 138 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to) 139 { 140 to->nfs_fh4_len = from->nfs_fh4_len; 141 bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len); 142 } 143 144 /* 145 * nfs4cmpfh - compare 2 filehandles. 146 * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is 147 * "less" than the second, +1 if the first is "greater" than the second. 148 */ 149 150 int 151 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2) 152 { 153 const char *c1, *c2; 154 155 if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len) 156 return (-1); 157 if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len) 158 return (1); 159 for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val; 160 c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len; 161 c1++, c2++) { 162 if (*c1 < *c2) 163 return (-1); 164 if (*c1 > *c2) 165 return (1); 166 } 167 168 return (0); 169 } 170 171 /* 172 * Compare two v4 filehandles. Return zero if they're the same, non-zero 173 * if they're not. Like nfs4cmpfh(), but different filehandle 174 * representation, and doesn't provide information about greater than or 175 * less than. 176 */ 177 178 int 179 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2) 180 { 181 if (fh1->fh_len == fh2->fh_len) 182 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len)); 183 184 return (1); 185 } 186 187 int 188 stateid4_cmp(stateid4 *s1, stateid4 *s2) 189 { 190 if (bcmp(s1, s2, sizeof (stateid4)) == 0) 191 return (1); 192 else 193 return (0); 194 } 195 196 nfsstat4 197 puterrno4(int error) 198 { 199 switch (error) { 200 case 0: 201 return (NFS4_OK); 202 case EPERM: 203 return (NFS4ERR_PERM); 204 case ENOENT: 205 return (NFS4ERR_NOENT); 206 case EINTR: 207 return (NFS4ERR_IO); 208 case EIO: 209 return (NFS4ERR_IO); 210 case ENXIO: 211 return (NFS4ERR_NXIO); 212 case ENOMEM: 213 return (NFS4ERR_RESOURCE); 214 case EACCES: 215 return (NFS4ERR_ACCESS); 216 case EBUSY: 217 return (NFS4ERR_IO); 218 case EEXIST: 219 return (NFS4ERR_EXIST); 220 case EXDEV: 221 return (NFS4ERR_XDEV); 222 case ENODEV: 223 return (NFS4ERR_IO); 224 case ENOTDIR: 225 return (NFS4ERR_NOTDIR); 226 case EISDIR: 227 return (NFS4ERR_ISDIR); 228 case EINVAL: 229 return (NFS4ERR_INVAL); 230 case EMFILE: 231 return (NFS4ERR_RESOURCE); 232 case EFBIG: 233 return (NFS4ERR_FBIG); 234 case ENOSPC: 235 return (NFS4ERR_NOSPC); 236 case EROFS: 237 return (NFS4ERR_ROFS); 238 case EMLINK: 239 return (NFS4ERR_MLINK); 240 case EDEADLK: 241 return (NFS4ERR_DEADLOCK); 242 case ENOLCK: 243 return (NFS4ERR_DENIED); 244 case EREMOTE: 245 return (NFS4ERR_SERVERFAULT); 246 case ENOTSUP: 247 return (NFS4ERR_NOTSUPP); 248 case EDQUOT: 249 return (NFS4ERR_DQUOT); 250 case ENAMETOOLONG: 251 return (NFS4ERR_NAMETOOLONG); 252 case EOVERFLOW: 253 return (NFS4ERR_INVAL); 254 case ENOSYS: 255 return (NFS4ERR_NOTSUPP); 256 case ENOTEMPTY: 257 return (NFS4ERR_NOTEMPTY); 258 case EOPNOTSUPP: 259 return (NFS4ERR_NOTSUPP); 260 case ESTALE: 261 return (NFS4ERR_STALE); 262 case EAGAIN: 263 if (curthread->t_flag & T_WOULDBLOCK) { 264 curthread->t_flag &= ~T_WOULDBLOCK; 265 return (NFS4ERR_DELAY); 266 } 267 return (NFS4ERR_LOCKED); 268 default: 269 return ((enum nfsstat4)error); 270 } 271 } 272 273 int 274 geterrno4(enum nfsstat4 status) 275 { 276 switch (status) { 277 case NFS4_OK: 278 return (0); 279 case NFS4ERR_PERM: 280 return (EPERM); 281 case NFS4ERR_NOENT: 282 return (ENOENT); 283 case NFS4ERR_IO: 284 return (EIO); 285 case NFS4ERR_NXIO: 286 return (ENXIO); 287 case NFS4ERR_ACCESS: 288 return (EACCES); 289 case NFS4ERR_EXIST: 290 return (EEXIST); 291 case NFS4ERR_XDEV: 292 return (EXDEV); 293 case NFS4ERR_NOTDIR: 294 return (ENOTDIR); 295 case NFS4ERR_ISDIR: 296 return (EISDIR); 297 case NFS4ERR_INVAL: 298 return (EINVAL); 299 case NFS4ERR_FBIG: 300 return (EFBIG); 301 case NFS4ERR_NOSPC: 302 return (ENOSPC); 303 case NFS4ERR_ROFS: 304 return (EROFS); 305 case NFS4ERR_MLINK: 306 return (EMLINK); 307 case NFS4ERR_NAMETOOLONG: 308 return (ENAMETOOLONG); 309 case NFS4ERR_NOTEMPTY: 310 return (ENOTEMPTY); 311 case NFS4ERR_DQUOT: 312 return (EDQUOT); 313 case NFS4ERR_STALE: 314 return (ESTALE); 315 case NFS4ERR_BADHANDLE: 316 return (ESTALE); 317 case NFS4ERR_BAD_COOKIE: 318 return (EINVAL); 319 case NFS4ERR_NOTSUPP: 320 return (EOPNOTSUPP); 321 case NFS4ERR_TOOSMALL: 322 return (EINVAL); 323 case NFS4ERR_SERVERFAULT: 324 return (EIO); 325 case NFS4ERR_BADTYPE: 326 return (EINVAL); 327 case NFS4ERR_DELAY: 328 return (ENXIO); 329 case NFS4ERR_SAME: 330 return (EPROTO); 331 case NFS4ERR_DENIED: 332 return (ENOLCK); 333 case NFS4ERR_EXPIRED: 334 return (EPROTO); 335 case NFS4ERR_LOCKED: 336 return (EACCES); 337 case NFS4ERR_GRACE: 338 return (EAGAIN); 339 case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */ 340 return (ESTALE); 341 case NFS4ERR_SHARE_DENIED: 342 return (EACCES); 343 case NFS4ERR_WRONGSEC: 344 return (EPERM); 345 case NFS4ERR_CLID_INUSE: 346 return (EAGAIN); 347 case NFS4ERR_RESOURCE: 348 return (EAGAIN); 349 case NFS4ERR_MOVED: 350 return (EPROTO); 351 case NFS4ERR_NOFILEHANDLE: 352 return (EIO); 353 case NFS4ERR_MINOR_VERS_MISMATCH: 354 return (ENOTSUP); 355 case NFS4ERR_STALE_CLIENTID: 356 return (EIO); 357 case NFS4ERR_STALE_STATEID: 358 return (EIO); 359 case NFS4ERR_OLD_STATEID: 360 return (EIO); 361 case NFS4ERR_BAD_STATEID: 362 return (EIO); 363 case NFS4ERR_BAD_SEQID: 364 return (EIO); 365 case NFS4ERR_NOT_SAME: 366 return (EPROTO); 367 case NFS4ERR_LOCK_RANGE: 368 return (EPROTO); 369 case NFS4ERR_SYMLINK: 370 return (EPROTO); 371 case NFS4ERR_RESTOREFH: 372 return (EPROTO); 373 case NFS4ERR_LEASE_MOVED: 374 return (EPROTO); 375 case NFS4ERR_ATTRNOTSUPP: 376 return (ENOTSUP); 377 case NFS4ERR_NO_GRACE: 378 return (EPROTO); 379 case NFS4ERR_RECLAIM_BAD: 380 return (EPROTO); 381 case NFS4ERR_RECLAIM_CONFLICT: 382 return (EPROTO); 383 case NFS4ERR_BADXDR: 384 return (EINVAL); 385 case NFS4ERR_LOCKS_HELD: 386 return (EIO); 387 case NFS4ERR_OPENMODE: 388 return (EACCES); 389 case NFS4ERR_BADOWNER: 390 /* 391 * Client and server are in different DNS domains 392 * and the NFSMAPID_DOMAIN in /etc/default/nfs 393 * doesn't match. No good answer here. Return 394 * EACCESS, which translates to "permission denied". 395 */ 396 return (EACCES); 397 case NFS4ERR_BADCHAR: 398 return (EINVAL); 399 case NFS4ERR_BADNAME: 400 return (EINVAL); 401 case NFS4ERR_BAD_RANGE: 402 return (EIO); 403 case NFS4ERR_LOCK_NOTSUPP: 404 return (ENOTSUP); 405 case NFS4ERR_OP_ILLEGAL: 406 return (EINVAL); 407 case NFS4ERR_DEADLOCK: 408 return (EDEADLK); 409 case NFS4ERR_FILE_OPEN: 410 return (EACCES); 411 case NFS4ERR_ADMIN_REVOKED: 412 return (EPROTO); 413 case NFS4ERR_CB_PATH_DOWN: 414 return (EPROTO); 415 default: 416 #ifdef DEBUG 417 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d", 418 status); 419 #endif 420 return ((int)status); 421 } 422 } 423 424 void 425 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op) 426 { 427 nfs4_server_t *server; 428 429 /* 430 * Return if already printed/queued a msg 431 * for this mount point. 432 */ 433 if (mi->mi_flags & MI4_BADOWNER_DEBUG) 434 return; 435 /* 436 * Happens once per client <-> server pair. 437 */ 438 if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 439 mi->mi_flags & MI4_INT)) 440 return; 441 442 server = find_nfs4_server(mi); 443 if (server == NULL) { 444 nfs_rw_exit(&mi->mi_recovlock); 445 return; 446 } 447 448 if (!(server->s_flags & N4S_BADOWNER_DEBUG)) { 449 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 450 "!NFSMAPID_DOMAIN does not match" 451 " the server: %s domain.\n" 452 "Please check configuration", 453 mi->mi_curr_serv->sv_hostname); 454 server->s_flags |= N4S_BADOWNER_DEBUG; 455 } 456 mutex_exit(&server->s_lock); 457 nfs4_server_rele(server); 458 nfs_rw_exit(&mi->mi_recovlock); 459 460 /* 461 * Happens once per mntinfo4_t. 462 * This error is deemed as one of the recovery facts "RF_BADOWNER", 463 * queue this in the mesg queue for this mount_info. This message 464 * is not printed, meaning its absent from id_to_dump_solo_fact() 465 * but its there for inspection if the queue is ever dumped/inspected. 466 */ 467 mutex_enter(&mi->mi_lock); 468 if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) { 469 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op, 470 FALSE, NULL, 0, NULL); 471 mi->mi_flags |= MI4_BADOWNER_DEBUG; 472 } 473 mutex_exit(&mi->mi_lock); 474 } 475 476 int 477 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime) 478 { 479 int64_t sec; 480 int32_t nsec; 481 482 /* 483 * Here check that the nfsv4 time is valid for the system. 484 * nfsv4 time value is a signed 64-bit, and the system time 485 * may be either int64_t or int32_t (depends on the kernel), 486 * so if the kernel is 32-bit, the nfsv4 time value may not fit. 487 */ 488 #ifndef _LP64 489 if (! NFS4_TIME_OK(ntime->seconds)) { 490 return (EOVERFLOW); 491 } 492 #endif 493 494 /* Invalid to specify 1 billion (or more) nsecs */ 495 if (ntime->nseconds >= 1000000000) 496 return (EINVAL); 497 498 if (ntime->seconds < 0) { 499 sec = ntime->seconds + 1; 500 nsec = -1000000000 + ntime->nseconds; 501 } else { 502 sec = ntime->seconds; 503 nsec = ntime->nseconds; 504 } 505 506 vatime->tv_sec = sec; 507 vatime->tv_nsec = nsec; 508 509 return (0); 510 } 511 512 int 513 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime) 514 { 515 int64_t sec; 516 uint32_t nsec; 517 518 /* 519 * nfsv4 time value is a signed 64-bit, and the system time 520 * may be either int64_t or int32_t (depends on the kernel), 521 * so all system time values will fit. 522 */ 523 if (vatime->tv_nsec >= 0) { 524 sec = vatime->tv_sec; 525 nsec = vatime->tv_nsec; 526 } else { 527 sec = vatime->tv_sec - 1; 528 nsec = 1000000000 + vatime->tv_nsec; 529 } 530 ntime->seconds = sec; 531 ntime->nseconds = nsec; 532 533 return (0); 534 } 535 536 /* 537 * Converts a utf8 string to a valid null terminated filename string. 538 * 539 * XXX - Not actually translating the UTF-8 string as per RFC 2279. 540 * For now, just validate that the UTF-8 string off the wire 541 * does not have characters that will freak out UFS, and leave 542 * it at that. 543 */ 544 char * 545 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s) 546 { 547 ASSERT(lenp != NULL); 548 549 if (u8s == NULL || u8s->utf8string_len <= 0 || 550 u8s->utf8string_val == NULL) 551 return (NULL); 552 553 /* 554 * Check for obvious illegal filename chars 555 */ 556 if (utf8_strchr(u8s, '/') != NULL) { 557 #ifdef DEBUG 558 if (nfs4_utf8_debug) { 559 char *path; 560 int len = u8s->utf8string_len; 561 562 path = kmem_alloc(len + 1, KM_SLEEP); 563 bcopy(u8s->utf8string_val, path, len); 564 path[len] = '\0'; 565 566 zcmn_err(getzoneid(), CE_WARN, 567 "Invalid UTF-8 filename: %s", path); 568 569 kmem_free(path, len + 1); 570 } 571 #endif 572 return (NULL); 573 } 574 575 return (utf8_to_str(u8s, lenp, s)); 576 } 577 578 /* 579 * Converts a utf8 string to a C string. 580 * kmem_allocs a new string if not supplied 581 */ 582 char * 583 utf8_to_str(utf8string *str, uint_t *lenp, char *s) 584 { 585 char *sp; 586 char *u8p; 587 int len; 588 int i; 589 590 ASSERT(lenp != NULL); 591 592 if (str == NULL) 593 return (NULL); 594 595 u8p = str->utf8string_val; 596 len = str->utf8string_len; 597 if (len <= 0 || u8p == NULL) { 598 if (s) 599 *s = '\0'; 600 return (NULL); 601 } 602 603 sp = s; 604 if (sp == NULL) 605 sp = kmem_alloc(len + 1, KM_SLEEP); 606 607 /* 608 * At least check for embedded nulls 609 */ 610 for (i = 0; i < len; i++) { 611 sp[i] = u8p[i]; 612 if (u8p[i] == '\0') { 613 #ifdef DEBUG 614 zcmn_err(getzoneid(), CE_WARN, 615 "Embedded NULL in UTF-8 string"); 616 #endif 617 if (s == NULL) 618 kmem_free(sp, len + 1); 619 return (NULL); 620 } 621 } 622 sp[len] = '\0'; 623 *lenp = len + 1; 624 625 return (sp); 626 } 627 628 /* 629 * str_to_utf8 - converts a null-terminated C string to a utf8 string 630 */ 631 utf8string * 632 str_to_utf8(char *nm, utf8string *str) 633 { 634 int len; 635 636 if (str == NULL) 637 return (NULL); 638 639 if (nm == NULL || *nm == '\0') { 640 str->utf8string_len = 0; 641 str->utf8string_val = NULL; 642 } 643 644 len = strlen(nm); 645 646 str->utf8string_val = kmem_alloc(len, KM_SLEEP); 647 str->utf8string_len = len; 648 bcopy(nm, str->utf8string_val, len); 649 650 return (str); 651 } 652 653 utf8string * 654 utf8_copy(utf8string *src, utf8string *dest) 655 { 656 if (src == NULL) 657 return (NULL); 658 if (dest == NULL) 659 return (NULL); 660 661 if (src->utf8string_len > 0) { 662 dest->utf8string_val = kmem_alloc(src->utf8string_len, 663 KM_SLEEP); 664 bcopy(src->utf8string_val, dest->utf8string_val, 665 src->utf8string_len); 666 dest->utf8string_len = src->utf8string_len; 667 } else { 668 dest->utf8string_val = NULL; 669 dest->utf8string_len = 0; 670 } 671 672 return (dest); 673 } 674 675 int 676 utf8_compare(const utf8string *a, const utf8string *b) 677 { 678 int mlen, cmp; 679 int alen, blen; 680 char *aval, *bval; 681 682 if ((a == NULL) && (b == NULL)) 683 return (0); 684 else if (a == NULL) 685 return (-1); 686 else if (b == NULL) 687 return (1); 688 689 alen = a->utf8string_len; 690 blen = b->utf8string_len; 691 aval = a->utf8string_val; 692 bval = b->utf8string_val; 693 694 if (((alen == 0) || (aval == NULL)) && 695 ((blen == 0) || (bval == NULL))) 696 return (0); 697 else if ((alen == 0) || (aval == NULL)) 698 return (-1); 699 else if ((blen == 0) || (bval == NULL)) 700 return (1); 701 702 mlen = MIN(alen, blen); 703 cmp = strncmp(aval, bval, mlen); 704 705 if ((cmp == 0) && (alen == blen)) 706 return (0); 707 else if ((cmp == 0) && (alen < blen)) 708 return (-1); 709 else if (cmp == 0) 710 return (1); 711 else if (cmp < 0) 712 return (-1); 713 return (1); 714 } 715 716 /* 717 * utf8_dir_verify - checks that the utf8 string is valid 718 */ 719 nfsstat4 720 utf8_dir_verify(utf8string *str) 721 { 722 char *nm; 723 int len; 724 725 if (str == NULL) 726 return (NFS4ERR_INVAL); 727 728 nm = str->utf8string_val; 729 len = str->utf8string_len; 730 if (nm == NULL || len == 0) { 731 return (NFS4ERR_INVAL); 732 } 733 734 if (len == 1 && nm[0] == '.') 735 return (NFS4ERR_BADNAME); 736 if (len == 2 && nm[0] == '.' && nm[1] == '.') 737 return (NFS4ERR_BADNAME); 738 739 if (utf8_strchr(str, '/') != NULL) 740 return (NFS4ERR_BADNAME); 741 742 if (utf8_strchr(str, '\0') != NULL) 743 return (NFS4ERR_BADNAME); 744 745 return (NFS4_OK); 746 } 747 748 /* 749 * from rpcsec module (common/rpcsec) 750 */ 751 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **); 752 extern void sec_clnt_freeh(AUTH *); 753 extern void sec_clnt_freeinfo(struct sec_data *); 754 755 /* 756 * authget() gets an auth handle based on the security 757 * information from the servinfo in mountinfo. 758 * The auth handle is stored in ch_client->cl_auth. 759 * 760 * First security flavor of choice is to use sv_secdata 761 * which is initiated by the client. If that fails, get 762 * secinfo from the server and then select one from the 763 * server secinfo list . 764 * 765 * For RPCSEC_GSS flavor, upon success, a secure context is 766 * established between client and server. 767 */ 768 int 769 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr) 770 { 771 int error, i; 772 773 /* 774 * SV4_TRYSECINFO indicates to try the secinfo list from 775 * sv_secinfo until a successful one is reached. Point 776 * sv_currsec to the selected security mechanism for 777 * later sessions. 778 */ 779 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 780 if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) { 781 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count; 782 i++) { 783 if (!(error = sec_clnt_geth(ch_client, 784 &svp->sv_secinfo->sdata[i], 785 cr, &ch_client->cl_auth))) { 786 787 svp->sv_currsec = &svp->sv_secinfo->sdata[i]; 788 svp->sv_secinfo->index = i; 789 /* done */ 790 svp->sv_flags &= ~SV4_TRYSECINFO; 791 break; 792 } 793 794 /* 795 * Allow the caller retry with the security flavor 796 * pointed by svp->sv_secinfo->index when 797 * ETIMEDOUT/ECONNRESET occurs. 798 */ 799 if (error == ETIMEDOUT || error == ECONNRESET) { 800 svp->sv_secinfo->index = i; 801 break; 802 } 803 } 804 } else { 805 /* sv_currsec points to one of the entries in sv_secinfo */ 806 if (svp->sv_currsec) { 807 error = sec_clnt_geth(ch_client, svp->sv_currsec, cr, 808 &ch_client->cl_auth); 809 } else { 810 /* If it's null, use sv_secdata. */ 811 error = sec_clnt_geth(ch_client, svp->sv_secdata, cr, 812 &ch_client->cl_auth); 813 } 814 } 815 nfs_rw_exit(&svp->sv_lock); 816 817 return (error); 818 } 819 820 /* 821 * Common handle get program for NFS, NFS ACL, and NFS AUTH client. 822 */ 823 int 824 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 825 struct chtab **chp, struct nfs4_clnt *nfscl) 826 { 827 struct chhead *ch, *newch; 828 struct chhead **plistp; 829 struct chtab *cp; 830 int error; 831 k_sigset_t smask; 832 833 if (newcl == NULL || chp == NULL || ci == NULL) 834 return (EINVAL); 835 836 *newcl = NULL; 837 *chp = NULL; 838 839 /* 840 * Find an unused handle or create one 841 */ 842 newch = NULL; 843 nfscl->nfscl_stat.clgets.value.ui64++; 844 top: 845 /* 846 * Find the correct entry in the cache to check for free 847 * client handles. The search is based on the RPC program 848 * number, program version number, dev_t for the transport 849 * device, and the protocol family. 850 */ 851 mutex_enter(&nfscl->nfscl_chtable4_lock); 852 plistp = &nfscl->nfscl_chtable4; 853 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 854 if (ch->ch_prog == ci->cl_prog && 855 ch->ch_vers == ci->cl_vers && 856 ch->ch_dev == svp->sv_knconf->knc_rdev && 857 (strcmp(ch->ch_protofmly, 858 svp->sv_knconf->knc_protofmly) == 0)) 859 break; 860 plistp = &ch->ch_next; 861 } 862 863 /* 864 * If we didn't find a cache entry for this quadruple, then 865 * create one. If we don't have one already preallocated, 866 * then drop the cache lock, create one, and then start over. 867 * If we did have a preallocated entry, then just add it to 868 * the front of the list. 869 */ 870 if (ch == NULL) { 871 if (newch == NULL) { 872 mutex_exit(&nfscl->nfscl_chtable4_lock); 873 newch = kmem_alloc(sizeof (*newch), KM_SLEEP); 874 newch->ch_timesused = 0; 875 newch->ch_prog = ci->cl_prog; 876 newch->ch_vers = ci->cl_vers; 877 newch->ch_dev = svp->sv_knconf->knc_rdev; 878 newch->ch_protofmly = kmem_alloc( 879 strlen(svp->sv_knconf->knc_protofmly) + 1, 880 KM_SLEEP); 881 (void) strcpy(newch->ch_protofmly, 882 svp->sv_knconf->knc_protofmly); 883 newch->ch_list = NULL; 884 goto top; 885 } 886 ch = newch; 887 newch = NULL; 888 ch->ch_next = nfscl->nfscl_chtable4; 889 nfscl->nfscl_chtable4 = ch; 890 /* 891 * We found a cache entry, but if it isn't on the front of the 892 * list, then move it to the front of the list to try to take 893 * advantage of locality of operations. 894 */ 895 } else if (ch != nfscl->nfscl_chtable4) { 896 *plistp = ch->ch_next; 897 ch->ch_next = nfscl->nfscl_chtable4; 898 nfscl->nfscl_chtable4 = ch; 899 } 900 901 /* 902 * If there was a free client handle cached, then remove it 903 * from the list, init it, and use it. 904 */ 905 if (ch->ch_list != NULL) { 906 cp = ch->ch_list; 907 ch->ch_list = cp->ch_list; 908 mutex_exit(&nfscl->nfscl_chtable4_lock); 909 if (newch != NULL) { 910 kmem_free(newch->ch_protofmly, 911 strlen(newch->ch_protofmly) + 1); 912 kmem_free(newch, sizeof (*newch)); 913 } 914 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf, 915 &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr); 916 917 /* 918 * Get an auth handle. 919 */ 920 error = authget(svp, cp->ch_client, cr); 921 if (error || cp->ch_client->cl_auth == NULL) { 922 CLNT_DESTROY(cp->ch_client); 923 kmem_cache_free(chtab4_cache, cp); 924 return ((error != 0) ? error : EINTR); 925 } 926 ch->ch_timesused++; 927 *newcl = cp->ch_client; 928 *chp = cp; 929 return (0); 930 } 931 932 /* 933 * There weren't any free client handles which fit, so allocate 934 * a new one and use that. 935 */ 936 #ifdef DEBUG 937 atomic_inc_64(&nfscl->nfscl_stat.clalloc.value.ui64); 938 #endif 939 mutex_exit(&nfscl->nfscl_chtable4_lock); 940 941 nfscl->nfscl_stat.cltoomany.value.ui64++; 942 if (newch != NULL) { 943 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1); 944 kmem_free(newch, sizeof (*newch)); 945 } 946 947 cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP); 948 cp->ch_head = ch; 949 950 sigintr(&smask, (int)ci->cl_flags & MI4_INT); 951 error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog, 952 ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client); 953 sigunintr(&smask); 954 955 if (error != 0) { 956 kmem_cache_free(chtab4_cache, cp); 957 #ifdef DEBUG 958 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64); 959 #endif 960 /* 961 * Warning is unnecessary if error is EINTR. 962 */ 963 if (error != EINTR) { 964 nfs_cmn_err(error, CE_WARN, 965 "clget: couldn't create handle: %m\n"); 966 } 967 return (error); 968 } 969 (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL); 970 auth_destroy(cp->ch_client->cl_auth); 971 972 /* 973 * Get an auth handle. 974 */ 975 error = authget(svp, cp->ch_client, cr); 976 if (error || cp->ch_client->cl_auth == NULL) { 977 CLNT_DESTROY(cp->ch_client); 978 kmem_cache_free(chtab4_cache, cp); 979 #ifdef DEBUG 980 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64); 981 #endif 982 return ((error != 0) ? error : EINTR); 983 } 984 ch->ch_timesused++; 985 *newcl = cp->ch_client; 986 ASSERT(cp->ch_client->cl_nosignal == FALSE); 987 *chp = cp; 988 return (0); 989 } 990 991 static int 992 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 993 struct chtab **chp, struct nfs4_clnt *nfscl) 994 { 995 clinfo_t ci; 996 bool_t is_recov; 997 int firstcall, error = 0; 998 999 /* 1000 * Set read buffer size to rsize 1001 * and add room for RPC headers. 1002 */ 1003 ci.cl_readsize = mi->mi_tsize; 1004 if (ci.cl_readsize != 0) 1005 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 1006 1007 /* 1008 * If soft mount and server is down just try once. 1009 * meaning: do not retransmit. 1010 */ 1011 if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN)) 1012 ci.cl_retrans = 0; 1013 else 1014 ci.cl_retrans = mi->mi_retrans; 1015 1016 ci.cl_prog = mi->mi_prog; 1017 ci.cl_vers = mi->mi_vers; 1018 ci.cl_flags = mi->mi_flags; 1019 1020 /* 1021 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS 1022 * security flavor, the client tries to establish a security context 1023 * by contacting the server. If the connection is timed out or reset, 1024 * e.g. server reboot, we will try again. 1025 */ 1026 is_recov = (curthread == mi->mi_recovthread); 1027 firstcall = 1; 1028 1029 do { 1030 error = clget4(&ci, svp, cr, newcl, chp, nfscl); 1031 1032 if (error == 0) 1033 break; 1034 1035 /* 1036 * For forced unmount and zone shutdown, bail out but 1037 * let the recovery thread do one more transmission. 1038 */ 1039 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) && 1040 (!is_recov || !firstcall)) { 1041 error = EIO; 1042 break; 1043 } 1044 1045 /* do not retry for soft mount */ 1046 if (!(mi->mi_flags & MI4_HARD)) 1047 break; 1048 1049 /* let the caller deal with the failover case */ 1050 if (FAILOVER_MOUNT4(mi)) 1051 break; 1052 1053 firstcall = 0; 1054 1055 } while (error == ETIMEDOUT || error == ECONNRESET); 1056 1057 return (error); 1058 } 1059 1060 void 1061 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl) 1062 { 1063 if (cl->cl_auth != NULL) { 1064 sec_clnt_freeh(cl->cl_auth); 1065 cl->cl_auth = NULL; 1066 } 1067 1068 /* 1069 * Timestamp this cache entry so that we know when it was last 1070 * used. 1071 */ 1072 cp->ch_freed = gethrestime_sec(); 1073 1074 /* 1075 * Add the free client handle to the front of the list. 1076 * This way, the list will be sorted in youngest to oldest 1077 * order. 1078 */ 1079 mutex_enter(&nfscl->nfscl_chtable4_lock); 1080 cp->ch_list = cp->ch_head->ch_list; 1081 cp->ch_head->ch_list = cp; 1082 mutex_exit(&nfscl->nfscl_chtable4_lock); 1083 } 1084 1085 #define CL_HOLDTIME 60 /* time to hold client handles */ 1086 1087 static void 1088 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime) 1089 { 1090 struct chhead *ch; 1091 struct chtab *cp; /* list of objects that can be reclaimed */ 1092 struct chtab *cpe; 1093 struct chtab *cpl; 1094 struct chtab **cpp; 1095 #ifdef DEBUG 1096 int n = 0; 1097 clstat4_debug.clreclaim.value.ui64++; 1098 #endif 1099 1100 /* 1101 * Need to reclaim some memory, so step through the cache 1102 * looking through the lists for entries which can be freed. 1103 */ 1104 cp = NULL; 1105 1106 mutex_enter(&nfscl->nfscl_chtable4_lock); 1107 1108 /* 1109 * Here we step through each non-NULL quadruple and start to 1110 * construct the reclaim list pointed to by cp. Note that 1111 * cp will contain all eligible chtab entries. When this traversal 1112 * completes, chtab entries from the last quadruple will be at the 1113 * front of cp and entries from previously inspected quadruples have 1114 * been appended to the rear of cp. 1115 */ 1116 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 1117 if (ch->ch_list == NULL) 1118 continue; 1119 /* 1120 * Search each list for entries older then 1121 * cl_holdtime seconds. The lists are maintained 1122 * in youngest to oldest order so that when the 1123 * first entry is found which is old enough, then 1124 * all of the rest of the entries on the list will 1125 * be old enough as well. 1126 */ 1127 cpl = ch->ch_list; 1128 cpp = &ch->ch_list; 1129 while (cpl != NULL && 1130 cpl->ch_freed + cl_holdtime > gethrestime_sec()) { 1131 cpp = &cpl->ch_list; 1132 cpl = cpl->ch_list; 1133 } 1134 if (cpl != NULL) { 1135 *cpp = NULL; 1136 if (cp != NULL) { 1137 cpe = cpl; 1138 while (cpe->ch_list != NULL) 1139 cpe = cpe->ch_list; 1140 cpe->ch_list = cp; 1141 } 1142 cp = cpl; 1143 } 1144 } 1145 1146 mutex_exit(&nfscl->nfscl_chtable4_lock); 1147 1148 /* 1149 * If cp is empty, then there is nothing to reclaim here. 1150 */ 1151 if (cp == NULL) 1152 return; 1153 1154 /* 1155 * Step through the list of entries to free, destroying each client 1156 * handle and kmem_free'ing the memory for each entry. 1157 */ 1158 while (cp != NULL) { 1159 #ifdef DEBUG 1160 n++; 1161 #endif 1162 CLNT_DESTROY(cp->ch_client); 1163 cpl = cp->ch_list; 1164 kmem_cache_free(chtab4_cache, cp); 1165 cp = cpl; 1166 } 1167 1168 #ifdef DEBUG 1169 /* 1170 * Update clalloc so that nfsstat shows the current number 1171 * of allocated client handles. 1172 */ 1173 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n); 1174 #endif 1175 } 1176 1177 /* ARGSUSED */ 1178 static void 1179 clreclaim4(void *all) 1180 { 1181 struct nfs4_clnt *nfscl; 1182 1183 /* 1184 * The system is low on memory; go through and try to reclaim some from 1185 * every zone on the system. 1186 */ 1187 mutex_enter(&nfs4_clnt_list_lock); 1188 nfscl = list_head(&nfs4_clnt_list); 1189 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) 1190 clreclaim4_zone(nfscl, CL_HOLDTIME); 1191 mutex_exit(&nfs4_clnt_list_lock); 1192 } 1193 1194 /* 1195 * Minimum time-out values indexed by call type 1196 * These units are in "eights" of a second to avoid multiplies 1197 */ 1198 static unsigned int minimum_timeo[] = { 1199 6, 7, 10 1200 }; 1201 1202 #define SHORTWAIT (NFS_COTS_TIMEO / 10) 1203 1204 /* 1205 * Back off for retransmission timeout, MAXTIMO is in hz of a sec 1206 */ 1207 #define MAXTIMO (20*hz) 1208 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim)) 1209 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1)) 1210 1211 static int 1212 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 1213 xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue, 1214 enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl) 1215 { 1216 CLIENT *client; 1217 struct chtab *ch; 1218 cred_t *cr = icr; 1219 struct rpc_err rpcerr, rpcerr_tmp; 1220 enum clnt_stat status; 1221 int error; 1222 struct timeval wait; 1223 int timeo; /* in units of hz */ 1224 bool_t tryagain, is_recov; 1225 bool_t cred_cloned = FALSE; 1226 k_sigset_t smask; 1227 servinfo4_t *svp; 1228 #ifdef DEBUG 1229 char *bufp; 1230 #endif 1231 int firstcall; 1232 1233 rpcerr.re_status = RPC_SUCCESS; 1234 1235 /* 1236 * If we know that we are rebooting then let's 1237 * not bother with doing any over the wireness. 1238 */ 1239 mutex_enter(&mi->mi_lock); 1240 if (mi->mi_flags & MI4_SHUTDOWN) { 1241 mutex_exit(&mi->mi_lock); 1242 return (EIO); 1243 } 1244 mutex_exit(&mi->mi_lock); 1245 1246 /* For TSOL, use a new cred which has net_mac_aware flag */ 1247 if (!cred_cloned && is_system_labeled()) { 1248 cred_cloned = TRUE; 1249 cr = crdup(icr); 1250 (void) setpflags(NET_MAC_AWARE, 1, cr); 1251 } 1252 1253 /* 1254 * clget() calls clnt_tli_kinit() which clears the xid, so we 1255 * are guaranteed to reprocess the retry as a new request. 1256 */ 1257 svp = mi->mi_curr_serv; 1258 rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl); 1259 if (rpcerr.re_errno != 0) 1260 return (rpcerr.re_errno); 1261 1262 timeo = (mi->mi_timeo * hz) / 10; 1263 1264 /* 1265 * If hard mounted fs, retry call forever unless hard error 1266 * occurs. 1267 * 1268 * For forced unmount, let the recovery thread through but return 1269 * an error for all others. This is so that user processes can 1270 * exit quickly. The recovery thread bails out after one 1271 * transmission so that it can tell if it needs to continue. 1272 * 1273 * For zone shutdown, behave as above to encourage quick 1274 * process exit, but also fail quickly when servers have 1275 * timed out before and reduce the timeouts. 1276 */ 1277 is_recov = (curthread == mi->mi_recovthread); 1278 firstcall = 1; 1279 do { 1280 tryagain = FALSE; 1281 1282 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE, 1283 "nfs4_rfscall: vfs_flag=0x%x, %s", 1284 mi->mi_vfsp->vfs_flag, 1285 is_recov ? "recov thread" : "not recov thread")); 1286 1287 /* 1288 * It's possible while we're retrying the admin 1289 * decided to reboot. 1290 */ 1291 mutex_enter(&mi->mi_lock); 1292 if (mi->mi_flags & MI4_SHUTDOWN) { 1293 mutex_exit(&mi->mi_lock); 1294 clfree4(client, ch, nfscl); 1295 if (cred_cloned) 1296 crfree(cr); 1297 return (EIO); 1298 } 1299 mutex_exit(&mi->mi_lock); 1300 1301 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) && 1302 (!is_recov || !firstcall)) { 1303 clfree4(client, ch, nfscl); 1304 if (cred_cloned) 1305 crfree(cr); 1306 return (EIO); 1307 } 1308 1309 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) { 1310 mutex_enter(&mi->mi_lock); 1311 if ((mi->mi_flags & MI4_TIMEDOUT) || 1312 !is_recov || !firstcall) { 1313 mutex_exit(&mi->mi_lock); 1314 clfree4(client, ch, nfscl); 1315 if (cred_cloned) 1316 crfree(cr); 1317 return (EIO); 1318 } 1319 mutex_exit(&mi->mi_lock); 1320 timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10; 1321 } 1322 1323 firstcall = 0; 1324 TICK_TO_TIMEVAL(timeo, &wait); 1325 1326 /* 1327 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 1328 * and SIGTERM. (Preserving the existing masks). 1329 * Mask out SIGINT if mount option nointr is specified. 1330 */ 1331 sigintr(&smask, (int)mi->mi_flags & MI4_INT); 1332 if (!(mi->mi_flags & MI4_INT)) 1333 client->cl_nosignal = TRUE; 1334 1335 /* 1336 * If there is a current signal, then don't bother 1337 * even trying to send out the request because we 1338 * won't be able to block waiting for the response. 1339 * Simply assume RPC_INTR and get on with it. 1340 */ 1341 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 1342 status = RPC_INTR; 1343 else { 1344 status = CLNT_CALL(client, which, xdrargs, argsp, 1345 xdrres, resp, wait); 1346 } 1347 1348 if (!(mi->mi_flags & MI4_INT)) 1349 client->cl_nosignal = FALSE; 1350 /* 1351 * restore original signal mask 1352 */ 1353 sigunintr(&smask); 1354 1355 switch (status) { 1356 case RPC_SUCCESS: 1357 break; 1358 1359 case RPC_INTR: 1360 /* 1361 * There is no way to recover from this error, 1362 * even if mount option nointr is specified. 1363 * SIGKILL, for example, cannot be blocked. 1364 */ 1365 rpcerr.re_status = RPC_INTR; 1366 rpcerr.re_errno = EINTR; 1367 break; 1368 1369 case RPC_UDERROR: 1370 /* 1371 * If the NFS server is local (vold) and 1372 * it goes away then we get RPC_UDERROR. 1373 * This is a retryable error, so we would 1374 * loop, so check to see if the specific 1375 * error was ECONNRESET, indicating that 1376 * target did not exist at all. If so, 1377 * return with RPC_PROGUNAVAIL and 1378 * ECONNRESET to indicate why. 1379 */ 1380 CLNT_GETERR(client, &rpcerr); 1381 if (rpcerr.re_errno == ECONNRESET) { 1382 rpcerr.re_status = RPC_PROGUNAVAIL; 1383 rpcerr.re_errno = ECONNRESET; 1384 break; 1385 } 1386 /*FALLTHROUGH*/ 1387 1388 default: /* probably RPC_TIMEDOUT */ 1389 1390 if (IS_UNRECOVERABLE_RPC(status)) 1391 break; 1392 1393 /* 1394 * increment server not responding count 1395 */ 1396 mutex_enter(&mi->mi_lock); 1397 mi->mi_noresponse++; 1398 mutex_exit(&mi->mi_lock); 1399 #ifdef DEBUG 1400 nfscl->nfscl_stat.noresponse.value.ui64++; 1401 #endif 1402 /* 1403 * On zone shutdown, mark server dead and move on. 1404 */ 1405 if (zone_status_get(curproc->p_zone) >= 1406 ZONE_IS_SHUTTING_DOWN) { 1407 mutex_enter(&mi->mi_lock); 1408 mi->mi_flags |= MI4_TIMEDOUT; 1409 mutex_exit(&mi->mi_lock); 1410 clfree4(client, ch, nfscl); 1411 if (cred_cloned) 1412 crfree(cr); 1413 return (EIO); 1414 } 1415 1416 /* 1417 * NFS client failover support: 1418 * return and let the caller take care of 1419 * failover. We only return for failover mounts 1420 * because otherwise we want the "not responding" 1421 * message, the timer updates, etc. 1422 */ 1423 if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) && 1424 (error = try_failover(status)) != 0) { 1425 clfree4(client, ch, nfscl); 1426 if (cred_cloned) 1427 crfree(cr); 1428 *rpc_statusp = status; 1429 return (error); 1430 } 1431 1432 if (flags & RFSCALL_SOFT) 1433 break; 1434 1435 tryagain = TRUE; 1436 1437 /* 1438 * The call is in progress (over COTS). 1439 * Try the CLNT_CALL again, but don't 1440 * print a noisy error message. 1441 */ 1442 if (status == RPC_INPROGRESS) 1443 break; 1444 1445 timeo = backoff(timeo); 1446 CLNT_GETERR(client, &rpcerr_tmp); 1447 1448 mutex_enter(&mi->mi_lock); 1449 if (!(mi->mi_flags & MI4_PRINTED)) { 1450 mi->mi_flags |= MI4_PRINTED; 1451 mutex_exit(&mi->mi_lock); 1452 if ((status == RPC_CANTSEND) && 1453 (rpcerr_tmp.re_errno == ENOBUFS)) 1454 nfs4_queue_fact(RF_SENDQ_FULL, mi, 0, 1455 0, 0, FALSE, NULL, 0, NULL); 1456 else 1457 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 1458 0, 0, 0, FALSE, NULL, 0, NULL); 1459 } else 1460 mutex_exit(&mi->mi_lock); 1461 1462 if (*doqueue && nfs_has_ctty()) { 1463 *doqueue = 0; 1464 if (!(mi->mi_flags & MI4_NOPRINT)) { 1465 if ((status == RPC_CANTSEND) && 1466 (rpcerr_tmp.re_errno == ENOBUFS)) 1467 nfs4_queue_fact(RF_SENDQ_FULL, 1468 mi, 0, 0, 0, FALSE, NULL, 1469 0, NULL); 1470 else 1471 nfs4_queue_fact( 1472 RF_SRV_NOT_RESPOND, mi, 0, 1473 0, 0, FALSE, NULL, 0, NULL); 1474 } 1475 } 1476 } 1477 } while (tryagain); 1478 1479 DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status, 1480 int, rpcerr.re_errno); 1481 1482 if (status != RPC_SUCCESS) { 1483 zoneid_t zoneid = mi->mi_zone->zone_id; 1484 1485 /* 1486 * Let soft mounts use the timed out message. 1487 */ 1488 if (status == RPC_INPROGRESS) 1489 status = RPC_TIMEDOUT; 1490 nfscl->nfscl_stat.badcalls.value.ui64++; 1491 if (status != RPC_INTR) { 1492 mutex_enter(&mi->mi_lock); 1493 mi->mi_flags |= MI4_DOWN; 1494 mutex_exit(&mi->mi_lock); 1495 CLNT_GETERR(client, &rpcerr); 1496 #ifdef DEBUG 1497 bufp = clnt_sperror(client, svp->sv_hostname); 1498 zprintf(zoneid, "NFS%d %s failed for %s\n", 1499 mi->mi_vers, mi->mi_rfsnames[which], bufp); 1500 if (nfs_has_ctty()) { 1501 if (!(mi->mi_flags & MI4_NOPRINT)) { 1502 uprintf("NFS%d %s failed for %s\n", 1503 mi->mi_vers, mi->mi_rfsnames[which], 1504 bufp); 1505 } 1506 } 1507 kmem_free(bufp, MAXPATHLEN); 1508 #else 1509 zprintf(zoneid, 1510 "NFS %s failed for server %s: error %d (%s)\n", 1511 mi->mi_rfsnames[which], svp->sv_hostname, 1512 status, clnt_sperrno(status)); 1513 if (nfs_has_ctty()) { 1514 if (!(mi->mi_flags & MI4_NOPRINT)) { 1515 uprintf( 1516 "NFS %s failed for server %s: error %d (%s)\n", 1517 mi->mi_rfsnames[which], 1518 svp->sv_hostname, status, 1519 clnt_sperrno(status)); 1520 } 1521 } 1522 #endif 1523 /* 1524 * when CLNT_CALL() fails with RPC_AUTHERROR, 1525 * re_errno is set appropriately depending on 1526 * the authentication error 1527 */ 1528 if (status == RPC_VERSMISMATCH || 1529 status == RPC_PROGVERSMISMATCH) 1530 rpcerr.re_errno = EIO; 1531 } 1532 } else { 1533 /* 1534 * Test the value of mi_down and mi_printed without 1535 * holding the mi_lock mutex. If they are both zero, 1536 * then it is okay to skip the down and printed 1537 * processing. This saves on a mutex_enter and 1538 * mutex_exit pair for a normal, successful RPC. 1539 * This was just complete overhead. 1540 */ 1541 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) { 1542 mutex_enter(&mi->mi_lock); 1543 mi->mi_flags &= ~MI4_DOWN; 1544 if (mi->mi_flags & MI4_PRINTED) { 1545 mi->mi_flags &= ~MI4_PRINTED; 1546 mutex_exit(&mi->mi_lock); 1547 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1548 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 1549 0, FALSE, NULL, 0, NULL); 1550 } else 1551 mutex_exit(&mi->mi_lock); 1552 } 1553 1554 if (*doqueue == 0) { 1555 if (!(mi->mi_flags & MI4_NOPRINT) && 1556 !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1557 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0, 1558 FALSE, NULL, 0, NULL); 1559 1560 *doqueue = 1; 1561 } 1562 } 1563 1564 clfree4(client, ch, nfscl); 1565 if (cred_cloned) 1566 crfree(cr); 1567 1568 ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 1569 1570 TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d", 1571 rpcerr.re_errno); 1572 1573 *rpc_statusp = status; 1574 return (rpcerr.re_errno); 1575 } 1576 1577 /* 1578 * rfs4call - general wrapper for RPC calls initiated by the client 1579 */ 1580 void 1581 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp, 1582 cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep) 1583 { 1584 int i, error; 1585 enum clnt_stat rpc_status = NFS4_OK; 1586 int num_resops; 1587 struct nfs4_clnt *nfscl; 1588 1589 ASSERT(nfs_zone() == mi->mi_zone); 1590 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1591 ASSERT(nfscl != NULL); 1592 1593 nfscl->nfscl_stat.calls.value.ui64++; 1594 mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++; 1595 1596 /* Set up the results struct for XDR usage */ 1597 resp->argsp = argsp; 1598 resp->array = NULL; 1599 resp->status = 0; 1600 resp->decode_len = 0; 1601 1602 error = nfs4_rfscall(mi, NFSPROC4_COMPOUND, 1603 xdr_COMPOUND4args_clnt, (caddr_t)argsp, 1604 xdr_COMPOUND4res_clnt, (caddr_t)resp, cr, 1605 doqueue, &rpc_status, flags, nfscl); 1606 1607 /* Return now if it was an RPC error */ 1608 if (error) { 1609 ep->error = error; 1610 ep->stat = resp->status; 1611 ep->rpc_status = rpc_status; 1612 return; 1613 } 1614 1615 /* else we'll count the processed operations */ 1616 num_resops = resp->decode_len; 1617 for (i = 0; i < num_resops; i++) { 1618 /* 1619 * Count the individual operations 1620 * processed by the server. 1621 */ 1622 if (resp->array[i].resop >= NFSPROC4_NULL && 1623 resp->array[i].resop <= OP_WRITE) 1624 mi->mi_reqs[resp->array[i].resop].value.ui64++; 1625 } 1626 1627 ep->error = 0; 1628 ep->stat = resp->status; 1629 ep->rpc_status = rpc_status; 1630 } 1631 1632 /* 1633 * nfs4rename_update - updates stored state after a rename. Currently this 1634 * is the path of the object and anything under it, and the filehandle of 1635 * the renamed object. 1636 */ 1637 void 1638 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm) 1639 { 1640 sfh4_update(VTOR4(renvp)->r_fh, nfh4p); 1641 fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm); 1642 } 1643 1644 /* 1645 * Routine to look up the filehandle for the given path and rootvp. 1646 * 1647 * Return values: 1648 * - success: returns zero and *statp is set to NFS4_OK, and *fhp is 1649 * updated. 1650 * - error: return value (errno value) and/or *statp is set appropriately. 1651 */ 1652 #define RML_ORDINARY 1 1653 #define RML_NAMED_ATTR 2 1654 #define RML_ATTRDIR 3 1655 1656 static void 1657 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp, 1658 int filetype, cred_t *cr, 1659 nfs_fh4 *fhp, nfs4_ga_res_t *garp, /* fh, attrs for object */ 1660 nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp, /* fh, attrs for parent */ 1661 nfs4_error_t *ep) 1662 { 1663 COMPOUND4args_clnt args; 1664 COMPOUND4res_clnt res; 1665 nfs_argop4 *argop; 1666 nfs_resop4 *resop; 1667 int num_argops; 1668 lookup4_param_t lookuparg; 1669 nfs_fh4 *tmpfhp; 1670 int doqueue = 1; 1671 char *path; 1672 mntinfo4_t *mi; 1673 1674 ASSERT(fname != NULL); 1675 ASSERT(rootvp->v_type == VDIR); 1676 1677 mi = VTOMI4(rootvp); 1678 path = fn_path(fname); 1679 switch (filetype) { 1680 case RML_NAMED_ATTR: 1681 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR; 1682 args.ctag = TAG_REMAP_LOOKUP_NA; 1683 break; 1684 case RML_ATTRDIR: 1685 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR; 1686 args.ctag = TAG_REMAP_LOOKUP_AD; 1687 break; 1688 case RML_ORDINARY: 1689 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1690 args.ctag = TAG_REMAP_LOOKUP; 1691 break; 1692 default: 1693 ep->error = EINVAL; 1694 return; 1695 } 1696 lookuparg.argsp = &args; 1697 lookuparg.resp = &res; 1698 lookuparg.header_len = 1; /* Putfh */ 1699 lookuparg.trailer_len = 0; 1700 lookuparg.ga_bits = NFS4_VATTR_MASK; 1701 lookuparg.mi = VTOMI4(rootvp); 1702 1703 (void) nfs4lookup_setup(path, &lookuparg, 1); 1704 1705 /* 0: putfh directory */ 1706 argop = args.array; 1707 argop[0].argop = OP_CPUTFH; 1708 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh; 1709 1710 num_argops = args.array_len; 1711 1712 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1713 1714 if (ep->error || res.status != NFS4_OK) 1715 goto exit; 1716 1717 /* get the object filehandle */ 1718 resop = &res.array[res.array_len - 2]; 1719 if (resop->resop != OP_GETFH) { 1720 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1721 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1722 ep->stat = NFS4ERR_SERVERFAULT; 1723 goto exit; 1724 } 1725 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1726 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1727 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1728 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1729 TAG_NONE, 0, 0); 1730 ep->stat = NFS4ERR_SERVERFAULT; 1731 goto exit; 1732 } 1733 fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1734 nfs_fh4_copy(tmpfhp, fhp); 1735 1736 /* get the object attributes */ 1737 resop = &res.array[res.array_len - 1]; 1738 if (garp && resop->resop == OP_GETATTR) 1739 *garp = resop->nfs_resop4_u.opgetattr.ga_res; 1740 1741 /* See if there are enough fields in the response for parent info */ 1742 if ((int)res.array_len - 5 <= 0) 1743 goto exit; 1744 1745 /* get the parent filehandle */ 1746 resop = &res.array[res.array_len - 5]; 1747 if (resop->resop != OP_GETFH) { 1748 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1749 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1750 ep->stat = NFS4ERR_SERVERFAULT; 1751 goto exit; 1752 } 1753 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1754 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1755 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1756 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1757 TAG_NONE, 0, 0); 1758 ep->stat = NFS4ERR_SERVERFAULT; 1759 goto exit; 1760 } 1761 pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1762 nfs_fh4_copy(tmpfhp, pfhp); 1763 1764 /* get the parent attributes */ 1765 resop = &res.array[res.array_len - 4]; 1766 if (pgarp && resop->resop == OP_GETATTR) 1767 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res; 1768 1769 exit: 1770 /* 1771 * It is too hard to remember where all the OP_LOOKUPs are 1772 */ 1773 nfs4args_lookup_free(argop, num_argops); 1774 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1775 1776 if (!ep->error) 1777 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1778 kmem_free(path, strlen(path)+1); 1779 } 1780 1781 /* 1782 * NFS client failover / volatile filehandle support 1783 * 1784 * Recover the filehandle for the given rnode. 1785 * 1786 * Errors are returned via the nfs4_error_t parameter. 1787 */ 1788 1789 void 1790 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 1791 { 1792 int is_stub; 1793 rnode4_t *rp = VTOR4(vp); 1794 vnode_t *rootvp = NULL; 1795 vnode_t *dvp = NULL; 1796 cred_t *cr, *cred_otw; 1797 nfs4_ga_res_t gar, pgar; 1798 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 1799 int filetype = RML_ORDINARY; 1800 nfs4_recov_state_t recov = {NULL, 0, 0}; 1801 int badfhcount = 0; 1802 nfs4_open_stream_t *osp = NULL; 1803 bool_t first_time = TRUE; /* first time getting OTW cred */ 1804 bool_t last_time = FALSE; /* last time getting OTW cred */ 1805 1806 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1807 "nfs4_remap_file: remapping %s", rnode4info(rp))); 1808 ASSERT(nfs4_consistent_type(vp)); 1809 1810 if (vp->v_flag & VROOT) { 1811 nfs4_remap_root(mi, ep, flags); 1812 return; 1813 } 1814 1815 /* 1816 * Given the root fh, use the path stored in 1817 * the rnode to find the fh for the new server. 1818 */ 1819 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1820 if (ep->error != 0) 1821 return; 1822 1823 cr = curthread->t_cred; 1824 ASSERT(cr != NULL); 1825 get_remap_cred: 1826 /* 1827 * Releases the osp, if it is provided. 1828 * Puts a hold on the cred_otw and the new osp (if found). 1829 */ 1830 cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp, 1831 &first_time, &last_time); 1832 ASSERT(cred_otw != NULL); 1833 1834 if (rp->r_flags & R4ISXATTR) { 1835 filetype = RML_NAMED_ATTR; 1836 (void) vtodv(vp, &dvp, cred_otw, FALSE); 1837 } 1838 1839 if (vp->v_flag & V_XATTRDIR) { 1840 filetype = RML_ATTRDIR; 1841 } 1842 1843 if (filetype == RML_ORDINARY && rootvp->v_type == VREG) { 1844 /* file mount, doesn't need a remap */ 1845 goto done; 1846 } 1847 1848 again: 1849 remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw, 1850 &newfh, &gar, &newpfh, &pgar, ep); 1851 1852 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1853 "nfs4_remap_file: remap_lookup returned %d/%d", 1854 ep->error, ep->stat)); 1855 1856 if (last_time == FALSE && ep->error == EACCES) { 1857 crfree(cred_otw); 1858 if (dvp != NULL) 1859 VN_RELE(dvp); 1860 goto get_remap_cred; 1861 } 1862 if (ep->error != 0) 1863 goto done; 1864 1865 switch (ep->stat) { 1866 case NFS4_OK: 1867 badfhcount = 0; 1868 if (recov.rs_flags & NFS4_RS_DELAY_MSG) { 1869 mutex_enter(&rp->r_statelock); 1870 rp->r_delay_interval = 0; 1871 mutex_exit(&rp->r_statelock); 1872 uprintf("NFS File Available..\n"); 1873 } 1874 break; 1875 case NFS4ERR_FHEXPIRED: 1876 case NFS4ERR_BADHANDLE: 1877 case NFS4ERR_STALE: 1878 /* 1879 * If we ran into filehandle problems, we should try to 1880 * remap the root vnode first and hope life gets better. 1881 * But we need to avoid loops. 1882 */ 1883 if (badfhcount++ > 0) 1884 goto done; 1885 if (newfh.nfs_fh4_len != 0) { 1886 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1887 newfh.nfs_fh4_len = 0; 1888 } 1889 if (newpfh.nfs_fh4_len != 0) { 1890 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1891 newpfh.nfs_fh4_len = 0; 1892 } 1893 /* relative path - remap rootvp then retry */ 1894 VN_RELE(rootvp); 1895 rootvp = NULL; 1896 nfs4_remap_root(mi, ep, flags); 1897 if (ep->error != 0 || ep->stat != NFS4_OK) 1898 goto done; 1899 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1900 if (ep->error != 0) 1901 goto done; 1902 goto again; 1903 case NFS4ERR_DELAY: 1904 badfhcount = 0; 1905 nfs4_set_delay_wait(vp); 1906 ep->error = nfs4_wait_for_delay(vp, &recov); 1907 if (ep->error != 0) 1908 goto done; 1909 goto again; 1910 case NFS4ERR_ACCESS: 1911 /* get new cred, try again */ 1912 if (last_time == TRUE) 1913 goto done; 1914 if (dvp != NULL) 1915 VN_RELE(dvp); 1916 crfree(cred_otw); 1917 goto get_remap_cred; 1918 default: 1919 goto done; 1920 } 1921 1922 /* 1923 * Check on the new and old rnodes before updating; 1924 * if the vnode type or size changes, issue a warning 1925 * and mark the file dead. 1926 */ 1927 mutex_enter(&rp->r_statelock); 1928 if (flags & NFS4_REMAP_CKATTRS) { 1929 if (vp->v_type != gar.n4g_va.va_type || 1930 (vp->v_type != VDIR && 1931 rp->r_size != gar.n4g_va.va_size)) { 1932 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1933 "nfs4_remap_file: size %d vs. %d, type %d vs. %d", 1934 (int)rp->r_size, (int)gar.n4g_va.va_size, 1935 vp->v_type, gar.n4g_va.va_type)); 1936 mutex_exit(&rp->r_statelock); 1937 nfs4_queue_event(RE_FILE_DIFF, mi, 1938 rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0, 1939 TAG_NONE, TAG_NONE, 0, 0); 1940 nfs4_fail_recov(vp, NULL, 0, NFS4_OK); 1941 goto done; 1942 } 1943 } 1944 ASSERT(gar.n4g_va.va_type != VNON); 1945 rp->r_server = mi->mi_curr_serv; 1946 1947 /* 1948 * Turn this object into a "stub" object if we 1949 * crossed an underlying server fs boundary. 1950 * 1951 * This stub will be for a mirror-mount. 1952 * A referral would look like a boundary crossing 1953 * as well, but would not be the same type of object, 1954 * so we would expect to mark the object dead. 1955 * 1956 * See comment in r4_do_attrcache() for more details. 1957 */ 1958 is_stub = 0; 1959 if (gar.n4g_fsid_valid) { 1960 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0); 1961 rp->r_srv_fsid = gar.n4g_fsid; 1962 if (!FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid)) 1963 is_stub = 1; 1964 nfs_rw_exit(&rp->r_server->sv_lock); 1965 #ifdef DEBUG 1966 } else { 1967 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1968 "remap_file: fsid attr not provided by server. rp=%p", 1969 (void *)rp)); 1970 #endif 1971 } 1972 if (is_stub) 1973 r4_stub_mirrormount(rp); 1974 else 1975 r4_stub_none(rp); 1976 mutex_exit(&rp->r_statelock); 1977 nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */ 1978 sfh4_update(rp->r_fh, &newfh); 1979 ASSERT(nfs4_consistent_type(vp)); 1980 1981 /* 1982 * If we got parent info, use it to update the parent 1983 */ 1984 if (newpfh.nfs_fh4_len != 0) { 1985 if (rp->r_svnode.sv_dfh != NULL) 1986 sfh4_update(rp->r_svnode.sv_dfh, &newpfh); 1987 if (dvp != NULL) { 1988 /* force update of attrs */ 1989 nfs4_attrcache_noinval(dvp, &pgar, gethrtime()); 1990 } 1991 } 1992 done: 1993 if (newfh.nfs_fh4_len != 0) 1994 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1995 if (newpfh.nfs_fh4_len != 0) 1996 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1997 if (cred_otw != NULL) 1998 crfree(cred_otw); 1999 if (rootvp != NULL) 2000 VN_RELE(rootvp); 2001 if (dvp != NULL) 2002 VN_RELE(dvp); 2003 if (osp != NULL) 2004 open_stream_rele(osp, rp); 2005 } 2006 2007 /* 2008 * Client-side failover support: remap the filehandle for vp if it appears 2009 * necessary. errors are returned via the nfs4_error_t parameter; though, 2010 * if there is a problem, we will just try again later. 2011 */ 2012 2013 void 2014 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 2015 { 2016 if (vp == NULL) 2017 return; 2018 2019 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY)) 2020 return; 2021 2022 if (VTOR4(vp)->r_server == mi->mi_curr_serv) 2023 return; 2024 2025 nfs4_remap_file(mi, vp, flags, ep); 2026 } 2027 2028 /* 2029 * nfs4_make_dotdot() - find or create a parent vnode of a non-root node. 2030 * 2031 * Our caller has a filehandle for ".." relative to a particular 2032 * directory object. We want to find or create a parent vnode 2033 * with that filehandle and return it. We can of course create 2034 * a vnode from this filehandle, but we need to also make sure 2035 * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR) 2036 * that we have a parent FH for future reopens as well. If 2037 * we have a remap failure, we won't be able to reopen this 2038 * file, but we won't treat that as fatal because a reopen 2039 * is at least unlikely. Someday nfs4_reopen() should look 2040 * for a missing parent FH and try a remap to recover from it. 2041 * 2042 * need_start_op argument indicates whether this function should 2043 * do a start_op before calling remap_lookup(). This should 2044 * be FALSE, if you are the recovery thread or in an op; otherwise, 2045 * set it to TRUE. 2046 */ 2047 int 2048 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp, 2049 cred_t *cr, vnode_t **vpp, int need_start_op) 2050 { 2051 mntinfo4_t *mi = VTOMI4(dvp); 2052 nfs4_fname_t *np = NULL, *pnp = NULL; 2053 vnode_t *vp = NULL, *rootvp = NULL; 2054 rnode4_t *rp; 2055 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 2056 nfs4_ga_res_t gar, pgar; 2057 vattr_t va, pva; 2058 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 2059 nfs4_sharedfh_t *sfh = NULL, *psfh = NULL; 2060 nfs4_recov_state_t recov_state; 2061 2062 #ifdef DEBUG 2063 /* 2064 * ensure need_start_op is correct 2065 */ 2066 { 2067 int no_need_start_op = (tsd_get(nfs4_tsd_key) || 2068 (curthread == mi->mi_recovthread)); 2069 /* C needs a ^^ operator! */ 2070 ASSERT(((need_start_op) && (!no_need_start_op)) || 2071 ((! need_start_op) && (no_need_start_op))); 2072 } 2073 #endif 2074 ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone()); 2075 2076 NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE, 2077 "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp, 2078 rnode4info(VTOR4(dvp)))); 2079 2080 /* 2081 * rootvp might be needed eventually. Holding it now will 2082 * ensure that r4find_unlocked() will find it, if ".." is the root. 2083 */ 2084 e.error = VFS_ROOT(mi->mi_vfsp, &rootvp); 2085 if (e.error != 0) 2086 goto out; 2087 rp = r4find_unlocked(fhp, mi->mi_vfsp); 2088 if (rp != NULL) { 2089 *vpp = RTOV4(rp); 2090 VN_RELE(rootvp); 2091 return (0); 2092 } 2093 2094 /* 2095 * Since we don't have the rnode, we have to go over the wire. 2096 * remap_lookup() can get all of the filehandles and attributes 2097 * we need in one operation. 2098 */ 2099 np = fn_parent(VTOSV(dvp)->sv_name); 2100 /* if a parent was not found return an error */ 2101 if (np == NULL) { 2102 e.error = ENOENT; 2103 goto out; 2104 } 2105 2106 recov_state.rs_flags = 0; 2107 recov_state.rs_num_retry_despite_err = 0; 2108 recov_retry: 2109 if (need_start_op) { 2110 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP, 2111 &recov_state, NULL); 2112 if (e.error != 0) { 2113 goto out; 2114 } 2115 } 2116 2117 pgar.n4g_va.va_type = VNON; 2118 gar.n4g_va.va_type = VNON; 2119 2120 remap_lookup(np, rootvp, RML_ORDINARY, cr, 2121 &newfh, &gar, &newpfh, &pgar, &e); 2122 if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) { 2123 if (need_start_op) { 2124 bool_t abort; 2125 2126 abort = nfs4_start_recovery(&e, mi, 2127 rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL, NULL, 2128 NULL); 2129 if (abort) { 2130 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2131 &recov_state, FALSE); 2132 if (e.error == 0) 2133 e.error = EIO; 2134 goto out; 2135 } 2136 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2137 &recov_state, TRUE); 2138 goto recov_retry; 2139 } 2140 if (e.error == 0) 2141 e.error = EIO; 2142 goto out; 2143 } 2144 2145 va = gar.n4g_va; 2146 pva = pgar.n4g_va; 2147 2148 if ((e.error != 0) || 2149 (va.va_type != VDIR)) { 2150 if (need_start_op) 2151 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2152 &recov_state, FALSE); 2153 if (e.error == 0) 2154 e.error = EIO; 2155 goto out; 2156 } 2157 2158 if (e.stat != NFS4_OK) { 2159 if (need_start_op) 2160 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2161 &recov_state, FALSE); 2162 e.error = EIO; 2163 goto out; 2164 } 2165 2166 /* 2167 * It is possible for remap_lookup() to return with no error, 2168 * but without providing the parent filehandle and attrs. 2169 */ 2170 if (pva.va_type != VDIR) { 2171 /* 2172 * Call remap_lookup() again, this time with the 2173 * newpfh and pgar args in the first position. 2174 */ 2175 pnp = fn_parent(np); 2176 if (pnp != NULL) { 2177 remap_lookup(pnp, rootvp, RML_ORDINARY, cr, 2178 &newpfh, &pgar, NULL, NULL, &e); 2179 /* 2180 * This remap_lookup call modifies pgar. The following 2181 * line prevents trouble when checking the va_type of 2182 * pva later in this code. 2183 */ 2184 pva = pgar.n4g_va; 2185 2186 if (nfs4_needs_recovery(&e, FALSE, 2187 mi->mi_vfsp)) { 2188 if (need_start_op) { 2189 bool_t abort; 2190 2191 abort = nfs4_start_recovery(&e, mi, 2192 rootvp, NULL, NULL, NULL, 2193 OP_LOOKUP, NULL, NULL, NULL); 2194 if (abort) { 2195 nfs4_end_fop(mi, rootvp, NULL, 2196 OH_LOOKUP, &recov_state, 2197 FALSE); 2198 if (e.error == 0) 2199 e.error = EIO; 2200 goto out; 2201 } 2202 nfs4_end_fop(mi, rootvp, NULL, 2203 OH_LOOKUP, &recov_state, TRUE); 2204 goto recov_retry; 2205 } 2206 if (e.error == 0) 2207 e.error = EIO; 2208 goto out; 2209 } 2210 2211 if (e.stat != NFS4_OK) { 2212 if (need_start_op) 2213 nfs4_end_fop(mi, rootvp, NULL, 2214 OH_LOOKUP, &recov_state, FALSE); 2215 e.error = EIO; 2216 goto out; 2217 } 2218 } 2219 if ((pnp == NULL) || 2220 (e.error != 0) || 2221 (pva.va_type == VNON)) { 2222 if (need_start_op) 2223 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2224 &recov_state, FALSE); 2225 if (e.error == 0) 2226 e.error = EIO; 2227 goto out; 2228 } 2229 } 2230 ASSERT(newpfh.nfs_fh4_len != 0); 2231 if (need_start_op) 2232 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE); 2233 psfh = sfh4_get(&newpfh, mi); 2234 2235 sfh = sfh4_get(&newfh, mi); 2236 vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t); 2237 2238 out: 2239 if (np != NULL) 2240 fn_rele(&np); 2241 if (pnp != NULL) 2242 fn_rele(&pnp); 2243 if (newfh.nfs_fh4_len != 0) 2244 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 2245 if (newpfh.nfs_fh4_len != 0) 2246 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 2247 if (sfh != NULL) 2248 sfh4_rele(&sfh); 2249 if (psfh != NULL) 2250 sfh4_rele(&psfh); 2251 if (rootvp != NULL) 2252 VN_RELE(rootvp); 2253 *vpp = vp; 2254 return (e.error); 2255 } 2256 2257 #ifdef DEBUG 2258 size_t r_path_memuse = 0; 2259 #endif 2260 2261 /* 2262 * NFS client failover support 2263 * 2264 * sv4_free() frees the malloc'd portion of a "servinfo_t". 2265 */ 2266 void 2267 sv4_free(servinfo4_t *svp) 2268 { 2269 servinfo4_t *next; 2270 struct knetconfig *knconf; 2271 2272 while (svp != NULL) { 2273 next = svp->sv_next; 2274 if (svp->sv_dhsec) 2275 sec_clnt_freeinfo(svp->sv_dhsec); 2276 if (svp->sv_secdata) 2277 sec_clnt_freeinfo(svp->sv_secdata); 2278 if (svp->sv_save_secinfo && 2279 svp->sv_save_secinfo != svp->sv_secinfo) 2280 secinfo_free(svp->sv_save_secinfo); 2281 if (svp->sv_secinfo) 2282 secinfo_free(svp->sv_secinfo); 2283 if (svp->sv_hostname && svp->sv_hostnamelen > 0) 2284 kmem_free(svp->sv_hostname, svp->sv_hostnamelen); 2285 knconf = svp->sv_knconf; 2286 if (knconf != NULL) { 2287 if (knconf->knc_protofmly != NULL) 2288 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2289 if (knconf->knc_proto != NULL) 2290 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2291 kmem_free(knconf, sizeof (*knconf)); 2292 } 2293 knconf = svp->sv_origknconf; 2294 if (knconf != NULL) { 2295 if (knconf->knc_protofmly != NULL) 2296 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2297 if (knconf->knc_proto != NULL) 2298 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2299 kmem_free(knconf, sizeof (*knconf)); 2300 } 2301 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0) 2302 kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen); 2303 if (svp->sv_path != NULL) { 2304 kmem_free(svp->sv_path, svp->sv_pathlen); 2305 } 2306 nfs_rw_destroy(&svp->sv_lock); 2307 kmem_free(svp, sizeof (*svp)); 2308 svp = next; 2309 } 2310 } 2311 2312 void 2313 nfs4_printfhandle(nfs4_fhandle_t *fhp) 2314 { 2315 int *ip; 2316 char *buf; 2317 size_t bufsize; 2318 char *cp; 2319 2320 /* 2321 * 13 == "(file handle:" 2322 * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times 2323 * 1 == ' ' 2324 * 8 == maximum strlen of "%x" 2325 * 3 == ")\n\0" 2326 */ 2327 bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3; 2328 buf = kmem_alloc(bufsize, KM_NOSLEEP); 2329 if (buf == NULL) 2330 return; 2331 2332 cp = buf; 2333 (void) strcpy(cp, "(file handle:"); 2334 while (*cp != '\0') 2335 cp++; 2336 for (ip = (int *)fhp->fh_buf; 2337 ip < (int *)&fhp->fh_buf[fhp->fh_len]; 2338 ip++) { 2339 (void) sprintf(cp, " %x", *ip); 2340 while (*cp != '\0') 2341 cp++; 2342 } 2343 (void) strcpy(cp, ")\n"); 2344 2345 zcmn_err(getzoneid(), CE_CONT, "%s", buf); 2346 2347 kmem_free(buf, bufsize); 2348 } 2349 2350 /* 2351 * The NFSv4 readdir cache subsystem. 2352 * 2353 * We provide a set of interfaces to allow the rest of the system to utilize 2354 * a caching mechanism while encapsulating the details of the actual 2355 * implementation. This should allow for better maintainability and 2356 * extensibility by consolidating the implementation details in one location. 2357 */ 2358 2359 /* 2360 * Comparator used by AVL routines. 2361 */ 2362 static int 2363 rddir4_cache_compar(const void *x, const void *y) 2364 { 2365 rddir4_cache_impl *ai = (rddir4_cache_impl *)x; 2366 rddir4_cache_impl *bi = (rddir4_cache_impl *)y; 2367 rddir4_cache *a = &ai->rc; 2368 rddir4_cache *b = &bi->rc; 2369 2370 if (a->nfs4_cookie == b->nfs4_cookie) { 2371 if (a->buflen == b->buflen) 2372 return (0); 2373 if (a->buflen < b->buflen) 2374 return (-1); 2375 return (1); 2376 } 2377 2378 if (a->nfs4_cookie < b->nfs4_cookie) 2379 return (-1); 2380 2381 return (1); 2382 } 2383 2384 /* 2385 * Allocate an opaque handle for the readdir cache. 2386 */ 2387 void 2388 rddir4_cache_create(rnode4_t *rp) 2389 { 2390 ASSERT(rp->r_dir == NULL); 2391 2392 rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 2393 2394 avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl), 2395 offsetof(rddir4_cache_impl, tree)); 2396 } 2397 2398 /* 2399 * Purge the cache of all cached readdir responses. 2400 */ 2401 void 2402 rddir4_cache_purge(rnode4_t *rp) 2403 { 2404 rddir4_cache_impl *rdip; 2405 rddir4_cache_impl *nrdip; 2406 2407 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2408 2409 if (rp->r_dir == NULL) 2410 return; 2411 2412 rdip = avl_first(rp->r_dir); 2413 2414 while (rdip != NULL) { 2415 nrdip = AVL_NEXT(rp->r_dir, rdip); 2416 avl_remove(rp->r_dir, rdip); 2417 rdip->rc.flags &= ~RDDIRCACHED; 2418 rddir4_cache_rele(rp, &rdip->rc); 2419 rdip = nrdip; 2420 } 2421 ASSERT(avl_numnodes(rp->r_dir) == 0); 2422 } 2423 2424 /* 2425 * Destroy the readdir cache. 2426 */ 2427 void 2428 rddir4_cache_destroy(rnode4_t *rp) 2429 { 2430 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2431 if (rp->r_dir == NULL) 2432 return; 2433 2434 rddir4_cache_purge(rp); 2435 avl_destroy(rp->r_dir); 2436 kmem_free(rp->r_dir, sizeof (avl_tree_t)); 2437 rp->r_dir = NULL; 2438 } 2439 2440 /* 2441 * Locate a readdir response from the readdir cache. 2442 * 2443 * Return values: 2444 * 2445 * NULL - If there is an unrecoverable situation like the operation may have 2446 * been interrupted. 2447 * 2448 * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller. 2449 * The flags are set approprately, such that the caller knows 2450 * what state the entry is in. 2451 */ 2452 rddir4_cache * 2453 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count) 2454 { 2455 rddir4_cache_impl *rdip = NULL; 2456 rddir4_cache_impl srdip; 2457 rddir4_cache *srdc; 2458 rddir4_cache *rdc = NULL; 2459 rddir4_cache *nrdc = NULL; 2460 avl_index_t where; 2461 2462 top: 2463 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER)); 2464 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2465 /* 2466 * Check to see if the readdir cache has been disabled. If so, then 2467 * simply allocate an rddir4_cache entry and return it, since caching 2468 * operations do not apply. 2469 */ 2470 if (rp->r_dir == NULL) { 2471 if (nrdc == NULL) { 2472 /* 2473 * Drop the lock because we are doing a sleeping 2474 * allocation. 2475 */ 2476 mutex_exit(&rp->r_statelock); 2477 rdc = rddir4_cache_alloc(KM_SLEEP); 2478 rdc->nfs4_cookie = cookie; 2479 rdc->buflen = count; 2480 mutex_enter(&rp->r_statelock); 2481 return (rdc); 2482 } 2483 return (nrdc); 2484 } 2485 2486 srdc = &srdip.rc; 2487 srdc->nfs4_cookie = cookie; 2488 srdc->buflen = count; 2489 2490 rdip = avl_find(rp->r_dir, &srdip, &where); 2491 2492 /* 2493 * If we didn't find an entry then create one and insert it 2494 * into the cache. 2495 */ 2496 if (rdip == NULL) { 2497 /* 2498 * Check for the case where we have made a second pass through 2499 * the cache due to a lockless allocation. If we find that no 2500 * thread has already inserted this entry, do the insert now 2501 * and return. 2502 */ 2503 if (nrdc != NULL) { 2504 avl_insert(rp->r_dir, nrdc->data, where); 2505 nrdc->flags |= RDDIRCACHED; 2506 rddir4_cache_hold(nrdc); 2507 return (nrdc); 2508 } 2509 2510 #ifdef DEBUG 2511 nfs4_readdir_cache_misses++; 2512 #endif 2513 /* 2514 * First, try to allocate an entry without sleeping. If that 2515 * fails then drop the lock and do a sleeping allocation. 2516 */ 2517 nrdc = rddir4_cache_alloc(KM_NOSLEEP); 2518 if (nrdc != NULL) { 2519 nrdc->nfs4_cookie = cookie; 2520 nrdc->buflen = count; 2521 avl_insert(rp->r_dir, nrdc->data, where); 2522 nrdc->flags |= RDDIRCACHED; 2523 rddir4_cache_hold(nrdc); 2524 return (nrdc); 2525 } 2526 2527 /* 2528 * Drop the lock and do a sleeping allocation. We incur 2529 * additional overhead by having to search the cache again, 2530 * but this case should be rare. 2531 */ 2532 mutex_exit(&rp->r_statelock); 2533 nrdc = rddir4_cache_alloc(KM_SLEEP); 2534 nrdc->nfs4_cookie = cookie; 2535 nrdc->buflen = count; 2536 mutex_enter(&rp->r_statelock); 2537 /* 2538 * We need to take another pass through the cache 2539 * since we dropped our lock to perform the alloc. 2540 * Another thread may have come by and inserted the 2541 * entry we are interested in. 2542 */ 2543 goto top; 2544 } 2545 2546 /* 2547 * Check to see if we need to free our entry. This can happen if 2548 * another thread came along beat us to the insert. We can 2549 * safely call rddir4_cache_free directly because no other thread 2550 * would have a reference to this entry. 2551 */ 2552 if (nrdc != NULL) 2553 rddir4_cache_free((rddir4_cache_impl *)nrdc->data); 2554 2555 #ifdef DEBUG 2556 nfs4_readdir_cache_hits++; 2557 #endif 2558 /* 2559 * Found something. Make sure it's ready to return. 2560 */ 2561 rdc = &rdip->rc; 2562 rddir4_cache_hold(rdc); 2563 /* 2564 * If the cache entry is in the process of being filled in, wait 2565 * until this completes. The RDDIRWAIT bit is set to indicate that 2566 * someone is waiting and when the thread currently filling the entry 2567 * is done, it should do a cv_broadcast to wakeup all of the threads 2568 * waiting for it to finish. If the thread wakes up to find that 2569 * someone new is now trying to complete the the entry, go back 2570 * to sleep. 2571 */ 2572 while (rdc->flags & RDDIR) { 2573 /* 2574 * The entry is not complete. 2575 */ 2576 nfs_rw_exit(&rp->r_rwlock); 2577 rdc->flags |= RDDIRWAIT; 2578 #ifdef DEBUG 2579 nfs4_readdir_cache_waits++; 2580 #endif 2581 while (rdc->flags & RDDIRWAIT) { 2582 if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) { 2583 /* 2584 * We got interrupted, probably the user 2585 * typed ^C or an alarm fired. We free the 2586 * new entry if we allocated one. 2587 */ 2588 rddir4_cache_rele(rp, rdc); 2589 mutex_exit(&rp->r_statelock); 2590 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2591 RW_READER, FALSE); 2592 mutex_enter(&rp->r_statelock); 2593 return (NULL); 2594 } 2595 } 2596 mutex_exit(&rp->r_statelock); 2597 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2598 RW_READER, FALSE); 2599 mutex_enter(&rp->r_statelock); 2600 } 2601 2602 /* 2603 * The entry we were waiting on may have been purged from 2604 * the cache and should no longer be used, release it and 2605 * start over. 2606 */ 2607 if (!(rdc->flags & RDDIRCACHED)) { 2608 rddir4_cache_rele(rp, rdc); 2609 goto top; 2610 } 2611 2612 /* 2613 * The entry is completed. Return it. 2614 */ 2615 return (rdc); 2616 } 2617 2618 /* 2619 * Allocate a cache element and return it. Can return NULL if memory is 2620 * low. 2621 */ 2622 static rddir4_cache * 2623 rddir4_cache_alloc(int flags) 2624 { 2625 rddir4_cache_impl *rdip = NULL; 2626 rddir4_cache *rc = NULL; 2627 2628 rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags); 2629 2630 if (rdip != NULL) { 2631 rc = &rdip->rc; 2632 rc->data = (void *)rdip; 2633 rc->nfs4_cookie = 0; 2634 rc->nfs4_ncookie = 0; 2635 rc->entries = NULL; 2636 rc->eof = 0; 2637 rc->entlen = 0; 2638 rc->buflen = 0; 2639 rc->actlen = 0; 2640 /* 2641 * A readdir is required so set the flag. 2642 */ 2643 rc->flags = RDDIRREQ; 2644 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL); 2645 rc->error = 0; 2646 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL); 2647 rdip->count = 1; 2648 #ifdef DEBUG 2649 atomic_inc_64(&clstat4_debug.dirent.value.ui64); 2650 #endif 2651 } 2652 return (rc); 2653 } 2654 2655 /* 2656 * Increment the reference count to this cache element. 2657 */ 2658 static void 2659 rddir4_cache_hold(rddir4_cache *rc) 2660 { 2661 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data; 2662 2663 mutex_enter(&rdip->lock); 2664 rdip->count++; 2665 mutex_exit(&rdip->lock); 2666 } 2667 2668 /* 2669 * Release a reference to this cache element. If the count is zero then 2670 * free the element. 2671 */ 2672 void 2673 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc) 2674 { 2675 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data; 2676 2677 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2678 2679 /* 2680 * Check to see if we have any waiters. If so, we can wake them 2681 * so that they can proceed. 2682 */ 2683 if (rdc->flags & RDDIRWAIT) { 2684 rdc->flags &= ~RDDIRWAIT; 2685 cv_broadcast(&rdc->cv); 2686 } 2687 2688 mutex_enter(&rdip->lock); 2689 ASSERT(rdip->count > 0); 2690 if (--rdip->count == 0) { 2691 mutex_exit(&rdip->lock); 2692 rddir4_cache_free(rdip); 2693 } else 2694 mutex_exit(&rdip->lock); 2695 } 2696 2697 /* 2698 * Free a cache element. 2699 */ 2700 static void 2701 rddir4_cache_free(rddir4_cache_impl *rdip) 2702 { 2703 rddir4_cache *rc = &rdip->rc; 2704 2705 #ifdef DEBUG 2706 atomic_dec_64(&clstat4_debug.dirent.value.ui64); 2707 #endif 2708 if (rc->entries != NULL) 2709 kmem_free(rc->entries, rc->buflen); 2710 cv_destroy(&rc->cv); 2711 mutex_destroy(&rdip->lock); 2712 kmem_free(rdip, sizeof (*rdip)); 2713 } 2714 2715 /* 2716 * Snapshot callback for nfs:0:nfs4_client as registered with the kstat 2717 * framework. 2718 */ 2719 static int 2720 cl4_snapshot(kstat_t *ksp, void *buf, int rw) 2721 { 2722 ksp->ks_snaptime = gethrtime(); 2723 if (rw == KSTAT_WRITE) { 2724 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl)); 2725 #ifdef DEBUG 2726 /* 2727 * Currently only the global zone can write to kstats, but we 2728 * add the check just for paranoia. 2729 */ 2730 if (INGLOBALZONE(curproc)) 2731 bcopy((char *)buf + sizeof (clstat4_tmpl), 2732 &clstat4_debug, sizeof (clstat4_debug)); 2733 #endif 2734 } else { 2735 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl)); 2736 #ifdef DEBUG 2737 /* 2738 * If we're displaying the "global" debug kstat values, we 2739 * display them as-is to all zones since in fact they apply to 2740 * the system as a whole. 2741 */ 2742 bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl), 2743 sizeof (clstat4_debug)); 2744 #endif 2745 } 2746 return (0); 2747 } 2748 2749 2750 2751 /* 2752 * Zone support 2753 */ 2754 static void * 2755 clinit4_zone(zoneid_t zoneid) 2756 { 2757 kstat_t *nfs4_client_kstat; 2758 struct nfs4_clnt *nfscl; 2759 uint_t ndata; 2760 2761 nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP); 2762 mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL); 2763 nfscl->nfscl_chtable4 = NULL; 2764 nfscl->nfscl_zoneid = zoneid; 2765 2766 bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl)); 2767 ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t); 2768 #ifdef DEBUG 2769 ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t); 2770 #endif 2771 if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client", 2772 "misc", KSTAT_TYPE_NAMED, ndata, 2773 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) { 2774 nfs4_client_kstat->ks_private = &nfscl->nfscl_stat; 2775 nfs4_client_kstat->ks_snapshot = cl4_snapshot; 2776 kstat_install(nfs4_client_kstat); 2777 } 2778 mutex_enter(&nfs4_clnt_list_lock); 2779 list_insert_head(&nfs4_clnt_list, nfscl); 2780 mutex_exit(&nfs4_clnt_list_lock); 2781 2782 return (nfscl); 2783 } 2784 2785 /*ARGSUSED*/ 2786 static void 2787 clfini4_zone(zoneid_t zoneid, void *arg) 2788 { 2789 struct nfs4_clnt *nfscl = arg; 2790 chhead_t *chp, *next; 2791 2792 if (nfscl == NULL) 2793 return; 2794 mutex_enter(&nfs4_clnt_list_lock); 2795 list_remove(&nfs4_clnt_list, nfscl); 2796 mutex_exit(&nfs4_clnt_list_lock); 2797 clreclaim4_zone(nfscl, 0); 2798 for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) { 2799 ASSERT(chp->ch_list == NULL); 2800 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1); 2801 next = chp->ch_next; 2802 kmem_free(chp, sizeof (*chp)); 2803 } 2804 kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid); 2805 mutex_destroy(&nfscl->nfscl_chtable4_lock); 2806 kmem_free(nfscl, sizeof (*nfscl)); 2807 } 2808 2809 /* 2810 * Called by endpnt_destructor to make sure the client handles are 2811 * cleaned up before the RPC endpoints. This becomes a no-op if 2812 * clfini_zone (above) is called first. This function is needed 2813 * (rather than relying on clfini_zone to clean up) because the ZSD 2814 * callbacks have no ordering mechanism, so we have no way to ensure 2815 * that clfini_zone is called before endpnt_destructor. 2816 */ 2817 void 2818 clcleanup4_zone(zoneid_t zoneid) 2819 { 2820 struct nfs4_clnt *nfscl; 2821 2822 mutex_enter(&nfs4_clnt_list_lock); 2823 nfscl = list_head(&nfs4_clnt_list); 2824 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) { 2825 if (nfscl->nfscl_zoneid == zoneid) { 2826 clreclaim4_zone(nfscl, 0); 2827 break; 2828 } 2829 } 2830 mutex_exit(&nfs4_clnt_list_lock); 2831 } 2832 2833 int 2834 nfs4_subr_init(void) 2835 { 2836 /* 2837 * Allocate and initialize the client handle cache 2838 */ 2839 chtab4_cache = kmem_cache_create("client_handle4_cache", 2840 sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL, 2841 NULL, 0); 2842 2843 /* 2844 * Initialize the list of per-zone client handles (and associated data). 2845 * This needs to be done before we call zone_key_create(). 2846 */ 2847 list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt), 2848 offsetof(struct nfs4_clnt, nfscl_node)); 2849 2850 /* 2851 * Initialize the zone_key for per-zone client handle lists. 2852 */ 2853 zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone); 2854 2855 if (nfs4err_delay_time == 0) 2856 nfs4err_delay_time = NFS4ERR_DELAY_TIME; 2857 2858 return (0); 2859 } 2860 2861 int 2862 nfs4_subr_fini(void) 2863 { 2864 /* 2865 * Deallocate the client handle cache 2866 */ 2867 kmem_cache_destroy(chtab4_cache); 2868 2869 /* 2870 * Destroy the zone_key 2871 */ 2872 (void) zone_key_delete(nfs4clnt_zone_key); 2873 2874 return (0); 2875 } 2876 /* 2877 * Set or Clear direct I/O flag 2878 * VOP_RWLOCK() is held for write access to prevent a race condition 2879 * which would occur if a process is in the middle of a write when 2880 * directio flag gets set. It is possible that all pages may not get flushed. 2881 * 2882 * This is a copy of nfs_directio, changes here may need to be made 2883 * there and vice versa. 2884 */ 2885 2886 int 2887 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr) 2888 { 2889 int error = 0; 2890 rnode4_t *rp; 2891 2892 rp = VTOR4(vp); 2893 2894 if (cmd == DIRECTIO_ON) { 2895 2896 if (rp->r_flags & R4DIRECTIO) 2897 return (0); 2898 2899 /* 2900 * Flush the page cache. 2901 */ 2902 2903 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 2904 2905 if (rp->r_flags & R4DIRECTIO) { 2906 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2907 return (0); 2908 } 2909 2910 if (nfs4_has_pages(vp) && 2911 ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) { 2912 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 2913 B_INVAL, cr, NULL); 2914 if (error) { 2915 if (error == ENOSPC || error == EDQUOT) { 2916 mutex_enter(&rp->r_statelock); 2917 if (!rp->r_error) 2918 rp->r_error = error; 2919 mutex_exit(&rp->r_statelock); 2920 } 2921 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2922 return (error); 2923 } 2924 } 2925 2926 mutex_enter(&rp->r_statelock); 2927 rp->r_flags |= R4DIRECTIO; 2928 mutex_exit(&rp->r_statelock); 2929 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2930 return (0); 2931 } 2932 2933 if (cmd == DIRECTIO_OFF) { 2934 mutex_enter(&rp->r_statelock); 2935 rp->r_flags &= ~R4DIRECTIO; /* disable direct mode */ 2936 mutex_exit(&rp->r_statelock); 2937 return (0); 2938 } 2939 2940 return (EINVAL); 2941 } 2942 2943 /* 2944 * Return TRUE if the file has any pages. Always go back to 2945 * the master vnode to check v_pages since none of the shadows 2946 * can have pages. 2947 */ 2948 2949 bool_t 2950 nfs4_has_pages(vnode_t *vp) 2951 { 2952 rnode4_t *rp; 2953 2954 rp = VTOR4(vp); 2955 if (IS_SHADOW(vp, rp)) 2956 vp = RTOV4(rp); /* RTOV4 always gives the master */ 2957 2958 return (vn_has_cached_data(vp)); 2959 } 2960 2961 /* 2962 * This table is used to determine whether the client should attempt 2963 * failover based on the clnt_stat value returned by CLNT_CALL. The 2964 * clnt_stat is used as an index into the table. If 2965 * the error value that corresponds to the clnt_stat value in the 2966 * table is non-zero, then that is the error to be returned AND 2967 * that signals that failover should be attempted. 2968 * 2969 * Special note: If the RPC_ values change, then direct indexing of the 2970 * table is no longer valid, but having the RPC_ values in the table 2971 * allow the functions to detect the change and issue a warning. 2972 * In this case, the code will always attempt failover as a defensive 2973 * measure. 2974 */ 2975 2976 static struct try_failover_tab { 2977 enum clnt_stat cstat; 2978 int error; 2979 } try_failover_table [] = { 2980 2981 RPC_SUCCESS, 0, 2982 RPC_CANTENCODEARGS, 0, 2983 RPC_CANTDECODERES, 0, 2984 RPC_CANTSEND, ECOMM, 2985 RPC_CANTRECV, ECOMM, 2986 RPC_TIMEDOUT, ETIMEDOUT, 2987 RPC_VERSMISMATCH, 0, 2988 RPC_AUTHERROR, 0, 2989 RPC_PROGUNAVAIL, 0, 2990 RPC_PROGVERSMISMATCH, 0, 2991 RPC_PROCUNAVAIL, 0, 2992 RPC_CANTDECODEARGS, 0, 2993 RPC_SYSTEMERROR, ENOSR, 2994 RPC_UNKNOWNHOST, EHOSTUNREACH, 2995 RPC_RPCBFAILURE, ENETUNREACH, 2996 RPC_PROGNOTREGISTERED, ECONNREFUSED, 2997 RPC_FAILED, ETIMEDOUT, 2998 RPC_UNKNOWNPROTO, EHOSTUNREACH, 2999 RPC_INTR, 0, 3000 RPC_UNKNOWNADDR, EHOSTUNREACH, 3001 RPC_TLIERROR, 0, 3002 RPC_NOBROADCAST, EHOSTUNREACH, 3003 RPC_N2AXLATEFAILURE, ECONNREFUSED, 3004 RPC_UDERROR, 0, 3005 RPC_INPROGRESS, 0, 3006 RPC_STALERACHANDLE, EINVAL, 3007 RPC_CANTCONNECT, ECONNREFUSED, 3008 RPC_XPRTFAILED, ECONNABORTED, 3009 RPC_CANTCREATESTREAM, ECONNREFUSED, 3010 RPC_CANTSTORE, ENOBUFS 3011 }; 3012 3013 /* 3014 * nfs4_try_failover - determine whether the client should 3015 * attempt failover based on the values stored in the nfs4_error_t. 3016 */ 3017 int 3018 nfs4_try_failover(nfs4_error_t *ep) 3019 { 3020 if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE) 3021 return (TRUE); 3022 3023 if (ep->error && ep->rpc_status != RPC_SUCCESS) 3024 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE); 3025 3026 return (FALSE); 3027 } 3028 3029 /* 3030 * try_failover - internal version of nfs4_try_failover, called 3031 * only by rfscall and aclcall. Determine if failover is warranted 3032 * based on the clnt_stat and return the error number if it is. 3033 */ 3034 static int 3035 try_failover(enum clnt_stat rpc_status) 3036 { 3037 int err = 0; 3038 3039 if (rpc_status == RPC_SUCCESS) 3040 return (0); 3041 3042 #ifdef DEBUG 3043 if (rpc_status != 0 && nfs4_try_failover_any) { 3044 err = ETIMEDOUT; 3045 goto done; 3046 } 3047 #endif 3048 /* 3049 * The rpc status is used as an index into the table. 3050 * If the rpc status is outside of the range of the 3051 * table or if the rpc error numbers have been changed 3052 * since the table was constructed, then print a warning 3053 * (DEBUG only) and try failover anyway. Otherwise, just 3054 * grab the resulting error number out of the table. 3055 */ 3056 if (rpc_status < RPC_SUCCESS || rpc_status >= 3057 sizeof (try_failover_table)/sizeof (try_failover_table[0]) || 3058 try_failover_table[rpc_status].cstat != rpc_status) { 3059 3060 err = ETIMEDOUT; 3061 #ifdef DEBUG 3062 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d", 3063 rpc_status); 3064 #endif 3065 } else 3066 err = try_failover_table[rpc_status].error; 3067 3068 done: 3069 if (rpc_status) 3070 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3071 "nfs4_try_failover: %strying failover on error %d", 3072 err ? "" : "NOT ", rpc_status)); 3073 3074 return (err); 3075 } 3076 3077 void 3078 nfs4_error_zinit(nfs4_error_t *ep) 3079 { 3080 ep->error = 0; 3081 ep->stat = NFS4_OK; 3082 ep->rpc_status = RPC_SUCCESS; 3083 } 3084 3085 void 3086 nfs4_error_init(nfs4_error_t *ep, int error) 3087 { 3088 ep->error = error; 3089 ep->stat = NFS4_OK; 3090 ep->rpc_status = RPC_SUCCESS; 3091 } 3092 3093 3094 #ifdef DEBUG 3095 3096 /* 3097 * Return a 16-bit hash for filehandle, stateid, clientid, owner. 3098 * use the same algorithm as for NFS v3. 3099 * 3100 */ 3101 int 3102 hash16(void *p, int len) 3103 { 3104 int i, rem; 3105 uint_t *wp; 3106 uint_t key = 0; 3107 3108 /* protect against non word aligned */ 3109 if ((rem = len & 3) != 0) 3110 len &= ~3; 3111 3112 for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) { 3113 key ^= (*wp >> 16) ^ *wp; 3114 } 3115 3116 /* hash left-over bytes */ 3117 for (i = 0; i < rem; i++) 3118 key ^= *((uchar_t *)p + i); 3119 3120 return (key & 0xffff); 3121 } 3122 3123 /* 3124 * rnode4info - return filehandle and path information for an rnode. 3125 * XXX MT issues: uses a single static buffer, no locking of path. 3126 */ 3127 char * 3128 rnode4info(rnode4_t *rp) 3129 { 3130 static char buf[80]; 3131 nfs4_fhandle_t fhandle; 3132 char *path; 3133 char *type; 3134 3135 if (rp == NULL) 3136 return ("null"); 3137 if (rp->r_flags & R4ISXATTR) 3138 type = "attr"; 3139 else if (RTOV4(rp)->v_flag & V_XATTRDIR) 3140 type = "attrdir"; 3141 else if (RTOV4(rp)->v_flag & VROOT) 3142 type = "root"; 3143 else if (RTOV4(rp)->v_type == VDIR) 3144 type = "dir"; 3145 else if (RTOV4(rp)->v_type == VREG) 3146 type = "file"; 3147 else 3148 type = "other"; 3149 sfh4_copyval(rp->r_fh, &fhandle); 3150 path = fn_path(rp->r_svnode.sv_name); 3151 (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n", 3152 (void *)rp, path, type, rp->r_flags, 3153 hash16((void *)&fhandle.fh_buf, fhandle.fh_len)); 3154 kmem_free(path, strlen(path)+1); 3155 return (buf); 3156 } 3157 #endif 3158