1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cmn_err.h> 37 #include <sys/vtrace.h> 38 #include <sys/session.h> 39 #include <sys/thread.h> 40 #include <sys/dnlc.h> 41 #include <sys/cred_impl.h> 42 #include <sys/list.h> 43 #include <sys/sdt.h> 44 #include <sys/policy.h> 45 46 #include <rpc/types.h> 47 #include <rpc/xdr.h> 48 49 #include <nfs/nfs.h> 50 51 #include <nfs/nfs_clnt.h> 52 53 #include <nfs/nfs4.h> 54 #include <nfs/rnode4.h> 55 #include <nfs/nfs4_clnt.h> 56 57 /* 58 * client side statistics 59 */ 60 static const struct clstat4 clstat4_tmpl = { 61 { "calls", KSTAT_DATA_UINT64 }, 62 { "badcalls", KSTAT_DATA_UINT64 }, 63 { "clgets", KSTAT_DATA_UINT64 }, 64 { "cltoomany", KSTAT_DATA_UINT64 }, 65 #ifdef DEBUG 66 { "clalloc", KSTAT_DATA_UINT64 }, 67 { "noresponse", KSTAT_DATA_UINT64 }, 68 { "failover", KSTAT_DATA_UINT64 }, 69 { "remap", KSTAT_DATA_UINT64 }, 70 #endif 71 }; 72 73 #ifdef DEBUG 74 struct clstat4_debug clstat4_debug = { 75 { "nrnode", KSTAT_DATA_UINT64 }, 76 { "access", KSTAT_DATA_UINT64 }, 77 { "dirent", KSTAT_DATA_UINT64 }, 78 { "dirents", KSTAT_DATA_UINT64 }, 79 { "reclaim", KSTAT_DATA_UINT64 }, 80 { "clreclaim", KSTAT_DATA_UINT64 }, 81 { "f_reclaim", KSTAT_DATA_UINT64 }, 82 { "a_reclaim", KSTAT_DATA_UINT64 }, 83 { "r_reclaim", KSTAT_DATA_UINT64 }, 84 { "r_path", KSTAT_DATA_UINT64 }, 85 }; 86 #endif 87 88 /* 89 * We keep a global list of per-zone client data, so we can clean up all zones 90 * if we get low on memory. 91 */ 92 static list_t nfs4_clnt_list; 93 static kmutex_t nfs4_clnt_list_lock; 94 static zone_key_t nfs4clnt_zone_key; 95 96 static struct kmem_cache *chtab4_cache; 97 98 #ifdef DEBUG 99 static int nfs4_rfscall_debug; 100 static int nfs4_try_failover_any; 101 int nfs4_utf8_debug = 0; 102 #endif 103 104 /* 105 * NFSv4 readdir cache implementation 106 */ 107 typedef struct rddir4_cache_impl { 108 rddir4_cache rc; /* readdir cache element */ 109 kmutex_t lock; /* lock protects count */ 110 uint_t count; /* reference count */ 111 avl_node_t tree; /* AVL tree link */ 112 } rddir4_cache_impl; 113 114 static int rddir4_cache_compar(const void *, const void *); 115 static void rddir4_cache_free(rddir4_cache_impl *); 116 static rddir4_cache *rddir4_cache_alloc(int); 117 static void rddir4_cache_hold(rddir4_cache *); 118 static int try_failover(enum clnt_stat); 119 120 static int nfs4_readdir_cache_hits = 0; 121 static int nfs4_readdir_cache_waits = 0; 122 static int nfs4_readdir_cache_misses = 0; 123 124 /* 125 * Shared nfs4 functions 126 */ 127 128 /* 129 * Copy an nfs_fh4. The destination storage (to->nfs_fh4_val) must already 130 * be allocated. 131 */ 132 133 void 134 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to) 135 { 136 to->nfs_fh4_len = from->nfs_fh4_len; 137 bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len); 138 } 139 140 /* 141 * nfs4cmpfh - compare 2 filehandles. 142 * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is 143 * "less" than the second, +1 if the first is "greater" than the second. 144 */ 145 146 int 147 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2) 148 { 149 const char *c1, *c2; 150 151 if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len) 152 return (-1); 153 if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len) 154 return (1); 155 for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val; 156 c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len; 157 c1++, c2++) { 158 if (*c1 < *c2) 159 return (-1); 160 if (*c1 > *c2) 161 return (1); 162 } 163 164 return (0); 165 } 166 167 /* 168 * Compare two v4 filehandles. Return zero if they're the same, non-zero 169 * if they're not. Like nfs4cmpfh(), but different filehandle 170 * representation, and doesn't provide information about greater than or 171 * less than. 172 */ 173 174 int 175 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2) 176 { 177 if (fh1->fh_len == fh2->fh_len) 178 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len)); 179 180 return (1); 181 } 182 183 int 184 stateid4_cmp(stateid4 *s1, stateid4 *s2) 185 { 186 if (bcmp(s1, s2, sizeof (stateid4)) == 0) 187 return (1); 188 else 189 return (0); 190 } 191 192 nfsstat4 193 puterrno4(int error) 194 { 195 switch (error) { 196 case 0: 197 return (NFS4_OK); 198 case EPERM: 199 return (NFS4ERR_PERM); 200 case ENOENT: 201 return (NFS4ERR_NOENT); 202 case EINTR: 203 return (NFS4ERR_IO); 204 case EIO: 205 return (NFS4ERR_IO); 206 case ENXIO: 207 return (NFS4ERR_NXIO); 208 case ENOMEM: 209 return (NFS4ERR_RESOURCE); 210 case EACCES: 211 return (NFS4ERR_ACCESS); 212 case EBUSY: 213 return (NFS4ERR_IO); 214 case EEXIST: 215 return (NFS4ERR_EXIST); 216 case EXDEV: 217 return (NFS4ERR_XDEV); 218 case ENODEV: 219 return (NFS4ERR_IO); 220 case ENOTDIR: 221 return (NFS4ERR_NOTDIR); 222 case EISDIR: 223 return (NFS4ERR_ISDIR); 224 case EINVAL: 225 return (NFS4ERR_INVAL); 226 case EMFILE: 227 return (NFS4ERR_RESOURCE); 228 case EFBIG: 229 return (NFS4ERR_FBIG); 230 case ENOSPC: 231 return (NFS4ERR_NOSPC); 232 case EROFS: 233 return (NFS4ERR_ROFS); 234 case EMLINK: 235 return (NFS4ERR_MLINK); 236 case EDEADLK: 237 return (NFS4ERR_DEADLOCK); 238 case ENOLCK: 239 return (NFS4ERR_DENIED); 240 case EREMOTE: 241 return (NFS4ERR_SERVERFAULT); 242 case ENOTSUP: 243 return (NFS4ERR_NOTSUPP); 244 case EDQUOT: 245 return (NFS4ERR_DQUOT); 246 case ENAMETOOLONG: 247 return (NFS4ERR_NAMETOOLONG); 248 case EOVERFLOW: 249 return (NFS4ERR_INVAL); 250 case ENOSYS: 251 return (NFS4ERR_NOTSUPP); 252 case ENOTEMPTY: 253 return (NFS4ERR_NOTEMPTY); 254 case EOPNOTSUPP: 255 return (NFS4ERR_NOTSUPP); 256 case ESTALE: 257 return (NFS4ERR_STALE); 258 case EAGAIN: 259 if (curthread->t_flag & T_WOULDBLOCK) { 260 curthread->t_flag &= ~T_WOULDBLOCK; 261 return (NFS4ERR_DELAY); 262 } 263 return (NFS4ERR_LOCKED); 264 default: 265 return ((enum nfsstat4)error); 266 } 267 } 268 269 int 270 geterrno4(enum nfsstat4 status) 271 { 272 switch (status) { 273 case NFS4_OK: 274 return (0); 275 case NFS4ERR_PERM: 276 return (EPERM); 277 case NFS4ERR_NOENT: 278 return (ENOENT); 279 case NFS4ERR_IO: 280 return (EIO); 281 case NFS4ERR_NXIO: 282 return (ENXIO); 283 case NFS4ERR_ACCESS: 284 return (EACCES); 285 case NFS4ERR_EXIST: 286 return (EEXIST); 287 case NFS4ERR_XDEV: 288 return (EXDEV); 289 case NFS4ERR_NOTDIR: 290 return (ENOTDIR); 291 case NFS4ERR_ISDIR: 292 return (EISDIR); 293 case NFS4ERR_INVAL: 294 return (EINVAL); 295 case NFS4ERR_FBIG: 296 return (EFBIG); 297 case NFS4ERR_NOSPC: 298 return (ENOSPC); 299 case NFS4ERR_ROFS: 300 return (EROFS); 301 case NFS4ERR_MLINK: 302 return (EMLINK); 303 case NFS4ERR_NAMETOOLONG: 304 return (ENAMETOOLONG); 305 case NFS4ERR_NOTEMPTY: 306 return (ENOTEMPTY); 307 case NFS4ERR_DQUOT: 308 return (EDQUOT); 309 case NFS4ERR_STALE: 310 return (ESTALE); 311 case NFS4ERR_BADHANDLE: 312 return (ESTALE); 313 case NFS4ERR_BAD_COOKIE: 314 return (EINVAL); 315 case NFS4ERR_NOTSUPP: 316 return (EOPNOTSUPP); 317 case NFS4ERR_TOOSMALL: 318 return (EINVAL); 319 case NFS4ERR_SERVERFAULT: 320 return (EIO); 321 case NFS4ERR_BADTYPE: 322 return (EINVAL); 323 case NFS4ERR_DELAY: 324 return (ENXIO); 325 case NFS4ERR_SAME: 326 return (EPROTO); 327 case NFS4ERR_DENIED: 328 return (ENOLCK); 329 case NFS4ERR_EXPIRED: 330 return (EPROTO); 331 case NFS4ERR_LOCKED: 332 return (EACCES); 333 case NFS4ERR_GRACE: 334 return (EAGAIN); 335 case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */ 336 return (ESTALE); 337 case NFS4ERR_SHARE_DENIED: 338 return (EACCES); 339 case NFS4ERR_WRONGSEC: 340 return (EPERM); 341 case NFS4ERR_CLID_INUSE: 342 return (EAGAIN); 343 case NFS4ERR_RESOURCE: 344 return (EAGAIN); 345 case NFS4ERR_MOVED: 346 return (EPROTO); 347 case NFS4ERR_NOFILEHANDLE: 348 return (EIO); 349 case NFS4ERR_MINOR_VERS_MISMATCH: 350 return (ENOTSUP); 351 case NFS4ERR_STALE_CLIENTID: 352 return (EIO); 353 case NFS4ERR_STALE_STATEID: 354 return (EIO); 355 case NFS4ERR_OLD_STATEID: 356 return (EIO); 357 case NFS4ERR_BAD_STATEID: 358 return (EIO); 359 case NFS4ERR_BAD_SEQID: 360 return (EIO); 361 case NFS4ERR_NOT_SAME: 362 return (EPROTO); 363 case NFS4ERR_LOCK_RANGE: 364 return (EPROTO); 365 case NFS4ERR_SYMLINK: 366 return (EPROTO); 367 case NFS4ERR_RESTOREFH: 368 return (EPROTO); 369 case NFS4ERR_LEASE_MOVED: 370 return (EPROTO); 371 case NFS4ERR_ATTRNOTSUPP: 372 return (ENOTSUP); 373 case NFS4ERR_NO_GRACE: 374 return (EPROTO); 375 case NFS4ERR_RECLAIM_BAD: 376 return (EPROTO); 377 case NFS4ERR_RECLAIM_CONFLICT: 378 return (EPROTO); 379 case NFS4ERR_BADXDR: 380 return (EINVAL); 381 case NFS4ERR_LOCKS_HELD: 382 return (EIO); 383 case NFS4ERR_OPENMODE: 384 return (EACCES); 385 case NFS4ERR_BADOWNER: 386 /* 387 * Client and server are in different DNS domains 388 * and the NFSMAPID_DOMAIN in /etc/default/nfs 389 * doesn't match. No good answer here. Return 390 * EACCESS, which translates to "permission denied". 391 */ 392 return (EACCES); 393 case NFS4ERR_BADCHAR: 394 return (EINVAL); 395 case NFS4ERR_BADNAME: 396 return (EINVAL); 397 case NFS4ERR_BAD_RANGE: 398 return (EIO); 399 case NFS4ERR_LOCK_NOTSUPP: 400 return (ENOTSUP); 401 case NFS4ERR_OP_ILLEGAL: 402 return (EINVAL); 403 case NFS4ERR_DEADLOCK: 404 return (EDEADLK); 405 case NFS4ERR_FILE_OPEN: 406 return (EACCES); 407 case NFS4ERR_ADMIN_REVOKED: 408 return (EPROTO); 409 case NFS4ERR_CB_PATH_DOWN: 410 return (EPROTO); 411 default: 412 #ifdef DEBUG 413 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d", 414 status); 415 #endif 416 return ((int)status); 417 } 418 } 419 420 void 421 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op) 422 { 423 nfs4_server_t *server; 424 425 /* 426 * Return if already printed/queued a msg 427 * for this mount point. 428 */ 429 if (mi->mi_flags & MI4_BADOWNER_DEBUG) 430 return; 431 /* 432 * Happens once per client <-> server pair. 433 */ 434 if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 435 mi->mi_flags & MI4_INT)) 436 return; 437 438 server = find_nfs4_server(mi); 439 if (server == NULL) { 440 nfs_rw_exit(&mi->mi_recovlock); 441 return; 442 } 443 444 if (!(server->s_flags & N4S_BADOWNER_DEBUG)) { 445 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 446 "!NFSMAPID_DOMAIN does not match" 447 " the server: %s domain.\n" 448 "Please check configuration", 449 mi->mi_curr_serv->sv_hostname); 450 server->s_flags |= N4S_BADOWNER_DEBUG; 451 } 452 mutex_exit(&server->s_lock); 453 nfs4_server_rele(server); 454 nfs_rw_exit(&mi->mi_recovlock); 455 456 /* 457 * Happens once per mntinfo4_t. 458 * This error is deemed as one of the recovery facts "RF_BADOWNER", 459 * queue this in the mesg queue for this mount_info. This message 460 * is not printed, meaning its absent from id_to_dump_solo_fact() 461 * but its there for inspection if the queue is ever dumped/inspected. 462 */ 463 mutex_enter(&mi->mi_lock); 464 if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) { 465 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op, 466 FALSE, NULL, 0, NULL); 467 mi->mi_flags |= MI4_BADOWNER_DEBUG; 468 } 469 mutex_exit(&mi->mi_lock); 470 } 471 472 473 474 int 475 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime) 476 { 477 int64_t sec; 478 int32_t nsec; 479 480 /* 481 * Here check that the nfsv4 time is valid for the system. 482 * nfsv4 time value is a signed 64-bit, and the system time 483 * may be either int64_t or int32_t (depends on the kernel), 484 * so if the kernel is 32-bit, the nfsv4 time value may not fit. 485 */ 486 #ifndef _LP64 487 if (! NFS4_TIME_OK(ntime->seconds)) { 488 return (EOVERFLOW); 489 } 490 #endif 491 492 /* Invalid to specify 1 billion (or more) nsecs */ 493 if (ntime->nseconds >= 1000000000) 494 return (EINVAL); 495 496 if (ntime->seconds < 0) { 497 sec = ntime->seconds + 1; 498 nsec = -1000000000 + ntime->nseconds; 499 } else { 500 sec = ntime->seconds; 501 nsec = ntime->nseconds; 502 } 503 504 vatime->tv_sec = sec; 505 vatime->tv_nsec = nsec; 506 507 return (0); 508 } 509 510 int 511 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime) 512 { 513 int64_t sec; 514 uint32_t nsec; 515 516 /* 517 * nfsv4 time value is a signed 64-bit, and the system time 518 * may be either int64_t or int32_t (depends on the kernel), 519 * so all system time values will fit. 520 */ 521 if (vatime->tv_nsec >= 0) { 522 sec = vatime->tv_sec; 523 nsec = vatime->tv_nsec; 524 } else { 525 sec = vatime->tv_sec - 1; 526 nsec = 1000000000 + vatime->tv_nsec; 527 } 528 ntime->seconds = sec; 529 ntime->nseconds = nsec; 530 531 return (0); 532 } 533 534 /* 535 * Converts a utf8 string to a valid null terminated filename string. 536 * 537 * XXX - Not actually translating the UTF-8 string as per RFC 2279. 538 * For now, just validate that the UTF-8 string off the wire 539 * does not have characters that will freak out UFS, and leave 540 * it at that. 541 */ 542 char * 543 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s) 544 { 545 ASSERT(lenp != NULL); 546 547 if (u8s == NULL || u8s->utf8string_len <= 0 || 548 u8s->utf8string_val == NULL) 549 return (NULL); 550 551 /* 552 * Check for obvious illegal filename chars 553 */ 554 if (utf8_strchr(u8s, '/') != NULL) { 555 #ifdef DEBUG 556 if (nfs4_utf8_debug) { 557 char *path; 558 int len = u8s->utf8string_len; 559 560 path = kmem_alloc(len + 1, KM_SLEEP); 561 bcopy(u8s->utf8string_val, path, len); 562 path[len] = '\0'; 563 564 zcmn_err(getzoneid(), CE_WARN, 565 "Invalid UTF-8 filename: %s", path); 566 567 kmem_free(path, len + 1); 568 } 569 #endif 570 return (NULL); 571 } 572 573 return (utf8_to_str(u8s, lenp, s)); 574 } 575 576 /* 577 * Converts a utf8 string to a C string. 578 * kmem_allocs a new string if not supplied 579 */ 580 char * 581 utf8_to_str(utf8string *str, uint_t *lenp, char *s) 582 { 583 char *sp; 584 char *u8p; 585 int len; 586 int i; 587 588 ASSERT(lenp != NULL); 589 590 if (str == NULL) 591 return (NULL); 592 593 u8p = str->utf8string_val; 594 len = str->utf8string_len; 595 if (len <= 0 || u8p == NULL) { 596 if (s) 597 *s = '\0'; 598 return (NULL); 599 } 600 601 sp = s; 602 if (sp == NULL) 603 sp = kmem_alloc(len + 1, KM_SLEEP); 604 605 /* 606 * At least check for embedded nulls 607 */ 608 for (i = 0; i < len; i++) { 609 sp[i] = u8p[i]; 610 if (u8p[i] == '\0') { 611 #ifdef DEBUG 612 zcmn_err(getzoneid(), CE_WARN, 613 "Embedded NULL in UTF-8 string"); 614 #endif 615 if (s == NULL) 616 kmem_free(sp, len + 1); 617 return (NULL); 618 } 619 } 620 sp[len] = '\0'; 621 *lenp = len + 1; 622 623 return (sp); 624 } 625 626 /* 627 * str_to_utf8 - converts a null-terminated C string to a utf8 string 628 */ 629 utf8string * 630 str_to_utf8(char *nm, utf8string *str) 631 { 632 int len; 633 634 if (str == NULL) 635 return (NULL); 636 637 if (nm == NULL || *nm == '\0') { 638 str->utf8string_len = 0; 639 str->utf8string_val = NULL; 640 } 641 642 len = strlen(nm); 643 644 str->utf8string_val = kmem_alloc(len, KM_SLEEP); 645 str->utf8string_len = len; 646 bcopy(nm, str->utf8string_val, len); 647 648 return (str); 649 } 650 651 utf8string * 652 utf8_copy(utf8string *src, utf8string *dest) 653 { 654 if (src == NULL) 655 return (NULL); 656 if (dest == NULL) 657 return (NULL); 658 659 if (src->utf8string_len > 0) { 660 dest->utf8string_val = kmem_alloc(src->utf8string_len, 661 KM_SLEEP); 662 bcopy(src->utf8string_val, dest->utf8string_val, 663 src->utf8string_len); 664 dest->utf8string_len = src->utf8string_len; 665 } else { 666 dest->utf8string_val = NULL; 667 dest->utf8string_len = 0; 668 } 669 670 return (dest); 671 } 672 673 int 674 utf8_compare(const utf8string *a, const utf8string *b) 675 { 676 int mlen, cmp; 677 int alen, blen; 678 char *aval, *bval; 679 680 if ((a == NULL) && (b == NULL)) 681 return (0); 682 else if (a == NULL) 683 return (-1); 684 else if (b == NULL) 685 return (1); 686 687 alen = a->utf8string_len; 688 blen = b->utf8string_len; 689 aval = a->utf8string_val; 690 bval = b->utf8string_val; 691 692 if (((alen == 0) || (aval == NULL)) && 693 ((blen == 0) || (bval == NULL))) 694 return (0); 695 else if ((alen == 0) || (aval == NULL)) 696 return (-1); 697 else if ((blen == 0) || (bval == NULL)) 698 return (1); 699 700 mlen = MIN(alen, blen); 701 cmp = strncmp(aval, bval, mlen); 702 703 if ((cmp == 0) && (alen == blen)) 704 return (0); 705 else if ((cmp == 0) && (alen < blen)) 706 return (-1); 707 else if (cmp == 0) 708 return (1); 709 else if (cmp < 0) 710 return (-1); 711 return (1); 712 } 713 714 /* 715 * utf8_dir_verify - checks that the utf8 string is valid 716 */ 717 int 718 utf8_dir_verify(utf8string *str) 719 { 720 char *nm; 721 int len; 722 723 if (str == NULL) 724 return (0); 725 726 nm = str->utf8string_val; 727 len = str->utf8string_len; 728 if (nm == NULL || len == 0) { 729 return (0); 730 } 731 732 if (len == 1 && nm[0] == '.') 733 return (0); 734 if (len == 2 && nm[0] == '.' && nm[1] == '.') 735 return (0); 736 737 if (utf8_strchr(str, '/') != NULL) 738 return (0); 739 740 if (utf8_strchr(str, '\0') != NULL) 741 return (0); 742 743 return (1); 744 } 745 746 /* 747 * from rpcsec module (common/rpcsec) 748 */ 749 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **); 750 extern void sec_clnt_freeh(AUTH *); 751 extern void sec_clnt_freeinfo(struct sec_data *); 752 753 /* 754 * authget() gets an auth handle based on the security 755 * information from the servinfo in mountinfo. 756 * The auth handle is stored in ch_client->cl_auth. 757 * 758 * First security flavor of choice is to use sv_secdata 759 * which is initiated by the client. If that fails, get 760 * secinfo from the server and then select one from the 761 * server secinfo list . 762 * 763 * For RPCSEC_GSS flavor, upon success, a secure context is 764 * established between client and server. 765 */ 766 int 767 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr) 768 { 769 int error, i; 770 771 /* 772 * SV4_TRYSECINFO indicates to try the secinfo list from 773 * sv_secinfo until a successful one is reached. Point 774 * sv_currsec to the selected security mechanism for 775 * later sessions. 776 */ 777 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 778 if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) { 779 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count; 780 i++) { 781 if (!(error = sec_clnt_geth(ch_client, 782 &svp->sv_secinfo->sdata[i], 783 cr, &ch_client->cl_auth))) { 784 785 svp->sv_currsec = &svp->sv_secinfo->sdata[i]; 786 svp->sv_secinfo->index = i; 787 /* done */ 788 svp->sv_flags &= ~SV4_TRYSECINFO; 789 break; 790 } 791 792 /* 793 * Allow the caller retry with the security flavor 794 * pointed by svp->sv_secinfo->index when 795 * ETIMEDOUT/ECONNRESET occurs. 796 */ 797 if (error == ETIMEDOUT || error == ECONNRESET) { 798 svp->sv_secinfo->index = i; 799 break; 800 } 801 } 802 } else { 803 /* sv_currsec points to one of the entries in sv_secinfo */ 804 if (svp->sv_currsec) { 805 error = sec_clnt_geth(ch_client, svp->sv_currsec, cr, 806 &ch_client->cl_auth); 807 } else { 808 /* If it's null, use sv_secdata. */ 809 error = sec_clnt_geth(ch_client, svp->sv_secdata, cr, 810 &ch_client->cl_auth); 811 } 812 } 813 nfs_rw_exit(&svp->sv_lock); 814 815 return (error); 816 } 817 818 /* 819 * Common handle get program for NFS, NFS ACL, and NFS AUTH client. 820 */ 821 int 822 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 823 struct chtab **chp, struct nfs4_clnt *nfscl) 824 { 825 struct chhead *ch, *newch; 826 struct chhead **plistp; 827 struct chtab *cp; 828 int error; 829 k_sigset_t smask; 830 831 if (newcl == NULL || chp == NULL || ci == NULL) 832 return (EINVAL); 833 834 *newcl = NULL; 835 *chp = NULL; 836 837 /* 838 * Find an unused handle or create one 839 */ 840 newch = NULL; 841 nfscl->nfscl_stat.clgets.value.ui64++; 842 top: 843 /* 844 * Find the correct entry in the cache to check for free 845 * client handles. The search is based on the RPC program 846 * number, program version number, dev_t for the transport 847 * device, and the protocol family. 848 */ 849 mutex_enter(&nfscl->nfscl_chtable4_lock); 850 plistp = &nfscl->nfscl_chtable4; 851 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 852 if (ch->ch_prog == ci->cl_prog && 853 ch->ch_vers == ci->cl_vers && 854 ch->ch_dev == svp->sv_knconf->knc_rdev && 855 (strcmp(ch->ch_protofmly, 856 svp->sv_knconf->knc_protofmly) == 0)) 857 break; 858 plistp = &ch->ch_next; 859 } 860 861 /* 862 * If we didn't find a cache entry for this quadruple, then 863 * create one. If we don't have one already preallocated, 864 * then drop the cache lock, create one, and then start over. 865 * If we did have a preallocated entry, then just add it to 866 * the front of the list. 867 */ 868 if (ch == NULL) { 869 if (newch == NULL) { 870 mutex_exit(&nfscl->nfscl_chtable4_lock); 871 newch = kmem_alloc(sizeof (*newch), KM_SLEEP); 872 newch->ch_timesused = 0; 873 newch->ch_prog = ci->cl_prog; 874 newch->ch_vers = ci->cl_vers; 875 newch->ch_dev = svp->sv_knconf->knc_rdev; 876 newch->ch_protofmly = kmem_alloc( 877 strlen(svp->sv_knconf->knc_protofmly) + 1, 878 KM_SLEEP); 879 (void) strcpy(newch->ch_protofmly, 880 svp->sv_knconf->knc_protofmly); 881 newch->ch_list = NULL; 882 goto top; 883 } 884 ch = newch; 885 newch = NULL; 886 ch->ch_next = nfscl->nfscl_chtable4; 887 nfscl->nfscl_chtable4 = ch; 888 /* 889 * We found a cache entry, but if it isn't on the front of the 890 * list, then move it to the front of the list to try to take 891 * advantage of locality of operations. 892 */ 893 } else if (ch != nfscl->nfscl_chtable4) { 894 *plistp = ch->ch_next; 895 ch->ch_next = nfscl->nfscl_chtable4; 896 nfscl->nfscl_chtable4 = ch; 897 } 898 899 /* 900 * If there was a free client handle cached, then remove it 901 * from the list, init it, and use it. 902 */ 903 if (ch->ch_list != NULL) { 904 cp = ch->ch_list; 905 ch->ch_list = cp->ch_list; 906 mutex_exit(&nfscl->nfscl_chtable4_lock); 907 if (newch != NULL) { 908 kmem_free(newch->ch_protofmly, 909 strlen(newch->ch_protofmly) + 1); 910 kmem_free(newch, sizeof (*newch)); 911 } 912 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf, 913 &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr); 914 915 /* 916 * Get an auth handle. 917 */ 918 error = authget(svp, cp->ch_client, cr); 919 if (error || cp->ch_client->cl_auth == NULL) { 920 CLNT_DESTROY(cp->ch_client); 921 kmem_cache_free(chtab4_cache, cp); 922 return ((error != 0) ? error : EINTR); 923 } 924 ch->ch_timesused++; 925 *newcl = cp->ch_client; 926 *chp = cp; 927 return (0); 928 } 929 930 /* 931 * There weren't any free client handles which fit, so allocate 932 * a new one and use that. 933 */ 934 #ifdef DEBUG 935 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1); 936 #endif 937 mutex_exit(&nfscl->nfscl_chtable4_lock); 938 939 nfscl->nfscl_stat.cltoomany.value.ui64++; 940 if (newch != NULL) { 941 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1); 942 kmem_free(newch, sizeof (*newch)); 943 } 944 945 cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP); 946 cp->ch_head = ch; 947 948 sigintr(&smask, (int)ci->cl_flags & MI4_INT); 949 error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog, 950 ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client); 951 sigunintr(&smask); 952 953 if (error != 0) { 954 kmem_cache_free(chtab4_cache, cp); 955 #ifdef DEBUG 956 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 957 #endif 958 /* 959 * Warning is unnecessary if error is EINTR. 960 */ 961 if (error != EINTR) { 962 nfs_cmn_err(error, CE_WARN, 963 "clget: couldn't create handle: %m\n"); 964 } 965 return (error); 966 } 967 (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL); 968 auth_destroy(cp->ch_client->cl_auth); 969 970 /* 971 * Get an auth handle. 972 */ 973 error = authget(svp, cp->ch_client, cr); 974 if (error || cp->ch_client->cl_auth == NULL) { 975 CLNT_DESTROY(cp->ch_client); 976 kmem_cache_free(chtab4_cache, cp); 977 #ifdef DEBUG 978 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 979 #endif 980 return ((error != 0) ? error : EINTR); 981 } 982 ch->ch_timesused++; 983 *newcl = cp->ch_client; 984 ASSERT(cp->ch_client->cl_nosignal == FALSE); 985 *chp = cp; 986 return (0); 987 } 988 989 static int 990 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 991 struct chtab **chp, struct nfs4_clnt *nfscl) 992 { 993 clinfo_t ci; 994 bool_t is_recov; 995 int firstcall, error = 0; 996 997 /* 998 * Set read buffer size to rsize 999 * and add room for RPC headers. 1000 */ 1001 ci.cl_readsize = mi->mi_tsize; 1002 if (ci.cl_readsize != 0) 1003 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 1004 1005 /* 1006 * If soft mount and server is down just try once. 1007 * meaning: do not retransmit. 1008 */ 1009 if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN)) 1010 ci.cl_retrans = 0; 1011 else 1012 ci.cl_retrans = mi->mi_retrans; 1013 1014 ci.cl_prog = mi->mi_prog; 1015 ci.cl_vers = mi->mi_vers; 1016 ci.cl_flags = mi->mi_flags; 1017 1018 /* 1019 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS 1020 * security flavor, the client tries to establish a security context 1021 * by contacting the server. If the connection is timed out or reset, 1022 * e.g. server reboot, we will try again. 1023 */ 1024 is_recov = (curthread == mi->mi_recovthread); 1025 firstcall = 1; 1026 1027 do { 1028 error = clget4(&ci, svp, cr, newcl, chp, nfscl); 1029 1030 if (error == 0) 1031 break; 1032 1033 /* 1034 * For forced unmount and zone shutdown, bail out but 1035 * let the recovery thread do one more transmission. 1036 */ 1037 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) && 1038 (!is_recov || !firstcall)) { 1039 error = EIO; 1040 break; 1041 } 1042 1043 /* do not retry for soft mount */ 1044 if (!(mi->mi_flags & MI4_HARD)) 1045 break; 1046 1047 /* let the caller deal with the failover case */ 1048 if (FAILOVER_MOUNT4(mi)) 1049 break; 1050 1051 firstcall = 0; 1052 1053 } while (error == ETIMEDOUT || error == ECONNRESET); 1054 1055 return (error); 1056 } 1057 1058 void 1059 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl) 1060 { 1061 if (cl->cl_auth != NULL) { 1062 sec_clnt_freeh(cl->cl_auth); 1063 cl->cl_auth = NULL; 1064 } 1065 1066 /* 1067 * Timestamp this cache entry so that we know when it was last 1068 * used. 1069 */ 1070 cp->ch_freed = gethrestime_sec(); 1071 1072 /* 1073 * Add the free client handle to the front of the list. 1074 * This way, the list will be sorted in youngest to oldest 1075 * order. 1076 */ 1077 mutex_enter(&nfscl->nfscl_chtable4_lock); 1078 cp->ch_list = cp->ch_head->ch_list; 1079 cp->ch_head->ch_list = cp; 1080 mutex_exit(&nfscl->nfscl_chtable4_lock); 1081 } 1082 1083 #define CL_HOLDTIME 60 /* time to hold client handles */ 1084 1085 static void 1086 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime) 1087 { 1088 struct chhead *ch; 1089 struct chtab *cp; /* list of objects that can be reclaimed */ 1090 struct chtab *cpe; 1091 struct chtab *cpl; 1092 struct chtab **cpp; 1093 #ifdef DEBUG 1094 int n = 0; 1095 clstat4_debug.clreclaim.value.ui64++; 1096 #endif 1097 1098 /* 1099 * Need to reclaim some memory, so step through the cache 1100 * looking through the lists for entries which can be freed. 1101 */ 1102 cp = NULL; 1103 1104 mutex_enter(&nfscl->nfscl_chtable4_lock); 1105 1106 /* 1107 * Here we step through each non-NULL quadruple and start to 1108 * construct the reclaim list pointed to by cp. Note that 1109 * cp will contain all eligible chtab entries. When this traversal 1110 * completes, chtab entries from the last quadruple will be at the 1111 * front of cp and entries from previously inspected quadruples have 1112 * been appended to the rear of cp. 1113 */ 1114 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 1115 if (ch->ch_list == NULL) 1116 continue; 1117 /* 1118 * Search each list for entries older then 1119 * cl_holdtime seconds. The lists are maintained 1120 * in youngest to oldest order so that when the 1121 * first entry is found which is old enough, then 1122 * all of the rest of the entries on the list will 1123 * be old enough as well. 1124 */ 1125 cpl = ch->ch_list; 1126 cpp = &ch->ch_list; 1127 while (cpl != NULL && 1128 cpl->ch_freed + cl_holdtime > gethrestime_sec()) { 1129 cpp = &cpl->ch_list; 1130 cpl = cpl->ch_list; 1131 } 1132 if (cpl != NULL) { 1133 *cpp = NULL; 1134 if (cp != NULL) { 1135 cpe = cpl; 1136 while (cpe->ch_list != NULL) 1137 cpe = cpe->ch_list; 1138 cpe->ch_list = cp; 1139 } 1140 cp = cpl; 1141 } 1142 } 1143 1144 mutex_exit(&nfscl->nfscl_chtable4_lock); 1145 1146 /* 1147 * If cp is empty, then there is nothing to reclaim here. 1148 */ 1149 if (cp == NULL) 1150 return; 1151 1152 /* 1153 * Step through the list of entries to free, destroying each client 1154 * handle and kmem_free'ing the memory for each entry. 1155 */ 1156 while (cp != NULL) { 1157 #ifdef DEBUG 1158 n++; 1159 #endif 1160 CLNT_DESTROY(cp->ch_client); 1161 cpl = cp->ch_list; 1162 kmem_cache_free(chtab4_cache, cp); 1163 cp = cpl; 1164 } 1165 1166 #ifdef DEBUG 1167 /* 1168 * Update clalloc so that nfsstat shows the current number 1169 * of allocated client handles. 1170 */ 1171 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n); 1172 #endif 1173 } 1174 1175 /* ARGSUSED */ 1176 static void 1177 clreclaim4(void *all) 1178 { 1179 struct nfs4_clnt *nfscl; 1180 1181 /* 1182 * The system is low on memory; go through and try to reclaim some from 1183 * every zone on the system. 1184 */ 1185 mutex_enter(&nfs4_clnt_list_lock); 1186 nfscl = list_head(&nfs4_clnt_list); 1187 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) 1188 clreclaim4_zone(nfscl, CL_HOLDTIME); 1189 mutex_exit(&nfs4_clnt_list_lock); 1190 } 1191 1192 /* 1193 * Minimum time-out values indexed by call type 1194 * These units are in "eights" of a second to avoid multiplies 1195 */ 1196 static unsigned int minimum_timeo[] = { 1197 6, 7, 10 1198 }; 1199 1200 #define SHORTWAIT (NFS_COTS_TIMEO / 10) 1201 1202 /* 1203 * Back off for retransmission timeout, MAXTIMO is in hz of a sec 1204 */ 1205 #define MAXTIMO (20*hz) 1206 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim)) 1207 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1)) 1208 1209 static int 1210 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 1211 xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue, 1212 enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl) 1213 { 1214 CLIENT *client; 1215 struct chtab *ch; 1216 cred_t *cr = icr; 1217 struct rpc_err rpcerr; 1218 enum clnt_stat status; 1219 int error; 1220 struct timeval wait; 1221 int timeo; /* in units of hz */ 1222 bool_t tryagain, is_recov; 1223 bool_t cred_cloned = FALSE; 1224 k_sigset_t smask; 1225 servinfo4_t *svp; 1226 #ifdef DEBUG 1227 char *bufp; 1228 #endif 1229 int firstcall; 1230 1231 rpcerr.re_status = RPC_SUCCESS; 1232 1233 /* 1234 * If we know that we are rebooting then let's 1235 * not bother with doing any over the wireness. 1236 */ 1237 mutex_enter(&mi->mi_lock); 1238 if (mi->mi_flags & MI4_SHUTDOWN) { 1239 mutex_exit(&mi->mi_lock); 1240 return (EIO); 1241 } 1242 mutex_exit(&mi->mi_lock); 1243 1244 /* For TSOL, use a new cred which has net_mac_aware flag */ 1245 if (!cred_cloned && is_system_labeled()) { 1246 cred_cloned = TRUE; 1247 cr = crdup(icr); 1248 (void) setpflags(NET_MAC_AWARE, 1, cr); 1249 } 1250 1251 /* 1252 * clget() calls clnt_tli_kinit() which clears the xid, so we 1253 * are guaranteed to reprocess the retry as a new request. 1254 */ 1255 svp = mi->mi_curr_serv; 1256 rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl); 1257 if (rpcerr.re_errno != 0) 1258 return (rpcerr.re_errno); 1259 1260 timeo = (mi->mi_timeo * hz) / 10; 1261 1262 /* 1263 * If hard mounted fs, retry call forever unless hard error 1264 * occurs. 1265 * 1266 * For forced unmount, let the recovery thread through but return 1267 * an error for all others. This is so that user processes can 1268 * exit quickly. The recovery thread bails out after one 1269 * transmission so that it can tell if it needs to continue. 1270 * 1271 * For zone shutdown, behave as above to encourage quick 1272 * process exit, but also fail quickly when servers have 1273 * timed out before and reduce the timeouts. 1274 */ 1275 is_recov = (curthread == mi->mi_recovthread); 1276 firstcall = 1; 1277 do { 1278 tryagain = FALSE; 1279 1280 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE, 1281 "nfs4_rfscall: vfs_flag=0x%x, %s", 1282 mi->mi_vfsp->vfs_flag, 1283 is_recov ? "recov thread" : "not recov thread")); 1284 1285 /* 1286 * It's possible while we're retrying the admin 1287 * decided to reboot. 1288 */ 1289 mutex_enter(&mi->mi_lock); 1290 if (mi->mi_flags & MI4_SHUTDOWN) { 1291 mutex_exit(&mi->mi_lock); 1292 clfree4(client, ch, nfscl); 1293 if (cred_cloned) 1294 crfree(cr); 1295 return (EIO); 1296 } 1297 mutex_exit(&mi->mi_lock); 1298 1299 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) && 1300 (!is_recov || !firstcall)) { 1301 clfree4(client, ch, nfscl); 1302 if (cred_cloned) 1303 crfree(cr); 1304 return (EIO); 1305 } 1306 1307 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) { 1308 mutex_enter(&mi->mi_lock); 1309 if ((mi->mi_flags & MI4_TIMEDOUT) || 1310 !is_recov || !firstcall) { 1311 mutex_exit(&mi->mi_lock); 1312 clfree4(client, ch, nfscl); 1313 if (cred_cloned) 1314 crfree(cr); 1315 return (EIO); 1316 } 1317 mutex_exit(&mi->mi_lock); 1318 timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10; 1319 } 1320 1321 firstcall = 0; 1322 TICK_TO_TIMEVAL(timeo, &wait); 1323 1324 /* 1325 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 1326 * and SIGTERM. (Preserving the existing masks). 1327 * Mask out SIGINT if mount option nointr is specified. 1328 */ 1329 sigintr(&smask, (int)mi->mi_flags & MI4_INT); 1330 if (!(mi->mi_flags & MI4_INT)) 1331 client->cl_nosignal = TRUE; 1332 1333 /* 1334 * If there is a current signal, then don't bother 1335 * even trying to send out the request because we 1336 * won't be able to block waiting for the response. 1337 * Simply assume RPC_INTR and get on with it. 1338 */ 1339 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 1340 status = RPC_INTR; 1341 else { 1342 status = CLNT_CALL(client, which, xdrargs, argsp, 1343 xdrres, resp, wait); 1344 } 1345 1346 if (!(mi->mi_flags & MI4_INT)) 1347 client->cl_nosignal = FALSE; 1348 /* 1349 * restore original signal mask 1350 */ 1351 sigunintr(&smask); 1352 1353 switch (status) { 1354 case RPC_SUCCESS: 1355 break; 1356 1357 case RPC_INTR: 1358 /* 1359 * There is no way to recover from this error, 1360 * even if mount option nointr is specified. 1361 * SIGKILL, for example, cannot be blocked. 1362 */ 1363 rpcerr.re_status = RPC_INTR; 1364 rpcerr.re_errno = EINTR; 1365 break; 1366 1367 case RPC_UDERROR: 1368 /* 1369 * If the NFS server is local (vold) and 1370 * it goes away then we get RPC_UDERROR. 1371 * This is a retryable error, so we would 1372 * loop, so check to see if the specific 1373 * error was ECONNRESET, indicating that 1374 * target did not exist at all. If so, 1375 * return with RPC_PROGUNAVAIL and 1376 * ECONNRESET to indicate why. 1377 */ 1378 CLNT_GETERR(client, &rpcerr); 1379 if (rpcerr.re_errno == ECONNRESET) { 1380 rpcerr.re_status = RPC_PROGUNAVAIL; 1381 rpcerr.re_errno = ECONNRESET; 1382 break; 1383 } 1384 /*FALLTHROUGH*/ 1385 1386 default: /* probably RPC_TIMEDOUT */ 1387 1388 if (IS_UNRECOVERABLE_RPC(status)) 1389 break; 1390 1391 /* 1392 * increment server not responding count 1393 */ 1394 mutex_enter(&mi->mi_lock); 1395 mi->mi_noresponse++; 1396 mutex_exit(&mi->mi_lock); 1397 #ifdef DEBUG 1398 nfscl->nfscl_stat.noresponse.value.ui64++; 1399 #endif 1400 /* 1401 * On zone shutdown, mark server dead and move on. 1402 */ 1403 if (zone_status_get(curproc->p_zone) >= 1404 ZONE_IS_SHUTTING_DOWN) { 1405 mutex_enter(&mi->mi_lock); 1406 mi->mi_flags |= MI4_TIMEDOUT; 1407 mutex_exit(&mi->mi_lock); 1408 clfree4(client, ch, nfscl); 1409 if (cred_cloned) 1410 crfree(cr); 1411 return (EIO); 1412 } 1413 1414 /* 1415 * NFS client failover support: 1416 * return and let the caller take care of 1417 * failover. We only return for failover mounts 1418 * because otherwise we want the "not responding" 1419 * message, the timer updates, etc. 1420 */ 1421 if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) && 1422 (error = try_failover(status)) != 0) { 1423 clfree4(client, ch, nfscl); 1424 if (cred_cloned) 1425 crfree(cr); 1426 *rpc_statusp = status; 1427 return (error); 1428 } 1429 1430 if (flags & RFSCALL_SOFT) 1431 break; 1432 1433 tryagain = TRUE; 1434 1435 /* 1436 * The call is in progress (over COTS). 1437 * Try the CLNT_CALL again, but don't 1438 * print a noisy error message. 1439 */ 1440 if (status == RPC_INPROGRESS) 1441 break; 1442 1443 timeo = backoff(timeo); 1444 mutex_enter(&mi->mi_lock); 1445 if (!(mi->mi_flags & MI4_PRINTED)) { 1446 mi->mi_flags |= MI4_PRINTED; 1447 mutex_exit(&mi->mi_lock); 1448 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 0, 0, 0, 1449 FALSE, NULL, 0, NULL); 1450 } else 1451 mutex_exit(&mi->mi_lock); 1452 1453 if (*doqueue && curproc->p_sessp->s_vp != NULL) { 1454 *doqueue = 0; 1455 if (!(mi->mi_flags & MI4_NOPRINT)) 1456 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 1457 0, 0, 0, FALSE, NULL, 0, NULL); 1458 } 1459 } 1460 } while (tryagain); 1461 1462 DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status, 1463 int, rpcerr.re_errno); 1464 1465 if (status != RPC_SUCCESS) { 1466 zoneid_t zoneid = mi->mi_zone->zone_id; 1467 1468 /* 1469 * Let soft mounts use the timed out message. 1470 */ 1471 if (status == RPC_INPROGRESS) 1472 status = RPC_TIMEDOUT; 1473 nfscl->nfscl_stat.badcalls.value.ui64++; 1474 if (status != RPC_INTR) { 1475 mutex_enter(&mi->mi_lock); 1476 mi->mi_flags |= MI4_DOWN; 1477 mutex_exit(&mi->mi_lock); 1478 CLNT_GETERR(client, &rpcerr); 1479 #ifdef DEBUG 1480 bufp = clnt_sperror(client, svp->sv_hostname); 1481 zprintf(zoneid, "NFS%d %s failed for %s\n", 1482 mi->mi_vers, mi->mi_rfsnames[which], bufp); 1483 if (curproc->p_sessp->s_vp != NULL) { 1484 if (!(mi->mi_flags & MI4_NOPRINT)) { 1485 uprintf("NFS%d %s failed for %s\n", 1486 mi->mi_vers, mi->mi_rfsnames[which], 1487 bufp); 1488 } 1489 } 1490 kmem_free(bufp, MAXPATHLEN); 1491 #else 1492 zprintf(zoneid, 1493 "NFS %s failed for server %s: error %d (%s)\n", 1494 mi->mi_rfsnames[which], svp->sv_hostname, 1495 status, clnt_sperrno(status)); 1496 if (curproc->p_sessp->s_vp != NULL) { 1497 if (!(mi->mi_flags & MI4_NOPRINT)) { 1498 uprintf( 1499 "NFS %s failed for server %s: error %d (%s)\n", 1500 mi->mi_rfsnames[which], 1501 svp->sv_hostname, status, 1502 clnt_sperrno(status)); 1503 } 1504 } 1505 #endif 1506 /* 1507 * when CLNT_CALL() fails with RPC_AUTHERROR, 1508 * re_errno is set appropriately depending on 1509 * the authentication error 1510 */ 1511 if (status == RPC_VERSMISMATCH || 1512 status == RPC_PROGVERSMISMATCH) 1513 rpcerr.re_errno = EIO; 1514 } 1515 } else { 1516 /* 1517 * Test the value of mi_down and mi_printed without 1518 * holding the mi_lock mutex. If they are both zero, 1519 * then it is okay to skip the down and printed 1520 * processing. This saves on a mutex_enter and 1521 * mutex_exit pair for a normal, successful RPC. 1522 * This was just complete overhead. 1523 */ 1524 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) { 1525 mutex_enter(&mi->mi_lock); 1526 mi->mi_flags &= ~MI4_DOWN; 1527 if (mi->mi_flags & MI4_PRINTED) { 1528 mi->mi_flags &= ~MI4_PRINTED; 1529 mutex_exit(&mi->mi_lock); 1530 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1531 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 1532 0, FALSE, NULL, 0, NULL); 1533 } else 1534 mutex_exit(&mi->mi_lock); 1535 } 1536 1537 if (*doqueue == 0) { 1538 if (!(mi->mi_flags & MI4_NOPRINT) && 1539 !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1540 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0, 1541 FALSE, NULL, 0, NULL); 1542 1543 *doqueue = 1; 1544 } 1545 } 1546 1547 clfree4(client, ch, nfscl); 1548 if (cred_cloned) 1549 crfree(cr); 1550 1551 ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 1552 1553 TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d", 1554 rpcerr.re_errno); 1555 1556 *rpc_statusp = status; 1557 return (rpcerr.re_errno); 1558 } 1559 1560 /* 1561 * rfs4call - general wrapper for RPC calls initiated by the client 1562 */ 1563 void 1564 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp, 1565 cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep) 1566 { 1567 int i, error; 1568 enum clnt_stat rpc_status = NFS4_OK; 1569 int num_resops; 1570 struct nfs4_clnt *nfscl; 1571 1572 ASSERT(nfs_zone() == mi->mi_zone); 1573 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1574 ASSERT(nfscl != NULL); 1575 1576 nfscl->nfscl_stat.calls.value.ui64++; 1577 mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++; 1578 1579 /* Set up the results struct for XDR usage */ 1580 resp->argsp = argsp; 1581 resp->array = NULL; 1582 resp->status = 0; 1583 resp->decode_len = 0; 1584 1585 error = nfs4_rfscall(mi, NFSPROC4_COMPOUND, 1586 xdr_COMPOUND4args_clnt, (caddr_t)argsp, 1587 xdr_COMPOUND4res_clnt, (caddr_t)resp, cr, 1588 doqueue, &rpc_status, flags, nfscl); 1589 1590 /* Return now if it was an RPC error */ 1591 if (error) { 1592 ep->error = error; 1593 ep->stat = resp->status; 1594 ep->rpc_status = rpc_status; 1595 return; 1596 } 1597 1598 /* else we'll count the processed operations */ 1599 num_resops = resp->decode_len; 1600 for (i = 0; i < num_resops; i++) { 1601 /* 1602 * Count the individual operations 1603 * processed by the server. 1604 */ 1605 if (resp->array[i].resop >= NFSPROC4_NULL && 1606 resp->array[i].resop <= OP_WRITE) 1607 mi->mi_reqs[resp->array[i].resop].value.ui64++; 1608 } 1609 1610 ep->error = 0; 1611 ep->stat = resp->status; 1612 ep->rpc_status = rpc_status; 1613 } 1614 1615 /* 1616 * nfs4rename_update - updates stored state after a rename. Currently this 1617 * is the path of the object and anything under it, and the filehandle of 1618 * the renamed object. 1619 */ 1620 void 1621 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm) 1622 { 1623 sfh4_update(VTOR4(renvp)->r_fh, nfh4p); 1624 fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm); 1625 } 1626 1627 /* 1628 * Routine to look up the filehandle for the given path and rootvp. 1629 * 1630 * Return values: 1631 * - success: returns zero and *statp is set to NFS4_OK, and *fhp is 1632 * updated. 1633 * - error: return value (errno value) and/or *statp is set appropriately. 1634 */ 1635 #define RML_ORDINARY 1 1636 #define RML_NAMED_ATTR 2 1637 #define RML_ATTRDIR 3 1638 1639 static void 1640 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp, 1641 int filetype, cred_t *cr, 1642 nfs_fh4 *fhp, nfs4_ga_res_t *garp, /* fh, attrs for object */ 1643 nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp, /* fh, attrs for parent */ 1644 nfs4_error_t *ep) 1645 { 1646 COMPOUND4args_clnt args; 1647 COMPOUND4res_clnt res; 1648 nfs_argop4 *argop; 1649 nfs_resop4 *resop; 1650 int num_argops; 1651 lookup4_param_t lookuparg; 1652 nfs_fh4 *tmpfhp; 1653 int doqueue = 1; 1654 char *path; 1655 mntinfo4_t *mi; 1656 1657 ASSERT(fname != NULL); 1658 ASSERT(rootvp->v_type == VDIR); 1659 1660 mi = VTOMI4(rootvp); 1661 path = fn_path(fname); 1662 switch (filetype) { 1663 case RML_NAMED_ATTR: 1664 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR; 1665 args.ctag = TAG_REMAP_LOOKUP_NA; 1666 break; 1667 case RML_ATTRDIR: 1668 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR; 1669 args.ctag = TAG_REMAP_LOOKUP_AD; 1670 break; 1671 case RML_ORDINARY: 1672 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1673 args.ctag = TAG_REMAP_LOOKUP; 1674 break; 1675 default: 1676 ep->error = EINVAL; 1677 return; 1678 } 1679 lookuparg.argsp = &args; 1680 lookuparg.resp = &res; 1681 lookuparg.header_len = 1; /* Putfh */ 1682 lookuparg.trailer_len = 0; 1683 lookuparg.ga_bits = NFS4_VATTR_MASK; 1684 lookuparg.mi = VTOMI4(rootvp); 1685 1686 (void) nfs4lookup_setup(path, &lookuparg, 1); 1687 1688 /* 0: putfh directory */ 1689 argop = args.array; 1690 argop[0].argop = OP_CPUTFH; 1691 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh; 1692 1693 num_argops = args.array_len; 1694 1695 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1696 1697 if (ep->error || res.status != NFS4_OK) 1698 goto exit; 1699 1700 /* get the object filehandle */ 1701 resop = &res.array[res.array_len - 2]; 1702 if (resop->resop != OP_GETFH) { 1703 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1704 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1705 ep->stat = NFS4ERR_SERVERFAULT; 1706 goto exit; 1707 } 1708 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1709 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1710 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1711 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1712 TAG_NONE, 0, 0); 1713 ep->stat = NFS4ERR_SERVERFAULT; 1714 goto exit; 1715 } 1716 fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1717 nfs_fh4_copy(tmpfhp, fhp); 1718 1719 /* get the object attributes */ 1720 resop = &res.array[res.array_len - 1]; 1721 if (garp && resop->resop == OP_GETATTR) 1722 *garp = resop->nfs_resop4_u.opgetattr.ga_res; 1723 1724 /* See if there are enough fields in the response for parent info */ 1725 if ((int)res.array_len - 5 <= 0) 1726 goto exit; 1727 1728 /* get the parent filehandle */ 1729 resop = &res.array[res.array_len - 5]; 1730 if (resop->resop != OP_GETFH) { 1731 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1732 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1733 ep->stat = NFS4ERR_SERVERFAULT; 1734 goto exit; 1735 } 1736 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1737 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1738 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1739 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1740 TAG_NONE, 0, 0); 1741 ep->stat = NFS4ERR_SERVERFAULT; 1742 goto exit; 1743 } 1744 pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1745 nfs_fh4_copy(tmpfhp, pfhp); 1746 1747 /* get the parent attributes */ 1748 resop = &res.array[res.array_len - 4]; 1749 if (pgarp && resop->resop == OP_GETATTR) 1750 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res; 1751 1752 exit: 1753 /* 1754 * It is too hard to remember where all the OP_LOOKUPs are 1755 */ 1756 nfs4args_lookup_free(argop, num_argops); 1757 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1758 1759 if (!ep->error) 1760 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1761 kmem_free(path, strlen(path)+1); 1762 } 1763 1764 /* 1765 * NFS client failover / volatile filehandle support 1766 * 1767 * Recover the filehandle for the given rnode. 1768 * 1769 * Errors are returned via the nfs4_error_t parameter. 1770 */ 1771 1772 void 1773 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 1774 { 1775 rnode4_t *rp = VTOR4(vp); 1776 vnode_t *rootvp = NULL; 1777 vnode_t *dvp = NULL; 1778 cred_t *cr, *cred_otw; 1779 nfs4_ga_res_t gar, pgar; 1780 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 1781 int filetype = RML_ORDINARY; 1782 nfs4_recov_state_t recov = {NULL, 0, 0}; 1783 int badfhcount = 0; 1784 nfs4_open_stream_t *osp = NULL; 1785 bool_t first_time = TRUE; /* first time getting OTW cred */ 1786 bool_t last_time = FALSE; /* last time getting OTW cred */ 1787 1788 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1789 "nfs4_remap_file: remapping %s", rnode4info(rp))); 1790 ASSERT(nfs4_consistent_type(vp)); 1791 1792 if (vp->v_flag & VROOT) { 1793 nfs4_remap_root(mi, ep, flags); 1794 return; 1795 } 1796 1797 /* 1798 * Given the root fh, use the path stored in 1799 * the rnode to find the fh for the new server. 1800 */ 1801 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1802 if (ep->error != 0) 1803 return; 1804 1805 cr = curthread->t_cred; 1806 ASSERT(cr != NULL); 1807 get_remap_cred: 1808 /* 1809 * Releases the osp, if it is provided. 1810 * Puts a hold on the cred_otw and the new osp (if found). 1811 */ 1812 cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp, 1813 &first_time, &last_time); 1814 ASSERT(cred_otw != NULL); 1815 1816 if (rp->r_flags & R4ISXATTR) { 1817 filetype = RML_NAMED_ATTR; 1818 (void) vtodv(vp, &dvp, cred_otw, FALSE); 1819 } 1820 1821 if (vp->v_flag & V_XATTRDIR) { 1822 filetype = RML_ATTRDIR; 1823 } 1824 1825 if (filetype == RML_ORDINARY && rootvp->v_type == VREG) { 1826 /* file mount, doesn't need a remap */ 1827 goto done; 1828 } 1829 1830 again: 1831 remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw, 1832 &newfh, &gar, &newpfh, &pgar, ep); 1833 1834 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1835 "nfs4_remap_file: remap_lookup returned %d/%d", 1836 ep->error, ep->stat)); 1837 1838 if (last_time == FALSE && ep->error == EACCES) { 1839 crfree(cred_otw); 1840 if (dvp != NULL) 1841 VN_RELE(dvp); 1842 goto get_remap_cred; 1843 } 1844 if (ep->error != 0) 1845 goto done; 1846 1847 switch (ep->stat) { 1848 case NFS4_OK: 1849 badfhcount = 0; 1850 if (recov.rs_flags & NFS4_RS_DELAY_MSG) { 1851 mutex_enter(&rp->r_statelock); 1852 rp->r_delay_interval = 0; 1853 mutex_exit(&rp->r_statelock); 1854 uprintf("NFS File Available..\n"); 1855 } 1856 break; 1857 case NFS4ERR_FHEXPIRED: 1858 case NFS4ERR_BADHANDLE: 1859 /* 1860 * If we ran into filehandle problems, we should try to 1861 * remap the root vnode first and hope life gets better. 1862 * But we need to avoid loops. 1863 */ 1864 if (badfhcount++ > 0) 1865 goto done; 1866 if (newfh.nfs_fh4_len != 0) { 1867 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1868 newfh.nfs_fh4_len = 0; 1869 } 1870 if (newpfh.nfs_fh4_len != 0) { 1871 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1872 newpfh.nfs_fh4_len = 0; 1873 } 1874 /* relative path - remap rootvp then retry */ 1875 VN_RELE(rootvp); 1876 rootvp = NULL; 1877 nfs4_remap_root(mi, ep, flags); 1878 if (ep->error != 0 || ep->stat != NFS4_OK) 1879 goto done; 1880 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1881 if (ep->error != 0) 1882 goto done; 1883 goto again; 1884 case NFS4ERR_DELAY: 1885 badfhcount = 0; 1886 nfs4_set_delay_wait(vp); 1887 ep->error = nfs4_wait_for_delay(vp, &recov); 1888 if (ep->error != 0) 1889 goto done; 1890 goto again; 1891 case NFS4ERR_ACCESS: 1892 /* get new cred, try again */ 1893 if (last_time == TRUE) 1894 goto done; 1895 if (dvp != NULL) 1896 VN_RELE(dvp); 1897 crfree(cred_otw); 1898 goto get_remap_cred; 1899 default: 1900 goto done; 1901 } 1902 1903 /* 1904 * Check on the new and old rnodes before updating; 1905 * if the vnode type or size changes, issue a warning 1906 * and mark the file dead. 1907 */ 1908 mutex_enter(&rp->r_statelock); 1909 if (flags & NFS4_REMAP_CKATTRS) { 1910 if (vp->v_type != gar.n4g_va.va_type || 1911 (vp->v_type != VDIR && 1912 rp->r_size != gar.n4g_va.va_size)) { 1913 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1914 "nfs4_remap_file: size %d vs. %d, type %d vs. %d", 1915 (int)rp->r_size, (int)gar.n4g_va.va_size, 1916 vp->v_type, gar.n4g_va.va_type)); 1917 mutex_exit(&rp->r_statelock); 1918 nfs4_queue_event(RE_FILE_DIFF, mi, 1919 rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0, 1920 TAG_NONE, TAG_NONE, 0, 0); 1921 nfs4_fail_recov(vp, NULL, 0, NFS4_OK); 1922 goto done; 1923 } 1924 } 1925 ASSERT(gar.n4g_va.va_type != VNON); 1926 rp->r_server = mi->mi_curr_serv; 1927 1928 if (gar.n4g_fsid_valid) { 1929 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0); 1930 rp->r_srv_fsid = gar.n4g_fsid; 1931 if (FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid)) 1932 rp->r_flags &= ~R4SRVSTUB; 1933 else 1934 rp->r_flags |= R4SRVSTUB; 1935 nfs_rw_exit(&rp->r_server->sv_lock); 1936 #ifdef DEBUG 1937 } else { 1938 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1939 "remap_file: fsid attr not provided by server. rp=%p", 1940 (void *)rp)); 1941 #endif 1942 } 1943 mutex_exit(&rp->r_statelock); 1944 nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */ 1945 sfh4_update(rp->r_fh, &newfh); 1946 ASSERT(nfs4_consistent_type(vp)); 1947 1948 /* 1949 * If we got parent info, use it to update the parent 1950 */ 1951 if (newpfh.nfs_fh4_len != 0) { 1952 if (rp->r_svnode.sv_dfh != NULL) 1953 sfh4_update(rp->r_svnode.sv_dfh, &newpfh); 1954 if (dvp != NULL) { 1955 /* force update of attrs */ 1956 nfs4_attrcache_noinval(dvp, &pgar, gethrtime()); 1957 } 1958 } 1959 done: 1960 if (newfh.nfs_fh4_len != 0) 1961 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1962 if (newpfh.nfs_fh4_len != 0) 1963 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1964 if (cred_otw != NULL) 1965 crfree(cred_otw); 1966 if (rootvp != NULL) 1967 VN_RELE(rootvp); 1968 if (dvp != NULL) 1969 VN_RELE(dvp); 1970 if (osp != NULL) 1971 open_stream_rele(osp, rp); 1972 } 1973 1974 /* 1975 * Client-side failover support: remap the filehandle for vp if it appears 1976 * necessary. errors are returned via the nfs4_error_t parameter; though, 1977 * if there is a problem, we will just try again later. 1978 */ 1979 1980 void 1981 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 1982 { 1983 if (vp == NULL) 1984 return; 1985 1986 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY)) 1987 return; 1988 1989 if (VTOR4(vp)->r_server == mi->mi_curr_serv) 1990 return; 1991 1992 nfs4_remap_file(mi, vp, flags, ep); 1993 } 1994 1995 /* 1996 * nfs4_make_dotdot() - find or create a parent vnode of a non-root node. 1997 * 1998 * Our caller has a filehandle for ".." relative to a particular 1999 * directory object. We want to find or create a parent vnode 2000 * with that filehandle and return it. We can of course create 2001 * a vnode from this filehandle, but we need to also make sure 2002 * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR) 2003 * that we have a parent FH for future reopens as well. If 2004 * we have a remap failure, we won't be able to reopen this 2005 * file, but we won't treat that as fatal because a reopen 2006 * is at least unlikely. Someday nfs4_reopen() should look 2007 * for a missing parent FH and try a remap to recover from it. 2008 * 2009 * need_start_op argument indicates whether this function should 2010 * do a start_op before calling remap_lookup(). This should 2011 * be FALSE, if you are the recovery thread or in an op; otherwise, 2012 * set it to TRUE. 2013 */ 2014 int 2015 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp, 2016 cred_t *cr, vnode_t **vpp, int need_start_op) 2017 { 2018 mntinfo4_t *mi = VTOMI4(dvp); 2019 nfs4_fname_t *np = NULL, *pnp = NULL; 2020 vnode_t *vp = NULL, *rootvp = NULL; 2021 rnode4_t *rp; 2022 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 2023 nfs4_ga_res_t gar, pgar; 2024 vattr_t va, pva; 2025 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 2026 nfs4_sharedfh_t *sfh = NULL, *psfh = NULL; 2027 nfs4_recov_state_t recov_state; 2028 2029 #ifdef DEBUG 2030 /* 2031 * ensure need_start_op is correct 2032 */ 2033 { 2034 int no_need_start_op = (tsd_get(nfs4_tsd_key) || 2035 (curthread == mi->mi_recovthread)); 2036 /* C needs a ^^ operator! */ 2037 ASSERT(((need_start_op) && (!no_need_start_op)) || 2038 ((! need_start_op) && (no_need_start_op))); 2039 } 2040 #endif 2041 ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone()); 2042 2043 NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE, 2044 "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp, 2045 rnode4info(VTOR4(dvp)))); 2046 2047 /* 2048 * rootvp might be needed eventually. Holding it now will 2049 * ensure that r4find_unlocked() will find it, if ".." is the root. 2050 */ 2051 e.error = VFS_ROOT(mi->mi_vfsp, &rootvp); 2052 if (e.error != 0) 2053 goto out; 2054 rp = r4find_unlocked(fhp, mi->mi_vfsp); 2055 if (rp != NULL) { 2056 *vpp = RTOV4(rp); 2057 VN_RELE(rootvp); 2058 return (0); 2059 } 2060 2061 /* 2062 * Since we don't have the rnode, we have to go over the wire. 2063 * remap_lookup() can get all of the filehandles and attributes 2064 * we need in one operation. 2065 */ 2066 np = fn_parent(VTOSV(dvp)->sv_name); 2067 ASSERT(np != NULL); 2068 2069 recov_state.rs_flags = 0; 2070 recov_state.rs_num_retry_despite_err = 0; 2071 recov_retry: 2072 if (need_start_op) { 2073 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP, 2074 &recov_state, NULL); 2075 if (e.error != 0) { 2076 goto out; 2077 } 2078 } 2079 va.va_type = VNON; 2080 pva.va_type = VNON; 2081 remap_lookup(np, rootvp, RML_ORDINARY, cr, 2082 &newfh, &gar, &newpfh, &pgar, &e); 2083 if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) { 2084 if (need_start_op) { 2085 bool_t abort; 2086 2087 abort = nfs4_start_recovery(&e, mi, 2088 rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL); 2089 if (abort) { 2090 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2091 &recov_state, FALSE); 2092 if (e.error == 0) 2093 e.error = EIO; 2094 goto out; 2095 } 2096 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2097 &recov_state, TRUE); 2098 goto recov_retry; 2099 } 2100 if (e.error == 0) 2101 e.error = EIO; 2102 goto out; 2103 } 2104 2105 if (!e.error) { 2106 va = gar.n4g_va; 2107 pva = pgar.n4g_va; 2108 } 2109 2110 if ((e.error != 0) || 2111 (va.va_type != VDIR)) { 2112 if (need_start_op) 2113 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2114 &recov_state, FALSE); 2115 if (e.error == 0) 2116 e.error = EIO; 2117 goto out; 2118 } 2119 2120 if (e.stat != NFS4_OK) { 2121 if (need_start_op) 2122 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2123 &recov_state, FALSE); 2124 e.error = EIO; 2125 goto out; 2126 } 2127 2128 /* 2129 * It is possible for remap_lookup() to return with no error, 2130 * but without providing the parent filehandle and attrs. 2131 */ 2132 if (pva.va_type != VDIR) { 2133 /* 2134 * Call remap_lookup() again, this time with the 2135 * newpfh and pgar args in the first position. 2136 */ 2137 pnp = fn_parent(np); 2138 if (pnp != NULL) { 2139 remap_lookup(pnp, rootvp, RML_ORDINARY, cr, 2140 &newpfh, &pgar, NULL, NULL, &e); 2141 if (nfs4_needs_recovery(&e, FALSE, 2142 mi->mi_vfsp)) { 2143 if (need_start_op) { 2144 bool_t abort; 2145 2146 abort = nfs4_start_recovery(&e, mi, 2147 rootvp, NULL, NULL, NULL, 2148 OP_LOOKUP, NULL); 2149 if (abort) { 2150 nfs4_end_fop(mi, rootvp, NULL, 2151 OH_LOOKUP, &recov_state, 2152 FALSE); 2153 if (e.error == 0) 2154 e.error = EIO; 2155 goto out; 2156 } 2157 nfs4_end_fop(mi, rootvp, NULL, 2158 OH_LOOKUP, &recov_state, TRUE); 2159 goto recov_retry; 2160 } 2161 if (e.error == 0) 2162 e.error = EIO; 2163 goto out; 2164 } 2165 2166 if (e.stat != NFS4_OK) { 2167 if (need_start_op) 2168 nfs4_end_fop(mi, rootvp, NULL, 2169 OH_LOOKUP, &recov_state, FALSE); 2170 e.error = EIO; 2171 goto out; 2172 } 2173 } 2174 if ((pnp == NULL) || 2175 (e.error != 0) || 2176 (pva.va_type == VNON)) { 2177 if (need_start_op) 2178 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2179 &recov_state, FALSE); 2180 if (e.error == 0) 2181 e.error = EIO; 2182 goto out; 2183 } 2184 } 2185 ASSERT(newpfh.nfs_fh4_len != 0); 2186 if (need_start_op) 2187 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE); 2188 psfh = sfh4_get(&newpfh, mi); 2189 2190 sfh = sfh4_get(&newfh, mi); 2191 vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t); 2192 2193 out: 2194 if (np != NULL) 2195 fn_rele(&np); 2196 if (pnp != NULL) 2197 fn_rele(&pnp); 2198 if (newfh.nfs_fh4_len != 0) 2199 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 2200 if (newpfh.nfs_fh4_len != 0) 2201 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 2202 if (sfh != NULL) 2203 sfh4_rele(&sfh); 2204 if (psfh != NULL) 2205 sfh4_rele(&psfh); 2206 if (rootvp != NULL) 2207 VN_RELE(rootvp); 2208 *vpp = vp; 2209 return (e.error); 2210 } 2211 2212 #ifdef DEBUG 2213 size_t r_path_memuse = 0; 2214 #endif 2215 2216 /* 2217 * NFS client failover support 2218 * 2219 * sv4_free() frees the malloc'd portion of a "servinfo_t". 2220 */ 2221 void 2222 sv4_free(servinfo4_t *svp) 2223 { 2224 servinfo4_t *next; 2225 struct knetconfig *knconf; 2226 2227 while (svp != NULL) { 2228 next = svp->sv_next; 2229 if (svp->sv_dhsec) 2230 sec_clnt_freeinfo(svp->sv_dhsec); 2231 if (svp->sv_secdata) 2232 sec_clnt_freeinfo(svp->sv_secdata); 2233 if (svp->sv_save_secinfo && 2234 svp->sv_save_secinfo != svp->sv_secinfo) 2235 secinfo_free(svp->sv_save_secinfo); 2236 if (svp->sv_secinfo) 2237 secinfo_free(svp->sv_secinfo); 2238 if (svp->sv_hostname && svp->sv_hostnamelen > 0) 2239 kmem_free(svp->sv_hostname, svp->sv_hostnamelen); 2240 knconf = svp->sv_knconf; 2241 if (knconf != NULL) { 2242 if (knconf->knc_protofmly != NULL) 2243 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2244 if (knconf->knc_proto != NULL) 2245 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2246 kmem_free(knconf, sizeof (*knconf)); 2247 } 2248 knconf = svp->sv_origknconf; 2249 if (knconf != NULL) { 2250 if (knconf->knc_protofmly != NULL) 2251 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2252 if (knconf->knc_proto != NULL) 2253 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2254 kmem_free(knconf, sizeof (*knconf)); 2255 } 2256 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0) 2257 kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen); 2258 if (svp->sv_path != NULL) { 2259 kmem_free(svp->sv_path, svp->sv_pathlen); 2260 } 2261 nfs_rw_destroy(&svp->sv_lock); 2262 kmem_free(svp, sizeof (*svp)); 2263 svp = next; 2264 } 2265 } 2266 2267 void 2268 nfs4_printfhandle(nfs4_fhandle_t *fhp) 2269 { 2270 int *ip; 2271 char *buf; 2272 size_t bufsize; 2273 char *cp; 2274 2275 /* 2276 * 13 == "(file handle:" 2277 * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times 2278 * 1 == ' ' 2279 * 8 == maximum strlen of "%x" 2280 * 3 == ")\n\0" 2281 */ 2282 bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3; 2283 buf = kmem_alloc(bufsize, KM_NOSLEEP); 2284 if (buf == NULL) 2285 return; 2286 2287 cp = buf; 2288 (void) strcpy(cp, "(file handle:"); 2289 while (*cp != '\0') 2290 cp++; 2291 for (ip = (int *)fhp->fh_buf; 2292 ip < (int *)&fhp->fh_buf[fhp->fh_len]; 2293 ip++) { 2294 (void) sprintf(cp, " %x", *ip); 2295 while (*cp != '\0') 2296 cp++; 2297 } 2298 (void) strcpy(cp, ")\n"); 2299 2300 zcmn_err(getzoneid(), CE_CONT, "%s", buf); 2301 2302 kmem_free(buf, bufsize); 2303 } 2304 2305 /* 2306 * The NFSv4 readdir cache subsystem. 2307 * 2308 * We provide a set of interfaces to allow the rest of the system to utilize 2309 * a caching mechanism while encapsulating the details of the actual 2310 * implementation. This should allow for better maintainability and 2311 * extensibilty by consolidating the implementation details in one location. 2312 */ 2313 2314 /* 2315 * Comparator used by AVL routines. 2316 */ 2317 static int 2318 rddir4_cache_compar(const void *x, const void *y) 2319 { 2320 rddir4_cache_impl *ai = (rddir4_cache_impl *)x; 2321 rddir4_cache_impl *bi = (rddir4_cache_impl *)y; 2322 rddir4_cache *a = &ai->rc; 2323 rddir4_cache *b = &bi->rc; 2324 2325 if (a->nfs4_cookie == b->nfs4_cookie) { 2326 if (a->buflen == b->buflen) 2327 return (0); 2328 if (a->buflen < b->buflen) 2329 return (-1); 2330 return (1); 2331 } 2332 2333 if (a->nfs4_cookie < b->nfs4_cookie) 2334 return (-1); 2335 2336 return (1); 2337 } 2338 2339 /* 2340 * Allocate an opaque handle for the readdir cache. 2341 */ 2342 void 2343 rddir4_cache_create(rnode4_t *rp) 2344 { 2345 ASSERT(rp->r_dir == NULL); 2346 2347 rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 2348 2349 avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl), 2350 offsetof(rddir4_cache_impl, tree)); 2351 } 2352 2353 /* 2354 * Purge the cache of all cached readdir responses. 2355 */ 2356 void 2357 rddir4_cache_purge(rnode4_t *rp) 2358 { 2359 rddir4_cache_impl *rdip; 2360 rddir4_cache_impl *nrdip; 2361 2362 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2363 2364 if (rp->r_dir == NULL) 2365 return; 2366 2367 rdip = avl_first(rp->r_dir); 2368 2369 while (rdip != NULL) { 2370 nrdip = AVL_NEXT(rp->r_dir, rdip); 2371 avl_remove(rp->r_dir, rdip); 2372 rdip->rc.flags &= ~RDDIRCACHED; 2373 rddir4_cache_rele(rp, &rdip->rc); 2374 rdip = nrdip; 2375 } 2376 ASSERT(avl_numnodes(rp->r_dir) == 0); 2377 } 2378 2379 /* 2380 * Destroy the readdir cache. 2381 */ 2382 void 2383 rddir4_cache_destroy(rnode4_t *rp) 2384 { 2385 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2386 if (rp->r_dir == NULL) 2387 return; 2388 2389 rddir4_cache_purge(rp); 2390 avl_destroy(rp->r_dir); 2391 kmem_free(rp->r_dir, sizeof (avl_tree_t)); 2392 rp->r_dir = NULL; 2393 } 2394 2395 /* 2396 * Locate a readdir response from the readdir cache. 2397 * 2398 * Return values: 2399 * 2400 * NULL - If there is an unrecoverable situation like the operation may have 2401 * been interrupted. 2402 * 2403 * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller. 2404 * The flags are set approprately, such that the caller knows 2405 * what state the entry is in. 2406 */ 2407 rddir4_cache * 2408 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count) 2409 { 2410 rddir4_cache_impl *rdip = NULL; 2411 rddir4_cache_impl srdip; 2412 rddir4_cache *srdc; 2413 rddir4_cache *rdc = NULL; 2414 rddir4_cache *nrdc = NULL; 2415 avl_index_t where; 2416 2417 top: 2418 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER)); 2419 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2420 /* 2421 * Check to see if the readdir cache has been disabled. If so, then 2422 * simply allocate an rddir4_cache entry and return it, since caching 2423 * operations do not apply. 2424 */ 2425 if (rp->r_dir == NULL) { 2426 if (nrdc == NULL) { 2427 /* 2428 * Drop the lock because we are doing a sleeping 2429 * allocation. 2430 */ 2431 mutex_exit(&rp->r_statelock); 2432 rdc = rddir4_cache_alloc(KM_SLEEP); 2433 rdc->nfs4_cookie = cookie; 2434 rdc->buflen = count; 2435 mutex_enter(&rp->r_statelock); 2436 return (rdc); 2437 } 2438 return (nrdc); 2439 } 2440 2441 srdc = &srdip.rc; 2442 srdc->nfs4_cookie = cookie; 2443 srdc->buflen = count; 2444 2445 rdip = avl_find(rp->r_dir, &srdip, &where); 2446 2447 /* 2448 * If we didn't find an entry then create one and insert it 2449 * into the cache. 2450 */ 2451 if (rdip == NULL) { 2452 /* 2453 * Check for the case where we have made a second pass through 2454 * the cache due to a lockless allocation. If we find that no 2455 * thread has already inserted this entry, do the insert now 2456 * and return. 2457 */ 2458 if (nrdc != NULL) { 2459 avl_insert(rp->r_dir, nrdc->data, where); 2460 nrdc->flags |= RDDIRCACHED; 2461 rddir4_cache_hold(nrdc); 2462 return (nrdc); 2463 } 2464 2465 #ifdef DEBUG 2466 nfs4_readdir_cache_misses++; 2467 #endif 2468 /* 2469 * First, try to allocate an entry without sleeping. If that 2470 * fails then drop the lock and do a sleeping allocation. 2471 */ 2472 nrdc = rddir4_cache_alloc(KM_NOSLEEP); 2473 if (nrdc != NULL) { 2474 nrdc->nfs4_cookie = cookie; 2475 nrdc->buflen = count; 2476 avl_insert(rp->r_dir, nrdc->data, where); 2477 nrdc->flags |= RDDIRCACHED; 2478 rddir4_cache_hold(nrdc); 2479 return (nrdc); 2480 } 2481 2482 /* 2483 * Drop the lock and do a sleeping allocation. We incur 2484 * additional overhead by having to search the cache again, 2485 * but this case should be rare. 2486 */ 2487 mutex_exit(&rp->r_statelock); 2488 nrdc = rddir4_cache_alloc(KM_SLEEP); 2489 nrdc->nfs4_cookie = cookie; 2490 nrdc->buflen = count; 2491 mutex_enter(&rp->r_statelock); 2492 /* 2493 * We need to take another pass through the cache 2494 * since we dropped our lock to perform the alloc. 2495 * Another thread may have come by and inserted the 2496 * entry we are interested in. 2497 */ 2498 goto top; 2499 } 2500 2501 /* 2502 * Check to see if we need to free our entry. This can happen if 2503 * another thread came along beat us to the insert. We can 2504 * safely call rddir4_cache_free directly because no other thread 2505 * would have a reference to this entry. 2506 */ 2507 if (nrdc != NULL) 2508 rddir4_cache_free((rddir4_cache_impl *)nrdc->data); 2509 2510 #ifdef DEBUG 2511 nfs4_readdir_cache_hits++; 2512 #endif 2513 /* 2514 * Found something. Make sure it's ready to return. 2515 */ 2516 rdc = &rdip->rc; 2517 rddir4_cache_hold(rdc); 2518 /* 2519 * If the cache entry is in the process of being filled in, wait 2520 * until this completes. The RDDIRWAIT bit is set to indicate that 2521 * someone is waiting and when the thread currently filling the entry 2522 * is done, it should do a cv_broadcast to wakeup all of the threads 2523 * waiting for it to finish. If the thread wakes up to find that 2524 * someone new is now trying to complete the the entry, go back 2525 * to sleep. 2526 */ 2527 while (rdc->flags & RDDIR) { 2528 /* 2529 * The entry is not complete. 2530 */ 2531 nfs_rw_exit(&rp->r_rwlock); 2532 rdc->flags |= RDDIRWAIT; 2533 #ifdef DEBUG 2534 nfs4_readdir_cache_waits++; 2535 #endif 2536 while (rdc->flags & RDDIRWAIT) { 2537 if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) { 2538 /* 2539 * We got interrupted, probably the user 2540 * typed ^C or an alarm fired. We free the 2541 * new entry if we allocated one. 2542 */ 2543 rddir4_cache_rele(rp, rdc); 2544 mutex_exit(&rp->r_statelock); 2545 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2546 RW_READER, FALSE); 2547 mutex_enter(&rp->r_statelock); 2548 return (NULL); 2549 } 2550 } 2551 mutex_exit(&rp->r_statelock); 2552 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2553 RW_READER, FALSE); 2554 mutex_enter(&rp->r_statelock); 2555 } 2556 2557 /* 2558 * The entry we were waiting on may have been purged from 2559 * the cache and should no longer be used, release it and 2560 * start over. 2561 */ 2562 if (!(rdc->flags & RDDIRCACHED)) { 2563 rddir4_cache_rele(rp, rdc); 2564 goto top; 2565 } 2566 2567 /* 2568 * The entry is completed. Return it. 2569 */ 2570 return (rdc); 2571 } 2572 2573 /* 2574 * Allocate a cache element and return it. Can return NULL if memory is 2575 * low. 2576 */ 2577 static rddir4_cache * 2578 rddir4_cache_alloc(int flags) 2579 { 2580 rddir4_cache_impl *rdip = NULL; 2581 rddir4_cache *rc = NULL; 2582 2583 rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags); 2584 2585 if (rdip != NULL) { 2586 rc = &rdip->rc; 2587 rc->data = (void *)rdip; 2588 rc->nfs4_cookie = 0; 2589 rc->nfs4_ncookie = 0; 2590 rc->entries = NULL; 2591 rc->eof = 0; 2592 rc->entlen = 0; 2593 rc->buflen = 0; 2594 rc->actlen = 0; 2595 /* 2596 * A readdir is required so set the flag. 2597 */ 2598 rc->flags = RDDIRREQ; 2599 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL); 2600 rc->error = 0; 2601 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL); 2602 rdip->count = 1; 2603 #ifdef DEBUG 2604 atomic_add_64(&clstat4_debug.dirent.value.ui64, 1); 2605 #endif 2606 } 2607 return (rc); 2608 } 2609 2610 /* 2611 * Increment the reference count to this cache element. 2612 */ 2613 static void 2614 rddir4_cache_hold(rddir4_cache *rc) 2615 { 2616 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data; 2617 2618 mutex_enter(&rdip->lock); 2619 rdip->count++; 2620 mutex_exit(&rdip->lock); 2621 } 2622 2623 /* 2624 * Release a reference to this cache element. If the count is zero then 2625 * free the element. 2626 */ 2627 void 2628 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc) 2629 { 2630 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data; 2631 2632 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2633 2634 /* 2635 * Check to see if we have any waiters. If so, we can wake them 2636 * so that they can proceed. 2637 */ 2638 if (rdc->flags & RDDIRWAIT) { 2639 rdc->flags &= ~RDDIRWAIT; 2640 cv_broadcast(&rdc->cv); 2641 } 2642 2643 mutex_enter(&rdip->lock); 2644 ASSERT(rdip->count > 0); 2645 if (--rdip->count == 0) { 2646 mutex_exit(&rdip->lock); 2647 rddir4_cache_free(rdip); 2648 } else 2649 mutex_exit(&rdip->lock); 2650 } 2651 2652 /* 2653 * Free a cache element. 2654 */ 2655 static void 2656 rddir4_cache_free(rddir4_cache_impl *rdip) 2657 { 2658 rddir4_cache *rc = &rdip->rc; 2659 2660 #ifdef DEBUG 2661 atomic_add_64(&clstat4_debug.dirent.value.ui64, -1); 2662 #endif 2663 if (rc->entries != NULL) 2664 kmem_free(rc->entries, rc->buflen); 2665 cv_destroy(&rc->cv); 2666 mutex_destroy(&rdip->lock); 2667 kmem_free(rdip, sizeof (*rdip)); 2668 } 2669 2670 /* 2671 * Snapshot callback for nfs:0:nfs4_client as registered with the kstat 2672 * framework. 2673 */ 2674 static int 2675 cl4_snapshot(kstat_t *ksp, void *buf, int rw) 2676 { 2677 ksp->ks_snaptime = gethrtime(); 2678 if (rw == KSTAT_WRITE) { 2679 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl)); 2680 #ifdef DEBUG 2681 /* 2682 * Currently only the global zone can write to kstats, but we 2683 * add the check just for paranoia. 2684 */ 2685 if (INGLOBALZONE(curproc)) 2686 bcopy((char *)buf + sizeof (clstat4_tmpl), &clstat4_debug, 2687 sizeof (clstat4_debug)); 2688 #endif 2689 } else { 2690 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl)); 2691 #ifdef DEBUG 2692 /* 2693 * If we're displaying the "global" debug kstat values, we 2694 * display them as-is to all zones since in fact they apply to 2695 * the system as a whole. 2696 */ 2697 bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl), 2698 sizeof (clstat4_debug)); 2699 #endif 2700 } 2701 return (0); 2702 } 2703 2704 2705 2706 /* 2707 * Zone support 2708 */ 2709 static void * 2710 clinit4_zone(zoneid_t zoneid) 2711 { 2712 kstat_t *nfs4_client_kstat; 2713 struct nfs4_clnt *nfscl; 2714 uint_t ndata; 2715 2716 nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP); 2717 mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL); 2718 nfscl->nfscl_chtable4 = NULL; 2719 nfscl->nfscl_zoneid = zoneid; 2720 2721 bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl)); 2722 ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t); 2723 #ifdef DEBUG 2724 ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t); 2725 #endif 2726 if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client", 2727 "misc", KSTAT_TYPE_NAMED, ndata, 2728 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) { 2729 nfs4_client_kstat->ks_private = &nfscl->nfscl_stat; 2730 nfs4_client_kstat->ks_snapshot = cl4_snapshot; 2731 kstat_install(nfs4_client_kstat); 2732 } 2733 mutex_enter(&nfs4_clnt_list_lock); 2734 list_insert_head(&nfs4_clnt_list, nfscl); 2735 mutex_exit(&nfs4_clnt_list_lock); 2736 return (nfscl); 2737 } 2738 2739 /*ARGSUSED*/ 2740 static void 2741 clfini4_zone(zoneid_t zoneid, void *arg) 2742 { 2743 struct nfs4_clnt *nfscl = arg; 2744 chhead_t *chp, *next; 2745 2746 if (nfscl == NULL) 2747 return; 2748 mutex_enter(&nfs4_clnt_list_lock); 2749 list_remove(&nfs4_clnt_list, nfscl); 2750 mutex_exit(&nfs4_clnt_list_lock); 2751 clreclaim4_zone(nfscl, 0); 2752 for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) { 2753 ASSERT(chp->ch_list == NULL); 2754 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1); 2755 next = chp->ch_next; 2756 kmem_free(chp, sizeof (*chp)); 2757 } 2758 kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid); 2759 mutex_destroy(&nfscl->nfscl_chtable4_lock); 2760 kmem_free(nfscl, sizeof (*nfscl)); 2761 } 2762 2763 /* 2764 * Called by endpnt_destructor to make sure the client handles are 2765 * cleaned up before the RPC endpoints. This becomes a no-op if 2766 * clfini_zone (above) is called first. This function is needed 2767 * (rather than relying on clfini_zone to clean up) because the ZSD 2768 * callbacks have no ordering mechanism, so we have no way to ensure 2769 * that clfini_zone is called before endpnt_destructor. 2770 */ 2771 void 2772 clcleanup4_zone(zoneid_t zoneid) 2773 { 2774 struct nfs4_clnt *nfscl; 2775 2776 mutex_enter(&nfs4_clnt_list_lock); 2777 nfscl = list_head(&nfs4_clnt_list); 2778 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) { 2779 if (nfscl->nfscl_zoneid == zoneid) { 2780 clreclaim4_zone(nfscl, 0); 2781 break; 2782 } 2783 } 2784 mutex_exit(&nfs4_clnt_list_lock); 2785 } 2786 2787 int 2788 nfs4_subr_init(void) 2789 { 2790 /* 2791 * Allocate and initialize the client handle cache 2792 */ 2793 chtab4_cache = kmem_cache_create("client_handle4_cache", 2794 sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL, 2795 NULL, 0); 2796 2797 /* 2798 * Initialize the list of per-zone client handles (and associated data). 2799 * This needs to be done before we call zone_key_create(). 2800 */ 2801 list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt), 2802 offsetof(struct nfs4_clnt, nfscl_node)); 2803 2804 /* 2805 * Initialize the zone_key for per-zone client handle lists. 2806 */ 2807 zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone); 2808 2809 if (nfs4err_delay_time == 0) 2810 nfs4err_delay_time = NFS4ERR_DELAY_TIME; 2811 2812 return (0); 2813 } 2814 2815 int 2816 nfs4_subr_fini(void) 2817 { 2818 /* 2819 * Deallocate the client handle cache 2820 */ 2821 kmem_cache_destroy(chtab4_cache); 2822 2823 /* 2824 * Destroy the zone_key 2825 */ 2826 (void) zone_key_delete(nfs4clnt_zone_key); 2827 2828 return (0); 2829 } 2830 /* 2831 * Set or Clear direct I/O flag 2832 * VOP_RWLOCK() is held for write access to prevent a race condition 2833 * which would occur if a process is in the middle of a write when 2834 * directio flag gets set. It is possible that all pages may not get flushed. 2835 * 2836 * This is a copy of nfs_directio, changes here may need to be made 2837 * there and vice versa. 2838 */ 2839 2840 int 2841 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr) 2842 { 2843 int error = 0; 2844 rnode4_t *rp; 2845 2846 rp = VTOR4(vp); 2847 2848 if (cmd == DIRECTIO_ON) { 2849 2850 if (rp->r_flags & R4DIRECTIO) 2851 return (0); 2852 2853 /* 2854 * Flush the page cache. 2855 */ 2856 2857 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 2858 2859 if (rp->r_flags & R4DIRECTIO) { 2860 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2861 return (0); 2862 } 2863 2864 if (nfs4_has_pages(vp) && 2865 ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) { 2866 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 2867 B_INVAL, cr); 2868 if (error) { 2869 if (error == ENOSPC || error == EDQUOT) { 2870 mutex_enter(&rp->r_statelock); 2871 if (!rp->r_error) 2872 rp->r_error = error; 2873 mutex_exit(&rp->r_statelock); 2874 } 2875 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2876 return (error); 2877 } 2878 } 2879 2880 mutex_enter(&rp->r_statelock); 2881 rp->r_flags |= R4DIRECTIO; 2882 mutex_exit(&rp->r_statelock); 2883 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2884 return (0); 2885 } 2886 2887 if (cmd == DIRECTIO_OFF) { 2888 mutex_enter(&rp->r_statelock); 2889 rp->r_flags &= ~R4DIRECTIO; /* disable direct mode */ 2890 mutex_exit(&rp->r_statelock); 2891 return (0); 2892 } 2893 2894 return (EINVAL); 2895 } 2896 2897 /* 2898 * Return TRUE if the file has any pages. Always go back to 2899 * the master vnode to check v_pages since none of the shadows 2900 * can have pages. 2901 */ 2902 2903 bool_t 2904 nfs4_has_pages(vnode_t *vp) 2905 { 2906 rnode4_t *rp; 2907 2908 rp = VTOR4(vp); 2909 if (IS_SHADOW(vp, rp)) 2910 vp = RTOV4(rp); /* RTOV4 always gives the master */ 2911 2912 return (vn_has_cached_data(vp)); 2913 } 2914 2915 /* 2916 * This table is used to determine whether the client should attempt 2917 * failover based on the clnt_stat value returned by CLNT_CALL. The 2918 * clnt_stat is used as an index into the table. If 2919 * the error value that corresponds to the clnt_stat value in the 2920 * table is non-zero, then that is the error to be returned AND 2921 * that signals that failover should be attempted. 2922 * 2923 * Special note: If the RPC_ values change, then direct indexing of the 2924 * table is no longer valid, but having the RPC_ values in the table 2925 * allow the functions to detect the change and issue a warning. 2926 * In this case, the code will always attempt failover as a defensive 2927 * measure. 2928 */ 2929 2930 static struct try_failover_tab { 2931 enum clnt_stat cstat; 2932 int error; 2933 } try_failover_table [] = { 2934 2935 RPC_SUCCESS, 0, 2936 RPC_CANTENCODEARGS, 0, 2937 RPC_CANTDECODERES, 0, 2938 RPC_CANTSEND, ECOMM, 2939 RPC_CANTRECV, ECOMM, 2940 RPC_TIMEDOUT, ETIMEDOUT, 2941 RPC_VERSMISMATCH, 0, 2942 RPC_AUTHERROR, 0, 2943 RPC_PROGUNAVAIL, 0, 2944 RPC_PROGVERSMISMATCH, 0, 2945 RPC_PROCUNAVAIL, 0, 2946 RPC_CANTDECODEARGS, 0, 2947 RPC_SYSTEMERROR, ENOSR, 2948 RPC_UNKNOWNHOST, EHOSTUNREACH, 2949 RPC_RPCBFAILURE, ENETUNREACH, 2950 RPC_PROGNOTREGISTERED, ECONNREFUSED, 2951 RPC_FAILED, ETIMEDOUT, 2952 RPC_UNKNOWNPROTO, EHOSTUNREACH, 2953 RPC_INTR, 0, 2954 RPC_UNKNOWNADDR, EHOSTUNREACH, 2955 RPC_TLIERROR, 0, 2956 RPC_NOBROADCAST, EHOSTUNREACH, 2957 RPC_N2AXLATEFAILURE, ECONNREFUSED, 2958 RPC_UDERROR, 0, 2959 RPC_INPROGRESS, 0, 2960 RPC_STALERACHANDLE, EINVAL, 2961 RPC_CANTCONNECT, ECONNREFUSED, 2962 RPC_XPRTFAILED, ECONNABORTED, 2963 RPC_CANTCREATESTREAM, ECONNREFUSED, 2964 RPC_CANTSTORE, ENOBUFS 2965 }; 2966 2967 /* 2968 * nfs4_try_failover - determine whether the client should 2969 * attempt failover based on the values stored in the nfs4_error_t. 2970 */ 2971 int 2972 nfs4_try_failover(nfs4_error_t *ep) 2973 { 2974 if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE) 2975 return (TRUE); 2976 2977 if (ep->error && ep->rpc_status != RPC_SUCCESS) 2978 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE); 2979 2980 return (FALSE); 2981 } 2982 2983 /* 2984 * try_failover - internal version of nfs4_try_failover, called 2985 * only by rfscall and aclcall. Determine if failover is warranted 2986 * based on the clnt_stat and return the error number if it is. 2987 */ 2988 static int 2989 try_failover(enum clnt_stat rpc_status) 2990 { 2991 int err = 0; 2992 2993 if (rpc_status == RPC_SUCCESS) 2994 return (0); 2995 2996 #ifdef DEBUG 2997 if (rpc_status != 0 && nfs4_try_failover_any) { 2998 err = ETIMEDOUT; 2999 goto done; 3000 } 3001 #endif 3002 /* 3003 * The rpc status is used as an index into the table. 3004 * If the rpc status is outside of the range of the 3005 * table or if the rpc error numbers have been changed 3006 * since the table was constructed, then print a warning 3007 * (DEBUG only) and try failover anyway. Otherwise, just 3008 * grab the resulting error number out of the table. 3009 */ 3010 if (rpc_status < RPC_SUCCESS || rpc_status >= 3011 sizeof (try_failover_table)/sizeof (try_failover_table[0]) || 3012 try_failover_table[rpc_status].cstat != rpc_status) { 3013 3014 err = ETIMEDOUT; 3015 #ifdef DEBUG 3016 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d", 3017 rpc_status); 3018 #endif 3019 } else 3020 err = try_failover_table[rpc_status].error; 3021 3022 done: 3023 if (rpc_status) 3024 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3025 "nfs4_try_failover: %strying failover on error %d", 3026 err ? "" : "NOT ", rpc_status)); 3027 3028 return (err); 3029 } 3030 3031 void 3032 nfs4_error_zinit(nfs4_error_t *ep) 3033 { 3034 ep->error = 0; 3035 ep->stat = NFS4_OK; 3036 ep->rpc_status = RPC_SUCCESS; 3037 } 3038 3039 void 3040 nfs4_error_init(nfs4_error_t *ep, int error) 3041 { 3042 ep->error = error; 3043 ep->stat = NFS4_OK; 3044 ep->rpc_status = RPC_SUCCESS; 3045 } 3046 3047 3048 #ifdef DEBUG 3049 3050 /* 3051 * Return a 16-bit hash for filehandle, stateid, clientid, owner. 3052 * use the same algorithm as for NFS v3. 3053 * 3054 */ 3055 int 3056 hash16(void *p, int len) 3057 { 3058 int i, rem; 3059 uint_t *wp; 3060 uint_t key = 0; 3061 3062 /* protect against non word aligned */ 3063 if ((rem = len & 3) != 0) 3064 len &= ~3; 3065 3066 for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) { 3067 key ^= (*wp >> 16) ^ *wp; 3068 } 3069 3070 /* hash left-over bytes */ 3071 for (i = 0; i < rem; i++) 3072 key ^= *((uchar_t *)p + i); 3073 3074 return (key & 0xffff); 3075 } 3076 3077 /* 3078 * rnode4info - return filehandle and path information for an rnode. 3079 * XXX MT issues: uses a single static buffer, no locking of path. 3080 */ 3081 char * 3082 rnode4info(rnode4_t *rp) 3083 { 3084 static char buf[80]; 3085 nfs4_fhandle_t fhandle; 3086 char *path; 3087 char *type; 3088 3089 if (rp == NULL) 3090 return ("null"); 3091 if (rp->r_flags & R4ISXATTR) 3092 type = "attr"; 3093 else if (RTOV4(rp)->v_flag & V_XATTRDIR) 3094 type = "attrdir"; 3095 else if (RTOV4(rp)->v_flag & VROOT) 3096 type = "root"; 3097 else if (RTOV4(rp)->v_type == VDIR) 3098 type = "dir"; 3099 else if (RTOV4(rp)->v_type == VREG) 3100 type = "file"; 3101 else 3102 type = "other"; 3103 sfh4_copyval(rp->r_fh, &fhandle); 3104 path = fn_path(rp->r_svnode.sv_name); 3105 (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n", 3106 (void *)rp, path, type, rp->r_flags, 3107 hash16((void *)&fhandle.fh_buf, fhandle.fh_len)); 3108 kmem_free(path, strlen(path)+1); 3109 return (buf); 3110 } 3111 #endif 3112