1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 29 * All Rights Reserved 30 */ 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/cmn_err.h> 38 #include <sys/vtrace.h> 39 #include <sys/session.h> 40 #include <sys/thread.h> 41 #include <sys/dnlc.h> 42 #include <sys/cred.h> 43 #include <sys/list.h> 44 #include <sys/sdt.h> 45 46 #include <rpc/types.h> 47 #include <rpc/xdr.h> 48 49 #include <nfs/nfs.h> 50 51 #include <nfs/nfs_clnt.h> 52 53 #include <nfs/nfs4.h> 54 #include <nfs/rnode4.h> 55 #include <nfs/nfs4_clnt.h> 56 57 /* 58 * client side statistics 59 */ 60 static const struct clstat4 clstat4_tmpl = { 61 { "calls", KSTAT_DATA_UINT64 }, 62 { "badcalls", KSTAT_DATA_UINT64 }, 63 { "clgets", KSTAT_DATA_UINT64 }, 64 { "cltoomany", KSTAT_DATA_UINT64 }, 65 #ifdef DEBUG 66 { "clalloc", KSTAT_DATA_UINT64 }, 67 { "noresponse", KSTAT_DATA_UINT64 }, 68 { "failover", KSTAT_DATA_UINT64 }, 69 { "remap", KSTAT_DATA_UINT64 }, 70 #endif 71 }; 72 73 #ifdef DEBUG 74 struct clstat4_debug clstat4_debug = { 75 { "nrnode", KSTAT_DATA_UINT64 }, 76 { "access", KSTAT_DATA_UINT64 }, 77 { "dirent", KSTAT_DATA_UINT64 }, 78 { "dirents", KSTAT_DATA_UINT64 }, 79 { "reclaim", KSTAT_DATA_UINT64 }, 80 { "clreclaim", KSTAT_DATA_UINT64 }, 81 { "f_reclaim", KSTAT_DATA_UINT64 }, 82 { "a_reclaim", KSTAT_DATA_UINT64 }, 83 { "r_reclaim", KSTAT_DATA_UINT64 }, 84 { "r_path", KSTAT_DATA_UINT64 }, 85 }; 86 #endif 87 88 /* 89 * We keep a global list of per-zone client data, so we can clean up all zones 90 * if we get low on memory. 91 */ 92 static list_t nfs4_clnt_list; 93 static kmutex_t nfs4_clnt_list_lock; 94 static zone_key_t nfs4clnt_zone_key; 95 96 static struct kmem_cache *chtab4_cache; 97 98 #ifdef DEBUG 99 static int nfs4_rfscall_debug; 100 static int nfs4_try_failover_any; 101 int nfs4_utf8_debug = 0; 102 #endif 103 104 /* 105 * NFSv4 readdir cache implementation 106 */ 107 typedef struct rddir4_cache_impl { 108 rddir4_cache rc; /* readdir cache element */ 109 kmutex_t lock; /* lock protects count */ 110 uint_t count; /* reference count */ 111 avl_node_t tree; /* AVL tree link */ 112 } rddir4_cache_impl; 113 114 static int rddir4_cache_compar(const void *, const void *); 115 static void rddir4_cache_free(rddir4_cache_impl *); 116 static rddir4_cache *rddir4_cache_alloc(int); 117 static void rddir4_cache_hold(rddir4_cache *); 118 static int try_failover(enum clnt_stat); 119 120 static int nfs4_readdir_cache_hits = 0; 121 static int nfs4_readdir_cache_waits = 0; 122 static int nfs4_readdir_cache_misses = 0; 123 124 /* 125 * Shared nfs4 functions 126 */ 127 128 /* 129 * Copy an nfs_fh4. The destination storage (to->nfs_fh4_val) must already 130 * be allocated. 131 */ 132 133 void 134 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to) 135 { 136 to->nfs_fh4_len = from->nfs_fh4_len; 137 bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len); 138 } 139 140 /* 141 * nfs4cmpfh - compare 2 filehandles. 142 * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is 143 * "less" than the second, +1 if the first is "greater" than the second. 144 */ 145 146 int 147 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2) 148 { 149 const char *c1, *c2; 150 151 if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len) 152 return (-1); 153 if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len) 154 return (1); 155 for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val; 156 c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len; 157 c1++, c2++) { 158 if (*c1 < *c2) 159 return (-1); 160 if (*c1 > *c2) 161 return (1); 162 } 163 164 return (0); 165 } 166 167 /* 168 * Compare two v4 filehandles. Return zero if they're the same, non-zero 169 * if they're not. Like nfs4cmpfh(), but different filehandle 170 * representation, and doesn't provide information about greater than or 171 * less than. 172 */ 173 174 int 175 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2) 176 { 177 if (fh1->fh_len == fh2->fh_len) 178 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len)); 179 180 return (1); 181 } 182 183 int 184 stateid4_cmp(stateid4 *s1, stateid4 *s2) 185 { 186 if (bcmp(s1, s2, sizeof (stateid4)) == 0) 187 return (1); 188 else 189 return (0); 190 } 191 192 nfsstat4 193 puterrno4(int error) 194 { 195 switch (error) { 196 case 0: 197 return (NFS4_OK); 198 case EPERM: 199 return (NFS4ERR_PERM); 200 case ENOENT: 201 return (NFS4ERR_NOENT); 202 case EINTR: 203 return (NFS4ERR_IO); 204 case EIO: 205 return (NFS4ERR_IO); 206 case ENXIO: 207 return (NFS4ERR_NXIO); 208 case ENOMEM: 209 return (NFS4ERR_RESOURCE); 210 case EACCES: 211 return (NFS4ERR_ACCESS); 212 case EBUSY: 213 return (NFS4ERR_IO); 214 case EEXIST: 215 return (NFS4ERR_EXIST); 216 case EXDEV: 217 return (NFS4ERR_XDEV); 218 case ENODEV: 219 return (NFS4ERR_IO); 220 case ENOTDIR: 221 return (NFS4ERR_NOTDIR); 222 case EISDIR: 223 return (NFS4ERR_ISDIR); 224 case EINVAL: 225 return (NFS4ERR_INVAL); 226 case EMFILE: 227 return (NFS4ERR_RESOURCE); 228 case EFBIG: 229 return (NFS4ERR_FBIG); 230 case ENOSPC: 231 return (NFS4ERR_NOSPC); 232 case EROFS: 233 return (NFS4ERR_ROFS); 234 case EMLINK: 235 return (NFS4ERR_MLINK); 236 case EDEADLK: 237 return (NFS4ERR_DEADLOCK); 238 case ENOLCK: 239 return (NFS4ERR_DENIED); 240 case EREMOTE: 241 return (NFS4ERR_SERVERFAULT); 242 case ENOTSUP: 243 return (NFS4ERR_NOTSUPP); 244 case EDQUOT: 245 return (NFS4ERR_DQUOT); 246 case ENAMETOOLONG: 247 return (NFS4ERR_NAMETOOLONG); 248 case EOVERFLOW: 249 return (NFS4ERR_INVAL); 250 case ENOSYS: 251 return (NFS4ERR_NOTSUPP); 252 case ENOTEMPTY: 253 return (NFS4ERR_NOTEMPTY); 254 case EOPNOTSUPP: 255 return (NFS4ERR_NOTSUPP); 256 case ESTALE: 257 return (NFS4ERR_STALE); 258 case EAGAIN: 259 if (curthread->t_flag & T_WOULDBLOCK) { 260 curthread->t_flag &= ~T_WOULDBLOCK; 261 return (NFS4ERR_DELAY); 262 } 263 return (NFS4ERR_LOCKED); 264 default: 265 return ((enum nfsstat4)error); 266 } 267 } 268 269 int 270 geterrno4(enum nfsstat4 status) 271 { 272 switch (status) { 273 case NFS4_OK: 274 return (0); 275 case NFS4ERR_PERM: 276 return (EPERM); 277 case NFS4ERR_NOENT: 278 return (ENOENT); 279 case NFS4ERR_IO: 280 return (EIO); 281 case NFS4ERR_NXIO: 282 return (ENXIO); 283 case NFS4ERR_ACCESS: 284 return (EACCES); 285 case NFS4ERR_EXIST: 286 return (EEXIST); 287 case NFS4ERR_XDEV: 288 return (EXDEV); 289 case NFS4ERR_NOTDIR: 290 return (ENOTDIR); 291 case NFS4ERR_ISDIR: 292 return (EISDIR); 293 case NFS4ERR_INVAL: 294 return (EINVAL); 295 case NFS4ERR_FBIG: 296 return (EFBIG); 297 case NFS4ERR_NOSPC: 298 return (ENOSPC); 299 case NFS4ERR_ROFS: 300 return (EROFS); 301 case NFS4ERR_MLINK: 302 return (EMLINK); 303 case NFS4ERR_NAMETOOLONG: 304 return (ENAMETOOLONG); 305 case NFS4ERR_NOTEMPTY: 306 return (ENOTEMPTY); 307 case NFS4ERR_DQUOT: 308 return (EDQUOT); 309 case NFS4ERR_STALE: 310 return (ESTALE); 311 case NFS4ERR_BADHANDLE: 312 return (ESTALE); 313 case NFS4ERR_BAD_COOKIE: 314 return (EINVAL); 315 case NFS4ERR_NOTSUPP: 316 return (EOPNOTSUPP); 317 case NFS4ERR_TOOSMALL: 318 return (EINVAL); 319 case NFS4ERR_SERVERFAULT: 320 return (EIO); 321 case NFS4ERR_BADTYPE: 322 return (EINVAL); 323 case NFS4ERR_DELAY: 324 return (ENXIO); 325 case NFS4ERR_SAME: 326 return (EPROTO); 327 case NFS4ERR_DENIED: 328 return (ENOLCK); 329 case NFS4ERR_EXPIRED: 330 return (EPROTO); 331 case NFS4ERR_LOCKED: 332 return (EACCES); 333 case NFS4ERR_GRACE: 334 return (EAGAIN); 335 case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */ 336 return (ESTALE); 337 case NFS4ERR_SHARE_DENIED: 338 return (EACCES); 339 case NFS4ERR_WRONGSEC: 340 return (EPERM); 341 case NFS4ERR_CLID_INUSE: 342 return (EAGAIN); 343 case NFS4ERR_RESOURCE: 344 return (EAGAIN); 345 case NFS4ERR_MOVED: 346 return (EPROTO); 347 case NFS4ERR_NOFILEHANDLE: 348 return (EIO); 349 case NFS4ERR_MINOR_VERS_MISMATCH: 350 return (ENOTSUP); 351 case NFS4ERR_STALE_CLIENTID: 352 return (EIO); 353 case NFS4ERR_STALE_STATEID: 354 return (EIO); 355 case NFS4ERR_OLD_STATEID: 356 return (EIO); 357 case NFS4ERR_BAD_STATEID: 358 return (EIO); 359 case NFS4ERR_BAD_SEQID: 360 return (EIO); 361 case NFS4ERR_NOT_SAME: 362 return (EPROTO); 363 case NFS4ERR_LOCK_RANGE: 364 return (EPROTO); 365 case NFS4ERR_SYMLINK: 366 return (EPROTO); 367 case NFS4ERR_RESTOREFH: 368 return (EPROTO); 369 case NFS4ERR_LEASE_MOVED: 370 return (EPROTO); 371 case NFS4ERR_ATTRNOTSUPP: 372 return (ENOTSUP); 373 case NFS4ERR_NO_GRACE: 374 return (EPROTO); 375 case NFS4ERR_RECLAIM_BAD: 376 return (EPROTO); 377 case NFS4ERR_RECLAIM_CONFLICT: 378 return (EPROTO); 379 case NFS4ERR_BADXDR: 380 return (EINVAL); 381 case NFS4ERR_LOCKS_HELD: 382 return (EIO); 383 case NFS4ERR_OPENMODE: 384 return (EACCES); 385 case NFS4ERR_BADOWNER: 386 /* 387 * Client and server are in different DNS domains 388 * and the NFSMAPID_DOMAIN in /etc/default/nfs 389 * doesn't match. No good answer here. Return 390 * EACCESS, which translates to "permission denied". 391 */ 392 return (EACCES); 393 case NFS4ERR_BADCHAR: 394 return (EINVAL); 395 case NFS4ERR_BADNAME: 396 return (EINVAL); 397 case NFS4ERR_BAD_RANGE: 398 return (EIO); 399 case NFS4ERR_LOCK_NOTSUPP: 400 return (ENOTSUP); 401 case NFS4ERR_OP_ILLEGAL: 402 return (EINVAL); 403 case NFS4ERR_DEADLOCK: 404 return (EDEADLK); 405 case NFS4ERR_FILE_OPEN: 406 return (EACCES); 407 case NFS4ERR_ADMIN_REVOKED: 408 return (EPROTO); 409 case NFS4ERR_CB_PATH_DOWN: 410 return (EPROTO); 411 default: 412 #ifdef DEBUG 413 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d", 414 status); 415 #endif 416 return ((int)status); 417 } 418 } 419 420 void 421 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op) 422 { 423 nfs4_server_t *server; 424 425 /* 426 * Return if already printed/queued a msg 427 * for this mount point. 428 */ 429 if (mi->mi_flags & MI4_BADOWNER_DEBUG) 430 return; 431 /* 432 * Happens once per client <-> server pair. 433 */ 434 if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 435 mi->mi_flags & MI4_INT)) 436 return; 437 438 server = find_nfs4_server(mi); 439 if (server == NULL) { 440 nfs_rw_exit(&mi->mi_recovlock); 441 return; 442 } 443 444 if (!(server->s_flags & N4S_BADOWNER_DEBUG)) { 445 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 446 "!NFSMAPID_DOMAIN does not match" 447 " the server: %s domain.\n" 448 "Please check configuration", 449 mi->mi_curr_serv->sv_hostname); 450 server->s_flags |= N4S_BADOWNER_DEBUG; 451 } 452 mutex_exit(&server->s_lock); 453 nfs4_server_rele(server); 454 nfs_rw_exit(&mi->mi_recovlock); 455 456 /* 457 * Happens once per mntinfo4_t. 458 * This error is deemed as one of the recovery facts "RF_BADOWNER", 459 * queue this in the mesg queue for this mount_info. This message 460 * is not printed, meaning its absent from id_to_dump_solo_fact() 461 * but its there for inspection if the queue is ever dumped/inspected. 462 */ 463 mutex_enter(&mi->mi_lock); 464 if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) { 465 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op, 466 FALSE, NULL, 0, NULL); 467 mi->mi_flags |= MI4_BADOWNER_DEBUG; 468 } 469 mutex_exit(&mi->mi_lock); 470 } 471 472 473 474 int 475 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime) 476 { 477 int64_t sec; 478 int32_t nsec; 479 480 /* 481 * Here check that the nfsv4 time is valid for the system. 482 * nfsv4 time value is a signed 64-bit, and the system time 483 * may be either int64_t or int32_t (depends on the kernel), 484 * so if the kernel is 32-bit, the nfsv4 time value may not fit. 485 */ 486 #ifndef _LP64 487 if (! NFS4_TIME_OK(ntime->seconds)) { 488 return (EOVERFLOW); 489 } 490 #endif 491 492 if (ntime->seconds < 0) { 493 sec = ntime->seconds + 1; 494 nsec = -1000000000 + ntime->nseconds; 495 } else { 496 sec = ntime->seconds; 497 nsec = ntime->nseconds; 498 } 499 500 vatime->tv_sec = sec; 501 vatime->tv_nsec = nsec; 502 503 return (0); 504 } 505 506 int 507 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime) 508 { 509 int64_t sec; 510 uint32_t nsec; 511 512 /* 513 * nfsv4 time value is a signed 64-bit, and the system time 514 * may be either int64_t or int32_t (depends on the kernel), 515 * so all system time values will fit. 516 */ 517 if (vatime->tv_nsec >= 0) { 518 sec = vatime->tv_sec; 519 nsec = vatime->tv_nsec; 520 } else { 521 sec = vatime->tv_sec - 1; 522 nsec = 1000000000 + vatime->tv_nsec; 523 } 524 ntime->seconds = sec; 525 ntime->nseconds = nsec; 526 527 return (0); 528 } 529 530 /* 531 * Converts a utf8 string to a valid null terminated filename string. 532 * 533 * XXX - Not actually translating the UTF-8 string as per RFC 2279. 534 * For now, just validate that the UTF-8 string off the wire 535 * does not have characters that will freak out UFS, and leave 536 * it at that. 537 */ 538 char * 539 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s) 540 { 541 ASSERT(lenp != NULL); 542 543 if (u8s == NULL || u8s->utf8string_len <= 0 || 544 u8s->utf8string_val == NULL) 545 return (NULL); 546 547 /* 548 * Check for obvious illegal filename chars 549 */ 550 if (utf8_strchr(u8s, '/') != NULL) { 551 #ifdef DEBUG 552 if (nfs4_utf8_debug) { 553 char *path; 554 int len = u8s->utf8string_len; 555 556 path = kmem_alloc(len + 1, KM_SLEEP); 557 bcopy(u8s->utf8string_val, path, len); 558 path[len] = '\0'; 559 560 zcmn_err(getzoneid(), CE_WARN, 561 "Invalid UTF-8 filename: %s", path); 562 563 kmem_free(path, len + 1); 564 } 565 #endif 566 return (NULL); 567 } 568 569 return (utf8_to_str(u8s, lenp, s)); 570 } 571 572 /* 573 * Converts a utf8 string to a C string. 574 * kmem_allocs a new string if not supplied 575 */ 576 char * 577 utf8_to_str(utf8string *str, uint_t *lenp, char *s) 578 { 579 char *sp; 580 char *u8p; 581 int len; 582 int i; 583 584 ASSERT(lenp != NULL); 585 586 if (str == NULL) 587 return (NULL); 588 589 u8p = str->utf8string_val; 590 len = str->utf8string_len; 591 if (len <= 0 || u8p == NULL) { 592 if (s) 593 *s = '\0'; 594 return (NULL); 595 } 596 597 sp = s; 598 if (sp == NULL) 599 sp = kmem_alloc(len + 1, KM_SLEEP); 600 601 /* 602 * At least check for embedded nulls 603 */ 604 for (i = 0; i < len; i++) { 605 sp[i] = u8p[i]; 606 if (u8p[i] == '\0') { 607 #ifdef DEBUG 608 zcmn_err(getzoneid(), CE_WARN, 609 "Embedded NULL in UTF-8 string"); 610 #endif 611 if (s == NULL) 612 kmem_free(sp, len + 1); 613 return (NULL); 614 } 615 } 616 sp[len] = '\0'; 617 *lenp = len + 1; 618 619 return (sp); 620 } 621 622 /* 623 * str_to_utf8 - converts a null-terminated C string to a utf8 string 624 */ 625 utf8string * 626 str_to_utf8(char *nm, utf8string *str) 627 { 628 int len; 629 630 if (str == NULL) 631 return (NULL); 632 633 if (nm == NULL || *nm == '\0') { 634 str->utf8string_len = 0; 635 str->utf8string_val = NULL; 636 } 637 638 len = strlen(nm); 639 640 str->utf8string_val = kmem_alloc(len, KM_SLEEP); 641 str->utf8string_len = len; 642 bcopy(nm, str->utf8string_val, len); 643 644 return (str); 645 } 646 647 utf8string * 648 utf8_copy(utf8string *src, utf8string *dest) 649 { 650 if (src == NULL) 651 return (NULL); 652 if (dest == NULL) 653 return (NULL); 654 655 if (src->utf8string_len > 0) { 656 dest->utf8string_val = kmem_alloc(src->utf8string_len, 657 KM_SLEEP); 658 bcopy(src->utf8string_val, dest->utf8string_val, 659 src->utf8string_len); 660 dest->utf8string_len = src->utf8string_len; 661 } else { 662 dest->utf8string_val = NULL; 663 dest->utf8string_len = 0; 664 } 665 666 return (dest); 667 } 668 669 int 670 utf8_compare(const utf8string *a, const utf8string *b) 671 { 672 int mlen, cmp; 673 int alen, blen; 674 char *aval, *bval; 675 676 if ((a == NULL) && (b == NULL)) 677 return (0); 678 else if (a == NULL) 679 return (-1); 680 else if (b == NULL) 681 return (1); 682 683 alen = a->utf8string_len; 684 blen = b->utf8string_len; 685 aval = a->utf8string_val; 686 bval = b->utf8string_val; 687 688 if (((alen == 0) || (aval == NULL)) && 689 ((blen == 0) || (bval == NULL))) 690 return (0); 691 else if ((alen == 0) || (aval == NULL)) 692 return (-1); 693 else if ((blen == 0) || (bval == NULL)) 694 return (1); 695 696 mlen = MIN(alen, blen); 697 cmp = strncmp(aval, bval, mlen); 698 699 if ((cmp == 0) && (alen == blen)) 700 return (0); 701 else if ((cmp == 0) && (alen < blen)) 702 return (-1); 703 else if (cmp == 0) 704 return (1); 705 else if (cmp < 0) 706 return (-1); 707 return (1); 708 } 709 710 /* 711 * utf8_dir_verify - checks that the utf8 string is valid 712 */ 713 int 714 utf8_dir_verify(utf8string *str) 715 { 716 char *nm; 717 int len; 718 719 if (str == NULL) 720 return (0); 721 722 nm = str->utf8string_val; 723 len = str->utf8string_len; 724 if (nm == NULL || len == 0) { 725 return (0); 726 } 727 728 if (len == 1 && nm[0] == '.') 729 return (0); 730 if (len == 2 && nm[0] == '.' && nm[1] == '.') 731 return (0); 732 733 if (utf8_strchr(str, '/') != NULL) 734 return (0); 735 736 if (utf8_strchr(str, '\0') != NULL) 737 return (0); 738 739 return (1); 740 } 741 742 /* 743 * from rpcsec module (common/rpcsec) 744 */ 745 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **); 746 extern void sec_clnt_freeh(AUTH *); 747 extern void sec_clnt_freeinfo(struct sec_data *); 748 749 /* 750 * authget() gets an auth handle based on the security 751 * information from the servinfo in mountinfo. 752 * The auth handle is stored in ch_client->cl_auth. 753 * 754 * First security flavor of choice is to use sv_secdata 755 * which is initiated by the client. If that fails, get 756 * secinfo from the server and then select one from the 757 * server secinfo list . 758 * 759 * For RPCSEC_GSS flavor, upon success, a secure context is 760 * established between client and server. 761 */ 762 int 763 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr) 764 { 765 int error, i; 766 767 /* 768 * SV4_TRYSECINFO indicates to try the secinfo list from 769 * sv_secinfo until a successful one is reached. Point 770 * sv_currsec to the selected security mechanism for 771 * later sessions. 772 */ 773 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 774 if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) { 775 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count; 776 i++) { 777 if (!(error = sec_clnt_geth(ch_client, 778 &svp->sv_secinfo->sdata[i], 779 cr, &ch_client->cl_auth))) { 780 781 svp->sv_currsec = &svp->sv_secinfo->sdata[i]; 782 svp->sv_secinfo->index = i; 783 /* done */ 784 svp->sv_flags &= ~SV4_TRYSECINFO; 785 break; 786 } 787 788 /* 789 * Allow the caller retry with the security flavor 790 * pointed by svp->sv_secinfo->index when 791 * ETIMEDOUT/ECONNRESET occurs. 792 */ 793 if (error == ETIMEDOUT || error == ECONNRESET) { 794 svp->sv_secinfo->index = i; 795 break; 796 } 797 } 798 } else { 799 /* sv_currsec points to one of the entries in sv_secinfo */ 800 if (svp->sv_currsec) { 801 error = sec_clnt_geth(ch_client, svp->sv_currsec, cr, 802 &ch_client->cl_auth); 803 } else { 804 /* If it's null, use sv_secdata. */ 805 error = sec_clnt_geth(ch_client, svp->sv_secdata, cr, 806 &ch_client->cl_auth); 807 } 808 } 809 nfs_rw_exit(&svp->sv_lock); 810 811 return (error); 812 } 813 814 /* 815 * Common handle get program for NFS, NFS ACL, and NFS AUTH client. 816 */ 817 int 818 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 819 struct chtab **chp, struct nfs4_clnt *nfscl) 820 { 821 struct chhead *ch, *newch; 822 struct chhead **plistp; 823 struct chtab *cp; 824 int error; 825 k_sigset_t smask; 826 827 if (newcl == NULL || chp == NULL || ci == NULL) 828 return (EINVAL); 829 830 *newcl = NULL; 831 *chp = NULL; 832 833 /* 834 * Find an unused handle or create one 835 */ 836 newch = NULL; 837 nfscl->nfscl_stat.clgets.value.ui64++; 838 top: 839 /* 840 * Find the correct entry in the cache to check for free 841 * client handles. The search is based on the RPC program 842 * number, program version number, dev_t for the transport 843 * device, and the protocol family. 844 */ 845 mutex_enter(&nfscl->nfscl_chtable4_lock); 846 plistp = &nfscl->nfscl_chtable4; 847 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 848 if (ch->ch_prog == ci->cl_prog && 849 ch->ch_vers == ci->cl_vers && 850 ch->ch_dev == svp->sv_knconf->knc_rdev && 851 (strcmp(ch->ch_protofmly, 852 svp->sv_knconf->knc_protofmly) == 0)) 853 break; 854 plistp = &ch->ch_next; 855 } 856 857 /* 858 * If we didn't find a cache entry for this quadruple, then 859 * create one. If we don't have one already preallocated, 860 * then drop the cache lock, create one, and then start over. 861 * If we did have a preallocated entry, then just add it to 862 * the front of the list. 863 */ 864 if (ch == NULL) { 865 if (newch == NULL) { 866 mutex_exit(&nfscl->nfscl_chtable4_lock); 867 newch = kmem_alloc(sizeof (*newch), KM_SLEEP); 868 newch->ch_timesused = 0; 869 newch->ch_prog = ci->cl_prog; 870 newch->ch_vers = ci->cl_vers; 871 newch->ch_dev = svp->sv_knconf->knc_rdev; 872 newch->ch_protofmly = kmem_alloc( 873 strlen(svp->sv_knconf->knc_protofmly) + 1, 874 KM_SLEEP); 875 (void) strcpy(newch->ch_protofmly, 876 svp->sv_knconf->knc_protofmly); 877 newch->ch_list = NULL; 878 goto top; 879 } 880 ch = newch; 881 newch = NULL; 882 ch->ch_next = nfscl->nfscl_chtable4; 883 nfscl->nfscl_chtable4 = ch; 884 /* 885 * We found a cache entry, but if it isn't on the front of the 886 * list, then move it to the front of the list to try to take 887 * advantage of locality of operations. 888 */ 889 } else if (ch != nfscl->nfscl_chtable4) { 890 *plistp = ch->ch_next; 891 ch->ch_next = nfscl->nfscl_chtable4; 892 nfscl->nfscl_chtable4 = ch; 893 } 894 895 /* 896 * If there was a free client handle cached, then remove it 897 * from the list, init it, and use it. 898 */ 899 if (ch->ch_list != NULL) { 900 cp = ch->ch_list; 901 ch->ch_list = cp->ch_list; 902 mutex_exit(&nfscl->nfscl_chtable4_lock); 903 if (newch != NULL) { 904 kmem_free(newch->ch_protofmly, 905 strlen(newch->ch_protofmly) + 1); 906 kmem_free(newch, sizeof (*newch)); 907 } 908 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf, 909 &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr); 910 911 /* 912 * Get an auth handle. 913 */ 914 error = authget(svp, cp->ch_client, cr); 915 if (error || cp->ch_client->cl_auth == NULL) { 916 CLNT_DESTROY(cp->ch_client); 917 kmem_cache_free(chtab4_cache, cp); 918 return ((error != 0) ? error : EINTR); 919 } 920 ch->ch_timesused++; 921 *newcl = cp->ch_client; 922 *chp = cp; 923 return (0); 924 } 925 926 /* 927 * There weren't any free client handles which fit, so allocate 928 * a new one and use that. 929 */ 930 #ifdef DEBUG 931 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1); 932 #endif 933 mutex_exit(&nfscl->nfscl_chtable4_lock); 934 935 nfscl->nfscl_stat.cltoomany.value.ui64++; 936 if (newch != NULL) { 937 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1); 938 kmem_free(newch, sizeof (*newch)); 939 } 940 941 cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP); 942 cp->ch_head = ch; 943 944 sigintr(&smask, (int)ci->cl_flags & MI4_INT); 945 error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog, 946 ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client); 947 sigunintr(&smask); 948 949 if (error != 0) { 950 kmem_cache_free(chtab4_cache, cp); 951 #ifdef DEBUG 952 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 953 #endif 954 /* 955 * Warning is unnecessary if error is EINTR. 956 */ 957 if (error != EINTR) { 958 nfs_cmn_err(error, CE_WARN, 959 "clget: couldn't create handle: %m\n"); 960 } 961 return (error); 962 } 963 (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL); 964 auth_destroy(cp->ch_client->cl_auth); 965 966 /* 967 * Get an auth handle. 968 */ 969 error = authget(svp, cp->ch_client, cr); 970 if (error || cp->ch_client->cl_auth == NULL) { 971 CLNT_DESTROY(cp->ch_client); 972 kmem_cache_free(chtab4_cache, cp); 973 #ifdef DEBUG 974 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 975 #endif 976 return ((error != 0) ? error : EINTR); 977 } 978 ch->ch_timesused++; 979 *newcl = cp->ch_client; 980 ASSERT(cp->ch_client->cl_nosignal == FALSE); 981 *chp = cp; 982 return (0); 983 } 984 985 static int 986 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl, 987 struct chtab **chp, struct nfs4_clnt *nfscl) 988 { 989 clinfo_t ci; 990 bool_t is_recov; 991 int firstcall, error = 0; 992 993 /* 994 * Set read buffer size to rsize 995 * and add room for RPC headers. 996 */ 997 ci.cl_readsize = mi->mi_tsize; 998 if (ci.cl_readsize != 0) 999 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 1000 1001 /* 1002 * If soft mount and server is down just try once. 1003 * meaning: do not retransmit. 1004 */ 1005 if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN)) 1006 ci.cl_retrans = 0; 1007 else 1008 ci.cl_retrans = mi->mi_retrans; 1009 1010 ci.cl_prog = mi->mi_prog; 1011 ci.cl_vers = mi->mi_vers; 1012 ci.cl_flags = mi->mi_flags; 1013 1014 /* 1015 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS 1016 * security flavor, the client tries to establish a security context 1017 * by contacting the server. If the connection is timed out or reset, 1018 * e.g. server reboot, we will try again. 1019 */ 1020 is_recov = (curthread == mi->mi_recovthread); 1021 firstcall = 1; 1022 1023 do { 1024 error = clget4(&ci, svp, cr, newcl, chp, nfscl); 1025 1026 if (error == 0) 1027 break; 1028 1029 /* 1030 * For forced unmount and zone shutdown, bail out but 1031 * let the recovery thread do one more transmission. 1032 */ 1033 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) && 1034 (!is_recov || !firstcall)) { 1035 error = EIO; 1036 break; 1037 } 1038 1039 /* do not retry for soft mount */ 1040 if (!(mi->mi_flags & MI4_HARD)) 1041 break; 1042 1043 /* let the caller deal with the failover case */ 1044 if (FAILOVER_MOUNT4(mi)) 1045 break; 1046 1047 firstcall = 0; 1048 1049 } while (error == ETIMEDOUT || error == ECONNRESET); 1050 1051 return (error); 1052 } 1053 1054 void 1055 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl) 1056 { 1057 if (cl->cl_auth != NULL) { 1058 sec_clnt_freeh(cl->cl_auth); 1059 cl->cl_auth = NULL; 1060 } 1061 1062 /* 1063 * Timestamp this cache entry so that we know when it was last 1064 * used. 1065 */ 1066 cp->ch_freed = gethrestime_sec(); 1067 1068 /* 1069 * Add the free client handle to the front of the list. 1070 * This way, the list will be sorted in youngest to oldest 1071 * order. 1072 */ 1073 mutex_enter(&nfscl->nfscl_chtable4_lock); 1074 cp->ch_list = cp->ch_head->ch_list; 1075 cp->ch_head->ch_list = cp; 1076 mutex_exit(&nfscl->nfscl_chtable4_lock); 1077 } 1078 1079 #define CL_HOLDTIME 60 /* time to hold client handles */ 1080 1081 static void 1082 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime) 1083 { 1084 struct chhead *ch; 1085 struct chtab *cp; /* list of objects that can be reclaimed */ 1086 struct chtab *cpe; 1087 struct chtab *cpl; 1088 struct chtab **cpp; 1089 #ifdef DEBUG 1090 int n = 0; 1091 clstat4_debug.clreclaim.value.ui64++; 1092 #endif 1093 1094 /* 1095 * Need to reclaim some memory, so step through the cache 1096 * looking through the lists for entries which can be freed. 1097 */ 1098 cp = NULL; 1099 1100 mutex_enter(&nfscl->nfscl_chtable4_lock); 1101 1102 /* 1103 * Here we step through each non-NULL quadruple and start to 1104 * construct the reclaim list pointed to by cp. Note that 1105 * cp will contain all eligible chtab entries. When this traversal 1106 * completes, chtab entries from the last quadruple will be at the 1107 * front of cp and entries from previously inspected quadruples have 1108 * been appended to the rear of cp. 1109 */ 1110 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) { 1111 if (ch->ch_list == NULL) 1112 continue; 1113 /* 1114 * Search each list for entries older then 1115 * cl_holdtime seconds. The lists are maintained 1116 * in youngest to oldest order so that when the 1117 * first entry is found which is old enough, then 1118 * all of the rest of the entries on the list will 1119 * be old enough as well. 1120 */ 1121 cpl = ch->ch_list; 1122 cpp = &ch->ch_list; 1123 while (cpl != NULL && 1124 cpl->ch_freed + cl_holdtime > gethrestime_sec()) { 1125 cpp = &cpl->ch_list; 1126 cpl = cpl->ch_list; 1127 } 1128 if (cpl != NULL) { 1129 *cpp = NULL; 1130 if (cp != NULL) { 1131 cpe = cpl; 1132 while (cpe->ch_list != NULL) 1133 cpe = cpe->ch_list; 1134 cpe->ch_list = cp; 1135 } 1136 cp = cpl; 1137 } 1138 } 1139 1140 mutex_exit(&nfscl->nfscl_chtable4_lock); 1141 1142 /* 1143 * If cp is empty, then there is nothing to reclaim here. 1144 */ 1145 if (cp == NULL) 1146 return; 1147 1148 /* 1149 * Step through the list of entries to free, destroying each client 1150 * handle and kmem_free'ing the memory for each entry. 1151 */ 1152 while (cp != NULL) { 1153 #ifdef DEBUG 1154 n++; 1155 #endif 1156 CLNT_DESTROY(cp->ch_client); 1157 cpl = cp->ch_list; 1158 kmem_cache_free(chtab4_cache, cp); 1159 cp = cpl; 1160 } 1161 1162 #ifdef DEBUG 1163 /* 1164 * Update clalloc so that nfsstat shows the current number 1165 * of allocated client handles. 1166 */ 1167 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n); 1168 #endif 1169 } 1170 1171 /* ARGSUSED */ 1172 static void 1173 clreclaim4(void *all) 1174 { 1175 struct nfs4_clnt *nfscl; 1176 1177 /* 1178 * The system is low on memory; go through and try to reclaim some from 1179 * every zone on the system. 1180 */ 1181 mutex_enter(&nfs4_clnt_list_lock); 1182 nfscl = list_head(&nfs4_clnt_list); 1183 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) 1184 clreclaim4_zone(nfscl, CL_HOLDTIME); 1185 mutex_exit(&nfs4_clnt_list_lock); 1186 } 1187 1188 /* 1189 * Minimum time-out values indexed by call type 1190 * These units are in "eights" of a second to avoid multiplies 1191 */ 1192 static unsigned int minimum_timeo[] = { 1193 6, 7, 10 1194 }; 1195 1196 #define SHORTWAIT (NFS_COTS_TIMEO / 10) 1197 1198 /* 1199 * Back off for retransmission timeout, MAXTIMO is in hz of a sec 1200 */ 1201 #define MAXTIMO (20*hz) 1202 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim)) 1203 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1)) 1204 1205 static int 1206 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 1207 xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *doqueue, 1208 enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl) 1209 { 1210 CLIENT *client; 1211 struct chtab *ch; 1212 struct rpc_err rpcerr; 1213 enum clnt_stat status; 1214 int error; 1215 struct timeval wait; 1216 int timeo; /* in units of hz */ 1217 bool_t tryagain, is_recov; 1218 k_sigset_t smask; 1219 servinfo4_t *svp; 1220 #ifdef DEBUG 1221 char *bufp; 1222 #endif 1223 int firstcall; 1224 1225 rpcerr.re_status = RPC_SUCCESS; 1226 1227 /* 1228 * If we know that we are rebooting then let's 1229 * not bother with doing any over the wireness. 1230 */ 1231 mutex_enter(&mi->mi_lock); 1232 if (mi->mi_flags & MI4_SHUTDOWN) { 1233 mutex_exit(&mi->mi_lock); 1234 return (EIO); 1235 } 1236 mutex_exit(&mi->mi_lock); 1237 1238 /* 1239 * clget() calls clnt_tli_kinit() which clears the xid, so we 1240 * are guaranteed to reprocess the retry as a new request. 1241 */ 1242 svp = mi->mi_curr_serv; 1243 rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl); 1244 if (rpcerr.re_errno != 0) 1245 return (rpcerr.re_errno); 1246 1247 timeo = (mi->mi_timeo * hz) / 10; 1248 1249 /* 1250 * If hard mounted fs, retry call forever unless hard error 1251 * occurs. 1252 * 1253 * For forced unmount, let the recovery thread through but return 1254 * an error for all others. This is so that user processes can 1255 * exit quickly. The recovery thread bails out after one 1256 * transmission so that it can tell if it needs to continue. 1257 * 1258 * For zone shutdown, behave as above to encourage quick 1259 * process exit, but also fail quickly when servers have 1260 * timed out before and reduce the timeouts. 1261 */ 1262 is_recov = (curthread == mi->mi_recovthread); 1263 firstcall = 1; 1264 do { 1265 tryagain = FALSE; 1266 1267 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE, 1268 "nfs4_rfscall: vfs_flag=0x%x, %s", 1269 mi->mi_vfsp->vfs_flag, 1270 is_recov ? "recov thread" : "not recov thread")); 1271 1272 /* 1273 * It's possible while we're retrying the admin 1274 * decided to reboot. 1275 */ 1276 mutex_enter(&mi->mi_lock); 1277 if (mi->mi_flags & MI4_SHUTDOWN) { 1278 mutex_exit(&mi->mi_lock); 1279 clfree4(client, ch, nfscl); 1280 return (EIO); 1281 } 1282 mutex_exit(&mi->mi_lock); 1283 1284 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) && 1285 (!is_recov || !firstcall)) { 1286 clfree4(client, ch, nfscl); 1287 return (EIO); 1288 } 1289 1290 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) { 1291 mutex_enter(&mi->mi_lock); 1292 if ((mi->mi_flags & MI4_TIMEDOUT) || 1293 !is_recov || !firstcall) { 1294 mutex_exit(&mi->mi_lock); 1295 clfree4(client, ch, nfscl); 1296 return (EIO); 1297 } 1298 mutex_exit(&mi->mi_lock); 1299 timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10; 1300 } 1301 1302 firstcall = 0; 1303 TICK_TO_TIMEVAL(timeo, &wait); 1304 1305 /* 1306 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 1307 * and SIGTERM. (Preserving the existing masks). 1308 * Mask out SIGINT if mount option nointr is specified. 1309 */ 1310 sigintr(&smask, (int)mi->mi_flags & MI4_INT); 1311 if (!(mi->mi_flags & MI4_INT)) 1312 client->cl_nosignal = TRUE; 1313 1314 /* 1315 * If there is a current signal, then don't bother 1316 * even trying to send out the request because we 1317 * won't be able to block waiting for the response. 1318 * Simply assume RPC_INTR and get on with it. 1319 */ 1320 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 1321 status = RPC_INTR; 1322 else { 1323 status = CLNT_CALL(client, which, xdrargs, argsp, 1324 xdrres, resp, wait); 1325 } 1326 1327 if (!(mi->mi_flags & MI4_INT)) 1328 client->cl_nosignal = FALSE; 1329 /* 1330 * restore original signal mask 1331 */ 1332 sigunintr(&smask); 1333 1334 switch (status) { 1335 case RPC_SUCCESS: 1336 break; 1337 1338 case RPC_INTR: 1339 /* 1340 * There is no way to recover from this error, 1341 * even if mount option nointr is specified. 1342 * SIGKILL, for example, cannot be blocked. 1343 */ 1344 rpcerr.re_status = RPC_INTR; 1345 rpcerr.re_errno = EINTR; 1346 break; 1347 1348 case RPC_UDERROR: 1349 /* 1350 * If the NFS server is local (vold) and 1351 * it goes away then we get RPC_UDERROR. 1352 * This is a retryable error, so we would 1353 * loop, so check to see if the specific 1354 * error was ECONNRESET, indicating that 1355 * target did not exist at all. If so, 1356 * return with RPC_PROGUNAVAIL and 1357 * ECONNRESET to indicate why. 1358 */ 1359 CLNT_GETERR(client, &rpcerr); 1360 if (rpcerr.re_errno == ECONNRESET) { 1361 rpcerr.re_status = RPC_PROGUNAVAIL; 1362 rpcerr.re_errno = ECONNRESET; 1363 break; 1364 } 1365 /*FALLTHROUGH*/ 1366 1367 default: /* probably RPC_TIMEDOUT */ 1368 1369 if (IS_UNRECOVERABLE_RPC(status)) 1370 break; 1371 1372 /* 1373 * increment server not responding count 1374 */ 1375 mutex_enter(&mi->mi_lock); 1376 mi->mi_noresponse++; 1377 mutex_exit(&mi->mi_lock); 1378 #ifdef DEBUG 1379 nfscl->nfscl_stat.noresponse.value.ui64++; 1380 #endif 1381 /* 1382 * On zone shutdown, mark server dead and move on. 1383 */ 1384 if (zone_status_get(curproc->p_zone) >= 1385 ZONE_IS_SHUTTING_DOWN) { 1386 mutex_enter(&mi->mi_lock); 1387 mi->mi_flags |= MI4_TIMEDOUT; 1388 mutex_exit(&mi->mi_lock); 1389 clfree4(client, ch, nfscl); 1390 return (EIO); 1391 } 1392 1393 /* 1394 * NFS client failover support: 1395 * return and let the caller take care of 1396 * failover. We only return for failover mounts 1397 * because otherwise we want the "not responding" 1398 * message, the timer updates, etc. 1399 */ 1400 if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) && 1401 (error = try_failover(status)) != 0) { 1402 clfree4(client, ch, nfscl); 1403 *rpc_statusp = status; 1404 return (error); 1405 } 1406 1407 if (flags & RFSCALL_SOFT) 1408 break; 1409 1410 tryagain = TRUE; 1411 1412 /* 1413 * The call is in progress (over COTS). 1414 * Try the CLNT_CALL again, but don't 1415 * print a noisy error message. 1416 */ 1417 if (status == RPC_INPROGRESS) 1418 break; 1419 1420 timeo = backoff(timeo); 1421 mutex_enter(&mi->mi_lock); 1422 if (!(mi->mi_flags & MI4_PRINTED)) { 1423 mi->mi_flags |= MI4_PRINTED; 1424 mutex_exit(&mi->mi_lock); 1425 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 0, 0, 0, 1426 FALSE, NULL, 0, NULL); 1427 } else 1428 mutex_exit(&mi->mi_lock); 1429 1430 if (*doqueue && curproc->p_sessp->s_vp != NULL) { 1431 *doqueue = 0; 1432 if (!(mi->mi_flags & MI4_NOPRINT)) 1433 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 1434 0, 0, 0, FALSE, NULL, 0, NULL); 1435 } 1436 } 1437 } while (tryagain); 1438 1439 DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status, 1440 int, rpcerr.re_errno); 1441 1442 if (status != RPC_SUCCESS) { 1443 zoneid_t zoneid = mi->mi_zone->zone_id; 1444 1445 /* 1446 * Let soft mounts use the timed out message. 1447 */ 1448 if (status == RPC_INPROGRESS) 1449 status = RPC_TIMEDOUT; 1450 nfscl->nfscl_stat.badcalls.value.ui64++; 1451 if (status != RPC_INTR) { 1452 mutex_enter(&mi->mi_lock); 1453 mi->mi_flags |= MI4_DOWN; 1454 mutex_exit(&mi->mi_lock); 1455 CLNT_GETERR(client, &rpcerr); 1456 #ifdef DEBUG 1457 bufp = clnt_sperror(client, svp->sv_hostname); 1458 zprintf(zoneid, "NFS%d %s failed for %s\n", 1459 mi->mi_vers, mi->mi_rfsnames[which], bufp); 1460 if (curproc->p_sessp->s_vp != NULL) { 1461 if (!(mi->mi_flags & MI4_NOPRINT)) { 1462 uprintf("NFS%d %s failed for %s\n", 1463 mi->mi_vers, mi->mi_rfsnames[which], 1464 bufp); 1465 } 1466 } 1467 kmem_free(bufp, MAXPATHLEN); 1468 #else 1469 zprintf(zoneid, 1470 "NFS %s failed for server %s: error %d (%s)\n", 1471 mi->mi_rfsnames[which], svp->sv_hostname, 1472 status, clnt_sperrno(status)); 1473 if (curproc->p_sessp->s_vp != NULL) { 1474 if (!(mi->mi_flags & MI4_NOPRINT)) { 1475 uprintf( 1476 "NFS %s failed for server %s: error %d (%s)\n", 1477 mi->mi_rfsnames[which], 1478 svp->sv_hostname, status, 1479 clnt_sperrno(status)); 1480 } 1481 } 1482 #endif 1483 /* 1484 * when CLNT_CALL() fails with RPC_AUTHERROR, 1485 * re_errno is set appropriately depending on 1486 * the authentication error 1487 */ 1488 if (status == RPC_VERSMISMATCH || 1489 status == RPC_PROGVERSMISMATCH) 1490 rpcerr.re_errno = EIO; 1491 } 1492 } else { 1493 /* 1494 * Test the value of mi_down and mi_printed without 1495 * holding the mi_lock mutex. If they are both zero, 1496 * then it is okay to skip the down and printed 1497 * processing. This saves on a mutex_enter and 1498 * mutex_exit pair for a normal, successful RPC. 1499 * This was just complete overhead. 1500 */ 1501 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) { 1502 mutex_enter(&mi->mi_lock); 1503 mi->mi_flags &= ~MI4_DOWN; 1504 if (mi->mi_flags & MI4_PRINTED) { 1505 mi->mi_flags &= ~MI4_PRINTED; 1506 mutex_exit(&mi->mi_lock); 1507 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1508 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 1509 0, FALSE, NULL, 0, NULL); 1510 } else 1511 mutex_exit(&mi->mi_lock); 1512 } 1513 1514 if (*doqueue == 0) { 1515 if (!(mi->mi_flags & MI4_NOPRINT) && 1516 !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1517 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0, 1518 FALSE, NULL, 0, NULL); 1519 1520 *doqueue = 1; 1521 } 1522 } 1523 1524 clfree4(client, ch, nfscl); 1525 1526 ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 1527 1528 TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d", 1529 rpcerr.re_errno); 1530 1531 *rpc_statusp = status; 1532 return (rpcerr.re_errno); 1533 } 1534 1535 /* 1536 * rfs4call - general wrapper for RPC calls initiated by the client 1537 */ 1538 void 1539 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp, 1540 cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep) 1541 { 1542 int i, error; 1543 enum clnt_stat rpc_status = NFS4_OK; 1544 int num_resops; 1545 struct nfs4_clnt *nfscl; 1546 1547 ASSERT(curproc->p_zone == mi->mi_zone); 1548 nfscl = zone_getspecific(nfs4clnt_zone_key, curproc->p_zone); 1549 ASSERT(nfscl != NULL); 1550 1551 nfscl->nfscl_stat.calls.value.ui64++; 1552 mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++; 1553 1554 /* Set up the results struct for XDR usage */ 1555 resp->argsp = argsp; 1556 resp->array = NULL; 1557 resp->status = 0; 1558 resp->decode_len = 0; 1559 1560 error = nfs4_rfscall(mi, NFSPROC4_COMPOUND, 1561 xdr_COMPOUND4args_clnt, (caddr_t)argsp, 1562 xdr_COMPOUND4res_clnt, (caddr_t)resp, cr, 1563 doqueue, &rpc_status, flags, nfscl); 1564 1565 /* Return now if it was an RPC error */ 1566 if (error) { 1567 ep->error = error; 1568 ep->stat = resp->status; 1569 ep->rpc_status = rpc_status; 1570 return; 1571 } 1572 1573 /* else we'll count the processed operations */ 1574 num_resops = resp->decode_len; 1575 for (i = 0; i < num_resops; i++) { 1576 /* 1577 * Count the individual operations 1578 * processed by the server. 1579 */ 1580 if (resp->array[i].resop >= NFSPROC4_NULL && 1581 resp->array[i].resop <= OP_WRITE) 1582 mi->mi_reqs[resp->array[i].resop].value.ui64++; 1583 } 1584 1585 ep->error = 0; 1586 ep->stat = resp->status; 1587 ep->rpc_status = rpc_status; 1588 } 1589 1590 /* 1591 * nfs4rename_update - updates stored state after a rename. Currently this 1592 * is the path of the object and anything under it, and the filehandle of 1593 * the renamed object. 1594 */ 1595 void 1596 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm) 1597 { 1598 sfh4_update(VTOR4(renvp)->r_fh, nfh4p); 1599 fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm); 1600 } 1601 1602 /* 1603 * Routine to look up the filehandle for the given path and rootvp. 1604 * 1605 * Return values: 1606 * - success: returns zero and *statp is set to NFS4_OK, and *fhp is 1607 * updated. 1608 * - error: return value (errno value) and/or *statp is set appropriately. 1609 */ 1610 #define RML_ORDINARY 1 1611 #define RML_NAMED_ATTR 2 1612 #define RML_ATTRDIR 3 1613 1614 static void 1615 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp, 1616 int filetype, cred_t *cr, 1617 nfs_fh4 *fhp, nfs4_ga_res_t *garp, /* fh, attrs for object */ 1618 nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp, /* fh, attrs for parent */ 1619 nfs4_error_t *ep) 1620 { 1621 COMPOUND4args_clnt args; 1622 COMPOUND4res_clnt res; 1623 nfs_argop4 *argop; 1624 nfs_resop4 *resop; 1625 int num_argops; 1626 lookup4_param_t lookuparg; 1627 nfs_fh4 *tmpfhp; 1628 int doqueue = 1; 1629 char *path; 1630 mntinfo4_t *mi; 1631 1632 ASSERT(fname != NULL); 1633 ASSERT(rootvp->v_type == VDIR); 1634 1635 mi = VTOMI4(rootvp); 1636 path = fn_path(fname); 1637 switch (filetype) { 1638 case RML_NAMED_ATTR: 1639 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR; 1640 args.ctag = TAG_REMAP_LOOKUP_NA; 1641 break; 1642 case RML_ATTRDIR: 1643 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR; 1644 args.ctag = TAG_REMAP_LOOKUP_AD; 1645 break; 1646 case RML_ORDINARY: 1647 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1648 args.ctag = TAG_REMAP_LOOKUP; 1649 break; 1650 default: 1651 ep->error = EINVAL; 1652 return; 1653 } 1654 lookuparg.argsp = &args; 1655 lookuparg.resp = &res; 1656 lookuparg.header_len = 1; /* Putfh */ 1657 lookuparg.trailer_len = 0; 1658 lookuparg.ga_bits = NFS4_VATTR_MASK; 1659 lookuparg.mi = VTOMI4(rootvp); 1660 1661 (void) nfs4lookup_setup(path, &lookuparg, 1); 1662 1663 /* 0: putfh directory */ 1664 argop = args.array; 1665 argop[0].argop = OP_CPUTFH; 1666 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh; 1667 1668 num_argops = args.array_len; 1669 1670 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1671 1672 if (ep->error || res.status != NFS4_OK) 1673 goto exit; 1674 1675 /* get the object filehandle */ 1676 resop = &res.array[res.array_len - 2]; 1677 if (resop->resop != OP_GETFH) { 1678 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1679 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1680 ep->stat = NFS4ERR_SERVERFAULT; 1681 goto exit; 1682 } 1683 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1684 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1685 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1686 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1687 TAG_NONE, 0, 0); 1688 ep->stat = NFS4ERR_SERVERFAULT; 1689 goto exit; 1690 } 1691 fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1692 nfs_fh4_copy(tmpfhp, fhp); 1693 1694 /* get the object attributes */ 1695 resop = &res.array[res.array_len - 1]; 1696 if (garp && resop->resop == OP_GETATTR) 1697 *garp = resop->nfs_resop4_u.opgetattr.ga_res; 1698 1699 /* See if there are enough fields in the response for parent info */ 1700 if ((int)res.array_len - 5 <= 0) 1701 goto exit; 1702 1703 /* get the parent filehandle */ 1704 resop = &res.array[res.array_len - 5]; 1705 if (resop->resop != OP_GETFH) { 1706 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL, 1707 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1708 ep->stat = NFS4ERR_SERVERFAULT; 1709 goto exit; 1710 } 1711 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1712 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) { 1713 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL, 1714 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE, 1715 TAG_NONE, 0, 0); 1716 ep->stat = NFS4ERR_SERVERFAULT; 1717 goto exit; 1718 } 1719 pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP); 1720 nfs_fh4_copy(tmpfhp, pfhp); 1721 1722 /* get the parent attributes */ 1723 resop = &res.array[res.array_len - 4]; 1724 if (pgarp && resop->resop == OP_GETATTR) 1725 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res; 1726 1727 exit: 1728 /* 1729 * It is too hard to remember where all the OP_LOOKUPs are 1730 */ 1731 nfs4args_lookup_free(argop, num_argops); 1732 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1733 1734 if (!ep->error) 1735 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1736 kmem_free(path, strlen(path)+1); 1737 } 1738 1739 /* 1740 * NFS client failover / volatile filehandle support 1741 * 1742 * Recover the filehandle for the given rnode. 1743 * 1744 * Errors are returned via the nfs4_error_t parameter. 1745 */ 1746 1747 void 1748 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 1749 { 1750 rnode4_t *rp = VTOR4(vp); 1751 vnode_t *rootvp = NULL; 1752 vnode_t *dvp = NULL; 1753 cred_t *cr, *cred_otw; 1754 nfs4_ga_res_t gar, pgar; 1755 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 1756 int filetype = RML_ORDINARY; 1757 nfs4_recov_state_t recov = {NULL, 0, 0}; 1758 int badfhcount = 0; 1759 nfs4_open_stream_t *osp = NULL; 1760 bool_t first_time = TRUE; /* first time getting OTW cred */ 1761 bool_t last_time = FALSE; /* last time getting OTW cred */ 1762 1763 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1764 "nfs4_remap_file: remapping %s", rnode4info(rp))); 1765 ASSERT(nfs4_consistent_type(vp)); 1766 1767 if (vp->v_flag & VROOT) { 1768 nfs4_remap_root(mi, ep, flags); 1769 return; 1770 } 1771 1772 /* 1773 * Given the root fh, use the path stored in 1774 * the rnode to find the fh for the new server. 1775 */ 1776 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1777 if (ep->error != 0) 1778 return; 1779 1780 cr = curthread->t_cred; 1781 ASSERT(cr != NULL); 1782 get_remap_cred: 1783 /* 1784 * Releases the osp, if it is provided. 1785 * Puts a hold on the cred_otw and the new osp (if found). 1786 */ 1787 cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp, 1788 &first_time, &last_time); 1789 ASSERT(cred_otw != NULL); 1790 1791 if (rp->r_flags & R4ISXATTR) { 1792 filetype = RML_NAMED_ATTR; 1793 (void) vtodv(vp, &dvp, cred_otw, FALSE); 1794 } 1795 1796 if (vp->v_flag & V_XATTRDIR) { 1797 filetype = RML_ATTRDIR; 1798 } 1799 1800 if (filetype == RML_ORDINARY && rootvp->v_type == VREG) { 1801 /* file mount, doesn't need a remap */ 1802 goto done; 1803 } 1804 1805 again: 1806 remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw, 1807 &newfh, &gar, &newpfh, &pgar, ep); 1808 1809 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1810 "nfs4_remap_file: remap_lookup returned %d/%d", 1811 ep->error, ep->stat)); 1812 1813 if (last_time == FALSE && ep->error == EACCES) { 1814 crfree(cred_otw); 1815 if (dvp != NULL) 1816 VN_RELE(dvp); 1817 goto get_remap_cred; 1818 } 1819 if (ep->error != 0) 1820 goto done; 1821 1822 switch (ep->stat) { 1823 case NFS4_OK: 1824 badfhcount = 0; 1825 if (recov.rs_flags & NFS4_RS_DELAY_MSG) { 1826 mutex_enter(&rp->r_statelock); 1827 rp->r_delay_interval = 0; 1828 mutex_exit(&rp->r_statelock); 1829 uprintf("NFS File Available..\n"); 1830 } 1831 break; 1832 case NFS4ERR_FHEXPIRED: 1833 case NFS4ERR_BADHANDLE: 1834 /* 1835 * If we ran into filehandle problems, we should try to 1836 * remap the root vnode first and hope life gets better. 1837 * But we need to avoid loops. 1838 */ 1839 if (badfhcount++ > 0) 1840 goto done; 1841 if (newfh.nfs_fh4_len != 0) { 1842 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1843 newfh.nfs_fh4_len = 0; 1844 } 1845 if (newpfh.nfs_fh4_len != 0) { 1846 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1847 newpfh.nfs_fh4_len = 0; 1848 } 1849 /* relative path - remap rootvp then retry */ 1850 VN_RELE(rootvp); 1851 rootvp = NULL; 1852 nfs4_remap_root(mi, ep, flags); 1853 if (ep->error != 0 || ep->stat != NFS4_OK) 1854 goto done; 1855 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp); 1856 if (ep->error != 0) 1857 goto done; 1858 goto again; 1859 case NFS4ERR_DELAY: 1860 badfhcount = 0; 1861 nfs4_set_delay_wait(vp); 1862 ep->error = nfs4_wait_for_delay(vp, &recov); 1863 if (ep->error != 0) 1864 goto done; 1865 goto again; 1866 case NFS4ERR_ACCESS: 1867 /* get new cred, try again */ 1868 if (last_time == TRUE) 1869 goto done; 1870 if (dvp != NULL) 1871 VN_RELE(dvp); 1872 crfree(cred_otw); 1873 goto get_remap_cred; 1874 default: 1875 goto done; 1876 } 1877 1878 /* 1879 * Check on the new and old rnodes before updating; 1880 * if the vnode type or size changes, issue a warning 1881 * and mark the file dead. 1882 */ 1883 mutex_enter(&rp->r_statelock); 1884 if (flags & NFS4_REMAP_CKATTRS) { 1885 if (vp->v_type != gar.n4g_va.va_type || 1886 (vp->v_type != VDIR && 1887 rp->r_size != gar.n4g_va.va_size)) { 1888 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1889 "nfs4_remap_file: size %d vs. %d, type %d vs. %d", 1890 (int)rp->r_size, (int)gar.n4g_va.va_size, 1891 vp->v_type, gar.n4g_va.va_type)); 1892 mutex_exit(&rp->r_statelock); 1893 nfs4_queue_event(RE_FILE_DIFF, mi, 1894 rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0, 1895 TAG_NONE, TAG_NONE, 0, 0); 1896 nfs4_fail_recov(vp, NULL, 0, NFS4_OK); 1897 goto done; 1898 } 1899 } 1900 ASSERT(gar.n4g_va.va_type != VNON); 1901 rp->r_server = mi->mi_curr_serv; 1902 1903 if (gar.n4g_fsid_valid) { 1904 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0); 1905 rp->r_srv_fsid = gar.n4g_fsid; 1906 if (FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid)) 1907 rp->r_flags &= ~R4SRVSTUB; 1908 else 1909 rp->r_flags |= R4SRVSTUB; 1910 nfs_rw_exit(&rp->r_server->sv_lock); 1911 #ifdef DEBUG 1912 } else { 1913 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1914 "remap_file: fsid attr not provided by server. rp=%p", 1915 (void *)rp)); 1916 #endif 1917 } 1918 mutex_exit(&rp->r_statelock); 1919 nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */ 1920 sfh4_update(rp->r_fh, &newfh); 1921 ASSERT(nfs4_consistent_type(vp)); 1922 1923 /* 1924 * If we got parent info, use it to update the parent 1925 */ 1926 if (newpfh.nfs_fh4_len != 0) { 1927 if (rp->r_svnode.sv_dfh != NULL) 1928 sfh4_update(rp->r_svnode.sv_dfh, &newpfh); 1929 if (dvp != NULL) { 1930 /* force update of attrs */ 1931 nfs4_attrcache_noinval(dvp, &pgar, gethrtime()); 1932 } 1933 } 1934 done: 1935 if (newfh.nfs_fh4_len != 0) 1936 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 1937 if (newpfh.nfs_fh4_len != 0) 1938 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 1939 if (cred_otw != NULL) 1940 crfree(cred_otw); 1941 if (rootvp != NULL) 1942 VN_RELE(rootvp); 1943 if (dvp != NULL) 1944 VN_RELE(dvp); 1945 if (osp != NULL) 1946 open_stream_rele(osp, rp); 1947 } 1948 1949 /* 1950 * Client-side failover support: remap the filehandle for vp if it appears 1951 * necessary. errors are returned via the nfs4_error_t parameter; though, 1952 * if there is a problem, we will just try again later. 1953 */ 1954 1955 void 1956 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep) 1957 { 1958 if (vp == NULL) 1959 return; 1960 1961 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY)) 1962 return; 1963 1964 if (VTOR4(vp)->r_server == mi->mi_curr_serv) 1965 return; 1966 1967 nfs4_remap_file(mi, vp, flags, ep); 1968 } 1969 1970 /* 1971 * nfs4_make_dotdot() - find or create a parent vnode of a non-root node. 1972 * 1973 * Our caller has a filehandle for ".." relative to a particular 1974 * directory object. We want to find or create a parent vnode 1975 * with that filehandle and return it. We can of course create 1976 * a vnode from this filehandle, but we need to also make sure 1977 * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR) 1978 * that we have a parent FH for future reopens as well. If 1979 * we have a remap failure, we won't be able to reopen this 1980 * file, but we won't treat that as fatal because a reopen 1981 * is at least unlikely. Someday nfs4_reopen() should look 1982 * for a missing parent FH and try a remap to recover from it. 1983 * 1984 * need_start_op argument indicates whether this function should 1985 * do a start_op before calling remap_lookup(). This should 1986 * be FALSE, if you are the recovery thread or in an op; otherwise, 1987 * set it to TRUE. 1988 */ 1989 int 1990 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp, 1991 cred_t *cr, vnode_t **vpp, int need_start_op) 1992 { 1993 mntinfo4_t *mi = VTOMI4(dvp); 1994 nfs4_fname_t *np = NULL, *pnp = NULL; 1995 vnode_t *vp = NULL, *rootvp = NULL; 1996 rnode4_t *rp; 1997 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL}; 1998 nfs4_ga_res_t gar, pgar; 1999 vattr_t va, pva; 2000 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 2001 nfs4_sharedfh_t *sfh = NULL, *psfh = NULL; 2002 nfs4_recov_state_t recov_state; 2003 2004 #ifdef DEBUG 2005 /* 2006 * ensure need_start_op is correct 2007 */ 2008 { 2009 int no_need_start_op = (tsd_get(nfs4_tsd_key) || 2010 (curthread == mi->mi_recovthread)); 2011 /* C needs a ^^ operator! */ 2012 ASSERT(((need_start_op) && (!no_need_start_op)) || 2013 ((! need_start_op) && (no_need_start_op))); 2014 } 2015 #endif 2016 ASSERT(VTOMI4(dvp)->mi_zone == curproc->p_zone); 2017 2018 NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE, 2019 "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp, 2020 rnode4info(VTOR4(dvp)))); 2021 2022 /* 2023 * rootvp might be needed eventually. Holding it now will 2024 * ensure that r4find_unlocked() will find it, if ".." is the root. 2025 */ 2026 e.error = VFS_ROOT(mi->mi_vfsp, &rootvp); 2027 if (e.error != 0) 2028 goto out; 2029 rp = r4find_unlocked(fhp, mi->mi_vfsp); 2030 if (rp != NULL) { 2031 *vpp = RTOV4(rp); 2032 VN_RELE(rootvp); 2033 return (0); 2034 } 2035 2036 /* 2037 * Since we don't have the rnode, we have to go over the wire. 2038 * remap_lookup() can get all of the filehandles and attributes 2039 * we need in one operation. 2040 */ 2041 np = fn_parent(VTOSV(dvp)->sv_name); 2042 ASSERT(np != NULL); 2043 2044 recov_state.rs_flags = 0; 2045 recov_state.rs_num_retry_despite_err = 0; 2046 recov_retry: 2047 if (need_start_op) { 2048 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP, 2049 &recov_state, NULL); 2050 if (e.error != 0) { 2051 goto out; 2052 } 2053 } 2054 va.va_type = VNON; 2055 pva.va_type = VNON; 2056 remap_lookup(np, rootvp, RML_ORDINARY, cr, 2057 &newfh, &gar, &newpfh, &pgar, &e); 2058 if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) { 2059 if (need_start_op) { 2060 bool_t abort; 2061 2062 abort = nfs4_start_recovery(&e, mi, 2063 rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL); 2064 if (abort) { 2065 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2066 &recov_state, FALSE); 2067 if (e.error == 0) 2068 e.error = EIO; 2069 goto out; 2070 } 2071 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2072 &recov_state, TRUE); 2073 goto recov_retry; 2074 } 2075 if (e.error == 0) 2076 e.error = EIO; 2077 goto out; 2078 } 2079 2080 if (!e.error) { 2081 va = gar.n4g_va; 2082 pva = pgar.n4g_va; 2083 } 2084 2085 if ((e.error != 0) || 2086 (va.va_type != VDIR)) { 2087 if (e.error == 0) 2088 e.error = EIO; 2089 goto out; 2090 } 2091 2092 if (e.stat != NFS4_OK) { 2093 e.error = EIO; 2094 goto out; 2095 } 2096 2097 /* 2098 * It is possible for remap_lookup() to return with no error, 2099 * but without providing the parent filehandle and attrs. 2100 */ 2101 if (pva.va_type != VDIR) { 2102 /* 2103 * Call remap_lookup() again, this time with the 2104 * newpfh and pgar args in the first position. 2105 */ 2106 pnp = fn_parent(np); 2107 if (pnp != NULL) { 2108 remap_lookup(pnp, rootvp, RML_ORDINARY, cr, 2109 &newpfh, &pgar, NULL, NULL, &e); 2110 if (nfs4_needs_recovery(&e, FALSE, 2111 mi->mi_vfsp)) { 2112 if (need_start_op) { 2113 bool_t abort; 2114 2115 abort = nfs4_start_recovery(&e, mi, 2116 rootvp, NULL, NULL, NULL, 2117 OP_LOOKUP, NULL); 2118 if (abort) { 2119 nfs4_end_fop(mi, rootvp, NULL, 2120 OH_LOOKUP, &recov_state, 2121 FALSE); 2122 if (e.error == 0) 2123 e.error = EIO; 2124 goto out; 2125 } 2126 nfs4_end_fop(mi, rootvp, NULL, 2127 OH_LOOKUP, &recov_state, TRUE); 2128 goto recov_retry; 2129 } 2130 if (e.error == 0) 2131 e.error = EIO; 2132 goto out; 2133 } 2134 2135 if (e.stat != NFS4_OK) { 2136 e.error = EIO; 2137 goto out; 2138 } 2139 } 2140 if ((pnp == NULL) || 2141 (e.error != 0) || 2142 (pva.va_type == VNON)) { 2143 if (need_start_op) 2144 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, 2145 &recov_state, FALSE); 2146 if (e.error == 0) 2147 e.error = EIO; 2148 goto out; 2149 } 2150 } 2151 ASSERT(newpfh.nfs_fh4_len != 0); 2152 if (need_start_op) 2153 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE); 2154 psfh = sfh4_get(&newpfh, mi); 2155 2156 sfh = sfh4_get(&newfh, mi); 2157 vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t); 2158 2159 out: 2160 if (np != NULL) 2161 fn_rele(&np); 2162 if (pnp != NULL) 2163 fn_rele(&pnp); 2164 if (newfh.nfs_fh4_len != 0) 2165 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len); 2166 if (newpfh.nfs_fh4_len != 0) 2167 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len); 2168 if (sfh != NULL) 2169 sfh4_rele(&sfh); 2170 if (psfh != NULL) 2171 sfh4_rele(&psfh); 2172 if (rootvp != NULL) 2173 VN_RELE(rootvp); 2174 *vpp = vp; 2175 return (e.error); 2176 } 2177 2178 #ifdef DEBUG 2179 size_t r_path_memuse = 0; 2180 #endif 2181 2182 /* 2183 * NFS client failover support 2184 * 2185 * sv4_free() frees the malloc'd portion of a "servinfo_t". 2186 */ 2187 void 2188 sv4_free(servinfo4_t *svp) 2189 { 2190 servinfo4_t *next; 2191 struct knetconfig *knconf; 2192 2193 while (svp != NULL) { 2194 next = svp->sv_next; 2195 if (svp->sv_dhsec) 2196 sec_clnt_freeinfo(svp->sv_dhsec); 2197 if (svp->sv_secdata) 2198 sec_clnt_freeinfo(svp->sv_secdata); 2199 if (svp->sv_save_secinfo && 2200 svp->sv_save_secinfo != svp->sv_secinfo) 2201 secinfo_free(svp->sv_save_secinfo); 2202 if (svp->sv_secinfo) 2203 secinfo_free(svp->sv_secinfo); 2204 if (svp->sv_hostname && svp->sv_hostnamelen > 0) 2205 kmem_free(svp->sv_hostname, svp->sv_hostnamelen); 2206 knconf = svp->sv_knconf; 2207 if (knconf != NULL) { 2208 if (knconf->knc_protofmly != NULL) 2209 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2210 if (knconf->knc_proto != NULL) 2211 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2212 kmem_free(knconf, sizeof (*knconf)); 2213 } 2214 knconf = svp->sv_origknconf; 2215 if (knconf != NULL) { 2216 if (knconf->knc_protofmly != NULL) 2217 kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 2218 if (knconf->knc_proto != NULL) 2219 kmem_free(knconf->knc_proto, KNC_STRSIZE); 2220 kmem_free(knconf, sizeof (*knconf)); 2221 } 2222 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0) 2223 kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen); 2224 if (svp->sv_path != NULL) { 2225 kmem_free(svp->sv_path, svp->sv_pathlen); 2226 } 2227 nfs_rw_destroy(&svp->sv_lock); 2228 kmem_free(svp, sizeof (*svp)); 2229 svp = next; 2230 } 2231 } 2232 2233 void 2234 nfs4_printfhandle(nfs4_fhandle_t *fhp) 2235 { 2236 int *ip; 2237 char *buf; 2238 size_t bufsize; 2239 char *cp; 2240 2241 /* 2242 * 13 == "(file handle:" 2243 * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times 2244 * 1 == ' ' 2245 * 8 == maximum strlen of "%x" 2246 * 3 == ")\n\0" 2247 */ 2248 bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3; 2249 buf = kmem_alloc(bufsize, KM_NOSLEEP); 2250 if (buf == NULL) 2251 return; 2252 2253 cp = buf; 2254 (void) strcpy(cp, "(file handle:"); 2255 while (*cp != '\0') 2256 cp++; 2257 for (ip = (int *)fhp->fh_buf; 2258 ip < (int *)&fhp->fh_buf[fhp->fh_len]; 2259 ip++) { 2260 (void) sprintf(cp, " %x", *ip); 2261 while (*cp != '\0') 2262 cp++; 2263 } 2264 (void) strcpy(cp, ")\n"); 2265 2266 zcmn_err(getzoneid(), CE_CONT, "%s", buf); 2267 2268 kmem_free(buf, bufsize); 2269 } 2270 2271 /* 2272 * The NFSv4 readdir cache subsystem. 2273 * 2274 * We provide a set of interfaces to allow the rest of the system to utilize 2275 * a caching mechanism while encapsulating the details of the actual 2276 * implementation. This should allow for better maintainability and 2277 * extensibilty by consolidating the implementation details in one location. 2278 */ 2279 2280 /* 2281 * Comparator used by AVL routines. 2282 */ 2283 static int 2284 rddir4_cache_compar(const void *x, const void *y) 2285 { 2286 rddir4_cache_impl *ai = (rddir4_cache_impl *)x; 2287 rddir4_cache_impl *bi = (rddir4_cache_impl *)y; 2288 rddir4_cache *a = &ai->rc; 2289 rddir4_cache *b = &bi->rc; 2290 2291 if (a->nfs4_cookie == b->nfs4_cookie) { 2292 if (a->buflen == b->buflen) 2293 return (0); 2294 if (a->buflen < b->buflen) 2295 return (-1); 2296 return (1); 2297 } 2298 2299 if (a->nfs4_cookie < b->nfs4_cookie) 2300 return (-1); 2301 2302 return (1); 2303 } 2304 2305 /* 2306 * Allocate an opaque handle for the readdir cache. 2307 */ 2308 void 2309 rddir4_cache_create(rnode4_t *rp) 2310 { 2311 ASSERT(rp->r_dir == NULL); 2312 2313 rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 2314 2315 avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl), 2316 offsetof(rddir4_cache_impl, tree)); 2317 } 2318 2319 /* 2320 * Purge the cache of all cached readdir responses. 2321 */ 2322 void 2323 rddir4_cache_purge(rnode4_t *rp) 2324 { 2325 rddir4_cache_impl *rdip; 2326 rddir4_cache_impl *nrdip; 2327 2328 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2329 2330 if (rp->r_dir == NULL) 2331 return; 2332 2333 rdip = avl_first(rp->r_dir); 2334 2335 while (rdip != NULL) { 2336 nrdip = AVL_NEXT(rp->r_dir, rdip); 2337 avl_remove(rp->r_dir, rdip); 2338 rdip->rc.flags &= ~RDDIRCACHED; 2339 rddir4_cache_rele(rp, &rdip->rc); 2340 rdip = nrdip; 2341 } 2342 ASSERT(avl_numnodes(rp->r_dir) == 0); 2343 } 2344 2345 /* 2346 * Destroy the readdir cache. 2347 */ 2348 void 2349 rddir4_cache_destroy(rnode4_t *rp) 2350 { 2351 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2352 if (rp->r_dir == NULL) 2353 return; 2354 2355 rddir4_cache_purge(rp); 2356 avl_destroy(rp->r_dir); 2357 kmem_free(rp->r_dir, sizeof (avl_tree_t)); 2358 rp->r_dir = NULL; 2359 } 2360 2361 /* 2362 * Locate a readdir response from the readdir cache. 2363 * 2364 * Return values: 2365 * 2366 * NULL - If there is an unrecoverable situation like the operation may have 2367 * been interrupted. 2368 * 2369 * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller. 2370 * The flags are set approprately, such that the caller knows 2371 * what state the entry is in. 2372 */ 2373 rddir4_cache * 2374 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count) 2375 { 2376 rddir4_cache_impl *rdip = NULL; 2377 rddir4_cache_impl srdip; 2378 rddir4_cache *srdc; 2379 rddir4_cache *rdc = NULL; 2380 rddir4_cache *nrdc = NULL; 2381 avl_index_t where; 2382 2383 top: 2384 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER)); 2385 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2386 /* 2387 * Check to see if the readdir cache has been disabled. If so, then 2388 * simply allocate an rddir4_cache entry and return it, since caching 2389 * operations do not apply. 2390 */ 2391 if (rp->r_dir == NULL) { 2392 if (nrdc == NULL) { 2393 /* 2394 * Drop the lock because we are doing a sleeping 2395 * allocation. 2396 */ 2397 mutex_exit(&rp->r_statelock); 2398 rdc = rddir4_cache_alloc(KM_SLEEP); 2399 rdc->nfs4_cookie = cookie; 2400 rdc->buflen = count; 2401 mutex_enter(&rp->r_statelock); 2402 return (rdc); 2403 } 2404 return (nrdc); 2405 } 2406 2407 srdc = &srdip.rc; 2408 srdc->nfs4_cookie = cookie; 2409 srdc->buflen = count; 2410 2411 rdip = avl_find(rp->r_dir, &srdip, &where); 2412 2413 /* 2414 * If we didn't find an entry then create one and insert it 2415 * into the cache. 2416 */ 2417 if (rdip == NULL) { 2418 /* 2419 * Check for the case where we have made a second pass through 2420 * the cache due to a lockless allocation. If we find that no 2421 * thread has already inserted this entry, do the insert now 2422 * and return. 2423 */ 2424 if (nrdc != NULL) { 2425 avl_insert(rp->r_dir, nrdc->data, where); 2426 nrdc->flags |= RDDIRCACHED; 2427 rddir4_cache_hold(nrdc); 2428 return (nrdc); 2429 } 2430 2431 #ifdef DEBUG 2432 nfs4_readdir_cache_misses++; 2433 #endif 2434 /* 2435 * First, try to allocate an entry without sleeping. If that 2436 * fails then drop the lock and do a sleeping allocation. 2437 */ 2438 nrdc = rddir4_cache_alloc(KM_NOSLEEP); 2439 if (nrdc != NULL) { 2440 nrdc->nfs4_cookie = cookie; 2441 nrdc->buflen = count; 2442 avl_insert(rp->r_dir, nrdc->data, where); 2443 nrdc->flags |= RDDIRCACHED; 2444 rddir4_cache_hold(nrdc); 2445 return (nrdc); 2446 } 2447 2448 /* 2449 * Drop the lock and do a sleeping allocation. We incur 2450 * additional overhead by having to search the cache again, 2451 * but this case should be rare. 2452 */ 2453 mutex_exit(&rp->r_statelock); 2454 nrdc = rddir4_cache_alloc(KM_SLEEP); 2455 nrdc->nfs4_cookie = cookie; 2456 nrdc->buflen = count; 2457 mutex_enter(&rp->r_statelock); 2458 /* 2459 * We need to take another pass through the cache 2460 * since we dropped our lock to perform the alloc. 2461 * Another thread may have come by and inserted the 2462 * entry we are interested in. 2463 */ 2464 goto top; 2465 } 2466 2467 /* 2468 * Check to see if we need to free our entry. This can happen if 2469 * another thread came along beat us to the insert. We can 2470 * safely call rddir4_cache_free directly because no other thread 2471 * would have a reference to this entry. 2472 */ 2473 if (nrdc != NULL) 2474 rddir4_cache_free((rddir4_cache_impl *)nrdc->data); 2475 2476 #ifdef DEBUG 2477 nfs4_readdir_cache_hits++; 2478 #endif 2479 /* 2480 * Found something. Make sure it's ready to return. 2481 */ 2482 rdc = &rdip->rc; 2483 rddir4_cache_hold(rdc); 2484 /* 2485 * If the cache entry is in the process of being filled in, wait 2486 * until this completes. The RDDIRWAIT bit is set to indicate that 2487 * someone is waiting and when the thread currently filling the entry 2488 * is done, it should do a cv_broadcast to wakeup all of the threads 2489 * waiting for it to finish. If the thread wakes up to find that 2490 * someone new is now trying to complete the the entry, go back 2491 * to sleep. 2492 */ 2493 while (rdc->flags & RDDIR) { 2494 /* 2495 * The entry is not complete. 2496 */ 2497 nfs_rw_exit(&rp->r_rwlock); 2498 rdc->flags |= RDDIRWAIT; 2499 #ifdef DEBUG 2500 nfs4_readdir_cache_waits++; 2501 #endif 2502 while (rdc->flags & RDDIRWAIT) { 2503 if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) { 2504 /* 2505 * We got interrupted, probably the user 2506 * typed ^C or an alarm fired. We free the 2507 * new entry if we allocated one. 2508 */ 2509 rddir4_cache_rele(rp, rdc); 2510 mutex_exit(&rp->r_statelock); 2511 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2512 RW_READER, FALSE); 2513 mutex_enter(&rp->r_statelock); 2514 return (NULL); 2515 } 2516 } 2517 mutex_exit(&rp->r_statelock); 2518 (void) nfs_rw_enter_sig(&rp->r_rwlock, 2519 RW_READER, FALSE); 2520 mutex_enter(&rp->r_statelock); 2521 } 2522 2523 /* 2524 * The entry we were waiting on may have been purged from 2525 * the cache and should no longer be used, release it and 2526 * start over. 2527 */ 2528 if (!(rdc->flags & RDDIRCACHED)) { 2529 rddir4_cache_rele(rp, rdc); 2530 goto top; 2531 } 2532 2533 /* 2534 * The entry is completed. Return it. 2535 */ 2536 return (rdc); 2537 } 2538 2539 /* 2540 * Allocate a cache element and return it. Can return NULL if memory is 2541 * low. 2542 */ 2543 static rddir4_cache * 2544 rddir4_cache_alloc(int flags) 2545 { 2546 rddir4_cache_impl *rdip = NULL; 2547 rddir4_cache *rc = NULL; 2548 2549 rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags); 2550 2551 if (rdip != NULL) { 2552 rc = &rdip->rc; 2553 rc->data = (void *)rdip; 2554 rc->nfs4_cookie = 0; 2555 rc->nfs4_ncookie = 0; 2556 rc->entries = NULL; 2557 rc->eof = 0; 2558 rc->entlen = 0; 2559 rc->buflen = 0; 2560 rc->actlen = 0; 2561 /* 2562 * A readdir is required so set the flag. 2563 */ 2564 rc->flags = RDDIRREQ; 2565 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL); 2566 rc->error = 0; 2567 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL); 2568 rdip->count = 1; 2569 #ifdef DEBUG 2570 atomic_add_64(&clstat4_debug.dirent.value.ui64, 1); 2571 #endif 2572 } 2573 return (rc); 2574 } 2575 2576 /* 2577 * Increment the reference count to this cache element. 2578 */ 2579 static void 2580 rddir4_cache_hold(rddir4_cache *rc) 2581 { 2582 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data; 2583 2584 mutex_enter(&rdip->lock); 2585 rdip->count++; 2586 mutex_exit(&rdip->lock); 2587 } 2588 2589 /* 2590 * Release a reference to this cache element. If the count is zero then 2591 * free the element. 2592 */ 2593 void 2594 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc) 2595 { 2596 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data; 2597 2598 ASSERT(MUTEX_HELD(&rp->r_statelock)); 2599 2600 /* 2601 * Check to see if we have any waiters. If so, we can wake them 2602 * so that they can proceed. 2603 */ 2604 if (rdc->flags & RDDIRWAIT) { 2605 rdc->flags &= ~RDDIRWAIT; 2606 cv_broadcast(&rdc->cv); 2607 } 2608 2609 mutex_enter(&rdip->lock); 2610 ASSERT(rdip->count > 0); 2611 if (--rdip->count == 0) { 2612 mutex_exit(&rdip->lock); 2613 rddir4_cache_free(rdip); 2614 } else 2615 mutex_exit(&rdip->lock); 2616 } 2617 2618 /* 2619 * Free a cache element. 2620 */ 2621 static void 2622 rddir4_cache_free(rddir4_cache_impl *rdip) 2623 { 2624 rddir4_cache *rc = &rdip->rc; 2625 2626 #ifdef DEBUG 2627 atomic_add_64(&clstat4_debug.dirent.value.ui64, -1); 2628 #endif 2629 if (rc->entries != NULL) 2630 kmem_free(rc->entries, rc->buflen); 2631 cv_destroy(&rc->cv); 2632 mutex_destroy(&rdip->lock); 2633 kmem_free(rdip, sizeof (*rdip)); 2634 } 2635 2636 /* 2637 * Snapshot callback for nfs:0:nfs4_client as registered with the kstat 2638 * framework. 2639 */ 2640 static int 2641 cl4_snapshot(kstat_t *ksp, void *buf, int rw) 2642 { 2643 ksp->ks_snaptime = gethrtime(); 2644 if (rw == KSTAT_WRITE) { 2645 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl)); 2646 #ifdef DEBUG 2647 /* 2648 * Currently only the global zone can write to kstats, but we 2649 * add the check just for paranoia. 2650 */ 2651 if (INGLOBALZONE(curproc)) 2652 bcopy((char *)buf + sizeof (clstat4_tmpl), &clstat4_debug, 2653 sizeof (clstat4_debug)); 2654 #endif 2655 } else { 2656 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl)); 2657 #ifdef DEBUG 2658 /* 2659 * If we're displaying the "global" debug kstat values, we 2660 * display them as-is to all zones since in fact they apply to 2661 * the system as a whole. 2662 */ 2663 bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl), 2664 sizeof (clstat4_debug)); 2665 #endif 2666 } 2667 return (0); 2668 } 2669 2670 2671 2672 /* 2673 * Zone support 2674 */ 2675 static void * 2676 clinit4_zone(zoneid_t zoneid) 2677 { 2678 kstat_t *nfs4_client_kstat; 2679 struct nfs4_clnt *nfscl; 2680 uint_t ndata; 2681 2682 nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP); 2683 mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL); 2684 nfscl->nfscl_chtable4 = NULL; 2685 nfscl->nfscl_zoneid = zoneid; 2686 2687 bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl)); 2688 ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t); 2689 #ifdef DEBUG 2690 ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t); 2691 #endif 2692 if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client", 2693 "misc", KSTAT_TYPE_NAMED, ndata, 2694 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) { 2695 nfs4_client_kstat->ks_private = &nfscl->nfscl_stat; 2696 nfs4_client_kstat->ks_snapshot = cl4_snapshot; 2697 kstat_install(nfs4_client_kstat); 2698 } 2699 mutex_enter(&nfs4_clnt_list_lock); 2700 list_insert_head(&nfs4_clnt_list, nfscl); 2701 mutex_exit(&nfs4_clnt_list_lock); 2702 return (nfscl); 2703 } 2704 2705 /*ARGSUSED*/ 2706 static void 2707 clfini4_zone(zoneid_t zoneid, void *arg) 2708 { 2709 struct nfs4_clnt *nfscl = arg; 2710 chhead_t *chp, *next; 2711 2712 if (nfscl == NULL) 2713 return; 2714 mutex_enter(&nfs4_clnt_list_lock); 2715 list_remove(&nfs4_clnt_list, nfscl); 2716 mutex_exit(&nfs4_clnt_list_lock); 2717 clreclaim4_zone(nfscl, 0); 2718 for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) { 2719 ASSERT(chp->ch_list == NULL); 2720 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1); 2721 next = chp->ch_next; 2722 kmem_free(chp, sizeof (*chp)); 2723 } 2724 kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid); 2725 mutex_destroy(&nfscl->nfscl_chtable4_lock); 2726 kmem_free(nfscl, sizeof (*nfscl)); 2727 } 2728 2729 /* 2730 * Called by endpnt_destructor to make sure the client handles are 2731 * cleaned up before the RPC endpoints. This becomes a no-op if 2732 * clfini_zone (above) is called first. This function is needed 2733 * (rather than relying on clfini_zone to clean up) because the ZSD 2734 * callbacks have no ordering mechanism, so we have no way to ensure 2735 * that clfini_zone is called before endpnt_destructor. 2736 */ 2737 void 2738 clcleanup4_zone(zoneid_t zoneid) 2739 { 2740 struct nfs4_clnt *nfscl; 2741 2742 mutex_enter(&nfs4_clnt_list_lock); 2743 nfscl = list_head(&nfs4_clnt_list); 2744 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) { 2745 if (nfscl->nfscl_zoneid == zoneid) { 2746 clreclaim4_zone(nfscl, 0); 2747 break; 2748 } 2749 } 2750 mutex_exit(&nfs4_clnt_list_lock); 2751 } 2752 2753 int 2754 nfs4_subr_init(void) 2755 { 2756 /* 2757 * Allocate and initialize the client handle cache 2758 */ 2759 chtab4_cache = kmem_cache_create("client_handle4_cache", 2760 sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL, 2761 NULL, 0); 2762 2763 /* 2764 * Initialize the list of per-zone client handles (and associated data). 2765 * This needs to be done before we call zone_key_create(). 2766 */ 2767 list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt), 2768 offsetof(struct nfs4_clnt, nfscl_node)); 2769 2770 /* 2771 * Initialize the zone_key for per-zone client handle lists. 2772 */ 2773 zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone); 2774 2775 if (nfs4err_delay_time == 0) 2776 nfs4err_delay_time = NFS4ERR_DELAY_TIME; 2777 2778 return (0); 2779 } 2780 2781 int 2782 nfs4_subr_fini(void) 2783 { 2784 /* 2785 * Deallocate the client handle cache 2786 */ 2787 kmem_cache_destroy(chtab4_cache); 2788 2789 /* 2790 * Destroy the zone_key 2791 */ 2792 (void) zone_key_delete(nfs4clnt_zone_key); 2793 2794 return (0); 2795 } 2796 /* 2797 * Set or Clear direct I/O flag 2798 * VOP_RWLOCK() is held for write access to prevent a race condition 2799 * which would occur if a process is in the middle of a write when 2800 * directio flag gets set. It is possible that all pages may not get flushed. 2801 * 2802 * This is a copy of nfs_directio, changes here may need to be made 2803 * there and vice versa. 2804 */ 2805 2806 int 2807 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr) 2808 { 2809 int error = 0; 2810 rnode4_t *rp; 2811 2812 rp = VTOR4(vp); 2813 2814 if (cmd == DIRECTIO_ON) { 2815 2816 if (rp->r_flags & R4DIRECTIO) 2817 return (0); 2818 2819 /* 2820 * Flush the page cache. 2821 */ 2822 2823 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 2824 2825 if (rp->r_flags & R4DIRECTIO) { 2826 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2827 return (0); 2828 } 2829 2830 if (nfs4_has_pages(vp) && 2831 ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) { 2832 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 2833 B_INVAL, cr); 2834 if (error) { 2835 if (error == ENOSPC || error == EDQUOT) { 2836 mutex_enter(&rp->r_statelock); 2837 if (!rp->r_error) 2838 rp->r_error = error; 2839 mutex_exit(&rp->r_statelock); 2840 } 2841 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2842 return (error); 2843 } 2844 } 2845 2846 mutex_enter(&rp->r_statelock); 2847 rp->r_flags |= R4DIRECTIO; 2848 mutex_exit(&rp->r_statelock); 2849 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 2850 return (0); 2851 } 2852 2853 if (cmd == DIRECTIO_OFF) { 2854 mutex_enter(&rp->r_statelock); 2855 rp->r_flags &= ~R4DIRECTIO; /* disable direct mode */ 2856 mutex_exit(&rp->r_statelock); 2857 return (0); 2858 } 2859 2860 return (EINVAL); 2861 } 2862 2863 /* 2864 * Return TRUE if the file has any pages. Always go back to 2865 * the master vnode to check v_pages since none of the shadows 2866 * can have pages. 2867 */ 2868 2869 bool_t 2870 nfs4_has_pages(vnode_t *vp) 2871 { 2872 rnode4_t *rp; 2873 2874 rp = VTOR4(vp); 2875 if (IS_SHADOW(vp, rp)) 2876 vp = RTOV4(rp); /* RTOV4 always gives the master */ 2877 2878 return (vn_has_cached_data(vp)); 2879 } 2880 2881 /* 2882 * This table is used to determine whether the client should attempt 2883 * failover based on the clnt_stat value returned by CLNT_CALL. The 2884 * clnt_stat is used as an index into the table. If 2885 * the error value that corresponds to the clnt_stat value in the 2886 * table is non-zero, then that is the error to be returned AND 2887 * that signals that failover should be attempted. 2888 * 2889 * Special note: If the RPC_ values change, then direct indexing of the 2890 * table is no longer valid, but having the RPC_ values in the table 2891 * allow the functions to detect the change and issue a warning. 2892 * In this case, the code will always attempt failover as a defensive 2893 * measure. 2894 */ 2895 2896 static struct try_failover_tab { 2897 enum clnt_stat cstat; 2898 int error; 2899 } try_failover_table [] = { 2900 2901 RPC_SUCCESS, 0, 2902 RPC_CANTENCODEARGS, 0, 2903 RPC_CANTDECODERES, 0, 2904 RPC_CANTSEND, ECOMM, 2905 RPC_CANTRECV, ECOMM, 2906 RPC_TIMEDOUT, ETIMEDOUT, 2907 RPC_VERSMISMATCH, 0, 2908 RPC_AUTHERROR, 0, 2909 RPC_PROGUNAVAIL, 0, 2910 RPC_PROGVERSMISMATCH, 0, 2911 RPC_PROCUNAVAIL, 0, 2912 RPC_CANTDECODEARGS, 0, 2913 RPC_SYSTEMERROR, ENOSR, 2914 RPC_UNKNOWNHOST, EHOSTUNREACH, 2915 RPC_RPCBFAILURE, ENETUNREACH, 2916 RPC_PROGNOTREGISTERED, ECONNREFUSED, 2917 RPC_FAILED, ETIMEDOUT, 2918 RPC_UNKNOWNPROTO, EHOSTUNREACH, 2919 RPC_INTR, 0, 2920 RPC_UNKNOWNADDR, EHOSTUNREACH, 2921 RPC_TLIERROR, 0, 2922 RPC_NOBROADCAST, EHOSTUNREACH, 2923 RPC_N2AXLATEFAILURE, ECONNREFUSED, 2924 RPC_UDERROR, 0, 2925 RPC_INPROGRESS, 0, 2926 RPC_STALERACHANDLE, EINVAL, 2927 RPC_CANTCONNECT, ECONNREFUSED, 2928 RPC_XPRTFAILED, ECONNABORTED, 2929 RPC_CANTCREATESTREAM, ECONNREFUSED, 2930 RPC_CANTSTORE, ENOBUFS 2931 }; 2932 2933 /* 2934 * nfs4_try_failover - determine whether the client should 2935 * attempt failover based on the values stored in the nfs4_error_t. 2936 */ 2937 int 2938 nfs4_try_failover(nfs4_error_t *ep) 2939 { 2940 if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE) 2941 return (TRUE); 2942 2943 if (ep->error && ep->rpc_status != RPC_SUCCESS) 2944 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE); 2945 2946 return (FALSE); 2947 } 2948 2949 /* 2950 * try_failover - internal version of nfs4_try_failover, called 2951 * only by rfscall and aclcall. Determine if failover is warranted 2952 * based on the clnt_stat and return the error number if it is. 2953 */ 2954 static int 2955 try_failover(enum clnt_stat rpc_status) 2956 { 2957 int err = 0; 2958 2959 if (rpc_status == RPC_SUCCESS) 2960 return (0); 2961 2962 #ifdef DEBUG 2963 if (rpc_status != 0 && nfs4_try_failover_any) { 2964 err = ETIMEDOUT; 2965 goto done; 2966 } 2967 #endif 2968 /* 2969 * The rpc status is used as an index into the table. 2970 * If the rpc status is outside of the range of the 2971 * table or if the rpc error numbers have been changed 2972 * since the table was constructed, then print a warning 2973 * (DEBUG only) and try failover anyway. Otherwise, just 2974 * grab the resulting error number out of the table. 2975 */ 2976 if (rpc_status < RPC_SUCCESS || rpc_status >= 2977 sizeof (try_failover_table)/sizeof (try_failover_table[0]) || 2978 try_failover_table[rpc_status].cstat != rpc_status) { 2979 2980 err = ETIMEDOUT; 2981 #ifdef DEBUG 2982 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d", 2983 rpc_status); 2984 #endif 2985 } else 2986 err = try_failover_table[rpc_status].error; 2987 2988 done: 2989 if (rpc_status) 2990 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 2991 "nfs4_try_failover: %strying failover on error %d", 2992 err ? "" : "NOT ", rpc_status)); 2993 2994 return (err); 2995 } 2996 2997 void 2998 nfs4_error_zinit(nfs4_error_t *ep) 2999 { 3000 ep->error = 0; 3001 ep->stat = NFS4_OK; 3002 ep->rpc_status = RPC_SUCCESS; 3003 } 3004 3005 void 3006 nfs4_error_init(nfs4_error_t *ep, int error) 3007 { 3008 ep->error = error; 3009 ep->stat = NFS4_OK; 3010 ep->rpc_status = RPC_SUCCESS; 3011 } 3012 3013 3014 #ifdef DEBUG 3015 3016 /* 3017 * Return a 16-bit hash for filehandle, stateid, clientid, owner. 3018 * use the same algorithm as for NFS v3. 3019 * 3020 */ 3021 int 3022 hash16(void *p, int len) 3023 { 3024 int i, rem; 3025 uint_t *wp; 3026 uint_t key = 0; 3027 3028 /* protect against non word aligned */ 3029 if ((rem = len & 3) != 0) 3030 len &= ~3; 3031 3032 for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) { 3033 key ^= (*wp >> 16) ^ *wp; 3034 } 3035 3036 /* hash left-over bytes */ 3037 for (i = 0; i < rem; i++) 3038 key ^= *((uchar_t *)p + i); 3039 3040 return (key & 0xffff); 3041 } 3042 3043 /* 3044 * rnode4info - return filehandle and path information for an rnode. 3045 * XXX MT issues: uses a single static buffer, no locking of path. 3046 */ 3047 char * 3048 rnode4info(rnode4_t *rp) 3049 { 3050 static char buf[80]; 3051 nfs4_fhandle_t fhandle; 3052 char *path; 3053 char *type; 3054 3055 if (rp == NULL) 3056 return ("null"); 3057 if (rp->r_flags & R4ISXATTR) 3058 type = "attr"; 3059 else if (RTOV4(rp)->v_flag & V_XATTRDIR) 3060 type = "attrdir"; 3061 else if (RTOV4(rp)->v_flag & VROOT) 3062 type = "root"; 3063 else if (RTOV4(rp)->v_type == VDIR) 3064 type = "dir"; 3065 else if (RTOV4(rp)->v_type == VREG) 3066 type = "file"; 3067 else 3068 type = "other"; 3069 sfh4_copyval(rp->r_fh, &fhandle); 3070 path = fn_path(rp->r_svnode.sv_name); 3071 (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n", 3072 (void *)rp, path, type, rp->r_flags, 3073 hash16((void *)&fhandle.fh_buf, fhandle.fh_len)); 3074 kmem_free(path, strlen(path)+1); 3075 return (buf); 3076 } 3077 #endif 3078