1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 27 * All Rights Reserved 28 */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/systm.h> 33 #include <sys/cred.h> 34 #include <sys/vfs.h> 35 #include <sys/vfs_opreg.h> 36 #include <sys/vnode.h> 37 #include <sys/pathname.h> 38 #include <sys/sysmacros.h> 39 #include <sys/kmem.h> 40 #include <sys/mkdev.h> 41 #include <sys/mount.h> 42 #include <sys/statvfs.h> 43 #include <sys/errno.h> 44 #include <sys/debug.h> 45 #include <sys/cmn_err.h> 46 #include <sys/utsname.h> 47 #include <sys/bootconf.h> 48 #include <sys/modctl.h> 49 #include <sys/acl.h> 50 #include <sys/flock.h> 51 #include <sys/time.h> 52 #include <sys/disp.h> 53 #include <sys/policy.h> 54 #include <sys/socket.h> 55 #include <sys/netconfig.h> 56 #include <sys/dnlc.h> 57 #include <sys/list.h> 58 #include <sys/mntent.h> 59 #include <sys/tsol/label.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/rpcsec_gss.h> 64 #include <rpc/clnt.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/nfs_clnt.h> 68 #include <nfs/mount.h> 69 #include <nfs/nfs_acl.h> 70 71 #include <fs/fs_subr.h> 72 73 #include <nfs/nfs4.h> 74 #include <nfs/rnode4.h> 75 #include <nfs/nfs4_clnt.h> 76 #include <sys/fs/autofs.h> 77 78 #include <sys/sdt.h> 79 80 81 /* 82 * Arguments passed to thread to free data structures from forced unmount. 83 */ 84 85 typedef struct { 86 vfs_t *fm_vfsp; 87 int fm_flag; 88 cred_t *fm_cr; 89 } freemountargs_t; 90 91 static void async_free_mount(vfs_t *, int, cred_t *); 92 static void nfs4_free_mount(vfs_t *, int, cred_t *); 93 static void nfs4_free_mount_thread(freemountargs_t *); 94 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 95 96 /* 97 * From rpcsec module (common/rpcsec). 98 */ 99 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 100 extern void sec_clnt_freeinfo(struct sec_data *); 101 102 /* 103 * The order and contents of this structure must be kept in sync with that of 104 * rfsreqcnt_v4_tmpl in nfs_stats.c 105 */ 106 static char *rfsnames_v4[] = { 107 "null", "compound", "reserved", "access", "close", "commit", "create", 108 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 109 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 110 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 111 "read", "readdir", "readlink", "remove", "rename", "renew", 112 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 113 "setclientid_confirm", "verify", "write" 114 }; 115 116 /* 117 * nfs4_max_mount_retry is the number of times the client will redrive 118 * a mount compound before giving up and returning failure. The intent 119 * is to redrive mount compounds which fail NFS4ERR_STALE so that 120 * if a component of the server path being mounted goes stale, it can 121 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 122 * code is needed outside of the recovery framework because mount is a 123 * special case. The client doesn't create vnodes/rnodes for components 124 * of the server path being mounted. The recovery code recovers real 125 * client objects, not STALE FHs which map to components of the server 126 * path being mounted. 127 * 128 * We could just fail the mount on the first time, but that would 129 * instantly trigger failover (from nfs4_mount), and the client should 130 * try to re-lookup the STALE FH before doing failover. The easiest 131 * way to "re-lookup" is to simply redrive the mount compound. 132 */ 133 static int nfs4_max_mount_retry = 2; 134 135 /* 136 * nfs4 vfs operations. 137 */ 138 int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 139 static int nfs4_unmount(vfs_t *, int, cred_t *); 140 static int nfs4_root(vfs_t *, vnode_t **); 141 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 142 static int nfs4_sync(vfs_t *, short, cred_t *); 143 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 144 static int nfs4_mountroot(vfs_t *, whymountroot_t); 145 static void nfs4_freevfs(vfs_t *); 146 147 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 148 int, cred_t *, zone_t *); 149 150 vfsops_t *nfs4_vfsops; 151 152 int nfs4_vfsinit(void); 153 void nfs4_vfsfini(void); 154 static void nfs4setclientid_init(void); 155 static void nfs4setclientid_fini(void); 156 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 157 struct nfs4_server *, nfs4_error_t *, int *); 158 static void destroy_nfs4_server(nfs4_server_t *); 159 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 160 161 extern void nfs4_ephemeral_init(void); 162 extern void nfs4_ephemeral_fini(void); 163 164 /* referral related routines */ 165 static servinfo4_t *copy_svp(servinfo4_t *); 166 static void free_knconf_contents(struct knetconfig *k); 167 static char *extract_referral_point(const char *, int); 168 static void setup_newsvpath(servinfo4_t *, int); 169 static void update_servinfo4(servinfo4_t *, fs_location4 *, 170 struct nfs_fsl_info *, char *, int); 171 172 /* 173 * Initialize the vfs structure 174 */ 175 176 static int nfs4fstyp; 177 178 179 /* 180 * Debug variable to check for rdma based 181 * transport startup and cleanup. Controlled 182 * through /etc/system. Off by default. 183 */ 184 extern int rdma_debug; 185 186 int 187 nfs4init(int fstyp, char *name) 188 { 189 static const fs_operation_def_t nfs4_vfsops_template[] = { 190 VFSNAME_MOUNT, { .vfs_mount = nfs4_mount }, 191 VFSNAME_UNMOUNT, { .vfs_unmount = nfs4_unmount }, 192 VFSNAME_ROOT, { .vfs_root = nfs4_root }, 193 VFSNAME_STATVFS, { .vfs_statvfs = nfs4_statvfs }, 194 VFSNAME_SYNC, { .vfs_sync = nfs4_sync }, 195 VFSNAME_VGET, { .vfs_vget = nfs4_vget }, 196 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs4_mountroot }, 197 VFSNAME_FREEVFS, { .vfs_freevfs = nfs4_freevfs }, 198 NULL, NULL 199 }; 200 int error; 201 202 nfs4_vfsops = NULL; 203 nfs4_vnodeops = NULL; 204 nfs4_trigger_vnodeops = NULL; 205 206 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 207 if (error != 0) { 208 zcmn_err(GLOBAL_ZONEID, CE_WARN, 209 "nfs4init: bad vfs ops template"); 210 goto out; 211 } 212 213 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 214 if (error != 0) { 215 zcmn_err(GLOBAL_ZONEID, CE_WARN, 216 "nfs4init: bad vnode ops template"); 217 goto out; 218 } 219 220 error = vn_make_ops("nfs4_trigger", nfs4_trigger_vnodeops_template, 221 &nfs4_trigger_vnodeops); 222 if (error != 0) { 223 zcmn_err(GLOBAL_ZONEID, CE_WARN, 224 "nfs4init: bad trigger vnode ops template"); 225 goto out; 226 } 227 228 nfs4fstyp = fstyp; 229 (void) nfs4_vfsinit(); 230 (void) nfs4_init_dot_entries(); 231 232 out: 233 if (error) { 234 if (nfs4_trigger_vnodeops != NULL) 235 vn_freevnodeops(nfs4_trigger_vnodeops); 236 237 if (nfs4_vnodeops != NULL) 238 vn_freevnodeops(nfs4_vnodeops); 239 240 (void) vfs_freevfsops_by_type(fstyp); 241 } 242 243 return (error); 244 } 245 246 void 247 nfs4fini(void) 248 { 249 (void) nfs4_destroy_dot_entries(); 250 nfs4_vfsfini(); 251 } 252 253 /* 254 * Create a new sec_data structure to store AUTH_DH related data: 255 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 256 * flag set for NFS V4 since we are avoiding to contact the rpcbind 257 * daemon and is using the IP time service (IPPORT_TIMESERVER). 258 * 259 * sec_data can be freed by sec_clnt_freeinfo(). 260 */ 261 static struct sec_data * 262 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 263 struct knetconfig *knconf) { 264 struct sec_data *secdata; 265 dh_k4_clntdata_t *data; 266 char *pf, *p; 267 268 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 269 return (NULL); 270 271 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 272 secdata->flags = 0; 273 274 data = kmem_alloc(sizeof (*data), KM_SLEEP); 275 276 data->syncaddr.maxlen = syncaddr->maxlen; 277 data->syncaddr.len = syncaddr->len; 278 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 279 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 280 281 /* 282 * duplicate the knconf information for the 283 * new opaque data. 284 */ 285 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 286 *data->knconf = *knconf; 287 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 288 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 289 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 290 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 291 data->knconf->knc_protofmly = pf; 292 data->knconf->knc_proto = p; 293 294 /* move server netname to the sec_data structure */ 295 data->netname = kmem_alloc(nlen, KM_SLEEP); 296 bcopy(netname, data->netname, nlen); 297 data->netnamelen = (int)nlen; 298 299 secdata->secmod = AUTH_DH; 300 secdata->rpcflavor = AUTH_DH; 301 secdata->data = (caddr_t)data; 302 303 return (secdata); 304 } 305 306 /* 307 * Returns (deep) copy of sec_data_t. Allocates all memory required; caller 308 * is responsible for freeing. 309 */ 310 sec_data_t * 311 copy_sec_data(sec_data_t *fsecdata) { 312 sec_data_t *tsecdata; 313 314 if (fsecdata == NULL) 315 return (NULL); 316 317 if (fsecdata->rpcflavor == AUTH_DH) { 318 dh_k4_clntdata_t *fdata = (dh_k4_clntdata_t *)fsecdata->data; 319 320 if (fdata == NULL) 321 return (NULL); 322 323 tsecdata = (sec_data_t *)create_authdh_data(fdata->netname, 324 fdata->netnamelen, &fdata->syncaddr, fdata->knconf); 325 326 return (tsecdata); 327 } 328 329 tsecdata = kmem_zalloc(sizeof (sec_data_t), KM_SLEEP); 330 331 tsecdata->secmod = fsecdata->secmod; 332 tsecdata->rpcflavor = fsecdata->rpcflavor; 333 tsecdata->flags = fsecdata->flags; 334 tsecdata->uid = fsecdata->uid; 335 336 if (fsecdata->rpcflavor == RPCSEC_GSS) { 337 gss_clntdata_t *gcd = (gss_clntdata_t *)fsecdata->data; 338 339 tsecdata->data = (caddr_t)copy_sec_data_gss(gcd); 340 } else { 341 tsecdata->data = NULL; 342 } 343 344 return (tsecdata); 345 } 346 347 gss_clntdata_t * 348 copy_sec_data_gss(gss_clntdata_t *fdata) 349 { 350 gss_clntdata_t *tdata; 351 352 if (fdata == NULL) 353 return (NULL); 354 355 tdata = kmem_zalloc(sizeof (gss_clntdata_t), KM_SLEEP); 356 357 tdata->mechanism.length = fdata->mechanism.length; 358 tdata->mechanism.elements = kmem_zalloc(fdata->mechanism.length, 359 KM_SLEEP); 360 bcopy(fdata->mechanism.elements, tdata->mechanism.elements, 361 fdata->mechanism.length); 362 363 tdata->service = fdata->service; 364 365 (void) strcpy(tdata->uname, fdata->uname); 366 (void) strcpy(tdata->inst, fdata->inst); 367 (void) strcpy(tdata->realm, fdata->realm); 368 369 tdata->qop = fdata->qop; 370 371 return (tdata); 372 } 373 374 static int 375 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 376 { 377 servinfo4_t *si; 378 379 /* 380 * Iterate over the servinfo4 list to make sure 381 * we do not have a duplicate. Skip any servinfo4 382 * that has been marked "NOT IN USE" 383 */ 384 for (si = svp_head; si; si = si->sv_next) { 385 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 386 if (si->sv_flags & SV4_NOTINUSE) { 387 nfs_rw_exit(&si->sv_lock); 388 continue; 389 } 390 nfs_rw_exit(&si->sv_lock); 391 if (si == svp) 392 continue; 393 if (si->sv_addr.len == svp->sv_addr.len && 394 strcmp(si->sv_knconf->knc_protofmly, 395 svp->sv_knconf->knc_protofmly) == 0 && 396 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 397 si->sv_addr.len) == 0) { 398 /* it's a duplicate */ 399 return (1); 400 } 401 } 402 /* it's not a duplicate */ 403 return (0); 404 } 405 406 void 407 nfs4_free_args(struct nfs_args *nargs) 408 { 409 if (nargs->knconf) { 410 if (nargs->knconf->knc_protofmly) 411 kmem_free(nargs->knconf->knc_protofmly, 412 KNC_STRSIZE); 413 if (nargs->knconf->knc_proto) 414 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 415 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 416 nargs->knconf = NULL; 417 } 418 419 if (nargs->fh) { 420 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 421 nargs->fh = NULL; 422 } 423 424 if (nargs->hostname) { 425 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 426 nargs->hostname = NULL; 427 } 428 429 if (nargs->addr) { 430 if (nargs->addr->buf) { 431 ASSERT(nargs->addr->len); 432 kmem_free(nargs->addr->buf, nargs->addr->len); 433 } 434 kmem_free(nargs->addr, sizeof (struct netbuf)); 435 nargs->addr = NULL; 436 } 437 438 if (nargs->syncaddr) { 439 ASSERT(nargs->syncaddr->len); 440 if (nargs->syncaddr->buf) { 441 ASSERT(nargs->syncaddr->len); 442 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 443 } 444 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 445 nargs->syncaddr = NULL; 446 } 447 448 if (nargs->netname) { 449 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 450 nargs->netname = NULL; 451 } 452 453 if (nargs->nfs_ext_u.nfs_extA.secdata) { 454 sec_clnt_freeinfo( 455 nargs->nfs_ext_u.nfs_extA.secdata); 456 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 457 } 458 } 459 460 461 int 462 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs) 463 { 464 465 int error; 466 size_t hlen; /* length of hostname */ 467 size_t nlen; /* length of netname */ 468 char netname[MAXNETNAMELEN+1]; /* server's netname */ 469 struct netbuf addr; /* server's address */ 470 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 471 struct knetconfig *knconf; /* transport structure */ 472 struct sec_data *secdata = NULL; /* security data */ 473 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 474 STRUCT_DECL(knetconfig, knconf_tmp); 475 STRUCT_DECL(netbuf, addr_tmp); 476 int flags; 477 char *p, *pf; 478 struct pathname pn; 479 char *userbufptr; 480 481 482 bzero(nargs, sizeof (*nargs)); 483 484 STRUCT_INIT(args, get_udatamodel()); 485 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 486 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 487 STRUCT_SIZE(args)))) 488 return (EFAULT); 489 490 nargs->wsize = STRUCT_FGET(args, wsize); 491 nargs->rsize = STRUCT_FGET(args, rsize); 492 nargs->timeo = STRUCT_FGET(args, timeo); 493 nargs->retrans = STRUCT_FGET(args, retrans); 494 nargs->acregmin = STRUCT_FGET(args, acregmin); 495 nargs->acregmax = STRUCT_FGET(args, acregmax); 496 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 497 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 498 499 flags = STRUCT_FGET(args, flags); 500 nargs->flags = flags; 501 502 addr.buf = NULL; 503 syncaddr.buf = NULL; 504 505 506 /* 507 * Allocate space for a knetconfig structure and 508 * its strings and copy in from user-land. 509 */ 510 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 511 STRUCT_INIT(knconf_tmp, get_udatamodel()); 512 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 513 STRUCT_SIZE(knconf_tmp))) { 514 kmem_free(knconf, sizeof (*knconf)); 515 return (EFAULT); 516 } 517 518 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 519 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 520 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 521 if (get_udatamodel() != DATAMODEL_LP64) { 522 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 523 } else { 524 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 525 } 526 527 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 528 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 529 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 530 if (error) { 531 kmem_free(pf, KNC_STRSIZE); 532 kmem_free(p, KNC_STRSIZE); 533 kmem_free(knconf, sizeof (*knconf)); 534 return (error); 535 } 536 537 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 538 if (error) { 539 kmem_free(pf, KNC_STRSIZE); 540 kmem_free(p, KNC_STRSIZE); 541 kmem_free(knconf, sizeof (*knconf)); 542 return (error); 543 } 544 545 546 knconf->knc_protofmly = pf; 547 knconf->knc_proto = p; 548 549 nargs->knconf = knconf; 550 551 /* 552 * Get server address 553 */ 554 STRUCT_INIT(addr_tmp, get_udatamodel()); 555 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 556 STRUCT_SIZE(addr_tmp))) { 557 error = EFAULT; 558 goto errout; 559 } 560 561 nargs->addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 562 userbufptr = STRUCT_FGETP(addr_tmp, buf); 563 addr.len = STRUCT_FGET(addr_tmp, len); 564 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 565 addr.maxlen = addr.len; 566 if (copyin(userbufptr, addr.buf, addr.len)) { 567 kmem_free(addr.buf, addr.len); 568 error = EFAULT; 569 goto errout; 570 } 571 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 572 573 /* 574 * Get the root fhandle 575 */ 576 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 577 if (error) 578 goto errout; 579 580 /* Volatile fh: keep server paths, so use actual-size strings */ 581 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 582 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen); 583 nargs->fh[pn.pn_pathlen] = '\0'; 584 pn_free(&pn); 585 586 587 /* 588 * Get server's hostname 589 */ 590 if (flags & NFSMNT_HOSTNAME) { 591 error = copyinstr(STRUCT_FGETP(args, hostname), 592 netname, sizeof (netname), &hlen); 593 if (error) 594 goto errout; 595 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 596 (void) strcpy(nargs->hostname, netname); 597 598 } else { 599 nargs->hostname = NULL; 600 } 601 602 603 /* 604 * If there are syncaddr and netname data, load them in. This is 605 * to support data needed for NFSV4 when AUTH_DH is the negotiated 606 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 607 */ 608 netname[0] = '\0'; 609 if (flags & NFSMNT_SECURE) { 610 611 /* get syncaddr */ 612 STRUCT_INIT(addr_tmp, get_udatamodel()); 613 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 614 STRUCT_SIZE(addr_tmp))) { 615 error = EINVAL; 616 goto errout; 617 } 618 userbufptr = STRUCT_FGETP(addr_tmp, buf); 619 syncaddr.len = STRUCT_FGET(addr_tmp, len); 620 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 621 syncaddr.maxlen = syncaddr.len; 622 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 623 kmem_free(syncaddr.buf, syncaddr.len); 624 error = EFAULT; 625 goto errout; 626 } 627 628 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 629 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 630 631 /* get server's netname */ 632 if (copyinstr(STRUCT_FGETP(args, netname), netname, 633 sizeof (netname), &nlen)) { 634 error = EFAULT; 635 goto errout; 636 } 637 638 netname[nlen] = '\0'; 639 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 640 (void) strcpy(nargs->netname, netname); 641 } 642 643 /* 644 * Get the extention data which has the security data structure. 645 * This includes data for AUTH_SYS as well. 646 */ 647 if (flags & NFSMNT_NEWARGS) { 648 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 649 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 650 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 651 /* 652 * Indicating the application is using the new 653 * sec_data structure to pass in the security 654 * data. 655 */ 656 if (STRUCT_FGETP(args, 657 nfs_ext_u.nfs_extA.secdata) != NULL) { 658 error = sec_clnt_loadinfo( 659 (struct sec_data *)STRUCT_FGETP(args, 660 nfs_ext_u.nfs_extA.secdata), 661 &secdata, get_udatamodel()); 662 } 663 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 664 } 665 } 666 667 if (error) 668 goto errout; 669 670 /* 671 * Failover support: 672 * 673 * We may have a linked list of nfs_args structures, 674 * which means the user is looking for failover. If 675 * the mount is either not "read-only" or "soft", 676 * we want to bail out with EINVAL. 677 */ 678 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 679 nargs->nfs_ext_u.nfs_extB.next = 680 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 681 682 errout: 683 if (error) 684 nfs4_free_args(nargs); 685 686 return (error); 687 } 688 689 690 /* 691 * nfs mount vfsop 692 * Set up mount info record and attach it to vfs struct. 693 */ 694 int 695 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 696 { 697 char *data = uap->dataptr; 698 int error; 699 vnode_t *rtvp; /* the server's root */ 700 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 701 struct knetconfig *rdma_knconf; /* rdma transport structure */ 702 rnode4_t *rp; 703 struct servinfo4 *svp; /* nfs server info */ 704 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 705 struct servinfo4 *svp_head; /* first nfs server info */ 706 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 707 struct sec_data *secdata; /* security data */ 708 struct nfs_args *args = NULL; 709 int flags, addr_type, removed; 710 zone_t *zone = nfs_zone(); 711 nfs4_error_t n4e; 712 zone_t *mntzone = NULL; 713 714 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 715 return (EPERM); 716 if (mvp->v_type != VDIR) 717 return (ENOTDIR); 718 719 /* 720 * get arguments 721 * 722 * nfs_args is now versioned and is extensible, so 723 * uap->datalen might be different from sizeof (args) 724 * in a compatible situation. 725 */ 726 more: 727 if (!(uap->flags & MS_SYSSPACE)) { 728 if (args == NULL) 729 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 730 else 731 nfs4_free_args(args); 732 error = nfs4_copyin(data, uap->datalen, args); 733 if (error) { 734 if (args) { 735 kmem_free(args, sizeof (*args)); 736 } 737 return (error); 738 } 739 } else { 740 args = (struct nfs_args *)data; 741 } 742 743 flags = args->flags; 744 745 /* 746 * If the request changes the locking type, disallow the remount, 747 * because it's questionable whether we can transfer the 748 * locking state correctly. 749 */ 750 if (uap->flags & MS_REMOUNT) { 751 if (!(uap->flags & MS_SYSSPACE)) { 752 nfs4_free_args(args); 753 kmem_free(args, sizeof (*args)); 754 } 755 if ((mi = VFTOMI4(vfsp)) != NULL) { 756 uint_t new_mi_llock; 757 uint_t old_mi_llock; 758 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 759 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 760 if (old_mi_llock != new_mi_llock) 761 return (EBUSY); 762 } 763 return (0); 764 } 765 766 /* 767 * For ephemeral mount trigger stub vnodes, we have two problems 768 * to solve: racing threads will likely fail the v_count check, and 769 * we want only one to proceed with the mount. 770 * 771 * For stubs, if the mount has already occurred (via a racing thread), 772 * just return success. If not, skip the v_count check and proceed. 773 * Note that we are already serialised at this point. 774 */ 775 mutex_enter(&mvp->v_lock); 776 if (vn_matchops(mvp, nfs4_trigger_vnodeops)) { 777 /* mntpt is a v4 stub vnode */ 778 ASSERT(RP_ISSTUB(VTOR4(mvp))); 779 ASSERT(!(uap->flags & MS_OVERLAY)); 780 ASSERT(!(mvp->v_flag & VROOT)); 781 if (vn_mountedvfs(mvp) != NULL) { 782 /* ephemeral mount has already occurred */ 783 ASSERT(uap->flags & MS_SYSSPACE); 784 mutex_exit(&mvp->v_lock); 785 return (0); 786 } 787 } else { 788 /* mntpt is a non-v4 or v4 non-stub vnode */ 789 if (!(uap->flags & MS_OVERLAY) && 790 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 791 mutex_exit(&mvp->v_lock); 792 if (!(uap->flags & MS_SYSSPACE)) { 793 nfs4_free_args(args); 794 kmem_free(args, sizeof (*args)); 795 } 796 return (EBUSY); 797 } 798 } 799 mutex_exit(&mvp->v_lock); 800 801 /* make sure things are zeroed for errout: */ 802 rtvp = NULL; 803 mi = NULL; 804 secdata = NULL; 805 806 /* 807 * A valid knetconfig structure is required. 808 */ 809 if (!(flags & NFSMNT_KNCONF) || 810 args->knconf == NULL || args->knconf->knc_protofmly == NULL || 811 args->knconf->knc_proto == NULL || 812 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) { 813 if (!(uap->flags & MS_SYSSPACE)) { 814 nfs4_free_args(args); 815 kmem_free(args, sizeof (*args)); 816 } 817 return (EINVAL); 818 } 819 820 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 821 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 822 if (!(uap->flags & MS_SYSSPACE)) { 823 nfs4_free_args(args); 824 kmem_free(args, sizeof (*args)); 825 } 826 return (EINVAL); 827 } 828 829 /* 830 * Allocate a servinfo4 struct. 831 */ 832 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 833 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 834 if (svp_tail) { 835 svp_2ndlast = svp_tail; 836 svp_tail->sv_next = svp; 837 } else { 838 svp_head = svp; 839 svp_2ndlast = svp; 840 } 841 842 svp_tail = svp; 843 svp->sv_knconf = args->knconf; 844 args->knconf = NULL; 845 846 /* 847 * Get server address 848 */ 849 if (args->addr == NULL || args->addr->buf == NULL) { 850 error = EINVAL; 851 goto errout; 852 } 853 854 svp->sv_addr.maxlen = args->addr->maxlen; 855 svp->sv_addr.len = args->addr->len; 856 svp->sv_addr.buf = args->addr->buf; 857 args->addr->buf = NULL; 858 859 /* 860 * Get the root fhandle 861 */ 862 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) { 863 error = EINVAL; 864 goto errout; 865 } 866 867 svp->sv_path = args->fh; 868 svp->sv_pathlen = strlen(args->fh) + 1; 869 args->fh = NULL; 870 871 /* 872 * Get server's hostname 873 */ 874 if (flags & NFSMNT_HOSTNAME) { 875 if (args->hostname == NULL || (strlen(args->hostname) > 876 MAXNETNAMELEN)) { 877 error = EINVAL; 878 goto errout; 879 } 880 svp->sv_hostnamelen = strlen(args->hostname) + 1; 881 svp->sv_hostname = args->hostname; 882 args->hostname = NULL; 883 } else { 884 char *p = "unknown-host"; 885 svp->sv_hostnamelen = strlen(p) + 1; 886 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 887 (void) strcpy(svp->sv_hostname, p); 888 } 889 890 /* 891 * RDMA MOUNT SUPPORT FOR NFS v4. 892 * Establish, is it possible to use RDMA, if so overload the 893 * knconf with rdma specific knconf and free the orignal knconf. 894 */ 895 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 896 /* 897 * Determine the addr type for RDMA, IPv4 or v6. 898 */ 899 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 900 addr_type = AF_INET; 901 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 902 addr_type = AF_INET6; 903 904 if (rdma_reachable(addr_type, &svp->sv_addr, 905 &rdma_knconf) == 0) { 906 /* 907 * If successful, hijack the orignal knconf and 908 * replace with the new one, depending on the flags. 909 */ 910 svp->sv_origknconf = svp->sv_knconf; 911 svp->sv_knconf = rdma_knconf; 912 } else { 913 if (flags & NFSMNT_TRYRDMA) { 914 #ifdef DEBUG 915 if (rdma_debug) 916 zcmn_err(getzoneid(), CE_WARN, 917 "no RDMA onboard, revert\n"); 918 #endif 919 } 920 921 if (flags & NFSMNT_DORDMA) { 922 /* 923 * If proto=rdma is specified and no RDMA 924 * path to this server is avialable then 925 * ditch this server. 926 * This is not included in the mountable 927 * server list or the replica list. 928 * Check if more servers are specified; 929 * Failover case, otherwise bail out of mount. 930 */ 931 if (args->nfs_args_ext == NFS_ARGS_EXTB && 932 args->nfs_ext_u.nfs_extB.next != NULL) { 933 data = (char *) 934 args->nfs_ext_u.nfs_extB.next; 935 if (uap->flags & MS_RDONLY && 936 !(flags & NFSMNT_SOFT)) { 937 if (svp_head->sv_next == NULL) { 938 svp_tail = NULL; 939 svp_2ndlast = NULL; 940 sv4_free(svp_head); 941 goto more; 942 } else { 943 svp_tail = svp_2ndlast; 944 svp_2ndlast->sv_next = 945 NULL; 946 sv4_free(svp); 947 goto more; 948 } 949 } 950 } else { 951 /* 952 * This is the last server specified 953 * in the nfs_args list passed down 954 * and its not rdma capable. 955 */ 956 if (svp_head->sv_next == NULL) { 957 /* 958 * Is this the only one 959 */ 960 error = EINVAL; 961 #ifdef DEBUG 962 if (rdma_debug) 963 zcmn_err(getzoneid(), 964 CE_WARN, 965 "No RDMA srv"); 966 #endif 967 goto errout; 968 } else { 969 /* 970 * There is list, since some 971 * servers specified before 972 * this passed all requirements 973 */ 974 svp_tail = svp_2ndlast; 975 svp_2ndlast->sv_next = NULL; 976 sv4_free(svp); 977 goto proceed; 978 } 979 } 980 } 981 } 982 } 983 984 /* 985 * If there are syncaddr and netname data, load them in. This is 986 * to support data needed for NFSV4 when AUTH_DH is the negotiated 987 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 988 */ 989 if (args->flags & NFSMNT_SECURE) { 990 svp->sv_dhsec = create_authdh_data(args->netname, 991 strlen(args->netname), 992 args->syncaddr, svp->sv_knconf); 993 } 994 995 /* 996 * Get the extention data which has the security data structure. 997 * This includes data for AUTH_SYS as well. 998 */ 999 if (flags & NFSMNT_NEWARGS) { 1000 switch (args->nfs_args_ext) { 1001 case NFS_ARGS_EXTA: 1002 case NFS_ARGS_EXTB: 1003 /* 1004 * Indicating the application is using the new 1005 * sec_data structure to pass in the security 1006 * data. 1007 */ 1008 secdata = args->nfs_ext_u.nfs_extA.secdata; 1009 if (secdata == NULL) { 1010 error = EINVAL; 1011 } else if (uap->flags & MS_SYSSPACE) { 1012 /* 1013 * Need to validate the flavor here if 1014 * sysspace, userspace was already 1015 * validate from the nfs_copyin function. 1016 */ 1017 switch (secdata->rpcflavor) { 1018 case AUTH_NONE: 1019 case AUTH_UNIX: 1020 case AUTH_LOOPBACK: 1021 case AUTH_DES: 1022 case RPCSEC_GSS: 1023 break; 1024 default: 1025 error = EINVAL; 1026 goto errout; 1027 } 1028 } 1029 args->nfs_ext_u.nfs_extA.secdata = NULL; 1030 break; 1031 1032 default: 1033 error = EINVAL; 1034 break; 1035 } 1036 1037 } else if (flags & NFSMNT_SECURE) { 1038 /* 1039 * NFSMNT_SECURE is deprecated but we keep it 1040 * to support the rogue user-generated application 1041 * that may use this undocumented interface to do 1042 * AUTH_DH security, e.g. our own rexd. 1043 * 1044 * Also note that NFSMNT_SECURE is used for passing 1045 * AUTH_DH info to be used in negotiation. 1046 */ 1047 secdata = create_authdh_data(args->netname, 1048 strlen(args->netname), args->syncaddr, svp->sv_knconf); 1049 1050 } else { 1051 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 1052 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 1053 secdata->data = NULL; 1054 } 1055 1056 svp->sv_secdata = secdata; 1057 1058 /* 1059 * User does not explictly specify a flavor, and a user 1060 * defined default flavor is passed down. 1061 */ 1062 if (flags & NFSMNT_SECDEFAULT) { 1063 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1064 svp->sv_flags |= SV4_TRYSECDEFAULT; 1065 nfs_rw_exit(&svp->sv_lock); 1066 } 1067 1068 /* 1069 * Failover support: 1070 * 1071 * We may have a linked list of nfs_args structures, 1072 * which means the user is looking for failover. If 1073 * the mount is either not "read-only" or "soft", 1074 * we want to bail out with EINVAL. 1075 */ 1076 if (args->nfs_args_ext == NFS_ARGS_EXTB && 1077 args->nfs_ext_u.nfs_extB.next != NULL) { 1078 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 1079 data = (char *)args->nfs_ext_u.nfs_extB.next; 1080 goto more; 1081 } 1082 error = EINVAL; 1083 goto errout; 1084 } 1085 1086 /* 1087 * Determine the zone we're being mounted into. 1088 */ 1089 zone_hold(mntzone = zone); /* start with this assumption */ 1090 if (getzoneid() == GLOBAL_ZONEID) { 1091 zone_rele(mntzone); 1092 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 1093 ASSERT(mntzone != NULL); 1094 if (mntzone != zone) { 1095 error = EBUSY; 1096 goto errout; 1097 } 1098 } 1099 1100 if (is_system_labeled()) { 1101 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 1102 svp->sv_knconf, cr); 1103 1104 if (error > 0) 1105 goto errout; 1106 1107 if (error == -1) { 1108 /* change mount to read-only to prevent write-down */ 1109 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1110 } 1111 } 1112 1113 /* 1114 * Stop the mount from going any further if the zone is going away. 1115 */ 1116 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 1117 error = EBUSY; 1118 goto errout; 1119 } 1120 1121 /* 1122 * Get root vnode. 1123 */ 1124 proceed: 1125 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 1126 if (error) { 1127 /* if nfs4rootvp failed, it will free svp_head */ 1128 svp_head = NULL; 1129 goto errout; 1130 } 1131 1132 mi = VTOMI4(rtvp); 1133 1134 /* 1135 * Send client id to the server, if necessary 1136 */ 1137 nfs4_error_zinit(&n4e); 1138 nfs4setclientid(mi, cr, FALSE, &n4e); 1139 1140 error = n4e.error; 1141 1142 if (error) 1143 goto errout; 1144 1145 /* 1146 * Set option fields in the mount info record 1147 */ 1148 1149 if (svp_head->sv_next) { 1150 mutex_enter(&mi->mi_lock); 1151 mi->mi_flags |= MI4_LLOCK; 1152 mutex_exit(&mi->mi_lock); 1153 } 1154 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args); 1155 if (error) 1156 goto errout; 1157 1158 /* 1159 * Time to tie in the mirror mount info at last! 1160 */ 1161 if (flags & NFSMNT_EPHEMERAL) 1162 error = nfs4_record_ephemeral_mount(mi, mvp); 1163 1164 errout: 1165 if (error) { 1166 if (rtvp != NULL) { 1167 rp = VTOR4(rtvp); 1168 if (rp->r_flags & R4HASHED) 1169 rp4_rmhash(rp); 1170 } 1171 if (mi != NULL) { 1172 nfs4_async_stop(vfsp); 1173 nfs4_async_manager_stop(vfsp); 1174 nfs4_remove_mi_from_server(mi, NULL); 1175 if (rtvp != NULL) 1176 VN_RELE(rtvp); 1177 if (mntzone != NULL) 1178 zone_rele(mntzone); 1179 /* need to remove it from the zone */ 1180 removed = nfs4_mi_zonelist_remove(mi); 1181 if (removed) 1182 zone_rele(mi->mi_zone); 1183 MI4_RELE(mi); 1184 if (!(uap->flags & MS_SYSSPACE) && args) { 1185 nfs4_free_args(args); 1186 kmem_free(args, sizeof (*args)); 1187 } 1188 return (error); 1189 } 1190 if (svp_head) 1191 sv4_free(svp_head); 1192 } 1193 1194 if (!(uap->flags & MS_SYSSPACE) && args) { 1195 nfs4_free_args(args); 1196 kmem_free(args, sizeof (*args)); 1197 } 1198 if (rtvp != NULL) 1199 VN_RELE(rtvp); 1200 1201 if (mntzone != NULL) 1202 zone_rele(mntzone); 1203 1204 return (error); 1205 } 1206 1207 #ifdef DEBUG 1208 #define VERS_MSG "NFS4 server " 1209 #else 1210 #define VERS_MSG "NFS server " 1211 #endif 1212 1213 #define READ_MSG \ 1214 VERS_MSG "%s returned 0 for read transfer size" 1215 #define WRITE_MSG \ 1216 VERS_MSG "%s returned 0 for write transfer size" 1217 #define SIZE_MSG \ 1218 VERS_MSG "%s returned 0 for maximum file size" 1219 1220 /* 1221 * Get the symbolic link text from the server for a given filehandle 1222 * of that symlink. 1223 * 1224 * (get symlink text) PUTFH READLINK 1225 */ 1226 static int 1227 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 1228 int flags) 1229 { 1230 COMPOUND4args_clnt args; 1231 COMPOUND4res_clnt res; 1232 int doqueue; 1233 nfs_argop4 argop[2]; 1234 nfs_resop4 *resop; 1235 READLINK4res *lr_res; 1236 uint_t len; 1237 bool_t needrecov = FALSE; 1238 nfs4_recov_state_t recov_state; 1239 nfs4_sharedfh_t *sfh; 1240 nfs4_error_t e; 1241 int num_retry = nfs4_max_mount_retry; 1242 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1243 1244 sfh = sfh4_get(fh, mi); 1245 recov_state.rs_flags = 0; 1246 recov_state.rs_num_retry_despite_err = 0; 1247 1248 recov_retry: 1249 nfs4_error_zinit(&e); 1250 1251 args.array_len = 2; 1252 args.array = argop; 1253 args.ctag = TAG_GET_SYMLINK; 1254 1255 if (! recovery) { 1256 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 1257 if (e.error) { 1258 sfh4_rele(&sfh); 1259 return (e.error); 1260 } 1261 } 1262 1263 /* 0. putfh symlink fh */ 1264 argop[0].argop = OP_CPUTFH; 1265 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1266 1267 /* 1. readlink */ 1268 argop[1].argop = OP_READLINK; 1269 1270 doqueue = 1; 1271 1272 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1273 1274 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 1275 1276 if (needrecov && !recovery && num_retry-- > 0) { 1277 1278 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1279 "getlinktext_otw: initiating recovery\n")); 1280 1281 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 1282 OP_READLINK, NULL, NULL, NULL) == FALSE) { 1283 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1284 if (!e.error) 1285 (void) xdr_free(xdr_COMPOUND4res_clnt, 1286 (caddr_t)&res); 1287 goto recov_retry; 1288 } 1289 } 1290 1291 /* 1292 * If non-NFS4 pcol error and/or we weren't able to recover. 1293 */ 1294 if (e.error != 0) { 1295 if (! recovery) 1296 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1297 sfh4_rele(&sfh); 1298 return (e.error); 1299 } 1300 1301 if (res.status) { 1302 e.error = geterrno4(res.status); 1303 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1304 if (! recovery) 1305 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1306 sfh4_rele(&sfh); 1307 return (e.error); 1308 } 1309 1310 /* res.status == NFS4_OK */ 1311 ASSERT(res.status == NFS4_OK); 1312 1313 resop = &res.array[1]; /* readlink res */ 1314 lr_res = &resop->nfs_resop4_u.opreadlink; 1315 1316 /* treat symlink name as data */ 1317 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 1318 1319 if (! recovery) 1320 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1321 sfh4_rele(&sfh); 1322 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1323 return (0); 1324 } 1325 1326 /* 1327 * Skip over consecutive slashes and "/./" in a pathname. 1328 */ 1329 void 1330 pathname_skipslashdot(struct pathname *pnp) 1331 { 1332 char *c1, *c2; 1333 1334 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 1335 1336 c1 = pnp->pn_path + 1; 1337 c2 = pnp->pn_path + 2; 1338 1339 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 1340 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 1341 pnp->pn_pathlen = pnp->pn_pathlen - 2; 1342 } else { 1343 pnp->pn_path++; 1344 pnp->pn_pathlen--; 1345 } 1346 } 1347 } 1348 1349 /* 1350 * Resolve a symbolic link path. The symlink is in the nth component of 1351 * svp->sv_path and has an nfs4 file handle "fh". 1352 * Upon return, the sv_path will point to the new path that has the nth 1353 * component resolved to its symlink text. 1354 */ 1355 int 1356 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 1357 cred_t *cr, int flags) 1358 { 1359 char *oldpath; 1360 char *symlink, *newpath; 1361 struct pathname oldpn, newpn; 1362 char component[MAXNAMELEN]; 1363 int i, addlen, error = 0; 1364 int oldpathlen; 1365 1366 /* Get the symbolic link text over the wire. */ 1367 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 1368 1369 if (error || symlink == NULL || strlen(symlink) == 0) 1370 return (error); 1371 1372 /* 1373 * Compose the new pathname. 1374 * Note: 1375 * - only the nth component is resolved for the pathname. 1376 * - pathname.pn_pathlen does not count the ending null byte. 1377 */ 1378 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1379 oldpath = svp->sv_path; 1380 oldpathlen = svp->sv_pathlen; 1381 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1382 nfs_rw_exit(&svp->sv_lock); 1383 kmem_free(symlink, strlen(symlink) + 1); 1384 return (error); 1385 } 1386 nfs_rw_exit(&svp->sv_lock); 1387 pn_alloc(&newpn); 1388 1389 /* 1390 * Skip over previous components from the oldpath so that the 1391 * oldpn.pn_path will point to the symlink component. Skip 1392 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1393 * pn_getcompnent can get the component. 1394 */ 1395 for (i = 1; i < nth; i++) { 1396 pathname_skipslashdot(&oldpn); 1397 error = pn_getcomponent(&oldpn, component); 1398 if (error) 1399 goto out; 1400 } 1401 1402 /* 1403 * Copy the old path upto the component right before the symlink 1404 * if the symlink is not an absolute path. 1405 */ 1406 if (symlink[0] != '/') { 1407 addlen = oldpn.pn_path - oldpn.pn_buf; 1408 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1409 newpn.pn_pathlen += addlen; 1410 newpn.pn_path += addlen; 1411 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1412 newpn.pn_pathlen++; 1413 newpn.pn_path++; 1414 } 1415 1416 /* copy the resolved symbolic link text */ 1417 addlen = strlen(symlink); 1418 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1419 error = ENAMETOOLONG; 1420 goto out; 1421 } 1422 bcopy(symlink, newpn.pn_path, addlen); 1423 newpn.pn_pathlen += addlen; 1424 newpn.pn_path += addlen; 1425 1426 /* 1427 * Check if there is any remaining path after the symlink component. 1428 * First, skip the symlink component. 1429 */ 1430 pathname_skipslashdot(&oldpn); 1431 if (error = pn_getcomponent(&oldpn, component)) 1432 goto out; 1433 1434 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1435 1436 /* 1437 * Copy the remaining path to the new pathname if there is any. 1438 */ 1439 if (addlen > 0) { 1440 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1441 error = ENAMETOOLONG; 1442 goto out; 1443 } 1444 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1445 newpn.pn_pathlen += addlen; 1446 } 1447 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1448 1449 /* get the newpath and store it in the servinfo4_t */ 1450 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1451 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1452 newpath[newpn.pn_pathlen] = '\0'; 1453 1454 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1455 svp->sv_path = newpath; 1456 svp->sv_pathlen = strlen(newpath) + 1; 1457 nfs_rw_exit(&svp->sv_lock); 1458 1459 kmem_free(oldpath, oldpathlen); 1460 out: 1461 kmem_free(symlink, strlen(symlink) + 1); 1462 pn_free(&newpn); 1463 pn_free(&oldpn); 1464 1465 return (error); 1466 } 1467 1468 /* 1469 * This routine updates servinfo4 structure with the new referred server 1470 * info. 1471 * nfsfsloc has the location related information 1472 * fsp has the hostname and pathname info. 1473 * new path = pathname from referral + part of orig pathname(based on nth). 1474 */ 1475 static void 1476 update_servinfo4(servinfo4_t *svp, fs_location4 *fsp, 1477 struct nfs_fsl_info *nfsfsloc, char *orig_path, int nth) 1478 { 1479 struct knetconfig *knconf, *svknconf; 1480 struct netbuf *saddr; 1481 sec_data_t *secdata; 1482 utf8string *host; 1483 int i = 0, num_slashes = 0; 1484 char *p, *spath, *op, *new_path; 1485 1486 /* Update knconf */ 1487 knconf = svp->sv_knconf; 1488 free_knconf_contents(knconf); 1489 bzero(knconf, sizeof (struct knetconfig)); 1490 svknconf = nfsfsloc->knconf; 1491 knconf->knc_semantics = svknconf->knc_semantics; 1492 knconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1493 knconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1494 knconf->knc_rdev = svknconf->knc_rdev; 1495 bcopy(svknconf->knc_protofmly, knconf->knc_protofmly, KNC_STRSIZE); 1496 bcopy(svknconf->knc_proto, knconf->knc_proto, KNC_STRSIZE); 1497 1498 /* Update server address */ 1499 saddr = &svp->sv_addr; 1500 if (saddr->buf != NULL) 1501 kmem_free(saddr->buf, saddr->maxlen); 1502 saddr->buf = kmem_alloc(nfsfsloc->addr->maxlen, KM_SLEEP); 1503 saddr->len = nfsfsloc->addr->len; 1504 saddr->maxlen = nfsfsloc->addr->maxlen; 1505 bcopy(nfsfsloc->addr->buf, saddr->buf, nfsfsloc->addr->len); 1506 1507 /* Update server name */ 1508 host = fsp->server_val; 1509 kmem_free(svp->sv_hostname, svp->sv_hostnamelen); 1510 svp->sv_hostname = kmem_zalloc(host->utf8string_len + 1, KM_SLEEP); 1511 bcopy(host->utf8string_val, svp->sv_hostname, host->utf8string_len); 1512 svp->sv_hostname[host->utf8string_len] = '\0'; 1513 svp->sv_hostnamelen = host->utf8string_len + 1; 1514 1515 /* 1516 * Update server path. 1517 * We need to setup proper path here. 1518 * For ex., If we got a path name serv1:/rp/aaa/bbb 1519 * where aaa is a referral and points to serv2:/rpool/aa 1520 * we need to set the path to serv2:/rpool/aa/bbb 1521 * The first part of this below code generates /rpool/aa 1522 * and the second part appends /bbb to the server path. 1523 */ 1524 spath = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1525 *p++ = '/'; 1526 for (i = 0; i < fsp->rootpath.pathname4_len; i++) { 1527 component4 *comp; 1528 1529 comp = &fsp->rootpath.pathname4_val[i]; 1530 /* If no space, null the string and bail */ 1531 if ((p - spath) + comp->utf8string_len + 1 > MAXPATHLEN) { 1532 p = spath + MAXPATHLEN - 1; 1533 spath[0] = '\0'; 1534 break; 1535 } 1536 bcopy(comp->utf8string_val, p, comp->utf8string_len); 1537 p += comp->utf8string_len; 1538 *p++ = '/'; 1539 } 1540 if (fsp->rootpath.pathname4_len != 0) 1541 *(p - 1) = '\0'; 1542 else 1543 *p = '\0'; 1544 p = spath; 1545 1546 new_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1547 (void) strlcpy(new_path, p, MAXPATHLEN); 1548 kmem_free(p, MAXPATHLEN); 1549 i = strlen(new_path); 1550 1551 for (op = orig_path; *op; op++) { 1552 if (*op == '/') 1553 num_slashes++; 1554 if (num_slashes == nth + 2) { 1555 while (*op != '\0') { 1556 new_path[i] = *op; 1557 i++; 1558 op++; 1559 } 1560 break; 1561 } 1562 } 1563 new_path[i] = '\0'; 1564 1565 kmem_free(svp->sv_path, svp->sv_pathlen); 1566 svp->sv_pathlen = strlen(new_path) + 1; 1567 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 1568 bcopy(new_path, svp->sv_path, svp->sv_pathlen); 1569 kmem_free(new_path, MAXPATHLEN); 1570 1571 /* 1572 * All the security data is specific to old server. 1573 * Clean it up except secdata which deals with mount options. 1574 * We need to inherit that data. Copy secdata into our new servinfo4. 1575 */ 1576 if (svp->sv_dhsec) { 1577 sec_clnt_freeinfo(svp->sv_dhsec); 1578 svp->sv_dhsec = NULL; 1579 } 1580 if (svp->sv_save_secinfo && 1581 svp->sv_save_secinfo != svp->sv_secinfo) { 1582 secinfo_free(svp->sv_save_secinfo); 1583 svp->sv_save_secinfo = NULL; 1584 } 1585 if (svp->sv_secinfo) { 1586 secinfo_free(svp->sv_secinfo); 1587 svp->sv_secinfo = NULL; 1588 } 1589 svp->sv_currsec = NULL; 1590 1591 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 1592 *secdata = *svp->sv_secdata; 1593 secdata->data = NULL; 1594 if (svp->sv_secdata) { 1595 sec_clnt_freeinfo(svp->sv_secdata); 1596 svp->sv_secdata = NULL; 1597 } 1598 svp->sv_secdata = secdata; 1599 } 1600 1601 /* 1602 * Resolve a referral. The referral is in the n+1th component of 1603 * svp->sv_path and has a parent nfs4 file handle "fh". 1604 * Upon return, the sv_path will point to the new path that has referral 1605 * component resolved to its referred path and part of original path. 1606 * Hostname and other address information is also updated. 1607 */ 1608 int 1609 resolve_referral(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, int nth, 1610 nfs_fh4 *fh) 1611 { 1612 nfs4_sharedfh_t *sfh; 1613 struct nfs_fsl_info nfsfsloc; 1614 nfs4_ga_res_t garp; 1615 COMPOUND4res_clnt callres; 1616 fs_location4 *fsp; 1617 char *nm, *orig_path; 1618 int orig_pathlen = 0, ret = -1, index; 1619 1620 if (svp->sv_pathlen <= 0) 1621 return (ret); 1622 1623 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1624 orig_pathlen = svp->sv_pathlen; 1625 orig_path = kmem_alloc(orig_pathlen, KM_SLEEP); 1626 bcopy(svp->sv_path, orig_path, orig_pathlen); 1627 nm = extract_referral_point(svp->sv_path, nth); 1628 setup_newsvpath(svp, nth); 1629 nfs_rw_exit(&svp->sv_lock); 1630 1631 sfh = sfh4_get(fh, mi); 1632 index = nfs4_process_referral(mi, sfh, nm, cr, 1633 &garp, &callres, &nfsfsloc); 1634 sfh4_rele(&sfh); 1635 kmem_free(nm, MAXPATHLEN); 1636 if (index < 0) { 1637 kmem_free(orig_path, orig_pathlen); 1638 return (index); 1639 } 1640 1641 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; 1642 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1643 update_servinfo4(svp, fsp, &nfsfsloc, orig_path, nth); 1644 nfs_rw_exit(&svp->sv_lock); 1645 1646 mutex_enter(&mi->mi_lock); 1647 mi->mi_vfs_referral_loop_cnt++; 1648 mutex_exit(&mi->mi_lock); 1649 1650 ret = 0; 1651 bad: 1652 /* Free up XDR memory allocated in nfs4_process_referral() */ 1653 xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1654 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1655 kmem_free(orig_path, orig_pathlen); 1656 1657 return (ret); 1658 } 1659 1660 /* 1661 * Get the root filehandle for the given filesystem and server, and update 1662 * svp. 1663 * 1664 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1665 * to coordinate with recovery. Otherwise, the caller is assumed to be 1666 * the recovery thread or have already done a start_fop. 1667 * 1668 * Errors are returned by the nfs4_error_t parameter. 1669 */ 1670 static void 1671 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1672 int flags, cred_t *cr, nfs4_error_t *ep) 1673 { 1674 COMPOUND4args_clnt args; 1675 COMPOUND4res_clnt res; 1676 int doqueue = 1; 1677 nfs_argop4 *argop; 1678 nfs_resop4 *resop; 1679 nfs4_ga_res_t *garp; 1680 int num_argops; 1681 lookup4_param_t lookuparg; 1682 nfs_fh4 *tmpfhp; 1683 nfs_fh4 *resfhp; 1684 bool_t needrecov = FALSE; 1685 nfs4_recov_state_t recov_state; 1686 int llndx; 1687 int nthcomp; 1688 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1689 1690 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1691 ASSERT(svp->sv_path != NULL); 1692 if (svp->sv_path[0] == '\0') { 1693 nfs_rw_exit(&svp->sv_lock); 1694 nfs4_error_init(ep, EINVAL); 1695 return; 1696 } 1697 nfs_rw_exit(&svp->sv_lock); 1698 1699 recov_state.rs_flags = 0; 1700 recov_state.rs_num_retry_despite_err = 0; 1701 1702 recov_retry: 1703 if (mi->mi_vfs_referral_loop_cnt >= NFS4_REFERRAL_LOOP_MAX) { 1704 DTRACE_PROBE3(nfs4clnt__debug__referral__loop, mntinfo4 *, 1705 mi, servinfo4_t *, svp, char *, "nfs4getfh_otw"); 1706 nfs4_error_init(ep, EINVAL); 1707 return; 1708 } 1709 nfs4_error_zinit(ep); 1710 1711 if (!recovery) { 1712 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1713 &recov_state, NULL); 1714 1715 /* 1716 * If recovery has been started and this request as 1717 * initiated by a mount, then we must wait for recovery 1718 * to finish before proceeding, otherwise, the error 1719 * cleanup would remove data structures needed by the 1720 * recovery thread. 1721 */ 1722 if (ep->error) { 1723 mutex_enter(&mi->mi_lock); 1724 if (mi->mi_flags & MI4_MOUNTING) { 1725 mi->mi_flags |= MI4_RECOV_FAIL; 1726 mi->mi_error = EIO; 1727 1728 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1729 "nfs4getfh_otw: waiting 4 recovery\n")); 1730 1731 while (mi->mi_flags & MI4_RECOV_ACTIV) 1732 cv_wait(&mi->mi_failover_cv, 1733 &mi->mi_lock); 1734 } 1735 mutex_exit(&mi->mi_lock); 1736 return; 1737 } 1738 1739 /* 1740 * If the client does not specify a specific flavor to use 1741 * and has not gotten a secinfo list from the server yet, 1742 * retrieve the secinfo list from the server and use a 1743 * flavor from the list to mount. 1744 * 1745 * If fail to get the secinfo list from the server, then 1746 * try the default flavor. 1747 */ 1748 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1749 svp->sv_secinfo == NULL) { 1750 (void) nfs4_secinfo_path(mi, cr, FALSE); 1751 } 1752 } 1753 1754 if (recovery) 1755 args.ctag = TAG_REMAP_MOUNT; 1756 else 1757 args.ctag = TAG_MOUNT; 1758 1759 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1760 lookuparg.argsp = &args; 1761 lookuparg.resp = &res; 1762 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1763 lookuparg.trailer_len = 0; 1764 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1765 lookuparg.mi = mi; 1766 1767 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1768 ASSERT(svp->sv_path != NULL); 1769 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1770 nfs_rw_exit(&svp->sv_lock); 1771 1772 argop = args.array; 1773 num_argops = args.array_len; 1774 1775 /* choose public or root filehandle */ 1776 if (flags & NFS4_GETFH_PUBLIC) 1777 argop[0].argop = OP_PUTPUBFH; 1778 else 1779 argop[0].argop = OP_PUTROOTFH; 1780 1781 /* get fh */ 1782 argop[1].argop = OP_GETFH; 1783 1784 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1785 "nfs4getfh_otw: %s call, mi 0x%p", 1786 needrecov ? "recov" : "first", (void *)mi)); 1787 1788 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1789 1790 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1791 1792 if (needrecov) { 1793 bool_t abort; 1794 1795 if (recovery) { 1796 nfs4args_lookup_free(argop, num_argops); 1797 kmem_free(argop, 1798 lookuparg.arglen * sizeof (nfs_argop4)); 1799 if (!ep->error) 1800 (void) xdr_free(xdr_COMPOUND4res_clnt, 1801 (caddr_t)&res); 1802 return; 1803 } 1804 1805 NFS4_DEBUG(nfs4_client_recov_debug, 1806 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1807 1808 abort = nfs4_start_recovery(ep, mi, NULL, 1809 NULL, NULL, NULL, OP_GETFH, NULL, NULL, NULL); 1810 if (!ep->error) { 1811 ep->error = geterrno4(res.status); 1812 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1813 } 1814 nfs4args_lookup_free(argop, num_argops); 1815 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1816 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1817 /* have another go? */ 1818 if (abort == FALSE) 1819 goto recov_retry; 1820 return; 1821 } 1822 1823 /* 1824 * No recovery, but check if error is set. 1825 */ 1826 if (ep->error) { 1827 nfs4args_lookup_free(argop, num_argops); 1828 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1829 if (!recovery) 1830 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1831 needrecov); 1832 return; 1833 } 1834 1835 is_link_err: 1836 1837 /* for non-recovery errors */ 1838 if (res.status && res.status != NFS4ERR_SYMLINK && 1839 res.status != NFS4ERR_MOVED) { 1840 if (!recovery) { 1841 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1842 needrecov); 1843 } 1844 nfs4args_lookup_free(argop, num_argops); 1845 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1846 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1847 return; 1848 } 1849 1850 /* 1851 * If any intermediate component in the path is a symbolic link, 1852 * resolve the symlink, then try mount again using the new path. 1853 */ 1854 if (res.status == NFS4ERR_SYMLINK || res.status == NFS4ERR_MOVED) { 1855 int where; 1856 1857 /* 1858 * Need to call nfs4_end_op before resolve_sympath to avoid 1859 * potential nfs4_start_op deadlock. 1860 */ 1861 if (!recovery) 1862 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1863 needrecov); 1864 1865 /* 1866 * This must be from OP_LOOKUP failure. The (cfh) for this 1867 * OP_LOOKUP is a symlink node. Found out where the 1868 * OP_GETFH is for the (cfh) that is a symlink node. 1869 * 1870 * Example: 1871 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1872 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1873 * 1874 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1875 * In this case, where = 7, nthcomp = 2. 1876 */ 1877 where = res.array_len - 2; 1878 ASSERT(where > 0); 1879 1880 if (res.status == NFS4ERR_SYMLINK) { 1881 1882 resop = &res.array[where - 1]; 1883 ASSERT(resop->resop == OP_GETFH); 1884 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1885 nthcomp = res.array_len/3 - 1; 1886 ep->error = resolve_sympath(mi, svp, nthcomp, 1887 tmpfhp, cr, flags); 1888 1889 } else if (res.status == NFS4ERR_MOVED) { 1890 1891 resop = &res.array[where - 2]; 1892 ASSERT(resop->resop == OP_GETFH); 1893 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1894 nthcomp = res.array_len/3 - 1; 1895 ep->error = resolve_referral(mi, svp, cr, nthcomp, 1896 tmpfhp); 1897 } 1898 1899 nfs4args_lookup_free(argop, num_argops); 1900 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1901 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1902 1903 if (ep->error) 1904 return; 1905 1906 goto recov_retry; 1907 } 1908 1909 /* getfh */ 1910 resop = &res.array[res.array_len - 2]; 1911 ASSERT(resop->resop == OP_GETFH); 1912 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1913 1914 /* getattr fsinfo res */ 1915 resop++; 1916 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1917 1918 *vtp = garp->n4g_va.va_type; 1919 1920 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1921 1922 mutex_enter(&mi->mi_lock); 1923 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1924 mi->mi_flags |= MI4_LINK; 1925 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1926 mi->mi_flags |= MI4_SYMLINK; 1927 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1928 mi->mi_flags |= MI4_ACL; 1929 mutex_exit(&mi->mi_lock); 1930 1931 if (garp->n4g_ext_res->n4g_maxread == 0) 1932 mi->mi_tsize = 1933 MIN(MAXBSIZE, mi->mi_tsize); 1934 else 1935 mi->mi_tsize = 1936 MIN(garp->n4g_ext_res->n4g_maxread, 1937 mi->mi_tsize); 1938 1939 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1940 mi->mi_stsize = 1941 MIN(MAXBSIZE, mi->mi_stsize); 1942 else 1943 mi->mi_stsize = 1944 MIN(garp->n4g_ext_res->n4g_maxwrite, 1945 mi->mi_stsize); 1946 1947 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1948 mi->mi_maxfilesize = 1949 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1950 mi->mi_maxfilesize); 1951 1952 /* 1953 * If the final component is a a symbolic link, resolve the symlink, 1954 * then try mount again using the new path. 1955 * 1956 * Assume no symbolic link for root filesysm "/". 1957 */ 1958 if (*vtp == VLNK) { 1959 /* 1960 * nthcomp is the total result length minus 1961 * the 1st 2 OPs (PUTROOTFH, GETFH), 1962 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1963 * 1964 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1965 * LOOKUP 2nd-comp GETFH GETATTR 1966 * 1967 * (8 - 2)/3 = 2 1968 */ 1969 nthcomp = (res.array_len - 2)/3; 1970 1971 /* 1972 * Need to call nfs4_end_op before resolve_sympath to avoid 1973 * potential nfs4_start_op deadlock. See RFE 4777612. 1974 */ 1975 if (!recovery) 1976 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1977 needrecov); 1978 1979 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1980 flags); 1981 1982 nfs4args_lookup_free(argop, num_argops); 1983 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1984 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1985 1986 if (ep->error) 1987 return; 1988 1989 goto recov_retry; 1990 } 1991 1992 /* 1993 * We need to figure out where in the compound the getfh 1994 * for the parent directory is. If the object to be mounted is 1995 * the root, then there is no lookup at all: 1996 * PUTROOTFH, GETFH. 1997 * If the object to be mounted is in the root, then the compound is: 1998 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1999 * In either of these cases, the index of the GETFH is 1. 2000 * If it is not at the root, then it's something like: 2001 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 2002 * LOOKUP, GETFH, GETATTR 2003 * In this case, the index is llndx (last lookup index) - 2. 2004 */ 2005 if (llndx == -1 || llndx == 2) 2006 resop = &res.array[1]; 2007 else { 2008 ASSERT(llndx > 2); 2009 resop = &res.array[llndx-2]; 2010 } 2011 2012 ASSERT(resop->resop == OP_GETFH); 2013 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 2014 2015 /* save the filehandles for the replica */ 2016 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2017 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 2018 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 2019 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 2020 tmpfhp->nfs_fh4_len); 2021 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 2022 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 2023 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 2024 2025 /* initialize fsid and supp_attrs for server fs */ 2026 svp->sv_fsid = garp->n4g_fsid; 2027 svp->sv_supp_attrs = 2028 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 2029 2030 nfs_rw_exit(&svp->sv_lock); 2031 nfs4args_lookup_free(argop, num_argops); 2032 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 2033 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2034 if (!recovery) 2035 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 2036 } 2037 2038 /* 2039 * Save a copy of Servinfo4_t structure. 2040 * We might need when there is a failure in getting file handle 2041 * in case of a referral to replace servinfo4 struct and try again. 2042 */ 2043 static struct servinfo4 * 2044 copy_svp(servinfo4_t *nsvp) 2045 { 2046 servinfo4_t *svp = NULL; 2047 struct knetconfig *sknconf, *tknconf; 2048 struct netbuf *saddr, *taddr; 2049 2050 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2051 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2052 svp->sv_flags = nsvp->sv_flags; 2053 svp->sv_fsid = nsvp->sv_fsid; 2054 svp->sv_hostnamelen = nsvp->sv_hostnamelen; 2055 svp->sv_pathlen = nsvp->sv_pathlen; 2056 svp->sv_supp_attrs = nsvp->sv_supp_attrs; 2057 2058 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2059 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2060 bcopy(nsvp->sv_hostname, svp->sv_hostname, svp->sv_hostnamelen); 2061 bcopy(nsvp->sv_path, svp->sv_path, svp->sv_pathlen); 2062 2063 saddr = &nsvp->sv_addr; 2064 taddr = &svp->sv_addr; 2065 taddr->maxlen = saddr->maxlen; 2066 taddr->len = saddr->len; 2067 if (saddr->len > 0) { 2068 taddr->buf = kmem_zalloc(saddr->maxlen, KM_SLEEP); 2069 bcopy(saddr->buf, taddr->buf, saddr->len); 2070 } 2071 2072 svp->sv_knconf = kmem_zalloc(sizeof (struct knetconfig), KM_SLEEP); 2073 sknconf = nsvp->sv_knconf; 2074 tknconf = svp->sv_knconf; 2075 tknconf->knc_semantics = sknconf->knc_semantics; 2076 tknconf->knc_rdev = sknconf->knc_rdev; 2077 if (sknconf->knc_proto != NULL) { 2078 tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 2079 bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto, 2080 KNC_STRSIZE); 2081 } 2082 if (sknconf->knc_protofmly != NULL) { 2083 tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 2084 bcopy(sknconf->knc_protofmly, (char *)tknconf->knc_protofmly, 2085 KNC_STRSIZE); 2086 } 2087 2088 if (nsvp->sv_origknconf != NULL) { 2089 svp->sv_origknconf = kmem_zalloc(sizeof (struct knetconfig), 2090 KM_SLEEP); 2091 sknconf = nsvp->sv_origknconf; 2092 tknconf = svp->sv_origknconf; 2093 tknconf->knc_semantics = sknconf->knc_semantics; 2094 tknconf->knc_rdev = sknconf->knc_rdev; 2095 if (sknconf->knc_proto != NULL) { 2096 tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 2097 bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto, 2098 KNC_STRSIZE); 2099 } 2100 if (sknconf->knc_protofmly != NULL) { 2101 tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, 2102 KM_SLEEP); 2103 bcopy(sknconf->knc_protofmly, 2104 (char *)tknconf->knc_protofmly, KNC_STRSIZE); 2105 } 2106 } 2107 2108 svp->sv_secdata = copy_sec_data(nsvp->sv_secdata); 2109 svp->sv_dhsec = copy_sec_data(svp->sv_dhsec); 2110 /* 2111 * Rest of the security information is not copied as they are built 2112 * with the information available from secdata and dhsec. 2113 */ 2114 svp->sv_next = NULL; 2115 2116 return (svp); 2117 } 2118 2119 servinfo4_t * 2120 restore_svp(mntinfo4_t *mi, servinfo4_t *svp, servinfo4_t *origsvp) 2121 { 2122 servinfo4_t *srvnext, *tmpsrv; 2123 2124 if (strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) { 2125 /* 2126 * Since the hostname changed, we must be dealing 2127 * with a referral, and the lookup failed. We will 2128 * restore the whole servinfo4_t to what it was before. 2129 */ 2130 srvnext = svp->sv_next; 2131 svp->sv_next = NULL; 2132 tmpsrv = copy_svp(origsvp); 2133 sv4_free(svp); 2134 svp = tmpsrv; 2135 svp->sv_next = srvnext; 2136 mutex_enter(&mi->mi_lock); 2137 mi->mi_servers = svp; 2138 mi->mi_curr_serv = svp; 2139 mutex_exit(&mi->mi_lock); 2140 2141 } else if (origsvp->sv_pathlen != svp->sv_pathlen) { 2142 2143 /* 2144 * For symlink case: restore original path because 2145 * it might have contained symlinks that were 2146 * expanded by nfsgetfh_otw before the failure occurred. 2147 */ 2148 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2149 kmem_free(svp->sv_path, svp->sv_pathlen); 2150 svp->sv_path = 2151 kmem_alloc(origsvp->sv_pathlen, KM_SLEEP); 2152 svp->sv_pathlen = origsvp->sv_pathlen; 2153 bcopy(origsvp->sv_path, svp->sv_path, 2154 origsvp->sv_pathlen); 2155 nfs_rw_exit(&svp->sv_lock); 2156 } 2157 return (svp); 2158 } 2159 2160 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 2161 uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 2162 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 2163 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 2164 2165 /* 2166 * Remap the root filehandle for the given filesystem. 2167 * 2168 * results returned via the nfs4_error_t parameter. 2169 */ 2170 void 2171 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 2172 { 2173 struct servinfo4 *svp, *origsvp; 2174 vtype_t vtype; 2175 nfs_fh4 rootfh; 2176 int getfh_flags; 2177 int num_retry; 2178 2179 mutex_enter(&mi->mi_lock); 2180 2181 remap_retry: 2182 svp = mi->mi_curr_serv; 2183 getfh_flags = 2184 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 2185 getfh_flags |= 2186 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 2187 mutex_exit(&mi->mi_lock); 2188 2189 /* 2190 * Just in case server path being mounted contains 2191 * symlinks and fails w/STALE, save the initial sv_path 2192 * so we can redrive the initial mount compound with the 2193 * initial sv_path -- not a symlink-expanded version. 2194 * 2195 * This could only happen if a symlink was expanded 2196 * and the expanded mount compound failed stale. Because 2197 * it could be the case that the symlink was removed at 2198 * the server (and replaced with another symlink/dir, 2199 * we need to use the initial sv_path when attempting 2200 * to re-lookup everything and recover. 2201 */ 2202 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2203 origsvp = copy_svp(svp); 2204 nfs_rw_exit(&svp->sv_lock); 2205 2206 num_retry = nfs4_max_mount_retry; 2207 2208 do { 2209 /* 2210 * Get the root fh from the server. Retry nfs4_max_mount_retry 2211 * (2) times if it fails with STALE since the recovery 2212 * infrastructure doesn't do STALE recovery for components 2213 * of the server path to the object being mounted. 2214 */ 2215 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 2216 2217 if (ep->error == 0 && ep->stat == NFS4_OK) 2218 break; 2219 2220 /* 2221 * For some reason, the mount compound failed. Before 2222 * retrying, we need to restore original conditions. 2223 */ 2224 svp = restore_svp(mi, svp, origsvp); 2225 2226 } while (num_retry-- > 0); 2227 2228 sv4_free(origsvp); 2229 2230 if (ep->error != 0 || ep->stat != 0) { 2231 return; 2232 } 2233 2234 if (vtype != VNON && vtype != mi->mi_type) { 2235 /* shouldn't happen */ 2236 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2237 "nfs4_remap_root: server root vnode type (%d) doesn't " 2238 "match mount info (%d)", vtype, mi->mi_type); 2239 } 2240 2241 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2242 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2243 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2244 nfs_rw_exit(&svp->sv_lock); 2245 sfh4_update(mi->mi_rootfh, &rootfh); 2246 2247 /* 2248 * It's possible that recovery took place on the filesystem 2249 * and the server has been updated between the time we did 2250 * the nfs4getfh_otw and now. Re-drive the otw operation 2251 * to make sure we have a good fh. 2252 */ 2253 mutex_enter(&mi->mi_lock); 2254 if (mi->mi_curr_serv != svp) 2255 goto remap_retry; 2256 2257 mutex_exit(&mi->mi_lock); 2258 } 2259 2260 static int 2261 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 2262 int flags, cred_t *cr, zone_t *zone) 2263 { 2264 vnode_t *rtvp = NULL; 2265 mntinfo4_t *mi; 2266 dev_t nfs_dev; 2267 int error = 0; 2268 rnode4_t *rp; 2269 int i, len; 2270 struct vattr va; 2271 vtype_t vtype = VNON; 2272 vtype_t tmp_vtype = VNON; 2273 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 2274 nfs4_oo_hash_bucket_t *bucketp; 2275 nfs_fh4 fh; 2276 char *droptext = ""; 2277 struct nfs_stats *nfsstatsp; 2278 nfs4_fname_t *mfname; 2279 nfs4_error_t e; 2280 int num_retry, removed; 2281 cred_t *lcr = NULL, *tcr = cr; 2282 struct servinfo4 *origsvp; 2283 char *resource; 2284 2285 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 2286 ASSERT(nfsstatsp != NULL); 2287 2288 ASSERT(nfs_zone() == zone); 2289 ASSERT(crgetref(cr)); 2290 2291 /* 2292 * Create a mount record and link it to the vfs struct. 2293 */ 2294 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 2295 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 2296 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 2297 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 2298 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 2299 2300 if (!(flags & NFSMNT_SOFT)) 2301 mi->mi_flags |= MI4_HARD; 2302 if ((flags & NFSMNT_NOPRINT)) 2303 mi->mi_flags |= MI4_NOPRINT; 2304 if (flags & NFSMNT_INT) 2305 mi->mi_flags |= MI4_INT; 2306 if (flags & NFSMNT_PUBLIC) 2307 mi->mi_flags |= MI4_PUBLIC; 2308 if (flags & NFSMNT_MIRRORMOUNT) 2309 mi->mi_flags |= MI4_MIRRORMOUNT; 2310 if (flags & NFSMNT_REFERRAL) 2311 mi->mi_flags |= MI4_REFERRAL; 2312 mi->mi_retrans = NFS_RETRIES; 2313 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 2314 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 2315 mi->mi_timeo = nfs4_cots_timeo; 2316 else 2317 mi->mi_timeo = NFS_TIMEO; 2318 mi->mi_prog = NFS_PROGRAM; 2319 mi->mi_vers = NFS_V4; 2320 mi->mi_rfsnames = rfsnames_v4; 2321 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 2322 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 2323 mi->mi_servers = svp; 2324 mi->mi_curr_serv = svp; 2325 mi->mi_acregmin = SEC2HR(ACREGMIN); 2326 mi->mi_acregmax = SEC2HR(ACREGMAX); 2327 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 2328 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 2329 mi->mi_fh_expire_type = FH4_PERSISTENT; 2330 mi->mi_clientid_next = NULL; 2331 mi->mi_clientid_prev = NULL; 2332 mi->mi_srv = NULL; 2333 mi->mi_grace_wait = 0; 2334 mi->mi_error = 0; 2335 mi->mi_srvsettime = 0; 2336 mi->mi_srvset_cnt = 0; 2337 2338 mi->mi_count = 1; 2339 2340 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 2341 mi->mi_stsize = mi->mi_tsize; 2342 2343 if (flags & NFSMNT_DIRECTIO) 2344 mi->mi_flags |= MI4_DIRECTIO; 2345 2346 mi->mi_flags |= MI4_MOUNTING; 2347 2348 /* 2349 * Make a vfs struct for nfs. We do this here instead of below 2350 * because rtvp needs a vfs before we can do a getattr on it. 2351 * 2352 * Assign a unique device id to the mount 2353 */ 2354 mutex_enter(&nfs_minor_lock); 2355 do { 2356 nfs_minor = (nfs_minor + 1) & MAXMIN32; 2357 nfs_dev = makedevice(nfs_major, nfs_minor); 2358 } while (vfs_devismounted(nfs_dev)); 2359 mutex_exit(&nfs_minor_lock); 2360 2361 vfsp->vfs_dev = nfs_dev; 2362 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 2363 vfsp->vfs_data = (caddr_t)mi; 2364 vfsp->vfs_fstype = nfsfstyp; 2365 vfsp->vfs_bsize = nfs4_bsize; 2366 2367 /* 2368 * Initialize fields used to support async putpage operations. 2369 */ 2370 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 2371 mi->mi_async_clusters[i] = nfs4_async_clusters; 2372 mi->mi_async_init_clusters = nfs4_async_clusters; 2373 mi->mi_async_curr[NFS4_ASYNC_QUEUE] = 2374 mi->mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0]; 2375 mi->mi_max_threads = nfs4_max_threads; 2376 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 2377 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 2378 cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_QUEUE], NULL, CV_DEFAULT, 2379 NULL); 2380 cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE], NULL, 2381 CV_DEFAULT, NULL); 2382 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 2383 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 2384 2385 mi->mi_vfsp = vfsp; 2386 zone_hold(mi->mi_zone = zone); 2387 nfs4_mi_zonelist_add(mi); 2388 2389 /* 2390 * Initialize the <open owner/cred> hash table. 2391 */ 2392 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 2393 bucketp = &(mi->mi_oo_list[i]); 2394 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 2395 list_create(&bucketp->b_oo_hash_list, 2396 sizeof (nfs4_open_owner_t), 2397 offsetof(nfs4_open_owner_t, oo_hash_node)); 2398 } 2399 2400 /* 2401 * Initialize the freed open owner list. 2402 */ 2403 mi->mi_foo_num = 0; 2404 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 2405 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 2406 offsetof(nfs4_open_owner_t, oo_foo_node)); 2407 2408 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 2409 offsetof(nfs4_lost_rqst_t, lr_node)); 2410 2411 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 2412 offsetof(nfs4_bseqid_entry_t, bs_node)); 2413 2414 /* 2415 * Initialize the msg buffer. 2416 */ 2417 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 2418 offsetof(nfs4_debug_msg_t, msg_node)); 2419 mi->mi_msg_count = 0; 2420 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 2421 2422 /* 2423 * Initialize kstats 2424 */ 2425 nfs4_mnt_kstat_init(vfsp); 2426 2427 /* 2428 * Initialize the shared filehandle pool. 2429 */ 2430 sfh4_createtab(&mi->mi_filehandles); 2431 2432 /* 2433 * Save server path we're attempting to mount. 2434 */ 2435 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2436 origsvp = copy_svp(svp); 2437 nfs_rw_exit(&svp->sv_lock); 2438 2439 /* 2440 * Make the GETFH call to get root fh for each replica. 2441 */ 2442 if (svp_head->sv_next) 2443 droptext = ", dropping replica"; 2444 2445 /* 2446 * If the uid is set then set the creds for secure mounts 2447 * by proxy processes such as automountd. 2448 */ 2449 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2450 if (svp->sv_secdata->uid != 0 && 2451 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 2452 lcr = crdup(cr); 2453 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 2454 tcr = lcr; 2455 } 2456 nfs_rw_exit(&svp->sv_lock); 2457 for (svp = svp_head; svp; svp = svp->sv_next) { 2458 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 2459 nfs_cmn_err(error, CE_WARN, 2460 VERS_MSG "Host %s is a duplicate%s", 2461 svp->sv_hostname, droptext); 2462 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2463 svp->sv_flags |= SV4_NOTINUSE; 2464 nfs_rw_exit(&svp->sv_lock); 2465 continue; 2466 } 2467 mi->mi_curr_serv = svp; 2468 2469 /* 2470 * Just in case server path being mounted contains 2471 * symlinks and fails w/STALE, save the initial sv_path 2472 * so we can redrive the initial mount compound with the 2473 * initial sv_path -- not a symlink-expanded version. 2474 * 2475 * This could only happen if a symlink was expanded 2476 * and the expanded mount compound failed stale. Because 2477 * it could be the case that the symlink was removed at 2478 * the server (and replaced with another symlink/dir, 2479 * we need to use the initial sv_path when attempting 2480 * to re-lookup everything and recover. 2481 * 2482 * Other mount errors should evenutally be handled here also 2483 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 2484 * failures will result in mount being redriven a few times. 2485 */ 2486 num_retry = nfs4_max_mount_retry; 2487 do { 2488 nfs4getfh_otw(mi, svp, &tmp_vtype, 2489 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 2490 NFS4_GETFH_NEEDSOP, tcr, &e); 2491 2492 if (e.error == 0 && e.stat == NFS4_OK) 2493 break; 2494 2495 /* 2496 * For some reason, the mount compound failed. Before 2497 * retrying, we need to restore original conditions. 2498 */ 2499 svp = restore_svp(mi, svp, origsvp); 2500 svp_head = svp; 2501 2502 } while (num_retry-- > 0); 2503 error = e.error ? e.error : geterrno4(e.stat); 2504 if (error) { 2505 nfs_cmn_err(error, CE_WARN, 2506 VERS_MSG "initial call to %s failed%s: %m", 2507 svp->sv_hostname, droptext); 2508 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2509 svp->sv_flags |= SV4_NOTINUSE; 2510 nfs_rw_exit(&svp->sv_lock); 2511 mi->mi_flags &= ~MI4_RECOV_FAIL; 2512 mi->mi_error = 0; 2513 continue; 2514 } 2515 2516 if (tmp_vtype == VBAD) { 2517 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2518 VERS_MSG "%s returned a bad file type for " 2519 "root%s", svp->sv_hostname, droptext); 2520 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2521 svp->sv_flags |= SV4_NOTINUSE; 2522 nfs_rw_exit(&svp->sv_lock); 2523 continue; 2524 } 2525 2526 if (vtype == VNON) { 2527 vtype = tmp_vtype; 2528 } else if (vtype != tmp_vtype) { 2529 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2530 VERS_MSG "%s returned a different file type " 2531 "for root%s", svp->sv_hostname, droptext); 2532 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2533 svp->sv_flags |= SV4_NOTINUSE; 2534 nfs_rw_exit(&svp->sv_lock); 2535 continue; 2536 } 2537 if (firstsvp == NULL) 2538 firstsvp = svp; 2539 } 2540 2541 if (firstsvp == NULL) { 2542 if (error == 0) 2543 error = ENOENT; 2544 goto bad; 2545 } 2546 2547 mi->mi_curr_serv = svp = firstsvp; 2548 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2549 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 2550 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2551 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2552 mi->mi_rootfh = sfh4_get(&fh, mi); 2553 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 2554 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 2555 mi->mi_srvparentfh = sfh4_get(&fh, mi); 2556 nfs_rw_exit(&svp->sv_lock); 2557 2558 /* 2559 * Get the fname for filesystem root. 2560 */ 2561 mi->mi_fname = fn_get(NULL, ".", mi->mi_rootfh); 2562 mfname = mi->mi_fname; 2563 fn_hold(mfname); 2564 2565 /* 2566 * Make the root vnode without attributes. 2567 */ 2568 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 2569 &mfname, NULL, mi, cr, gethrtime()); 2570 rtvp->v_type = vtype; 2571 2572 mi->mi_curread = mi->mi_tsize; 2573 mi->mi_curwrite = mi->mi_stsize; 2574 2575 /* 2576 * Start the manager thread responsible for handling async worker 2577 * threads. 2578 */ 2579 MI4_HOLD(mi); 2580 VFS_HOLD(vfsp); /* add reference for thread */ 2581 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 2582 vfsp, 0, minclsyspri); 2583 ASSERT(mi->mi_manager_thread != NULL); 2584 2585 /* 2586 * Create the thread that handles over-the-wire calls for 2587 * VOP_INACTIVE. 2588 * This needs to happen after the manager thread is created. 2589 */ 2590 MI4_HOLD(mi); 2591 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 2592 mi, 0, minclsyspri); 2593 ASSERT(mi->mi_inactive_thread != NULL); 2594 2595 /* If we didn't get a type, get one now */ 2596 if (rtvp->v_type == VNON) { 2597 va.va_mask = AT_TYPE; 2598 error = nfs4getattr(rtvp, &va, tcr); 2599 if (error) 2600 goto bad; 2601 rtvp->v_type = va.va_type; 2602 } 2603 2604 mi->mi_type = rtvp->v_type; 2605 2606 mutex_enter(&mi->mi_lock); 2607 mi->mi_flags &= ~MI4_MOUNTING; 2608 mutex_exit(&mi->mi_lock); 2609 2610 /* Update VFS with new server and path info */ 2611 if ((strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) || 2612 (strcmp(svp->sv_path, origsvp->sv_path) != 0)) { 2613 len = svp->sv_hostnamelen + svp->sv_pathlen; 2614 resource = kmem_zalloc(len, KM_SLEEP); 2615 (void) strcat(resource, svp->sv_hostname); 2616 (void) strcat(resource, ":"); 2617 (void) strcat(resource, svp->sv_path); 2618 vfs_setresource(vfsp, resource, 0); 2619 kmem_free(resource, len); 2620 } 2621 2622 sv4_free(origsvp); 2623 *rtvpp = rtvp; 2624 if (lcr != NULL) 2625 crfree(lcr); 2626 2627 return (0); 2628 bad: 2629 /* 2630 * An error occurred somewhere, need to clean up... 2631 */ 2632 if (lcr != NULL) 2633 crfree(lcr); 2634 2635 if (rtvp != NULL) { 2636 /* 2637 * We need to release our reference to the root vnode and 2638 * destroy the mntinfo4 struct that we just created. 2639 */ 2640 rp = VTOR4(rtvp); 2641 if (rp->r_flags & R4HASHED) 2642 rp4_rmhash(rp); 2643 VN_RELE(rtvp); 2644 } 2645 nfs4_async_stop(vfsp); 2646 nfs4_async_manager_stop(vfsp); 2647 removed = nfs4_mi_zonelist_remove(mi); 2648 if (removed) 2649 zone_rele(mi->mi_zone); 2650 2651 /* 2652 * This releases the initial "hold" of the mi since it will never 2653 * be referenced by the vfsp. Also, when mount returns to vfs.c 2654 * with an error, the vfsp will be destroyed, not rele'd. 2655 */ 2656 MI4_RELE(mi); 2657 2658 if (origsvp != NULL) 2659 sv4_free(origsvp); 2660 2661 *rtvpp = NULL; 2662 return (error); 2663 } 2664 2665 /* 2666 * vfs operations 2667 */ 2668 static int 2669 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 2670 { 2671 mntinfo4_t *mi; 2672 ushort_t omax; 2673 int removed; 2674 2675 bool_t must_unlock; 2676 2677 nfs4_ephemeral_tree_t *eph_tree; 2678 2679 if (secpolicy_fs_unmount(cr, vfsp) != 0) 2680 return (EPERM); 2681 2682 mi = VFTOMI4(vfsp); 2683 2684 if (flag & MS_FORCE) { 2685 vfsp->vfs_flag |= VFS_UNMOUNTED; 2686 if (nfs_zone() != mi->mi_zone) { 2687 /* 2688 * If the request is coming from the wrong zone, 2689 * we don't want to create any new threads, and 2690 * performance is not a concern. Do everything 2691 * inline. 2692 */ 2693 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 2694 "nfs4_unmount x-zone forced unmount of vfs %p\n", 2695 (void *)vfsp)); 2696 nfs4_free_mount(vfsp, flag, cr); 2697 } else { 2698 /* 2699 * Free data structures asynchronously, to avoid 2700 * blocking the current thread (for performance 2701 * reasons only). 2702 */ 2703 async_free_mount(vfsp, flag, cr); 2704 } 2705 2706 return (0); 2707 } 2708 2709 /* 2710 * Wait until all asynchronous putpage operations on 2711 * this file system are complete before flushing rnodes 2712 * from the cache. 2713 */ 2714 omax = mi->mi_max_threads; 2715 if (nfs4_async_stop_sig(vfsp)) 2716 return (EINTR); 2717 2718 r4flush(vfsp, cr); 2719 2720 /* 2721 * About the only reason that this would fail would be 2722 * that the harvester is already busy tearing down this 2723 * node. So we fail back to the caller and let them try 2724 * again when needed. 2725 */ 2726 if (nfs4_ephemeral_umount(mi, flag, cr, 2727 &must_unlock, &eph_tree)) { 2728 ASSERT(must_unlock == FALSE); 2729 mutex_enter(&mi->mi_async_lock); 2730 mi->mi_max_threads = omax; 2731 mutex_exit(&mi->mi_async_lock); 2732 2733 return (EBUSY); 2734 } 2735 2736 /* 2737 * If there are any active vnodes on this file system, 2738 * then the file system is busy and can't be unmounted. 2739 */ 2740 if (check_rtable4(vfsp)) { 2741 nfs4_ephemeral_umount_unlock(&must_unlock, &eph_tree); 2742 2743 mutex_enter(&mi->mi_async_lock); 2744 mi->mi_max_threads = omax; 2745 mutex_exit(&mi->mi_async_lock); 2746 2747 return (EBUSY); 2748 } 2749 2750 /* 2751 * The unmount can't fail from now on, so record any 2752 * ephemeral changes. 2753 */ 2754 nfs4_ephemeral_umount_activate(mi, &must_unlock, &eph_tree); 2755 2756 /* 2757 * There are no active files that could require over-the-wire 2758 * calls to the server, so stop the async manager and the 2759 * inactive thread. 2760 */ 2761 nfs4_async_manager_stop(vfsp); 2762 2763 /* 2764 * Destroy all rnodes belonging to this file system from the 2765 * rnode hash queues and purge any resources allocated to 2766 * them. 2767 */ 2768 destroy_rtable4(vfsp, cr); 2769 vfsp->vfs_flag |= VFS_UNMOUNTED; 2770 2771 nfs4_remove_mi_from_server(mi, NULL); 2772 removed = nfs4_mi_zonelist_remove(mi); 2773 if (removed) 2774 zone_rele(mi->mi_zone); 2775 2776 return (0); 2777 } 2778 2779 /* 2780 * find root of nfs 2781 */ 2782 static int 2783 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2784 { 2785 mntinfo4_t *mi; 2786 vnode_t *vp; 2787 nfs4_fname_t *mfname; 2788 servinfo4_t *svp; 2789 2790 mi = VFTOMI4(vfsp); 2791 2792 if (nfs_zone() != mi->mi_zone) 2793 return (EPERM); 2794 2795 svp = mi->mi_curr_serv; 2796 if (svp) { 2797 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2798 if (svp->sv_flags & SV4_ROOT_STALE) { 2799 nfs_rw_exit(&svp->sv_lock); 2800 2801 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2802 if (svp->sv_flags & SV4_ROOT_STALE) { 2803 svp->sv_flags &= ~SV4_ROOT_STALE; 2804 nfs_rw_exit(&svp->sv_lock); 2805 return (ENOENT); 2806 } 2807 nfs_rw_exit(&svp->sv_lock); 2808 } else 2809 nfs_rw_exit(&svp->sv_lock); 2810 } 2811 2812 mfname = mi->mi_fname; 2813 fn_hold(mfname); 2814 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2815 VFTOMI4(vfsp), CRED(), gethrtime()); 2816 2817 if (VTOR4(vp)->r_flags & R4STALE) { 2818 VN_RELE(vp); 2819 return (ENOENT); 2820 } 2821 2822 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2823 2824 vp->v_type = mi->mi_type; 2825 2826 *vpp = vp; 2827 2828 return (0); 2829 } 2830 2831 static int 2832 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2833 { 2834 int error; 2835 nfs4_ga_res_t gar; 2836 nfs4_ga_ext_res_t ger; 2837 2838 gar.n4g_ext_res = &ger; 2839 2840 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2841 NFS4_STATFS_ATTR_MASK, cr)) 2842 return (error); 2843 2844 *sbp = gar.n4g_ext_res->n4g_sb; 2845 2846 return (0); 2847 } 2848 2849 /* 2850 * Get file system statistics. 2851 */ 2852 static int 2853 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2854 { 2855 int error; 2856 vnode_t *vp; 2857 cred_t *cr; 2858 2859 error = nfs4_root(vfsp, &vp); 2860 if (error) 2861 return (error); 2862 2863 cr = CRED(); 2864 2865 error = nfs4_statfs_otw(vp, sbp, cr); 2866 if (!error) { 2867 (void) strncpy(sbp->f_basetype, 2868 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2869 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2870 } else { 2871 nfs4_purge_stale_fh(error, vp, cr); 2872 } 2873 2874 VN_RELE(vp); 2875 2876 return (error); 2877 } 2878 2879 static kmutex_t nfs4_syncbusy; 2880 2881 /* 2882 * Flush dirty nfs files for file system vfsp. 2883 * If vfsp == NULL, all nfs files are flushed. 2884 * 2885 * SYNC_CLOSE in flag is passed to us to 2886 * indicate that we are shutting down and or 2887 * rebooting. 2888 */ 2889 static int 2890 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2891 { 2892 /* 2893 * Cross-zone calls are OK here, since this translates to a 2894 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2895 */ 2896 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2897 r4flush(vfsp, cr); 2898 mutex_exit(&nfs4_syncbusy); 2899 } 2900 2901 /* 2902 * if SYNC_CLOSE is set then we know that 2903 * the system is rebooting, mark the mntinfo 2904 * for later examination. 2905 */ 2906 if (vfsp && (flag & SYNC_CLOSE)) { 2907 mntinfo4_t *mi; 2908 2909 mi = VFTOMI4(vfsp); 2910 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2911 mutex_enter(&mi->mi_lock); 2912 mi->mi_flags |= MI4_SHUTDOWN; 2913 mutex_exit(&mi->mi_lock); 2914 } 2915 } 2916 return (0); 2917 } 2918 2919 /* 2920 * vget is difficult, if not impossible, to support in v4 because we don't 2921 * know the parent directory or name, which makes it impossible to create a 2922 * useful shadow vnode. And we need the shadow vnode for things like 2923 * OPEN. 2924 */ 2925 2926 /* ARGSUSED */ 2927 /* 2928 * XXX Check nfs4_vget_pseudo() for dependency. 2929 */ 2930 static int 2931 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2932 { 2933 return (EREMOTE); 2934 } 2935 2936 /* 2937 * nfs4_mountroot get called in the case where we are diskless booting. All 2938 * we need from here is the ability to get the server info and from there we 2939 * can simply call nfs4_rootvp. 2940 */ 2941 /* ARGSUSED */ 2942 static int 2943 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2944 { 2945 vnode_t *rtvp; 2946 char root_hostname[SYS_NMLN+1]; 2947 struct servinfo4 *svp; 2948 int error; 2949 int vfsflags; 2950 size_t size; 2951 char *root_path; 2952 struct pathname pn; 2953 char *name; 2954 cred_t *cr; 2955 mntinfo4_t *mi; 2956 struct nfs_args args; /* nfs mount arguments */ 2957 static char token[10]; 2958 nfs4_error_t n4e; 2959 2960 bzero(&args, sizeof (args)); 2961 2962 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2963 clkset(-1L); /* hack for now - until we get time svc? */ 2964 2965 if (why == ROOT_REMOUNT) { 2966 /* 2967 * Shouldn't happen. 2968 */ 2969 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2970 } 2971 2972 if (why == ROOT_UNMOUNT) { 2973 /* 2974 * Nothing to do for NFS. 2975 */ 2976 return (0); 2977 } 2978 2979 /* 2980 * why == ROOT_INIT 2981 */ 2982 2983 name = token; 2984 *name = 0; 2985 (void) getfsname("root", name, sizeof (token)); 2986 2987 pn_alloc(&pn); 2988 root_path = pn.pn_path; 2989 2990 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2991 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2992 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2993 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2994 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2995 2996 /* 2997 * Get server address 2998 * Get the root path 2999 * Get server's transport 3000 * Get server's hostname 3001 * Get options 3002 */ 3003 args.addr = &svp->sv_addr; 3004 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 3005 args.fh = (char *)&svp->sv_fhandle; 3006 args.knconf = svp->sv_knconf; 3007 args.hostname = root_hostname; 3008 vfsflags = 0; 3009 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 3010 &args, &vfsflags)) { 3011 if (error == EPROTONOSUPPORT) 3012 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 3013 "mount_root failed: server doesn't support NFS V4"); 3014 else 3015 nfs_cmn_err(error, CE_WARN, 3016 "nfs4_mountroot: mount_root failed: %m"); 3017 nfs_rw_exit(&svp->sv_lock); 3018 sv4_free(svp); 3019 pn_free(&pn); 3020 return (error); 3021 } 3022 nfs_rw_exit(&svp->sv_lock); 3023 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 3024 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 3025 (void) strcpy(svp->sv_hostname, root_hostname); 3026 3027 svp->sv_pathlen = (int)(strlen(root_path) + 1); 3028 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 3029 (void) strcpy(svp->sv_path, root_path); 3030 3031 /* 3032 * Force root partition to always be mounted with AUTH_UNIX for now 3033 */ 3034 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 3035 svp->sv_secdata->secmod = AUTH_UNIX; 3036 svp->sv_secdata->rpcflavor = AUTH_UNIX; 3037 svp->sv_secdata->data = NULL; 3038 3039 cr = crgetcred(); 3040 rtvp = NULL; 3041 3042 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 3043 3044 if (error) { 3045 crfree(cr); 3046 pn_free(&pn); 3047 sv4_free(svp); 3048 return (error); 3049 } 3050 3051 mi = VTOMI4(rtvp); 3052 3053 /* 3054 * Send client id to the server, if necessary 3055 */ 3056 nfs4_error_zinit(&n4e); 3057 nfs4setclientid(mi, cr, FALSE, &n4e); 3058 error = n4e.error; 3059 3060 crfree(cr); 3061 3062 if (error) { 3063 pn_free(&pn); 3064 goto errout; 3065 } 3066 3067 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 3068 if (error) { 3069 nfs_cmn_err(error, CE_WARN, 3070 "nfs4_mountroot: invalid root mount options"); 3071 pn_free(&pn); 3072 goto errout; 3073 } 3074 3075 (void) vfs_lock_wait(vfsp); 3076 vfs_add(NULL, vfsp, vfsflags); 3077 vfs_unlock(vfsp); 3078 3079 size = strlen(svp->sv_hostname); 3080 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 3081 rootfs.bo_name[size] = ':'; 3082 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 3083 3084 pn_free(&pn); 3085 3086 errout: 3087 if (error) { 3088 sv4_free(svp); 3089 nfs4_async_stop(vfsp); 3090 nfs4_async_manager_stop(vfsp); 3091 } 3092 3093 if (rtvp != NULL) 3094 VN_RELE(rtvp); 3095 3096 return (error); 3097 } 3098 3099 /* 3100 * Initialization routine for VFS routines. Should only be called once 3101 */ 3102 int 3103 nfs4_vfsinit(void) 3104 { 3105 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 3106 nfs4setclientid_init(); 3107 nfs4_ephemeral_init(); 3108 return (0); 3109 } 3110 3111 void 3112 nfs4_vfsfini(void) 3113 { 3114 nfs4_ephemeral_fini(); 3115 nfs4setclientid_fini(); 3116 mutex_destroy(&nfs4_syncbusy); 3117 } 3118 3119 void 3120 nfs4_freevfs(vfs_t *vfsp) 3121 { 3122 mntinfo4_t *mi; 3123 3124 /* need to release the initial hold */ 3125 mi = VFTOMI4(vfsp); 3126 3127 /* 3128 * At this point, we can no longer reference the vfs 3129 * and need to inform other holders of the reference 3130 * to the mntinfo4_t. 3131 */ 3132 mi->mi_vfsp = NULL; 3133 3134 MI4_RELE(mi); 3135 } 3136 3137 /* 3138 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 3139 */ 3140 struct nfs4_server nfs4_server_lst = 3141 { &nfs4_server_lst, &nfs4_server_lst }; 3142 3143 kmutex_t nfs4_server_lst_lock; 3144 3145 static void 3146 nfs4setclientid_init(void) 3147 { 3148 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 3149 } 3150 3151 static void 3152 nfs4setclientid_fini(void) 3153 { 3154 mutex_destroy(&nfs4_server_lst_lock); 3155 } 3156 3157 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 3158 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 3159 3160 /* 3161 * Set the clientid for the server for "mi". No-op if the clientid is 3162 * already set. 3163 * 3164 * The recovery boolean should be set to TRUE if this function was called 3165 * by the recovery code, and FALSE otherwise. This is used to determine 3166 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 3167 * for adding a mntinfo4_t to a nfs4_server_t. 3168 * 3169 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 3170 * 'n4ep->error' is set to geterrno4(n4ep->stat). 3171 */ 3172 void 3173 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 3174 { 3175 struct nfs4_server *np; 3176 struct servinfo4 *svp = mi->mi_curr_serv; 3177 nfs4_recov_state_t recov_state; 3178 int num_retries = 0; 3179 bool_t retry; 3180 cred_t *lcr = NULL; 3181 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 3182 time_t lease_time = 0; 3183 3184 recov_state.rs_flags = 0; 3185 recov_state.rs_num_retry_despite_err = 0; 3186 ASSERT(n4ep != NULL); 3187 3188 recov_retry: 3189 retry = FALSE; 3190 nfs4_error_zinit(n4ep); 3191 if (!recovery) 3192 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3193 3194 mutex_enter(&nfs4_server_lst_lock); 3195 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 3196 mutex_exit(&nfs4_server_lst_lock); 3197 if (!np) { 3198 struct nfs4_server *tnp; 3199 np = new_nfs4_server(svp, cr); 3200 mutex_enter(&np->s_lock); 3201 3202 mutex_enter(&nfs4_server_lst_lock); 3203 tnp = servinfo4_to_nfs4_server(svp); 3204 if (tnp) { 3205 /* 3206 * another thread snuck in and put server on list. 3207 * since we aren't adding it to the nfs4_server_list 3208 * we need to set the ref count to 0 and destroy it. 3209 */ 3210 np->s_refcnt = 0; 3211 destroy_nfs4_server(np); 3212 np = tnp; 3213 } else { 3214 /* 3215 * do not give list a reference until everything 3216 * succeeds 3217 */ 3218 insque(np, &nfs4_server_lst); 3219 } 3220 mutex_exit(&nfs4_server_lst_lock); 3221 } 3222 ASSERT(MUTEX_HELD(&np->s_lock)); 3223 /* 3224 * If we find the server already has N4S_CLIENTID_SET, then 3225 * just return, we've already done SETCLIENTID to that server 3226 */ 3227 if (np->s_flags & N4S_CLIENTID_SET) { 3228 /* add mi to np's mntinfo4_list */ 3229 nfs4_add_mi_to_server(np, mi); 3230 if (!recovery) 3231 nfs_rw_exit(&mi->mi_recovlock); 3232 mutex_exit(&np->s_lock); 3233 nfs4_server_rele(np); 3234 return; 3235 } 3236 mutex_exit(&np->s_lock); 3237 3238 3239 /* 3240 * Drop the mi_recovlock since nfs4_start_op will 3241 * acquire it again for us. 3242 */ 3243 if (!recovery) { 3244 nfs_rw_exit(&mi->mi_recovlock); 3245 3246 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 3247 if (n4ep->error) { 3248 nfs4_server_rele(np); 3249 return; 3250 } 3251 } 3252 3253 mutex_enter(&np->s_lock); 3254 while (np->s_flags & N4S_CLIENTID_PEND) { 3255 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 3256 mutex_exit(&np->s_lock); 3257 nfs4_server_rele(np); 3258 if (!recovery) 3259 nfs4_end_op(mi, NULL, NULL, &recov_state, 3260 recovery); 3261 n4ep->error = EINTR; 3262 return; 3263 } 3264 } 3265 3266 if (np->s_flags & N4S_CLIENTID_SET) { 3267 /* XXX copied/pasted from above */ 3268 /* add mi to np's mntinfo4_list */ 3269 nfs4_add_mi_to_server(np, mi); 3270 mutex_exit(&np->s_lock); 3271 nfs4_server_rele(np); 3272 if (!recovery) 3273 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 3274 return; 3275 } 3276 3277 /* 3278 * Reset the N4S_CB_PINGED flag. This is used to 3279 * indicate if we have received a CB_NULL from the 3280 * server. Also we reset the waiter flag. 3281 */ 3282 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 3283 /* any failure must now clear this flag */ 3284 np->s_flags |= N4S_CLIENTID_PEND; 3285 mutex_exit(&np->s_lock); 3286 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 3287 3288 if (n4ep->error == EACCES) { 3289 /* 3290 * If the uid is set then set the creds for secure mounts 3291 * by proxy processes such as automountd. 3292 */ 3293 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 3294 if (svp->sv_secdata->uid != 0) { 3295 lcr = crdup(cr); 3296 (void) crsetugid(lcr, svp->sv_secdata->uid, 3297 crgetgid(cr)); 3298 } 3299 nfs_rw_exit(&svp->sv_lock); 3300 3301 if (lcr != NULL) { 3302 mutex_enter(&np->s_lock); 3303 crfree(np->s_cred); 3304 np->s_cred = lcr; 3305 mutex_exit(&np->s_lock); 3306 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 3307 &retry_inuse); 3308 } 3309 } 3310 mutex_enter(&np->s_lock); 3311 lease_time = np->s_lease_time; 3312 np->s_flags &= ~N4S_CLIENTID_PEND; 3313 mutex_exit(&np->s_lock); 3314 3315 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 3316 /* 3317 * Start recovery if failover is a possibility. If 3318 * invoked by the recovery thread itself, then just 3319 * return and let it handle the failover first. NB: 3320 * recovery is not allowed if the mount is in progress 3321 * since the infrastructure is not sufficiently setup 3322 * to allow it. Just return the error (after suitable 3323 * retries). 3324 */ 3325 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 3326 (void) nfs4_start_recovery(n4ep, mi, NULL, 3327 NULL, NULL, NULL, OP_SETCLIENTID, NULL, NULL, NULL); 3328 /* 3329 * Don't retry here, just return and let 3330 * recovery take over. 3331 */ 3332 if (recovery) 3333 retry = FALSE; 3334 } else if (nfs4_rpc_retry_error(n4ep->error) || 3335 n4ep->stat == NFS4ERR_RESOURCE || 3336 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 3337 3338 retry = TRUE; 3339 /* 3340 * Always retry if in recovery or once had 3341 * contact with the server (but now it's 3342 * overloaded). 3343 */ 3344 if (recovery == TRUE || 3345 n4ep->error == ETIMEDOUT || 3346 n4ep->error == ECONNRESET) 3347 num_retries = 0; 3348 } else if (retry_inuse && n4ep->error == 0 && 3349 n4ep->stat == NFS4ERR_CLID_INUSE) { 3350 retry = TRUE; 3351 num_retries = 0; 3352 } 3353 } else { 3354 /* 3355 * Since everything succeeded give the list a reference count if 3356 * it hasn't been given one by add_new_nfs4_server() or if this 3357 * is not a recovery situation in which case it is already on 3358 * the list. 3359 */ 3360 mutex_enter(&np->s_lock); 3361 if ((np->s_flags & N4S_INSERTED) == 0) { 3362 np->s_refcnt++; 3363 np->s_flags |= N4S_INSERTED; 3364 } 3365 mutex_exit(&np->s_lock); 3366 } 3367 3368 if (!recovery) 3369 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 3370 3371 3372 if (retry && num_retries++ < nfs4_num_sclid_retries) { 3373 if (retry_inuse) { 3374 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 3375 retry_inuse = 0; 3376 } else 3377 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 3378 3379 nfs4_server_rele(np); 3380 goto recov_retry; 3381 } 3382 3383 3384 if (n4ep->error == 0) 3385 n4ep->error = geterrno4(n4ep->stat); 3386 3387 /* broadcast before release in case no other threads are waiting */ 3388 cv_broadcast(&np->s_clientid_pend); 3389 nfs4_server_rele(np); 3390 } 3391 3392 int nfs4setclientid_otw_debug = 0; 3393 3394 /* 3395 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 3396 * but nothing else; the calling function must be designed to handle those 3397 * other errors. 3398 */ 3399 static void 3400 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 3401 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 3402 { 3403 COMPOUND4args_clnt args; 3404 COMPOUND4res_clnt res; 3405 nfs_argop4 argop[3]; 3406 SETCLIENTID4args *s_args; 3407 SETCLIENTID4resok *s_resok; 3408 int doqueue = 1; 3409 nfs4_ga_res_t *garp = NULL; 3410 timespec_t prop_time, after_time; 3411 verifier4 verf; 3412 clientid4 tmp_clientid; 3413 3414 ASSERT(!MUTEX_HELD(&np->s_lock)); 3415 3416 args.ctag = TAG_SETCLIENTID; 3417 3418 args.array = argop; 3419 args.array_len = 3; 3420 3421 /* PUTROOTFH */ 3422 argop[0].argop = OP_PUTROOTFH; 3423 3424 /* GETATTR */ 3425 argop[1].argop = OP_GETATTR; 3426 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 3427 argop[1].nfs_argop4_u.opgetattr.mi = mi; 3428 3429 /* SETCLIENTID */ 3430 argop[2].argop = OP_SETCLIENTID; 3431 3432 s_args = &argop[2].nfs_argop4_u.opsetclientid; 3433 3434 mutex_enter(&np->s_lock); 3435 3436 s_args->client.verifier = np->clidtosend.verifier; 3437 s_args->client.id_len = np->clidtosend.id_len; 3438 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 3439 s_args->client.id_val = np->clidtosend.id_val; 3440 3441 /* 3442 * Callback needs to happen on non-RDMA transport 3443 * Check if we have saved the original knetconfig 3444 * if so, use that instead. 3445 */ 3446 if (svp->sv_origknconf != NULL) 3447 nfs4_cb_args(np, svp->sv_origknconf, s_args); 3448 else 3449 nfs4_cb_args(np, svp->sv_knconf, s_args); 3450 3451 mutex_exit(&np->s_lock); 3452 3453 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 3454 3455 if (ep->error) 3456 return; 3457 3458 /* getattr lease_time res */ 3459 if ((res.array_len >= 2) && 3460 (res.array[1].nfs_resop4_u.opgetattr.status == NFS4_OK)) { 3461 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 3462 3463 #ifndef _LP64 3464 /* 3465 * The 32 bit client cannot handle a lease time greater than 3466 * (INT32_MAX/1000000). This is due to the use of the 3467 * lease_time in calls to drv_usectohz() in 3468 * nfs4_renew_lease_thread(). The problem is that 3469 * drv_usectohz() takes a time_t (which is just a long = 4 3470 * bytes) as its parameter. The lease_time is multiplied by 3471 * 1000000 to convert seconds to usecs for the parameter. If 3472 * a number bigger than (INT32_MAX/1000000) is used then we 3473 * overflow on the 32bit client. 3474 */ 3475 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 3476 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 3477 } 3478 #endif 3479 3480 mutex_enter(&np->s_lock); 3481 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 3482 3483 /* 3484 * Keep track of the lease period for the mi's 3485 * mi_msg_list. We need an appropiate time 3486 * bound to associate past facts with a current 3487 * event. The lease period is perfect for this. 3488 */ 3489 mutex_enter(&mi->mi_msg_list_lock); 3490 mi->mi_lease_period = np->s_lease_time; 3491 mutex_exit(&mi->mi_msg_list_lock); 3492 mutex_exit(&np->s_lock); 3493 } 3494 3495 3496 if (res.status == NFS4ERR_CLID_INUSE) { 3497 clientaddr4 *clid_inuse; 3498 3499 if (!(*retry_inusep)) { 3500 clid_inuse = &res.array->nfs_resop4_u. 3501 opsetclientid.SETCLIENTID4res_u.client_using; 3502 3503 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3504 "NFS4 mount (SETCLIENTID failed)." 3505 " nfs4_client_id.id is in" 3506 "use already by: r_netid<%s> r_addr<%s>", 3507 clid_inuse->r_netid, clid_inuse->r_addr); 3508 } 3509 3510 /* 3511 * XXX - The client should be more robust in its 3512 * handling of clientid in use errors (regen another 3513 * clientid and try again?) 3514 */ 3515 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3516 return; 3517 } 3518 3519 if (res.status) { 3520 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3521 return; 3522 } 3523 3524 s_resok = &res.array[2].nfs_resop4_u. 3525 opsetclientid.SETCLIENTID4res_u.resok4; 3526 3527 tmp_clientid = s_resok->clientid; 3528 3529 verf = s_resok->setclientid_confirm; 3530 3531 #ifdef DEBUG 3532 if (nfs4setclientid_otw_debug) { 3533 union { 3534 clientid4 clientid; 3535 int foo[2]; 3536 } cid; 3537 3538 cid.clientid = s_resok->clientid; 3539 3540 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3541 "nfs4setclientid_otw: OK, clientid = %x,%x, " 3542 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 3543 } 3544 #endif 3545 3546 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3547 3548 /* Confirm the client id and get the lease_time attribute */ 3549 3550 args.ctag = TAG_SETCLIENTID_CF; 3551 3552 args.array = argop; 3553 args.array_len = 1; 3554 3555 argop[0].argop = OP_SETCLIENTID_CONFIRM; 3556 3557 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 3558 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 3559 3560 /* used to figure out RTT for np */ 3561 gethrestime(&prop_time); 3562 3563 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 3564 "start time: %ld sec %ld nsec", prop_time.tv_sec, 3565 prop_time.tv_nsec)); 3566 3567 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 3568 3569 gethrestime(&after_time); 3570 mutex_enter(&np->s_lock); 3571 np->propagation_delay.tv_sec = 3572 MAX(1, after_time.tv_sec - prop_time.tv_sec); 3573 mutex_exit(&np->s_lock); 3574 3575 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 3576 "finish time: %ld sec ", after_time.tv_sec)); 3577 3578 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 3579 "propagation delay set to %ld sec", 3580 np->propagation_delay.tv_sec)); 3581 3582 if (ep->error) 3583 return; 3584 3585 if (res.status == NFS4ERR_CLID_INUSE) { 3586 clientaddr4 *clid_inuse; 3587 3588 if (!(*retry_inusep)) { 3589 clid_inuse = &res.array->nfs_resop4_u. 3590 opsetclientid.SETCLIENTID4res_u.client_using; 3591 3592 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3593 "SETCLIENTID_CONFIRM failed. " 3594 "nfs4_client_id.id is in use already by: " 3595 "r_netid<%s> r_addr<%s>", 3596 clid_inuse->r_netid, clid_inuse->r_addr); 3597 } 3598 3599 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3600 return; 3601 } 3602 3603 if (res.status) { 3604 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3605 return; 3606 } 3607 3608 mutex_enter(&np->s_lock); 3609 np->clientid = tmp_clientid; 3610 np->s_flags |= N4S_CLIENTID_SET; 3611 3612 /* Add mi to np's mntinfo4 list */ 3613 nfs4_add_mi_to_server(np, mi); 3614 3615 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 3616 /* 3617 * Start lease management thread. 3618 * Keep trying until we succeed. 3619 */ 3620 3621 np->s_refcnt++; /* pass reference to thread */ 3622 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 3623 minclsyspri); 3624 } 3625 mutex_exit(&np->s_lock); 3626 3627 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3628 } 3629 3630 /* 3631 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 3632 * mi's clientid the same as sp's. 3633 * Assumes sp is locked down. 3634 */ 3635 void 3636 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 3637 { 3638 mntinfo4_t *tmi; 3639 int in_list = 0; 3640 3641 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3642 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3643 ASSERT(sp != &nfs4_server_lst); 3644 ASSERT(MUTEX_HELD(&sp->s_lock)); 3645 3646 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3647 "nfs4_add_mi_to_server: add mi %p to sp %p", 3648 (void*)mi, (void*)sp)); 3649 3650 for (tmi = sp->mntinfo4_list; 3651 tmi != NULL; 3652 tmi = tmi->mi_clientid_next) { 3653 if (tmi == mi) { 3654 NFS4_DEBUG(nfs4_client_lease_debug, 3655 (CE_NOTE, 3656 "nfs4_add_mi_to_server: mi in list")); 3657 in_list = 1; 3658 } 3659 } 3660 3661 /* 3662 * First put a hold on the mntinfo4's vfsp so that references via 3663 * mntinfo4_list will be valid. 3664 */ 3665 if (!in_list) 3666 VFS_HOLD(mi->mi_vfsp); 3667 3668 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 3669 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 3670 3671 if (!in_list) { 3672 if (sp->mntinfo4_list) 3673 sp->mntinfo4_list->mi_clientid_prev = mi; 3674 mi->mi_clientid_next = sp->mntinfo4_list; 3675 mi->mi_srv = sp; 3676 sp->mntinfo4_list = mi; 3677 mi->mi_srvsettime = gethrestime_sec(); 3678 mi->mi_srvset_cnt++; 3679 } 3680 3681 /* set mi's clientid to that of sp's for later matching */ 3682 mi->mi_clientid = sp->clientid; 3683 3684 /* 3685 * Update the clientid for any other mi's belonging to sp. This 3686 * must be done here while we hold sp->s_lock, so that 3687 * find_nfs4_server() continues to work. 3688 */ 3689 3690 for (tmi = sp->mntinfo4_list; 3691 tmi != NULL; 3692 tmi = tmi->mi_clientid_next) { 3693 if (tmi != mi) { 3694 tmi->mi_clientid = sp->clientid; 3695 } 3696 } 3697 } 3698 3699 /* 3700 * Remove the mi from sp's mntinfo4_list and release its reference. 3701 * Exception: if mi still has open files, flag it for later removal (when 3702 * all the files are closed). 3703 * 3704 * If this is the last mntinfo4 in sp's list then tell the lease renewal 3705 * thread to exit. 3706 */ 3707 static void 3708 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 3709 { 3710 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3711 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 3712 (void*)mi, (void*)sp)); 3713 3714 ASSERT(sp != NULL); 3715 ASSERT(MUTEX_HELD(&sp->s_lock)); 3716 ASSERT(mi->mi_open_files >= 0); 3717 3718 /* 3719 * First make sure this mntinfo4 can be taken off of the list, 3720 * ie: it doesn't have any open files remaining. 3721 */ 3722 if (mi->mi_open_files > 0) { 3723 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3724 "nfs4_remove_mi_from_server_nolock: don't " 3725 "remove mi since it still has files open")); 3726 3727 mutex_enter(&mi->mi_lock); 3728 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 3729 mutex_exit(&mi->mi_lock); 3730 return; 3731 } 3732 3733 VFS_HOLD(mi->mi_vfsp); 3734 remove_mi(sp, mi); 3735 VFS_RELE(mi->mi_vfsp); 3736 3737 if (sp->mntinfo4_list == NULL) { 3738 /* last fs unmounted, kill the thread */ 3739 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3740 "remove_mi_from_nfs4_server_nolock: kill the thread")); 3741 nfs4_mark_srv_dead(sp); 3742 } 3743 } 3744 3745 /* 3746 * Remove mi from sp's mntinfo4_list and release the vfs reference. 3747 */ 3748 static void 3749 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 3750 { 3751 ASSERT(MUTEX_HELD(&sp->s_lock)); 3752 3753 /* 3754 * We release a reference, and the caller must still have a 3755 * reference. 3756 */ 3757 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3758 3759 if (mi->mi_clientid_prev) { 3760 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3761 } else { 3762 /* This is the first mi in sp's mntinfo4_list */ 3763 /* 3764 * Make sure the first mntinfo4 in the list is the actual 3765 * mntinfo4 passed in. 3766 */ 3767 ASSERT(sp->mntinfo4_list == mi); 3768 3769 sp->mntinfo4_list = mi->mi_clientid_next; 3770 } 3771 if (mi->mi_clientid_next) 3772 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3773 3774 /* Now mark the mntinfo4's links as being removed */ 3775 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3776 mi->mi_srv = NULL; 3777 mi->mi_srvset_cnt++; 3778 3779 VFS_RELE(mi->mi_vfsp); 3780 } 3781 3782 /* 3783 * Free all the entries in sp's mntinfo4_list. 3784 */ 3785 static void 3786 remove_all_mi(nfs4_server_t *sp) 3787 { 3788 mntinfo4_t *mi; 3789 3790 ASSERT(MUTEX_HELD(&sp->s_lock)); 3791 3792 while (sp->mntinfo4_list != NULL) { 3793 mi = sp->mntinfo4_list; 3794 /* 3795 * Grab a reference in case there is only one left (which 3796 * remove_mi() frees). 3797 */ 3798 VFS_HOLD(mi->mi_vfsp); 3799 remove_mi(sp, mi); 3800 VFS_RELE(mi->mi_vfsp); 3801 } 3802 } 3803 3804 /* 3805 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3806 * 3807 * This version can be called with a null nfs4_server_t arg, 3808 * and will either find the right one and handle locking, or 3809 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3810 */ 3811 void 3812 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3813 { 3814 nfs4_server_t *sp; 3815 3816 if (esp) { 3817 nfs4_remove_mi_from_server_nolock(mi, esp); 3818 return; 3819 } 3820 3821 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3822 if (sp = find_nfs4_server_all(mi, 1)) { 3823 nfs4_remove_mi_from_server_nolock(mi, sp); 3824 mutex_exit(&sp->s_lock); 3825 nfs4_server_rele(sp); 3826 } 3827 nfs_rw_exit(&mi->mi_recovlock); 3828 } 3829 3830 /* 3831 * Return TRUE if the given server has any non-unmounted filesystems. 3832 */ 3833 3834 bool_t 3835 nfs4_fs_active(nfs4_server_t *sp) 3836 { 3837 mntinfo4_t *mi; 3838 3839 ASSERT(MUTEX_HELD(&sp->s_lock)); 3840 3841 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3842 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3843 return (TRUE); 3844 } 3845 3846 return (FALSE); 3847 } 3848 3849 /* 3850 * Mark sp as finished and notify any waiters. 3851 */ 3852 3853 void 3854 nfs4_mark_srv_dead(nfs4_server_t *sp) 3855 { 3856 ASSERT(MUTEX_HELD(&sp->s_lock)); 3857 3858 sp->s_thread_exit = NFS4_THREAD_EXIT; 3859 cv_broadcast(&sp->cv_thread_exit); 3860 } 3861 3862 /* 3863 * Create a new nfs4_server_t structure. 3864 * Returns new node unlocked and not in list, but with a reference count of 3865 * 1. 3866 */ 3867 struct nfs4_server * 3868 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3869 { 3870 struct nfs4_server *np; 3871 timespec_t tt; 3872 union { 3873 struct { 3874 uint32_t sec; 3875 uint32_t subsec; 3876 } un_curtime; 3877 verifier4 un_verifier; 3878 } nfs4clientid_verifier; 3879 /* 3880 * We change this ID string carefully and with the Solaris 3881 * NFS server behaviour in mind. "+referrals" indicates 3882 * a client that can handle an NFSv4 referral. 3883 */ 3884 char id_val[] = "Solaris: %s, NFSv4 kernel client +referrals"; 3885 int len; 3886 3887 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3888 np->saddr.len = svp->sv_addr.len; 3889 np->saddr.maxlen = svp->sv_addr.maxlen; 3890 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3891 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3892 np->s_refcnt = 1; 3893 3894 /* 3895 * Build the nfs_client_id4 for this server mount. Ensure 3896 * the verifier is useful and that the identification is 3897 * somehow based on the server's address for the case of 3898 * multi-homed servers. 3899 */ 3900 nfs4clientid_verifier.un_verifier = 0; 3901 gethrestime(&tt); 3902 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3903 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3904 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3905 3906 /* 3907 * calculate the length of the opaque identifier. Subtract 2 3908 * for the "%s" and add the traditional +1 for null 3909 * termination. 3910 */ 3911 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3912 np->clidtosend.id_len = len + np->saddr.maxlen; 3913 3914 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3915 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3916 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3917 3918 np->s_flags = 0; 3919 np->mntinfo4_list = NULL; 3920 /* save cred for issuing rfs4calls inside the renew thread */ 3921 crhold(cr); 3922 np->s_cred = cr; 3923 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3924 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3925 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3926 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3927 offsetof(rnode4_t, r_deleg_link)); 3928 np->s_thread_exit = 0; 3929 np->state_ref_count = 0; 3930 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3931 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3932 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3933 np->s_otw_call_count = 0; 3934 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3935 np->zoneid = getzoneid(); 3936 np->zone_globals = nfs4_get_callback_globals(); 3937 ASSERT(np->zone_globals != NULL); 3938 return (np); 3939 } 3940 3941 /* 3942 * Create a new nfs4_server_t structure and add it to the list. 3943 * Returns new node locked; reference must eventually be freed. 3944 */ 3945 static struct nfs4_server * 3946 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3947 { 3948 nfs4_server_t *sp; 3949 3950 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3951 sp = new_nfs4_server(svp, cr); 3952 mutex_enter(&sp->s_lock); 3953 insque(sp, &nfs4_server_lst); 3954 sp->s_refcnt++; /* list gets a reference */ 3955 sp->s_flags |= N4S_INSERTED; 3956 sp->clientid = 0; 3957 return (sp); 3958 } 3959 3960 int nfs4_server_t_debug = 0; 3961 3962 #ifdef lint 3963 extern void 3964 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3965 #endif 3966 3967 #ifndef lint 3968 #ifdef DEBUG 3969 void 3970 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3971 { 3972 int hash16(void *p, int len); 3973 nfs4_server_t *np; 3974 3975 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3976 "dumping nfs4_server_t list in %s", txt)); 3977 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3978 "mi 0x%p, want clientid %llx, addr %d/%04X", 3979 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3980 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3981 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3982 np = np->forw) { 3983 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3984 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3985 np, (longlong_t)np->clientid, np->saddr.len, 3986 hash16((void *)np->saddr.buf, np->saddr.len), 3987 np->state_ref_count)); 3988 if (np->saddr.len == srv_p->sv_addr.len && 3989 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3990 np->saddr.len) == 0) 3991 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3992 " - address matches")); 3993 if (np->clientid == clientid || np->clientid == 0) 3994 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3995 " - clientid matches")); 3996 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3997 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3998 " - thread not exiting")); 3999 } 4000 delay(hz); 4001 } 4002 #endif 4003 #endif 4004 4005 4006 /* 4007 * Move a mntinfo4_t from one server list to another. 4008 * Locking of the two nfs4_server_t nodes will be done in list order. 4009 * 4010 * Returns NULL if the current nfs4_server_t for the filesystem could not 4011 * be found (e.g., due to forced unmount). Otherwise returns a reference 4012 * to the new nfs4_server_t, which must eventually be freed. 4013 */ 4014 nfs4_server_t * 4015 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 4016 { 4017 nfs4_server_t *p, *op = NULL, *np = NULL; 4018 int num_open; 4019 zoneid_t zoneid = nfs_zoneid(); 4020 4021 ASSERT(nfs_zone() == mi->mi_zone); 4022 4023 mutex_enter(&nfs4_server_lst_lock); 4024 #ifdef DEBUG 4025 if (nfs4_server_t_debug) 4026 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 4027 #endif 4028 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 4029 if (p->zoneid != zoneid) 4030 continue; 4031 if (p->saddr.len == old->sv_addr.len && 4032 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 4033 p->s_thread_exit != NFS4_THREAD_EXIT) { 4034 op = p; 4035 mutex_enter(&op->s_lock); 4036 op->s_refcnt++; 4037 } 4038 if (p->saddr.len == new->sv_addr.len && 4039 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 4040 p->s_thread_exit != NFS4_THREAD_EXIT) { 4041 np = p; 4042 mutex_enter(&np->s_lock); 4043 } 4044 if (op != NULL && np != NULL) 4045 break; 4046 } 4047 if (op == NULL) { 4048 /* 4049 * Filesystem has been forcibly unmounted. Bail out. 4050 */ 4051 if (np != NULL) 4052 mutex_exit(&np->s_lock); 4053 mutex_exit(&nfs4_server_lst_lock); 4054 return (NULL); 4055 } 4056 if (np != NULL) { 4057 np->s_refcnt++; 4058 } else { 4059 #ifdef DEBUG 4060 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 4061 "nfs4_move_mi: no target nfs4_server, will create.")); 4062 #endif 4063 np = add_new_nfs4_server(new, kcred); 4064 } 4065 mutex_exit(&nfs4_server_lst_lock); 4066 4067 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 4068 "nfs4_move_mi: for mi 0x%p, " 4069 "old servinfo4 0x%p, new servinfo4 0x%p, " 4070 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 4071 (void*)mi, (void*)old, (void*)new, 4072 (void*)op, (void*)np)); 4073 ASSERT(op != NULL && np != NULL); 4074 4075 /* discard any delegations */ 4076 nfs4_deleg_discard(mi, op); 4077 4078 num_open = mi->mi_open_files; 4079 mi->mi_open_files = 0; 4080 op->state_ref_count -= num_open; 4081 ASSERT(op->state_ref_count >= 0); 4082 np->state_ref_count += num_open; 4083 nfs4_remove_mi_from_server_nolock(mi, op); 4084 mi->mi_open_files = num_open; 4085 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 4086 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 4087 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 4088 4089 nfs4_add_mi_to_server(np, mi); 4090 4091 mutex_exit(&op->s_lock); 4092 mutex_exit(&np->s_lock); 4093 nfs4_server_rele(op); 4094 4095 return (np); 4096 } 4097 4098 /* 4099 * Need to have the nfs4_server_lst_lock. 4100 * Search the nfs4_server list to find a match on this servinfo4 4101 * based on its address. 4102 * 4103 * Returns NULL if no match is found. Otherwise returns a reference (which 4104 * must eventually be freed) to a locked nfs4_server. 4105 */ 4106 nfs4_server_t * 4107 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 4108 { 4109 nfs4_server_t *np; 4110 zoneid_t zoneid = nfs_zoneid(); 4111 4112 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 4113 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 4114 if (np->zoneid == zoneid && 4115 np->saddr.len == srv_p->sv_addr.len && 4116 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 4117 np->saddr.len) == 0 && 4118 np->s_thread_exit != NFS4_THREAD_EXIT) { 4119 mutex_enter(&np->s_lock); 4120 np->s_refcnt++; 4121 return (np); 4122 } 4123 } 4124 return (NULL); 4125 } 4126 4127 /* 4128 * Locks the nfs4_server down if it is found and returns a reference that 4129 * must eventually be freed. 4130 */ 4131 static nfs4_server_t * 4132 lookup_nfs4_server(nfs4_server_t *sp, int any_state) 4133 { 4134 nfs4_server_t *np; 4135 4136 mutex_enter(&nfs4_server_lst_lock); 4137 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 4138 mutex_enter(&np->s_lock); 4139 if (np == sp && np->s_refcnt > 0 && 4140 (np->s_thread_exit != NFS4_THREAD_EXIT || any_state)) { 4141 mutex_exit(&nfs4_server_lst_lock); 4142 np->s_refcnt++; 4143 return (np); 4144 } 4145 mutex_exit(&np->s_lock); 4146 } 4147 mutex_exit(&nfs4_server_lst_lock); 4148 4149 return (NULL); 4150 } 4151 4152 /* 4153 * The caller should be holding mi->mi_recovlock, and it should continue to 4154 * hold the lock until done with the returned nfs4_server_t. Once 4155 * mi->mi_recovlock is released, there is no guarantee that the returned 4156 * mi->nfs4_server_t will continue to correspond to mi. 4157 */ 4158 nfs4_server_t * 4159 find_nfs4_server(mntinfo4_t *mi) 4160 { 4161 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 4162 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 4163 4164 return (lookup_nfs4_server(mi->mi_srv, 0)); 4165 } 4166 4167 /* 4168 * Same as above, but takes an "any_state" parameter which can be 4169 * set to 1 if the caller wishes to find nfs4_server_t's which 4170 * have been marked for termination by the exit of the renew 4171 * thread. This should only be used by operations which are 4172 * cleaning up and will not cause an OTW op. 4173 */ 4174 nfs4_server_t * 4175 find_nfs4_server_all(mntinfo4_t *mi, int any_state) 4176 { 4177 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 4178 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 4179 4180 return (lookup_nfs4_server(mi->mi_srv, any_state)); 4181 } 4182 4183 /* 4184 * Lock sp, but only if it's still active (in the list and hasn't been 4185 * flagged as exiting) or 'any_state' is non-zero. 4186 * Returns TRUE if sp got locked and adds a reference to sp. 4187 */ 4188 bool_t 4189 nfs4_server_vlock(nfs4_server_t *sp, int any_state) 4190 { 4191 return (lookup_nfs4_server(sp, any_state) != NULL); 4192 } 4193 4194 /* 4195 * Release the reference to sp and destroy it if that's the last one. 4196 */ 4197 4198 void 4199 nfs4_server_rele(nfs4_server_t *sp) 4200 { 4201 mutex_enter(&sp->s_lock); 4202 ASSERT(sp->s_refcnt > 0); 4203 sp->s_refcnt--; 4204 if (sp->s_refcnt > 0) { 4205 mutex_exit(&sp->s_lock); 4206 return; 4207 } 4208 mutex_exit(&sp->s_lock); 4209 4210 mutex_enter(&nfs4_server_lst_lock); 4211 mutex_enter(&sp->s_lock); 4212 if (sp->s_refcnt > 0) { 4213 mutex_exit(&sp->s_lock); 4214 mutex_exit(&nfs4_server_lst_lock); 4215 return; 4216 } 4217 remque(sp); 4218 sp->forw = sp->back = NULL; 4219 mutex_exit(&nfs4_server_lst_lock); 4220 destroy_nfs4_server(sp); 4221 } 4222 4223 static void 4224 destroy_nfs4_server(nfs4_server_t *sp) 4225 { 4226 ASSERT(MUTEX_HELD(&sp->s_lock)); 4227 ASSERT(sp->s_refcnt == 0); 4228 ASSERT(sp->s_otw_call_count == 0); 4229 4230 remove_all_mi(sp); 4231 4232 crfree(sp->s_cred); 4233 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 4234 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 4235 mutex_exit(&sp->s_lock); 4236 4237 /* destroy the nfs4_server */ 4238 nfs4callback_destroy(sp); 4239 list_destroy(&sp->s_deleg_list); 4240 mutex_destroy(&sp->s_lock); 4241 cv_destroy(&sp->cv_thread_exit); 4242 cv_destroy(&sp->s_cv_otw_count); 4243 cv_destroy(&sp->s_clientid_pend); 4244 cv_destroy(&sp->wait_cb_null); 4245 nfs_rw_destroy(&sp->s_recovlock); 4246 kmem_free(sp, sizeof (*sp)); 4247 } 4248 4249 /* 4250 * Fork off a thread to free the data structures for a mount. 4251 */ 4252 4253 static void 4254 async_free_mount(vfs_t *vfsp, int flag, cred_t *cr) 4255 { 4256 freemountargs_t *args; 4257 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 4258 args->fm_vfsp = vfsp; 4259 VFS_HOLD(vfsp); 4260 MI4_HOLD(VFTOMI4(vfsp)); 4261 args->fm_flag = flag; 4262 args->fm_cr = cr; 4263 crhold(cr); 4264 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 4265 minclsyspri); 4266 } 4267 4268 static void 4269 nfs4_free_mount_thread(freemountargs_t *args) 4270 { 4271 mntinfo4_t *mi; 4272 nfs4_free_mount(args->fm_vfsp, args->fm_flag, args->fm_cr); 4273 mi = VFTOMI4(args->fm_vfsp); 4274 crfree(args->fm_cr); 4275 VFS_RELE(args->fm_vfsp); 4276 MI4_RELE(mi); 4277 kmem_free(args, sizeof (freemountargs_t)); 4278 zthread_exit(); 4279 /* NOTREACHED */ 4280 } 4281 4282 /* 4283 * Thread to free the data structures for a given filesystem. 4284 */ 4285 static void 4286 nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr) 4287 { 4288 mntinfo4_t *mi = VFTOMI4(vfsp); 4289 nfs4_server_t *sp; 4290 callb_cpr_t cpr_info; 4291 kmutex_t cpr_lock; 4292 boolean_t async_thread; 4293 int removed; 4294 4295 bool_t must_unlock; 4296 nfs4_ephemeral_tree_t *eph_tree; 4297 4298 /* 4299 * We need to participate in the CPR framework if this is a kernel 4300 * thread. 4301 */ 4302 async_thread = (curproc == nfs_zone()->zone_zsched); 4303 if (async_thread) { 4304 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 4305 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 4306 "nfsv4AsyncUnmount"); 4307 } 4308 4309 /* 4310 * We need to wait for all outstanding OTW calls 4311 * and recovery to finish before we remove the mi 4312 * from the nfs4_server_t, as current pending 4313 * calls might still need this linkage (in order 4314 * to find a nfs4_server_t from a mntinfo4_t). 4315 */ 4316 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 4317 sp = find_nfs4_server(mi); 4318 nfs_rw_exit(&mi->mi_recovlock); 4319 4320 if (sp) { 4321 while (sp->s_otw_call_count != 0) { 4322 if (async_thread) { 4323 mutex_enter(&cpr_lock); 4324 CALLB_CPR_SAFE_BEGIN(&cpr_info); 4325 mutex_exit(&cpr_lock); 4326 } 4327 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 4328 if (async_thread) { 4329 mutex_enter(&cpr_lock); 4330 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 4331 mutex_exit(&cpr_lock); 4332 } 4333 } 4334 mutex_exit(&sp->s_lock); 4335 nfs4_server_rele(sp); 4336 sp = NULL; 4337 } 4338 4339 mutex_enter(&mi->mi_lock); 4340 while (mi->mi_in_recovery != 0) { 4341 if (async_thread) { 4342 mutex_enter(&cpr_lock); 4343 CALLB_CPR_SAFE_BEGIN(&cpr_info); 4344 mutex_exit(&cpr_lock); 4345 } 4346 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 4347 if (async_thread) { 4348 mutex_enter(&cpr_lock); 4349 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 4350 mutex_exit(&cpr_lock); 4351 } 4352 } 4353 mutex_exit(&mi->mi_lock); 4354 4355 /* 4356 * If we got an error, then do not nuke the 4357 * tree. Either the harvester is busy reclaiming 4358 * this node or we ran into some busy condition. 4359 * 4360 * The harvester will eventually come along and cleanup. 4361 * The only problem would be the root mount point. 4362 * 4363 * Since the busy node can occur for a variety 4364 * of reasons and can result in an entry staying 4365 * in df output but no longer accessible from the 4366 * directory tree, we are okay. 4367 */ 4368 if (!nfs4_ephemeral_umount(mi, flag, cr, 4369 &must_unlock, &eph_tree)) 4370 nfs4_ephemeral_umount_activate(mi, &must_unlock, 4371 &eph_tree); 4372 4373 /* 4374 * The original purge of the dnlc via 'dounmount' 4375 * doesn't guarantee that another dnlc entry was not 4376 * added while we waitied for all outstanding OTW 4377 * and recovery calls to finish. So re-purge the 4378 * dnlc now. 4379 */ 4380 (void) dnlc_purge_vfsp(vfsp, 0); 4381 4382 /* 4383 * We need to explicitly stop the manager thread; the asyc worker 4384 * threads can timeout and exit on their own. 4385 */ 4386 mutex_enter(&mi->mi_async_lock); 4387 mi->mi_max_threads = 0; 4388 NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv); 4389 mutex_exit(&mi->mi_async_lock); 4390 if (mi->mi_manager_thread) 4391 nfs4_async_manager_stop(vfsp); 4392 4393 destroy_rtable4(vfsp, cr); 4394 4395 nfs4_remove_mi_from_server(mi, NULL); 4396 4397 if (async_thread) { 4398 mutex_enter(&cpr_lock); 4399 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 4400 mutex_destroy(&cpr_lock); 4401 } 4402 4403 removed = nfs4_mi_zonelist_remove(mi); 4404 if (removed) 4405 zone_rele(mi->mi_zone); 4406 } 4407 4408 /* Referral related sub-routines */ 4409 4410 /* Freeup knetconfig */ 4411 static void 4412 free_knconf_contents(struct knetconfig *k) 4413 { 4414 if (k == NULL) 4415 return; 4416 if (k->knc_protofmly) 4417 kmem_free(k->knc_protofmly, KNC_STRSIZE); 4418 if (k->knc_proto) 4419 kmem_free(k->knc_proto, KNC_STRSIZE); 4420 } 4421 4422 /* 4423 * This updates newpath variable with exact name component from the 4424 * path which gave us a NFS4ERR_MOVED error. 4425 * If the path is /rp/aaa/bbb and nth value is 1, aaa is returned. 4426 */ 4427 static char * 4428 extract_referral_point(const char *svp, int nth) 4429 { 4430 int num_slashes = 0; 4431 const char *p; 4432 char *newpath = NULL; 4433 int i = 0; 4434 4435 newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4436 for (p = svp; *p; p++) { 4437 if (*p == '/') 4438 num_slashes++; 4439 if (num_slashes == nth + 1) { 4440 p++; 4441 while (*p != '/') { 4442 if (*p == '\0') 4443 break; 4444 newpath[i] = *p; 4445 i++; 4446 p++; 4447 } 4448 newpath[i++] = '\0'; 4449 break; 4450 } 4451 } 4452 return (newpath); 4453 } 4454 4455 /* 4456 * This sets up a new path in sv_path to do a lookup of the referral point. 4457 * If the path is /rp/aaa/bbb and the referral point is aaa, 4458 * this updates /rp/aaa. This path will be used to get referral 4459 * location. 4460 */ 4461 static void 4462 setup_newsvpath(servinfo4_t *svp, int nth) 4463 { 4464 int num_slashes = 0, pathlen, i = 0; 4465 char *newpath, *p; 4466 4467 newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4468 for (p = svp->sv_path; *p; p++) { 4469 newpath[i] = *p; 4470 if (*p == '/') 4471 num_slashes++; 4472 if (num_slashes == nth + 1) { 4473 newpath[i] = '\0'; 4474 pathlen = strlen(newpath) + 1; 4475 kmem_free(svp->sv_path, svp->sv_pathlen); 4476 svp->sv_path = kmem_alloc(pathlen, KM_SLEEP); 4477 svp->sv_pathlen = pathlen; 4478 bcopy(newpath, svp->sv_path, pathlen); 4479 break; 4480 } 4481 i++; 4482 } 4483 kmem_free(newpath, MAXPATHLEN); 4484 } 4485