1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/time.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/dnlc.h> 59 #include <sys/list.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/rpcsec_gss.h> 64 #include <rpc/clnt.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/nfs_clnt.h> 68 #include <nfs/mount.h> 69 #include <nfs/nfs_acl.h> 70 71 #include <fs/fs_subr.h> 72 73 #include <nfs/nfs4.h> 74 #include <nfs/rnode4.h> 75 #include <nfs/nfs4_clnt.h> 76 77 /* 78 * Arguments passed to thread to free data structures from forced unmount. 79 */ 80 81 typedef struct { 82 vfs_t *fm_vfsp; 83 cred_t *fm_cr; 84 } freemountargs_t; 85 86 static void async_free_mount(vfs_t *, cred_t *); 87 static void nfs4_free_mount(vfs_t *, cred_t *); 88 static void nfs4_free_mount_thread(freemountargs_t *); 89 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 90 91 /* 92 * From rpcsec module (common/rpcsec). 93 */ 94 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 95 extern void sec_clnt_freeinfo(struct sec_data *); 96 97 /* 98 * The order and contents of this structure must be kept in sync with that of 99 * rfsreqcnt_v4_tmpl in nfs_stats.c 100 */ 101 static char *rfsnames_v4[] = { 102 "null", "compound", "reserved", "access", "close", "commit", "create", 103 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 104 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 105 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 106 "read", "readdir", "readlink", "remove", "rename", "renew", 107 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 108 "setclientid_confirm", "verify", "write" 109 }; 110 111 /* 112 * nfs4_max_mount_retry is the number of times the client will redrive 113 * a mount compound before giving up and returning failure. The intent 114 * is to redrive mount compounds which fail NFS4ERR_STALE so that 115 * if a component of the server path being mounted goes stale, it can 116 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 117 * code is needed outside of the recovery framework because mount is a 118 * special case. The client doesn't create vnodes/rnodes for components 119 * of the server path being mounted. The recovery code recovers real 120 * client objects, not STALE FHs which map to components of the server 121 * path being mounted. 122 * 123 * We could just fail the mount on the first time, but that would 124 * instantly trigger failover (from nfs4_mount), and the client should 125 * try to re-lookup the STALE FH before doing failover. The easiest 126 * way to "re-lookup" is to simply redrive the mount compound. 127 */ 128 static int nfs4_max_mount_retry = 2; 129 130 /* 131 * nfs4 vfs operations. 132 */ 133 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 134 static int nfs4_unmount(vfs_t *, int, cred_t *); 135 static int nfs4_root(vfs_t *, vnode_t **); 136 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 137 static int nfs4_sync(vfs_t *, short, cred_t *); 138 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 139 static int nfs4_mountroot(vfs_t *, whymountroot_t); 140 static void nfs4_freevfs(vfs_t *); 141 142 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 143 int, cred_t *, zone_t *); 144 145 vfsops_t *nfs4_vfsops; 146 147 int nfs4_vfsinit(void); 148 void nfs4_vfsfini(void); 149 static void nfs4setclientid_init(void); 150 static void nfs4setclientid_fini(void); 151 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 152 struct nfs4_server *, nfs4_error_t *, int *); 153 static void destroy_nfs4_server(nfs4_server_t *); 154 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 155 156 /* 157 * Initialize the vfs structure 158 */ 159 160 static int nfs4fstyp; 161 162 163 /* 164 * Debug variable to check for rdma based 165 * transport startup and cleanup. Controlled 166 * through /etc/system. Off by default. 167 */ 168 extern int rdma_debug; 169 170 int 171 nfs4init(int fstyp, char *name) 172 { 173 static const fs_operation_def_t nfs4_vfsops_template[] = { 174 VFSNAME_MOUNT, nfs4_mount, 175 VFSNAME_UNMOUNT, nfs4_unmount, 176 VFSNAME_ROOT, nfs4_root, 177 VFSNAME_STATVFS, nfs4_statvfs, 178 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 179 VFSNAME_VGET, nfs4_vget, 180 VFSNAME_MOUNTROOT, nfs4_mountroot, 181 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 182 NULL, NULL 183 }; 184 int error; 185 186 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 187 if (error != 0) { 188 zcmn_err(GLOBAL_ZONEID, CE_WARN, 189 "nfs4init: bad vfs ops template"); 190 return (error); 191 } 192 193 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 194 if (error != 0) { 195 (void) vfs_freevfsops_by_type(fstyp); 196 zcmn_err(GLOBAL_ZONEID, CE_WARN, 197 "nfs4init: bad vnode ops template"); 198 return (error); 199 } 200 201 nfs4fstyp = fstyp; 202 203 (void) nfs4_vfsinit(); 204 205 (void) nfs4_init_dot_entries(); 206 207 return (0); 208 } 209 210 void 211 nfs4fini(void) 212 { 213 (void) nfs4_destroy_dot_entries(); 214 nfs4_vfsfini(); 215 } 216 217 /* 218 * Create a new sec_data structure to store AUTH_DH related data: 219 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 220 * flag set for NFS V4 since we are avoiding to contact the rpcbind 221 * daemon and is using the IP time service (IPPORT_TIMESERVER). 222 * 223 * sec_data can be freed by sec_clnt_freeinfo(). 224 */ 225 struct sec_data * 226 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 227 struct knetconfig *knconf) { 228 struct sec_data *secdata; 229 dh_k4_clntdata_t *data; 230 char *pf, *p; 231 232 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 233 return (NULL); 234 235 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 236 secdata->flags = 0; 237 238 data = kmem_alloc(sizeof (*data), KM_SLEEP); 239 240 data->syncaddr.maxlen = syncaddr->maxlen; 241 data->syncaddr.len = syncaddr->len; 242 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 243 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 244 245 /* 246 * duplicate the knconf information for the 247 * new opaque data. 248 */ 249 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 250 *data->knconf = *knconf; 251 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 252 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 253 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 254 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 255 data->knconf->knc_protofmly = pf; 256 data->knconf->knc_proto = p; 257 258 /* move server netname to the sec_data structure */ 259 data->netname = kmem_alloc(nlen, KM_SLEEP); 260 bcopy(netname, data->netname, nlen); 261 data->netnamelen = (int)nlen; 262 263 secdata->secmod = AUTH_DH; 264 secdata->rpcflavor = AUTH_DH; 265 secdata->data = (caddr_t)data; 266 267 return (secdata); 268 } 269 270 static int 271 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 272 { 273 servinfo4_t *si; 274 275 /* 276 * Iterate over the servinfo4 list to make sure 277 * we do not have a duplicate. Skip any servinfo4 278 * that has been marked "NOT IN USE" 279 */ 280 for (si = svp_head; si; si = si->sv_next) { 281 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 282 if (si->sv_flags & SV4_NOTINUSE) { 283 nfs_rw_exit(&si->sv_lock); 284 continue; 285 } 286 nfs_rw_exit(&si->sv_lock); 287 if (si == svp) 288 continue; 289 if (si->sv_addr.len == svp->sv_addr.len && 290 strcmp(si->sv_knconf->knc_protofmly, 291 svp->sv_knconf->knc_protofmly) == 0 && 292 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 293 si->sv_addr.len) == 0) { 294 /* it's a duplicate */ 295 return (1); 296 } 297 } 298 /* it's not a duplicate */ 299 return (0); 300 } 301 302 /* 303 * nfs mount vfsop 304 * Set up mount info record and attach it to vfs struct. 305 */ 306 static int 307 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 308 { 309 char *data = uap->dataptr; 310 int error; 311 vnode_t *rtvp; /* the server's root */ 312 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 313 size_t hlen; /* length of hostname */ 314 size_t nlen; /* length of netname */ 315 char netname[MAXNETNAMELEN+1]; /* server's netname */ 316 struct netbuf addr; /* server's address */ 317 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 318 struct knetconfig *knconf; /* transport knetconfig structure */ 319 struct knetconfig *rdma_knconf; /* rdma transport structure */ 320 rnode4_t *rp; 321 struct servinfo4 *svp; /* nfs server info */ 322 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 323 struct servinfo4 *svp_head; /* first nfs server info */ 324 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 325 struct sec_data *secdata; /* security data */ 326 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 327 STRUCT_DECL(knetconfig, knconf_tmp); 328 STRUCT_DECL(netbuf, addr_tmp); 329 int flags, addr_type; 330 char *p, *pf; 331 struct pathname pn; 332 char *userbufptr; 333 zone_t *zone = nfs_zone(); 334 nfs4_error_t n4e; 335 336 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 337 return (EPERM); 338 if (mvp->v_type != VDIR) 339 return (ENOTDIR); 340 /* 341 * get arguments 342 * 343 * nfs_args is now versioned and is extensible, so 344 * uap->datalen might be different from sizeof (args) 345 * in a compatible situation. 346 */ 347 more: 348 STRUCT_INIT(args, get_udatamodel()); 349 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 350 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 351 STRUCT_SIZE(args)))) 352 return (EFAULT); 353 354 flags = STRUCT_FGET(args, flags); 355 356 /* 357 * If the request changes the locking type, disallow the remount, 358 * because it's questionable whether we can transfer the 359 * locking state correctly. 360 */ 361 if (uap->flags & MS_REMOUNT) { 362 if ((mi = VFTOMI4(vfsp)) != NULL) { 363 uint_t new_mi_llock; 364 uint_t old_mi_llock; 365 366 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 367 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 368 if (old_mi_llock != new_mi_llock) 369 return (EBUSY); 370 } 371 return (0); 372 } 373 374 mutex_enter(&mvp->v_lock); 375 if (!(uap->flags & MS_OVERLAY) && 376 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 377 mutex_exit(&mvp->v_lock); 378 return (EBUSY); 379 } 380 mutex_exit(&mvp->v_lock); 381 382 /* make sure things are zeroed for errout: */ 383 rtvp = NULL; 384 mi = NULL; 385 addr.buf = NULL; 386 syncaddr.buf = NULL; 387 secdata = NULL; 388 389 /* 390 * A valid knetconfig structure is required. 391 */ 392 if (!(flags & NFSMNT_KNCONF)) 393 return (EINVAL); 394 395 /* 396 * Allocate a servinfo4 struct. 397 */ 398 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 399 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 400 if (svp_tail) { 401 svp_2ndlast = svp_tail; 402 svp_tail->sv_next = svp; 403 } else { 404 svp_head = svp; 405 svp_2ndlast = svp; 406 } 407 408 svp_tail = svp; 409 410 /* 411 * Allocate space for a knetconfig structure and 412 * its strings and copy in from user-land. 413 */ 414 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 415 svp->sv_knconf = knconf; 416 STRUCT_INIT(knconf_tmp, get_udatamodel()); 417 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 418 STRUCT_SIZE(knconf_tmp))) { 419 sv4_free(svp_head); 420 return (EFAULT); 421 } 422 423 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 424 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 425 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 426 if (get_udatamodel() != DATAMODEL_LP64) { 427 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 428 } else { 429 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 430 } 431 432 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 433 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 434 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 435 if (error) { 436 kmem_free(pf, KNC_STRSIZE); 437 kmem_free(p, KNC_STRSIZE); 438 sv4_free(svp_head); 439 return (error); 440 } 441 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 442 if (error) { 443 kmem_free(pf, KNC_STRSIZE); 444 kmem_free(p, KNC_STRSIZE); 445 sv4_free(svp_head); 446 return (error); 447 } 448 if (strcmp(p, NC_UDP) == 0) { 449 kmem_free(pf, KNC_STRSIZE); 450 kmem_free(p, KNC_STRSIZE); 451 sv4_free(svp_head); 452 return (ENOTSUP); 453 } 454 knconf->knc_protofmly = pf; 455 knconf->knc_proto = p; 456 457 /* 458 * Get server address 459 */ 460 STRUCT_INIT(addr_tmp, get_udatamodel()); 461 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 462 STRUCT_SIZE(addr_tmp))) { 463 error = EFAULT; 464 goto errout; 465 } 466 467 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 468 addr.len = STRUCT_FGET(addr_tmp, len); 469 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 470 addr.maxlen = addr.len; 471 if (copyin(userbufptr, addr.buf, addr.len)) { 472 kmem_free(addr.buf, addr.len); 473 error = EFAULT; 474 goto errout; 475 } 476 477 svp->sv_addr = addr; 478 479 /* 480 * Get the root fhandle 481 */ 482 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 483 484 if (error) 485 goto errout; 486 487 /* Volatile fh: keep server paths, so use actual-size strings */ 488 svp->sv_path = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 489 bcopy(pn.pn_path, svp->sv_path, pn.pn_pathlen); 490 svp->sv_path[pn.pn_pathlen] = '\0'; 491 svp->sv_pathlen = pn.pn_pathlen + 1; 492 pn_free(&pn); 493 494 /* 495 * Get server's hostname 496 */ 497 if (flags & NFSMNT_HOSTNAME) { 498 error = copyinstr(STRUCT_FGETP(args, hostname), 499 netname, sizeof (netname), &hlen); 500 if (error) 501 goto errout; 502 } else { 503 char *p = "unknown-host"; 504 hlen = strlen(p) + 1; 505 (void) strcpy(netname, p); 506 } 507 svp->sv_hostnamelen = hlen; 508 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 509 (void) strcpy(svp->sv_hostname, netname); 510 511 /* 512 * RDMA MOUNT SUPPORT FOR NFS v4. 513 * Establish, is it possible to use RDMA, if so overload the 514 * knconf with rdma specific knconf and free the orignal knconf. 515 */ 516 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 517 /* 518 * Determine the addr type for RDMA, IPv4 or v6. 519 */ 520 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 521 addr_type = AF_INET; 522 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 523 addr_type = AF_INET6; 524 525 if (rdma_reachable(addr_type, &svp->sv_addr, 526 &rdma_knconf) == 0) { 527 /* 528 * If successful, hijack the orignal knconf and 529 * replace with the new one, depending on the flags. 530 */ 531 svp->sv_origknconf = svp->sv_knconf; 532 svp->sv_knconf = rdma_knconf; 533 knconf = rdma_knconf; 534 } else { 535 if (flags & NFSMNT_TRYRDMA) { 536 #ifdef DEBUG 537 if (rdma_debug) 538 zcmn_err(getzoneid(), CE_WARN, 539 "no RDMA onboard, revert\n"); 540 #endif 541 } 542 543 if (flags & NFSMNT_DORDMA) { 544 /* 545 * If proto=rdma is specified and no RDMA 546 * path to this server is avialable then 547 * ditch this server. 548 * This is not included in the mountable 549 * server list or the replica list. 550 * Check if more servers are specified; 551 * Failover case, otherwise bail out of mount. 552 */ 553 if (STRUCT_FGET(args, nfs_args_ext) == 554 NFS_ARGS_EXTB && STRUCT_FGETP(args, 555 nfs_ext_u.nfs_extB.next) != NULL) { 556 if (uap->flags & MS_RDONLY && 557 !(flags & NFSMNT_SOFT)) { 558 data = (char *) 559 STRUCT_FGETP(args, 560 nfs_ext_u.nfs_extB.next); 561 if (svp_head->sv_next == NULL) { 562 svp_tail = NULL; 563 svp_2ndlast = NULL; 564 sv4_free(svp_head); 565 goto more; 566 } else { 567 svp_tail = svp_2ndlast; 568 svp_2ndlast->sv_next = 569 NULL; 570 sv4_free(svp); 571 goto more; 572 } 573 } 574 } else { 575 /* 576 * This is the last server specified 577 * in the nfs_args list passed down 578 * and its not rdma capable. 579 */ 580 if (svp_head->sv_next == NULL) { 581 /* 582 * Is this the only one 583 */ 584 error = EINVAL; 585 #ifdef DEBUG 586 if (rdma_debug) 587 zcmn_err(getzoneid(), 588 CE_WARN, 589 "No RDMA srv"); 590 #endif 591 goto errout; 592 } else { 593 /* 594 * There is list, since some 595 * servers specified before 596 * this passed all requirements 597 */ 598 svp_tail = svp_2ndlast; 599 svp_2ndlast->sv_next = NULL; 600 sv4_free(svp); 601 goto proceed; 602 } 603 } 604 } 605 } 606 } 607 608 /* 609 * If there are syncaddr and netname data, load them in. This is 610 * to support data needed for NFSV4 when AUTH_DH is the negotiated 611 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 612 */ 613 netname[0] = '\0'; 614 if (flags & NFSMNT_SECURE) { 615 616 /* get syncaddr */ 617 STRUCT_INIT(addr_tmp, get_udatamodel()); 618 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 619 STRUCT_SIZE(addr_tmp))) { 620 error = EINVAL; 621 goto errout; 622 } 623 userbufptr = STRUCT_FGETP(addr_tmp, buf); 624 syncaddr.len = STRUCT_FGET(addr_tmp, len); 625 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 626 syncaddr.maxlen = syncaddr.len; 627 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 628 kmem_free(syncaddr.buf, syncaddr.len); 629 error = EFAULT; 630 goto errout; 631 } 632 633 /* get server's netname */ 634 if (copyinstr(STRUCT_FGETP(args, netname), netname, 635 sizeof (netname), &nlen)) { 636 kmem_free(syncaddr.buf, syncaddr.len); 637 error = EFAULT; 638 goto errout; 639 } 640 netname[nlen] = '\0'; 641 642 svp->sv_dhsec = create_authdh_data(netname, nlen, &syncaddr, 643 knconf); 644 } 645 646 /* 647 * Get the extention data which has the security data structure. 648 * This includes data for AUTH_SYS as well. 649 */ 650 if (flags & NFSMNT_NEWARGS) { 651 switch (STRUCT_FGET(args, nfs_args_ext)) { 652 case NFS_ARGS_EXTA: 653 case NFS_ARGS_EXTB: 654 /* 655 * Indicating the application is using the new 656 * sec_data structure to pass in the security 657 * data. 658 */ 659 if (STRUCT_FGETP(args, 660 nfs_ext_u.nfs_extA.secdata) == NULL) { 661 error = EINVAL; 662 } else { 663 error = sec_clnt_loadinfo( 664 (struct sec_data *)STRUCT_FGETP(args, 665 nfs_ext_u.nfs_extA.secdata), 666 &secdata, get_udatamodel()); 667 } 668 break; 669 670 default: 671 error = EINVAL; 672 break; 673 } 674 675 } else if (flags & NFSMNT_SECURE) { 676 /* 677 * NFSMNT_SECURE is deprecated but we keep it 678 * to support the rouge user generated application 679 * that may use this undocumented interface to do 680 * AUTH_DH security. 681 */ 682 secdata = create_authdh_data(netname, nlen, &syncaddr, knconf); 683 684 } else { 685 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 686 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 687 secdata->data = NULL; 688 } 689 690 svp->sv_secdata = secdata; 691 692 /* syncaddr is no longer needed. */ 693 if (syncaddr.buf != NULL) 694 kmem_free(syncaddr.buf, syncaddr.len); 695 696 /* 697 * User does not explictly specify a flavor, and a user 698 * defined default flavor is passed down. 699 */ 700 if (flags & NFSMNT_SECDEFAULT) { 701 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 702 svp->sv_flags |= SV4_TRYSECDEFAULT; 703 nfs_rw_exit(&svp->sv_lock); 704 } 705 706 /* 707 * Failover support: 708 * 709 * We may have a linked list of nfs_args structures, 710 * which means the user is looking for failover. If 711 * the mount is either not "read-only" or "soft", 712 * we want to bail out with EINVAL. 713 */ 714 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 715 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 716 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 717 data = (char *)STRUCT_FGETP(args, 718 nfs_ext_u.nfs_extB.next); 719 goto more; 720 } 721 error = EINVAL; 722 goto errout; 723 } 724 725 /* 726 * Determine the zone we're being mounted into. 727 */ 728 if (getzoneid() == GLOBAL_ZONEID) { 729 zone_t *mntzone; 730 731 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 732 ASSERT(mntzone != NULL); 733 zone_rele(mntzone); 734 if (mntzone != zone) { 735 error = EBUSY; 736 goto errout; 737 } 738 } 739 740 /* 741 * Stop the mount from going any further if the zone is going away. 742 */ 743 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) { 744 error = EBUSY; 745 goto errout; 746 } 747 748 /* 749 * Get root vnode. 750 */ 751 proceed: 752 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, zone); 753 754 if (error) 755 goto errout; 756 757 mi = VTOMI4(rtvp); 758 759 /* 760 * Send client id to the server, if necessary 761 */ 762 nfs4_error_zinit(&n4e); 763 nfs4setclientid(mi, cr, FALSE, &n4e); 764 error = n4e.error; 765 766 if (error) 767 goto errout; 768 769 /* 770 * Set option fields in the mount info record 771 */ 772 773 if (svp_head->sv_next) { 774 mutex_enter(&mi->mi_lock); 775 mi->mi_flags |= MI4_LLOCK; 776 mutex_exit(&mi->mi_lock); 777 } 778 779 error = nfs4_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 780 781 errout: 782 if (error) { 783 if (rtvp != NULL) { 784 rp = VTOR4(rtvp); 785 if (rp->r_flags & R4HASHED) 786 rp4_rmhash(rp); 787 } 788 if (mi != NULL) { 789 nfs4_async_stop(vfsp); 790 nfs4_async_manager_stop(vfsp); 791 nfs4_remove_mi_from_server(mi, NULL); 792 /* 793 * In this error path we need to sfh4_rele() before 794 * we free the mntinfo4_t as sfh4_rele() has a 795 * dependancy on mi_fh_lock. 796 */ 797 if (rtvp != NULL) 798 VN_RELE(rtvp); 799 if (mi->mi_io_kstats) { 800 kstat_delete(mi->mi_io_kstats); 801 mi->mi_io_kstats = NULL; 802 } 803 if (mi->mi_ro_kstats) { 804 kstat_delete(mi->mi_ro_kstats); 805 mi->mi_ro_kstats = NULL; 806 } 807 if (mi->mi_recov_ksp) { 808 kstat_delete(mi->mi_recov_ksp); 809 mi->mi_recov_ksp = NULL; 810 } 811 nfs_free_mi4(mi); 812 return (error); 813 } 814 sv4_free(svp_head); 815 } 816 817 if (rtvp != NULL) 818 VN_RELE(rtvp); 819 820 return (error); 821 } 822 823 #ifdef DEBUG 824 #define VERS_MSG "NFS4 server " 825 #else 826 #define VERS_MSG "NFS server " 827 #endif 828 829 #define READ_MSG \ 830 VERS_MSG "%s returned 0 for read transfer size" 831 #define WRITE_MSG \ 832 VERS_MSG "%s returned 0 for write transfer size" 833 #define SIZE_MSG \ 834 VERS_MSG "%s returned 0 for maximum file size" 835 836 /* 837 * Get the symbolic link text from the server for a given filehandle 838 * of that symlink. 839 * 840 * (get symlink text) PUTFH READLINK 841 */ 842 static int 843 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 844 int flags) 845 { 846 COMPOUND4args_clnt args; 847 COMPOUND4res_clnt res; 848 int doqueue; 849 nfs_argop4 argop[2]; 850 nfs_resop4 *resop; 851 READLINK4res *lr_res; 852 uint_t len; 853 bool_t needrecov = FALSE; 854 nfs4_recov_state_t recov_state; 855 nfs4_sharedfh_t *sfh; 856 nfs4_error_t e; 857 int num_retry = nfs4_max_mount_retry; 858 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 859 860 sfh = sfh4_get(fh, mi); 861 recov_state.rs_flags = 0; 862 recov_state.rs_num_retry_despite_err = 0; 863 864 recov_retry: 865 nfs4_error_zinit(&e); 866 867 args.array_len = 2; 868 args.array = argop; 869 args.ctag = TAG_GET_SYMLINK; 870 871 if (! recovery) { 872 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 873 if (e.error) { 874 sfh4_rele(&sfh); 875 return (e.error); 876 } 877 } 878 879 /* 0. putfh symlink fh */ 880 argop[0].argop = OP_CPUTFH; 881 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 882 883 /* 1. readlink */ 884 argop[1].argop = OP_READLINK; 885 886 doqueue = 1; 887 888 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 889 890 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 891 892 if (needrecov && !recovery && num_retry-- > 0) { 893 894 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 895 "getlinktext_otw: initiating recovery\n")); 896 897 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 898 OP_READLINK, NULL) == FALSE) { 899 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 900 if (!e.error) 901 (void) xdr_free(xdr_COMPOUND4res_clnt, 902 (caddr_t)&res); 903 goto recov_retry; 904 } 905 } 906 907 /* 908 * If non-NFS4 pcol error and/or we weren't able to recover. 909 */ 910 if (e.error != 0) { 911 if (! recovery) 912 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 913 sfh4_rele(&sfh); 914 return (e.error); 915 } 916 917 if (res.status) { 918 e.error = geterrno4(res.status); 919 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 920 if (! recovery) 921 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 922 sfh4_rele(&sfh); 923 return (e.error); 924 } 925 926 /* res.status == NFS4_OK */ 927 ASSERT(res.status == NFS4_OK); 928 929 resop = &res.array[1]; /* readlink res */ 930 lr_res = &resop->nfs_resop4_u.opreadlink; 931 932 /* treat symlink name as data */ 933 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 934 935 if (! recovery) 936 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 937 sfh4_rele(&sfh); 938 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 939 940 return (0); 941 } 942 943 /* 944 * Skip over consecutive slashes and "/./" in a pathname. 945 */ 946 void 947 pathname_skipslashdot(struct pathname *pnp) 948 { 949 char *c1, *c2; 950 951 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 952 953 c1 = pnp->pn_path + 1; 954 c2 = pnp->pn_path + 2; 955 956 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 957 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 958 pnp->pn_pathlen = pnp->pn_pathlen - 2; 959 } else { 960 pnp->pn_path++; 961 pnp->pn_pathlen--; 962 } 963 } 964 } 965 966 /* 967 * Resolve a symbolic link path. The symlink is in the nth component of 968 * svp->sv_path and has an nfs4 file handle "fh". 969 * Upon return, the sv_path will point to the new path that has the nth 970 * component resolved to its symlink text. 971 */ 972 int 973 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 974 cred_t *cr, int flags) 975 { 976 char *oldpath; 977 char *symlink, *newpath; 978 struct pathname oldpn, newpn; 979 char component[MAXNAMELEN]; 980 int i, addlen, error = 0; 981 int oldpathlen; 982 983 /* Get the symbolic link text over the wire. */ 984 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 985 986 if (error || symlink == NULL || strlen(symlink) == 0) 987 return (error); 988 989 /* 990 * Compose the new pathname. 991 * Note: 992 * - only the nth component is resolved for the pathname. 993 * - pathname.pn_pathlen does not count the ending null byte. 994 */ 995 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 996 oldpath = svp->sv_path; 997 oldpathlen = svp->sv_pathlen; 998 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 999 nfs_rw_exit(&svp->sv_lock); 1000 kmem_free(symlink, strlen(symlink) + 1); 1001 return (error); 1002 } 1003 nfs_rw_exit(&svp->sv_lock); 1004 pn_alloc(&newpn); 1005 1006 /* 1007 * Skip over previous components from the oldpath so that the 1008 * oldpn.pn_path will point to the symlink component. Skip 1009 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1010 * pn_getcompnent can get the component. 1011 */ 1012 for (i = 1; i < nth; i++) { 1013 pathname_skipslashdot(&oldpn); 1014 error = pn_getcomponent(&oldpn, component); 1015 if (error) 1016 goto out; 1017 } 1018 1019 /* 1020 * Copy the old path upto the component right before the symlink 1021 * if the symlink is not an absolute path. 1022 */ 1023 if (symlink[0] != '/') { 1024 addlen = oldpn.pn_path - oldpn.pn_buf; 1025 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1026 newpn.pn_pathlen += addlen; 1027 newpn.pn_path += addlen; 1028 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1029 newpn.pn_pathlen++; 1030 newpn.pn_path++; 1031 } 1032 1033 /* copy the resolved symbolic link text */ 1034 addlen = strlen(symlink); 1035 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1036 error = ENAMETOOLONG; 1037 goto out; 1038 } 1039 bcopy(symlink, newpn.pn_path, addlen); 1040 newpn.pn_pathlen += addlen; 1041 newpn.pn_path += addlen; 1042 1043 /* 1044 * Check if there is any remaining path after the symlink component. 1045 * First, skip the symlink component. 1046 */ 1047 pathname_skipslashdot(&oldpn); 1048 if (error = pn_getcomponent(&oldpn, component)) 1049 goto out; 1050 1051 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1052 1053 /* 1054 * Copy the remaining path to the new pathname if there is any. 1055 */ 1056 if (addlen > 0) { 1057 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1058 error = ENAMETOOLONG; 1059 goto out; 1060 } 1061 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1062 newpn.pn_pathlen += addlen; 1063 } 1064 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1065 1066 /* get the newpath and store it in the servinfo4_t */ 1067 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1068 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1069 newpath[newpn.pn_pathlen] = '\0'; 1070 1071 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1072 svp->sv_path = newpath; 1073 svp->sv_pathlen = strlen(newpath) + 1; 1074 nfs_rw_exit(&svp->sv_lock); 1075 1076 kmem_free(oldpath, oldpathlen); 1077 out: 1078 kmem_free(symlink, strlen(symlink) + 1); 1079 pn_free(&newpn); 1080 pn_free(&oldpn); 1081 1082 return (error); 1083 } 1084 1085 /* 1086 * Get the root filehandle for the given filesystem and server, and update 1087 * svp. 1088 * 1089 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1090 * to coordinate with recovery. Otherwise, the caller is assumed to be 1091 * the recovery thread or have already done a start_fop. 1092 * 1093 * Errors are returned by the nfs4_error_t parameter. 1094 */ 1095 1096 static void 1097 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1098 int flags, cred_t *cr, nfs4_error_t *ep) 1099 { 1100 COMPOUND4args_clnt args; 1101 COMPOUND4res_clnt res; 1102 int doqueue = 1; 1103 nfs_argop4 *argop; 1104 nfs_resop4 *resop; 1105 nfs4_ga_res_t *garp; 1106 int num_argops; 1107 lookup4_param_t lookuparg; 1108 nfs_fh4 *tmpfhp; 1109 nfs_fh4 *resfhp; 1110 bool_t needrecov = FALSE; 1111 nfs4_recov_state_t recov_state; 1112 int llndx; 1113 int nthcomp; 1114 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1115 1116 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1117 ASSERT(svp->sv_path != NULL); 1118 if (svp->sv_path[0] == '\0') { 1119 nfs_rw_exit(&svp->sv_lock); 1120 nfs4_error_init(ep, EINVAL); 1121 return; 1122 } 1123 nfs_rw_exit(&svp->sv_lock); 1124 1125 recov_state.rs_flags = 0; 1126 recov_state.rs_num_retry_despite_err = 0; 1127 recov_retry: 1128 nfs4_error_zinit(ep); 1129 1130 if (!recovery) { 1131 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1132 &recov_state, NULL); 1133 1134 /* 1135 * If recovery has been started and this request as 1136 * initiated by a mount, then we must wait for recovery 1137 * to finish before proceeding, otherwise, the error 1138 * cleanup would remove data structures needed by the 1139 * recovery thread. 1140 */ 1141 if (ep->error) { 1142 mutex_enter(&mi->mi_lock); 1143 if (mi->mi_flags & MI4_MOUNTING) { 1144 mi->mi_flags |= MI4_RECOV_FAIL; 1145 mi->mi_error = EIO; 1146 1147 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1148 "nfs4getfh_otw: waiting 4 recovery\n")); 1149 1150 while (mi->mi_flags & MI4_RECOV_ACTIV) 1151 cv_wait(&mi->mi_failover_cv, 1152 &mi->mi_lock); 1153 } 1154 mutex_exit(&mi->mi_lock); 1155 return; 1156 } 1157 1158 /* 1159 * If the client does not specify a specific flavor to use 1160 * and has not gotten a secinfo list from the server yet, 1161 * retrieve the secinfo list from the server and use a 1162 * flavor from the list to mount. 1163 * 1164 * If fail to get the secinfo list from the server, then 1165 * try the default flavor. 1166 */ 1167 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1168 svp->sv_secinfo == NULL) { 1169 (void) nfs4_secinfo_path(mi, cr, FALSE); 1170 } 1171 } 1172 1173 if (recovery) 1174 args.ctag = TAG_REMAP_MOUNT; 1175 else 1176 args.ctag = TAG_MOUNT; 1177 1178 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1179 lookuparg.argsp = &args; 1180 lookuparg.resp = &res; 1181 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1182 lookuparg.trailer_len = 0; 1183 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1184 lookuparg.mi = mi; 1185 1186 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1187 ASSERT(svp->sv_path != NULL); 1188 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1189 nfs_rw_exit(&svp->sv_lock); 1190 1191 argop = args.array; 1192 num_argops = args.array_len; 1193 1194 /* choose public or root filehandle */ 1195 if (flags & NFS4_GETFH_PUBLIC) 1196 argop[0].argop = OP_PUTPUBFH; 1197 else 1198 argop[0].argop = OP_PUTROOTFH; 1199 1200 /* get fh */ 1201 argop[1].argop = OP_GETFH; 1202 1203 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1204 "nfs4getfh_otw: %s call, mi 0x%p", 1205 needrecov ? "recov" : "first", (void *)mi)); 1206 1207 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1208 1209 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1210 1211 if (needrecov) { 1212 bool_t abort; 1213 1214 if (recovery) { 1215 nfs4args_lookup_free(argop, num_argops); 1216 kmem_free(argop, 1217 lookuparg.arglen * sizeof (nfs_argop4)); 1218 if (!ep->error) 1219 (void) xdr_free(xdr_COMPOUND4res_clnt, 1220 (caddr_t)&res); 1221 return; 1222 } 1223 1224 NFS4_DEBUG(nfs4_client_recov_debug, 1225 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1226 1227 abort = nfs4_start_recovery(ep, mi, NULL, 1228 NULL, NULL, NULL, OP_GETFH, NULL); 1229 if (!ep->error) { 1230 ep->error = geterrno4(res.status); 1231 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1232 } 1233 nfs4args_lookup_free(argop, num_argops); 1234 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1235 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1236 /* have another go? */ 1237 if (abort == FALSE) 1238 goto recov_retry; 1239 return; 1240 } 1241 1242 /* 1243 * No recovery, but check if error is set. 1244 */ 1245 if (ep->error) { 1246 nfs4args_lookup_free(argop, num_argops); 1247 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1248 if (!recovery) 1249 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1250 needrecov); 1251 return; 1252 } 1253 1254 is_link_err: 1255 1256 /* for non-recovery errors */ 1257 if (res.status && res.status != NFS4ERR_SYMLINK) { 1258 if (!recovery) { 1259 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1260 needrecov); 1261 } 1262 nfs4args_lookup_free(argop, num_argops); 1263 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1264 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1265 return; 1266 } 1267 1268 /* 1269 * If any intermediate component in the path is a symbolic link, 1270 * resolve the symlink, then try mount again using the new path. 1271 */ 1272 if (res.status == NFS4ERR_SYMLINK) { 1273 int where; 1274 1275 /* 1276 * This must be from OP_LOOKUP failure. The (cfh) for this 1277 * OP_LOOKUP is a symlink node. Found out where the 1278 * OP_GETFH is for the (cfh) that is a symlink node. 1279 * 1280 * Example: 1281 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1282 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1283 * 1284 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1285 * In this case, where = 7, nthcomp = 2. 1286 */ 1287 where = res.array_len - 2; 1288 ASSERT(where > 0); 1289 1290 resop = &res.array[where - 1]; 1291 ASSERT(resop->resop == OP_GETFH); 1292 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1293 nthcomp = res.array_len/3 - 1; 1294 1295 /* 1296 * Need to call nfs4_end_op before resolve_sympath to avoid 1297 * potential nfs4_start_op deadlock. 1298 */ 1299 if (!recovery) 1300 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1301 needrecov); 1302 1303 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1304 flags); 1305 1306 nfs4args_lookup_free(argop, num_argops); 1307 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1308 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1309 1310 if (ep->error) 1311 return; 1312 1313 goto recov_retry; 1314 } 1315 1316 /* getfh */ 1317 resop = &res.array[res.array_len - 2]; 1318 ASSERT(resop->resop == OP_GETFH); 1319 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1320 1321 /* getattr fsinfo res */ 1322 resop++; 1323 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1324 1325 *vtp = garp->n4g_va.va_type; 1326 1327 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1328 1329 mutex_enter(&mi->mi_lock); 1330 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1331 mi->mi_flags |= MI4_LINK; 1332 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1333 mi->mi_flags |= MI4_SYMLINK; 1334 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1335 mi->mi_flags |= MI4_ACL; 1336 mutex_exit(&mi->mi_lock); 1337 1338 if (garp->n4g_ext_res->n4g_maxread == 0) 1339 mi->mi_tsize = 1340 MIN(MAXBSIZE, mi->mi_tsize); 1341 else 1342 mi->mi_tsize = 1343 MIN(garp->n4g_ext_res->n4g_maxread, 1344 mi->mi_tsize); 1345 1346 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1347 mi->mi_stsize = 1348 MIN(MAXBSIZE, mi->mi_stsize); 1349 else 1350 mi->mi_stsize = 1351 MIN(garp->n4g_ext_res->n4g_maxwrite, 1352 mi->mi_stsize); 1353 1354 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1355 mi->mi_maxfilesize = 1356 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1357 mi->mi_maxfilesize); 1358 1359 /* 1360 * If the final component is a a symbolic link, resolve the symlink, 1361 * then try mount again using the new path. 1362 * 1363 * Assume no symbolic link for root filesysm "/". 1364 */ 1365 if (*vtp == VLNK) { 1366 /* 1367 * nthcomp is the total result length minus 1368 * the 1st 2 OPs (PUTROOTFH, GETFH), 1369 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1370 * 1371 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1372 * LOOKUP 2nd-comp GETFH GETATTR 1373 * 1374 * (8 - 2)/3 = 2 1375 */ 1376 nthcomp = (res.array_len - 2)/3; 1377 1378 /* 1379 * Need to call nfs4_end_op before resolve_sympath to avoid 1380 * potential nfs4_start_op deadlock. See RFE 4777612. 1381 */ 1382 if (!recovery) 1383 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1384 needrecov); 1385 1386 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1387 flags); 1388 1389 nfs4args_lookup_free(argop, num_argops); 1390 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1391 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1392 1393 if (ep->error) 1394 return; 1395 1396 goto recov_retry; 1397 } 1398 1399 /* 1400 * We need to figure out where in the compound the getfh 1401 * for the parent directory is. If the object to be mounted is 1402 * the root, then there is no lookup at all: 1403 * PUTROOTFH, GETFH. 1404 * If the object to be mounted is in the root, then the compound is: 1405 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1406 * In either of these cases, the index of the GETFH is 1. 1407 * If it is not at the root, then it's something like: 1408 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1409 * LOOKUP, GETFH, GETATTR 1410 * In this case, the index is llndx (last lookup index) - 2. 1411 */ 1412 if (llndx == -1 || llndx == 2) 1413 resop = &res.array[1]; 1414 else { 1415 ASSERT(llndx > 2); 1416 resop = &res.array[llndx-2]; 1417 } 1418 1419 ASSERT(resop->resop == OP_GETFH); 1420 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1421 1422 /* save the filehandles for the replica */ 1423 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1424 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1425 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1426 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1427 tmpfhp->nfs_fh4_len); 1428 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1429 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1430 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1431 1432 /* initialize fsid and supp_attrs for server fs */ 1433 svp->sv_fsid = garp->n4g_fsid; 1434 svp->sv_supp_attrs = 1435 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1436 1437 nfs_rw_exit(&svp->sv_lock); 1438 1439 nfs4args_lookup_free(argop, num_argops); 1440 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1441 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1442 if (!recovery) 1443 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1444 } 1445 1446 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1447 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1448 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1449 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1450 1451 /* 1452 * Remap the root filehandle for the given filesystem. 1453 * 1454 * results returned via the nfs4_error_t parameter. 1455 */ 1456 void 1457 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1458 { 1459 struct servinfo4 *svp; 1460 vtype_t vtype; 1461 nfs_fh4 rootfh; 1462 int getfh_flags; 1463 char *orig_sv_path; 1464 int orig_sv_pathlen, num_retry; 1465 1466 mutex_enter(&mi->mi_lock); 1467 1468 remap_retry: 1469 svp = mi->mi_curr_serv; 1470 getfh_flags = 1471 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1472 getfh_flags |= 1473 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1474 mutex_exit(&mi->mi_lock); 1475 1476 /* 1477 * Just in case server path being mounted contains 1478 * symlinks and fails w/STALE, save the initial sv_path 1479 * so we can redrive the initial mount compound with the 1480 * initial sv_path -- not a symlink-expanded version. 1481 * 1482 * This could only happen if a symlink was expanded 1483 * and the expanded mount compound failed stale. Because 1484 * it could be the case that the symlink was removed at 1485 * the server (and replaced with another symlink/dir, 1486 * we need to use the initial sv_path when attempting 1487 * to re-lookup everything and recover. 1488 */ 1489 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1490 orig_sv_pathlen = svp->sv_pathlen; 1491 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1492 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1493 nfs_rw_exit(&svp->sv_lock); 1494 1495 num_retry = nfs4_max_mount_retry; 1496 1497 do { 1498 /* 1499 * Get the root fh from the server. Retry nfs4_max_mount_retry 1500 * (2) times if it fails with STALE since the recovery 1501 * infrastructure doesn't do STALE recovery for components 1502 * of the server path to the object being mounted. 1503 */ 1504 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1505 1506 if (ep->error == 0 && ep->stat == NFS4_OK) 1507 break; 1508 1509 /* 1510 * For some reason, the mount compound failed. Before 1511 * retrying, we need to restore the original sv_path 1512 * because it might have contained symlinks that were 1513 * expanded by nfsgetfh_otw before the failure occurred. 1514 * replace current sv_path with orig sv_path -- just in case 1515 * it changed due to embedded symlinks. 1516 */ 1517 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1518 if (orig_sv_pathlen != svp->sv_pathlen) { 1519 kmem_free(svp->sv_path, svp->sv_pathlen); 1520 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1521 svp->sv_pathlen = orig_sv_pathlen; 1522 } 1523 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1524 nfs_rw_exit(&svp->sv_lock); 1525 1526 } while (num_retry-- > 0); 1527 1528 kmem_free(orig_sv_path, orig_sv_pathlen); 1529 1530 if (ep->error != 0 || ep->stat != 0) { 1531 return; 1532 } 1533 1534 if (vtype != VNON && vtype != mi->mi_type) { 1535 /* shouldn't happen */ 1536 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1537 "nfs4_remap_root: server root vnode type (%d) doesn't " 1538 "match mount info (%d)", vtype, mi->mi_type); 1539 } 1540 1541 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1542 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1543 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1544 nfs_rw_exit(&svp->sv_lock); 1545 sfh4_update(mi->mi_rootfh, &rootfh); 1546 1547 /* 1548 * It's possible that recovery took place on the filesystem 1549 * and the server has been updated between the time we did 1550 * the nfs4getfh_otw and now. Re-drive the otw operation 1551 * to make sure we have a good fh. 1552 */ 1553 mutex_enter(&mi->mi_lock); 1554 if (mi->mi_curr_serv != svp) 1555 goto remap_retry; 1556 1557 mutex_exit(&mi->mi_lock); 1558 } 1559 1560 static int 1561 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1562 int flags, cred_t *cr, zone_t *zone) 1563 { 1564 vnode_t *rtvp = NULL; 1565 mntinfo4_t *mi; 1566 dev_t nfs_dev; 1567 int error = 0; 1568 rnode4_t *rp; 1569 int i; 1570 struct vattr va; 1571 vtype_t vtype = VNON; 1572 vtype_t tmp_vtype = VNON; 1573 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1574 nfs4_oo_hash_bucket_t *bucketp; 1575 nfs_fh4 fh; 1576 char *droptext = ""; 1577 struct nfs_stats *nfsstatsp; 1578 nfs4_fname_t *mfname; 1579 nfs4_error_t e; 1580 char *orig_sv_path; 1581 int orig_sv_pathlen, num_retry; 1582 cred_t *lcr = NULL, *tcr = cr; 1583 1584 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1585 ASSERT(nfsstatsp != NULL); 1586 1587 ASSERT(nfs_zone() == zone); 1588 ASSERT(crgetref(cr)); 1589 1590 /* 1591 * Create a mount record and link it to the vfs struct. 1592 */ 1593 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1594 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1595 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1596 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1597 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1598 1599 if (!(flags & NFSMNT_SOFT)) 1600 mi->mi_flags |= MI4_HARD; 1601 if ((flags & NFSMNT_NOPRINT)) 1602 mi->mi_flags |= MI4_NOPRINT; 1603 if (flags & NFSMNT_INT) 1604 mi->mi_flags |= MI4_INT; 1605 if (flags & NFSMNT_PUBLIC) 1606 mi->mi_flags |= MI4_PUBLIC; 1607 mi->mi_retrans = NFS_RETRIES; 1608 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1609 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1610 mi->mi_timeo = nfs4_cots_timeo; 1611 else 1612 mi->mi_timeo = NFS_TIMEO; 1613 mi->mi_prog = NFS_PROGRAM; 1614 mi->mi_vers = NFS_V4; 1615 mi->mi_rfsnames = rfsnames_v4; 1616 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1617 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1618 mi->mi_servers = svp; 1619 mi->mi_curr_serv = svp; 1620 mi->mi_acregmin = SEC2HR(ACREGMIN); 1621 mi->mi_acregmax = SEC2HR(ACREGMAX); 1622 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1623 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1624 mi->mi_fh_expire_type = FH4_PERSISTENT; 1625 mi->mi_clientid_next = NULL; 1626 mi->mi_clientid_prev = NULL; 1627 mi->mi_grace_wait = 0; 1628 mi->mi_error = 0; 1629 mi->mi_srvsettime = 0; 1630 1631 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1632 mi->mi_stsize = mi->mi_tsize; 1633 1634 if (flags & NFSMNT_DIRECTIO) 1635 mi->mi_flags |= MI4_DIRECTIO; 1636 1637 mi->mi_flags |= MI4_MOUNTING; 1638 1639 /* 1640 * Make a vfs struct for nfs. We do this here instead of below 1641 * because rtvp needs a vfs before we can do a getattr on it. 1642 * 1643 * Assign a unique device id to the mount 1644 */ 1645 mutex_enter(&nfs_minor_lock); 1646 do { 1647 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1648 nfs_dev = makedevice(nfs_major, nfs_minor); 1649 } while (vfs_devismounted(nfs_dev)); 1650 mutex_exit(&nfs_minor_lock); 1651 1652 vfsp->vfs_dev = nfs_dev; 1653 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1654 vfsp->vfs_data = (caddr_t)mi; 1655 vfsp->vfs_fstype = nfsfstyp; 1656 vfsp->vfs_bsize = nfs4_bsize; 1657 1658 /* 1659 * Initialize fields used to support async putpage operations. 1660 */ 1661 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1662 mi->mi_async_clusters[i] = nfs4_async_clusters; 1663 mi->mi_async_init_clusters = nfs4_async_clusters; 1664 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1665 mi->mi_max_threads = nfs4_max_threads; 1666 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1667 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1668 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1669 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1670 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1671 1672 mi->mi_vfsp = vfsp; 1673 zone_hold(mi->mi_zone = zone); 1674 nfs4_mi_zonelist_add(mi); 1675 1676 /* 1677 * Initialize the <open owner/cred> hash table. 1678 */ 1679 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1680 bucketp = &(mi->mi_oo_list[i]); 1681 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1682 list_create(&bucketp->b_oo_hash_list, 1683 sizeof (nfs4_open_owner_t), 1684 offsetof(nfs4_open_owner_t, oo_hash_node)); 1685 } 1686 1687 /* 1688 * Initialize the freed open owner list. 1689 */ 1690 mi->mi_foo_num = 0; 1691 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1692 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1693 offsetof(nfs4_open_owner_t, oo_foo_node)); 1694 1695 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1696 offsetof(nfs4_lost_rqst_t, lr_node)); 1697 1698 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1699 offsetof(nfs4_bseqid_entry_t, bs_node)); 1700 1701 /* 1702 * Initialize the msg buffer. 1703 */ 1704 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1705 offsetof(nfs4_debug_msg_t, msg_node)); 1706 mi->mi_msg_count = 0; 1707 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1708 1709 /* 1710 * Initialize kstats 1711 */ 1712 nfs4_mnt_kstat_init(vfsp); 1713 1714 /* 1715 * Initialize the shared filehandle pool, and get the fname for 1716 * the filesystem root. 1717 */ 1718 sfh4_createtab(&mi->mi_filehandles); 1719 mi->mi_fname = fn_get(NULL, "."); 1720 1721 /* 1722 * Save server path we're attempting to mount. 1723 */ 1724 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1725 orig_sv_pathlen = svp_head->sv_pathlen; 1726 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1727 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1728 nfs_rw_exit(&svp->sv_lock); 1729 1730 /* 1731 * Make the GETFH call to get root fh for each replica. 1732 */ 1733 if (svp_head->sv_next) 1734 droptext = ", dropping replica"; 1735 1736 /* 1737 * If the uid is set then set the creds for secure mounts 1738 * by proxy processes such as automountd. 1739 */ 1740 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1741 if (svp->sv_secdata->uid != 0) { 1742 lcr = crdup(cr); 1743 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1744 tcr = lcr; 1745 } 1746 nfs_rw_exit(&svp->sv_lock); 1747 for (svp = svp_head; svp; svp = svp->sv_next) { 1748 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 1749 nfs_cmn_err(error, CE_WARN, 1750 VERS_MSG "Host %s is a duplicate%s", 1751 svp->sv_hostname, droptext); 1752 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1753 svp->sv_flags |= SV4_NOTINUSE; 1754 nfs_rw_exit(&svp->sv_lock); 1755 continue; 1756 } 1757 mi->mi_curr_serv = svp; 1758 1759 /* 1760 * Just in case server path being mounted contains 1761 * symlinks and fails w/STALE, save the initial sv_path 1762 * so we can redrive the initial mount compound with the 1763 * initial sv_path -- not a symlink-expanded version. 1764 * 1765 * This could only happen if a symlink was expanded 1766 * and the expanded mount compound failed stale. Because 1767 * it could be the case that the symlink was removed at 1768 * the server (and replaced with another symlink/dir, 1769 * we need to use the initial sv_path when attempting 1770 * to re-lookup everything and recover. 1771 * 1772 * Other mount errors should evenutally be handled here also 1773 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 1774 * failures will result in mount being redriven a few times. 1775 */ 1776 num_retry = nfs4_max_mount_retry; 1777 do { 1778 nfs4getfh_otw(mi, svp, &tmp_vtype, 1779 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 1780 NFS4_GETFH_NEEDSOP, tcr, &e); 1781 1782 if (e.error == 0 && e.stat == NFS4_OK) 1783 break; 1784 1785 /* 1786 * replace current sv_path with orig sv_path -- just in 1787 * case it changed due to embedded symlinks. 1788 */ 1789 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1790 if (orig_sv_pathlen != svp->sv_pathlen) { 1791 kmem_free(svp->sv_path, svp->sv_pathlen); 1792 svp->sv_path = kmem_alloc(orig_sv_pathlen, 1793 KM_SLEEP); 1794 svp->sv_pathlen = orig_sv_pathlen; 1795 } 1796 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1797 nfs_rw_exit(&svp->sv_lock); 1798 1799 } while (num_retry-- > 0); 1800 1801 error = e.error ? e.error : geterrno4(e.stat); 1802 if (error) { 1803 nfs_cmn_err(error, CE_WARN, 1804 VERS_MSG "initial call to %s failed%s: %m", 1805 svp->sv_hostname, droptext); 1806 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1807 svp->sv_flags |= SV4_NOTINUSE; 1808 nfs_rw_exit(&svp->sv_lock); 1809 mi->mi_flags &= ~MI4_RECOV_FAIL; 1810 mi->mi_error = 0; 1811 continue; 1812 } 1813 1814 if (tmp_vtype == VBAD) { 1815 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1816 VERS_MSG "%s returned a bad file type for " 1817 "root%s", svp->sv_hostname, droptext); 1818 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1819 svp->sv_flags |= SV4_NOTINUSE; 1820 nfs_rw_exit(&svp->sv_lock); 1821 continue; 1822 } 1823 1824 if (vtype == VNON) { 1825 vtype = tmp_vtype; 1826 } else if (vtype != tmp_vtype) { 1827 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1828 VERS_MSG "%s returned a different file type " 1829 "for root%s", svp->sv_hostname, droptext); 1830 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1831 svp->sv_flags |= SV4_NOTINUSE; 1832 nfs_rw_exit(&svp->sv_lock); 1833 continue; 1834 } 1835 if (firstsvp == NULL) 1836 firstsvp = svp; 1837 } 1838 1839 kmem_free(orig_sv_path, orig_sv_pathlen); 1840 1841 if (firstsvp == NULL) { 1842 if (error == 0) 1843 error = ENOENT; 1844 goto bad; 1845 } 1846 1847 mi->mi_curr_serv = svp = firstsvp; 1848 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1849 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 1850 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1851 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1852 mi->mi_rootfh = sfh4_get(&fh, mi); 1853 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 1854 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 1855 mi->mi_srvparentfh = sfh4_get(&fh, mi); 1856 nfs_rw_exit(&svp->sv_lock); 1857 1858 /* 1859 * Make the root vnode without attributes. 1860 */ 1861 mfname = mi->mi_fname; 1862 fn_hold(mfname); 1863 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 1864 &mfname, NULL, mi, cr, gethrtime()); 1865 rtvp->v_type = vtype; 1866 1867 mi->mi_curread = mi->mi_tsize; 1868 mi->mi_curwrite = mi->mi_stsize; 1869 1870 /* 1871 * Start the manager thread responsible for handling async worker 1872 * threads. 1873 */ 1874 VFS_HOLD(vfsp); /* add reference for thread */ 1875 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 1876 vfsp, 0, minclsyspri); 1877 ASSERT(mi->mi_manager_thread != NULL); 1878 /* 1879 * Create the thread that handles over-the-wire calls for 1880 * VOP_INACTIVE. 1881 * This needs to happen after the manager thread is created. 1882 */ 1883 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 1884 mi, 0, minclsyspri); 1885 ASSERT(mi->mi_inactive_thread != NULL); 1886 1887 /* If we didn't get a type, get one now */ 1888 if (rtvp->v_type == VNON) { 1889 va.va_mask = AT_TYPE; 1890 error = nfs4getattr(rtvp, &va, tcr); 1891 if (error) 1892 goto bad; 1893 rtvp->v_type = va.va_type; 1894 } 1895 1896 mi->mi_type = rtvp->v_type; 1897 1898 mutex_enter(&mi->mi_lock); 1899 mi->mi_flags &= ~MI4_MOUNTING; 1900 mutex_exit(&mi->mi_lock); 1901 1902 *rtvpp = rtvp; 1903 if (lcr != NULL) 1904 crfree(lcr); 1905 1906 return (0); 1907 bad: 1908 /* 1909 * An error occurred somewhere, need to clean up... 1910 * 1911 * XXX Should not svp be cleaned too? 1912 */ 1913 if (lcr != NULL) 1914 crfree(lcr); 1915 if (rtvp != NULL) { 1916 /* 1917 * We need to release our reference to the root vnode and 1918 * destroy the mntinfo4 struct that we just created. 1919 */ 1920 rp = VTOR4(rtvp); 1921 if (rp->r_flags & R4HASHED) 1922 rp4_rmhash(rp); 1923 VN_RELE(rtvp); 1924 } 1925 nfs4_async_stop(vfsp); 1926 nfs4_async_manager_stop(vfsp); 1927 if (mi->mi_io_kstats) { 1928 kstat_delete(mi->mi_io_kstats); 1929 mi->mi_io_kstats = NULL; 1930 } 1931 if (mi->mi_ro_kstats) { 1932 kstat_delete(mi->mi_ro_kstats); 1933 mi->mi_ro_kstats = NULL; 1934 } 1935 if (mi->mi_recov_ksp) { 1936 kstat_delete(mi->mi_recov_ksp); 1937 mi->mi_recov_ksp = NULL; 1938 } 1939 nfs_free_mi4(mi); 1940 *rtvpp = NULL; 1941 return (error); 1942 } 1943 1944 /* 1945 * vfs operations 1946 */ 1947 static int 1948 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1949 { 1950 mntinfo4_t *mi; 1951 ushort_t omax; 1952 1953 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1954 return (EPERM); 1955 1956 mi = VFTOMI4(vfsp); 1957 1958 if (flag & MS_FORCE) { 1959 vfsp->vfs_flag |= VFS_UNMOUNTED; 1960 if (nfs_zone() != mi->mi_zone) { 1961 /* 1962 * If the request is coming from the wrong zone, 1963 * we don't want to create any new threads, and 1964 * performance is not a concern. Do everything 1965 * inline. 1966 */ 1967 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 1968 "nfs4_unmount x-zone forced unmount of vfs %p\n", 1969 (void *)vfsp)); 1970 nfs4_free_mount(vfsp, cr); 1971 } else { 1972 /* 1973 * Free data structures asynchronously, to avoid 1974 * blocking the current thread (for performance 1975 * reasons only). 1976 */ 1977 async_free_mount(vfsp, cr); 1978 } 1979 return (0); 1980 } 1981 /* 1982 * Wait until all asynchronous putpage operations on 1983 * this file system are complete before flushing rnodes 1984 * from the cache. 1985 */ 1986 omax = mi->mi_max_threads; 1987 if (nfs4_async_stop_sig(vfsp)) { 1988 return (EINTR); 1989 } 1990 r4flush(vfsp, cr); 1991 /* 1992 * If there are any active vnodes on this file system, 1993 * then the file system is busy and can't be umounted. 1994 */ 1995 if (check_rtable4(vfsp)) { 1996 mutex_enter(&mi->mi_async_lock); 1997 mi->mi_max_threads = omax; 1998 mutex_exit(&mi->mi_async_lock); 1999 return (EBUSY); 2000 } 2001 /* 2002 * The unmount can't fail from now on, and there are no active 2003 * files that could require over-the-wire calls to the server, 2004 * so stop the async manager and the inactive thread. 2005 */ 2006 nfs4_async_manager_stop(vfsp); 2007 /* 2008 * Destroy all rnodes belonging to this file system from the 2009 * rnode hash queues and purge any resources allocated to 2010 * them. 2011 */ 2012 destroy_rtable4(vfsp, cr); 2013 vfsp->vfs_flag |= VFS_UNMOUNTED; 2014 nfs4_remove_mi_from_server(mi, NULL); 2015 if (mi->mi_io_kstats) { 2016 kstat_delete(mi->mi_io_kstats); 2017 mi->mi_io_kstats = NULL; 2018 } 2019 if (mi->mi_ro_kstats) { 2020 kstat_delete(mi->mi_ro_kstats); 2021 mi->mi_ro_kstats = NULL; 2022 } 2023 if (mi->mi_recov_ksp) { 2024 kstat_delete(mi->mi_recov_ksp); 2025 mi->mi_recov_ksp = NULL; 2026 } 2027 return (0); 2028 } 2029 2030 /* 2031 * find root of nfs 2032 */ 2033 static int 2034 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2035 { 2036 mntinfo4_t *mi; 2037 vnode_t *vp; 2038 nfs4_fname_t *mfname; 2039 servinfo4_t *svp; 2040 2041 mi = VFTOMI4(vfsp); 2042 2043 if (nfs_zone() != mi->mi_zone) 2044 return (EPERM); 2045 2046 svp = mi->mi_curr_serv; 2047 if (svp) { 2048 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2049 if (svp->sv_flags & SV4_ROOT_STALE) { 2050 nfs_rw_exit(&svp->sv_lock); 2051 2052 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2053 if (svp->sv_flags & SV4_ROOT_STALE) { 2054 svp->sv_flags &= ~SV4_ROOT_STALE; 2055 nfs_rw_exit(&svp->sv_lock); 2056 return (ENOENT); 2057 } 2058 nfs_rw_exit(&svp->sv_lock); 2059 } else 2060 nfs_rw_exit(&svp->sv_lock); 2061 } 2062 2063 mfname = mi->mi_fname; 2064 fn_hold(mfname); 2065 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2066 VFTOMI4(vfsp), CRED(), gethrtime()); 2067 2068 if (VTOR4(vp)->r_flags & R4STALE) { 2069 VN_RELE(vp); 2070 return (ENOENT); 2071 } 2072 2073 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2074 2075 vp->v_type = mi->mi_type; 2076 2077 *vpp = vp; 2078 2079 return (0); 2080 } 2081 2082 static int 2083 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2084 { 2085 int error; 2086 nfs4_ga_res_t gar; 2087 nfs4_ga_ext_res_t ger; 2088 2089 gar.n4g_ext_res = &ger; 2090 2091 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2092 NFS4_STATFS_ATTR_MASK, cr)) 2093 return (error); 2094 2095 *sbp = gar.n4g_ext_res->n4g_sb; 2096 2097 return (0); 2098 } 2099 2100 /* 2101 * Get file system statistics. 2102 */ 2103 static int 2104 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2105 { 2106 int error; 2107 vnode_t *vp; 2108 cred_t *cr; 2109 2110 error = nfs4_root(vfsp, &vp); 2111 if (error) 2112 return (error); 2113 2114 cr = CRED(); 2115 2116 error = nfs4_statfs_otw(vp, sbp, cr); 2117 if (!error) { 2118 (void) strncpy(sbp->f_basetype, 2119 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2120 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2121 } else { 2122 nfs4_purge_stale_fh(error, vp, cr); 2123 } 2124 2125 VN_RELE(vp); 2126 2127 return (error); 2128 } 2129 2130 static kmutex_t nfs4_syncbusy; 2131 2132 /* 2133 * Flush dirty nfs files for file system vfsp. 2134 * If vfsp == NULL, all nfs files are flushed. 2135 * 2136 * SYNC_CLOSE in flag is passed to us to 2137 * indicate that we are shutting down and or 2138 * rebooting. 2139 */ 2140 static int 2141 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2142 { 2143 /* 2144 * Cross-zone calls are OK here, since this translates to a 2145 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2146 */ 2147 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2148 r4flush(vfsp, cr); 2149 mutex_exit(&nfs4_syncbusy); 2150 } 2151 2152 /* 2153 * if SYNC_CLOSE is set then we know that 2154 * the system is rebooting, mark the mntinfo 2155 * for later examination. 2156 */ 2157 if (vfsp && (flag & SYNC_CLOSE)) { 2158 mntinfo4_t *mi; 2159 2160 mi = VFTOMI4(vfsp); 2161 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2162 mutex_enter(&mi->mi_lock); 2163 mi->mi_flags |= MI4_SHUTDOWN; 2164 mutex_exit(&mi->mi_lock); 2165 } 2166 } 2167 return (0); 2168 } 2169 2170 /* 2171 * vget is difficult, if not impossible, to support in v4 because we don't 2172 * know the parent directory or name, which makes it impossible to create a 2173 * useful shadow vnode. And we need the shadow vnode for things like 2174 * OPEN. 2175 */ 2176 2177 /* ARGSUSED */ 2178 /* 2179 * XXX Check nfs4_vget_pseudo() for dependency. 2180 */ 2181 static int 2182 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2183 { 2184 return (EREMOTE); 2185 } 2186 2187 /* 2188 * nfs4_mountroot get called in the case where we are diskless booting. All 2189 * we need from here is the ability to get the server info and from there we 2190 * can simply call nfs4_rootvp. 2191 */ 2192 /* ARGSUSED */ 2193 static int 2194 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2195 { 2196 vnode_t *rtvp; 2197 char root_hostname[SYS_NMLN+1]; 2198 struct servinfo4 *svp; 2199 int error; 2200 int vfsflags; 2201 size_t size; 2202 char *root_path; 2203 struct pathname pn; 2204 char *name; 2205 cred_t *cr; 2206 mntinfo4_t *mi; 2207 struct nfs_args args; /* nfs mount arguments */ 2208 static char token[10]; 2209 nfs4_error_t n4e; 2210 2211 bzero(&args, sizeof (args)); 2212 2213 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2214 clkset(-1L); /* hack for now - until we get time svc? */ 2215 2216 if (why == ROOT_REMOUNT) { 2217 /* 2218 * Shouldn't happen. 2219 */ 2220 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2221 } 2222 2223 if (why == ROOT_UNMOUNT) { 2224 /* 2225 * Nothing to do for NFS. 2226 */ 2227 return (0); 2228 } 2229 2230 /* 2231 * why == ROOT_INIT 2232 */ 2233 2234 name = token; 2235 *name = 0; 2236 (void) getfsname("root", name, sizeof (token)); 2237 2238 pn_alloc(&pn); 2239 root_path = pn.pn_path; 2240 2241 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2242 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2243 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2244 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2245 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2246 2247 /* 2248 * Get server address 2249 * Get the root path 2250 * Get server's transport 2251 * Get server's hostname 2252 * Get options 2253 */ 2254 args.addr = &svp->sv_addr; 2255 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2256 args.fh = (char *)&svp->sv_fhandle; 2257 args.knconf = svp->sv_knconf; 2258 args.hostname = root_hostname; 2259 vfsflags = 0; 2260 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2261 &args, &vfsflags)) { 2262 if (error == EPROTONOSUPPORT) 2263 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2264 "mount_root failed: server doesn't support NFS V4"); 2265 else 2266 nfs_cmn_err(error, CE_WARN, 2267 "nfs4_mountroot: mount_root failed: %m"); 2268 nfs_rw_exit(&svp->sv_lock); 2269 sv4_free(svp); 2270 pn_free(&pn); 2271 return (error); 2272 } 2273 nfs_rw_exit(&svp->sv_lock); 2274 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2275 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2276 (void) strcpy(svp->sv_hostname, root_hostname); 2277 2278 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2279 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2280 (void) strcpy(svp->sv_path, root_path); 2281 2282 /* 2283 * Force root partition to always be mounted with AUTH_UNIX for now 2284 */ 2285 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2286 svp->sv_secdata->secmod = AUTH_UNIX; 2287 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2288 svp->sv_secdata->data = NULL; 2289 2290 cr = crgetcred(); 2291 rtvp = NULL; 2292 2293 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2294 2295 if (error) { 2296 crfree(cr); 2297 pn_free(&pn); 2298 goto errout; 2299 } 2300 2301 mi = VTOMI4(rtvp); 2302 2303 /* 2304 * Send client id to the server, if necessary 2305 */ 2306 nfs4_error_zinit(&n4e); 2307 nfs4setclientid(mi, cr, FALSE, &n4e); 2308 error = n4e.error; 2309 2310 crfree(cr); 2311 2312 if (error) { 2313 pn_free(&pn); 2314 goto errout; 2315 } 2316 2317 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2318 if (error) { 2319 nfs_cmn_err(error, CE_WARN, 2320 "nfs4_mountroot: invalid root mount options"); 2321 pn_free(&pn); 2322 goto errout; 2323 } 2324 2325 (void) vfs_lock_wait(vfsp); 2326 vfs_add(NULL, vfsp, vfsflags); 2327 vfs_unlock(vfsp); 2328 2329 size = strlen(svp->sv_hostname); 2330 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2331 rootfs.bo_name[size] = ':'; 2332 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2333 2334 pn_free(&pn); 2335 2336 errout: 2337 if (error) { 2338 sv4_free(svp); 2339 nfs4_async_stop(vfsp); 2340 nfs4_async_manager_stop(vfsp); 2341 } 2342 2343 if (rtvp != NULL) 2344 VN_RELE(rtvp); 2345 2346 return (error); 2347 } 2348 2349 /* 2350 * Initialization routine for VFS routines. Should only be called once 2351 */ 2352 int 2353 nfs4_vfsinit(void) 2354 { 2355 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2356 nfs4setclientid_init(); 2357 return (0); 2358 } 2359 2360 void 2361 nfs4_vfsfini(void) 2362 { 2363 nfs4setclientid_fini(); 2364 mutex_destroy(&nfs4_syncbusy); 2365 } 2366 2367 void 2368 nfs4_freevfs(vfs_t *vfsp) 2369 { 2370 mntinfo4_t *mi; 2371 servinfo4_t *svp; 2372 2373 /* free up the resources */ 2374 mi = VFTOMI4(vfsp); 2375 svp = mi->mi_servers; 2376 mi->mi_servers = mi->mi_curr_serv = NULL; 2377 sv4_free(svp); 2378 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_freevfs: " 2379 "free mi %p", (void *)mi)); 2380 2381 /* 2382 * By this time we should have already deleted the 2383 * mi kstats in the unmount code. If they are still around 2384 * somethings wrong 2385 */ 2386 ASSERT(mi->mi_io_kstats == NULL); 2387 2388 nfs_free_mi4(mi); 2389 } 2390 2391 /* 2392 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2393 */ 2394 struct nfs4_server nfs4_server_lst = 2395 { &nfs4_server_lst, &nfs4_server_lst }; 2396 2397 kmutex_t nfs4_server_lst_lock; 2398 2399 static void 2400 nfs4setclientid_init(void) 2401 { 2402 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2403 } 2404 2405 static void 2406 nfs4setclientid_fini(void) 2407 { 2408 mutex_destroy(&nfs4_server_lst_lock); 2409 } 2410 2411 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2412 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2413 2414 /* 2415 * Set the clientid for the server for "mi". No-op if the clientid is 2416 * already set. 2417 * 2418 * The recovery boolean should be set to TRUE if this function was called 2419 * by the recovery code, and FALSE otherwise. This is used to determine 2420 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2421 * for adding a mntinfo4_t to a nfs4_server_t. 2422 * 2423 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2424 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2425 */ 2426 void 2427 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2428 { 2429 struct nfs4_server *np; 2430 struct servinfo4 *svp = mi->mi_curr_serv; 2431 nfs4_recov_state_t recov_state; 2432 int num_retries = 0; 2433 bool_t retry = FALSE; 2434 cred_t *lcr = NULL; 2435 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2436 time_t lease_time = 0; 2437 2438 recov_state.rs_flags = 0; 2439 recov_state.rs_num_retry_despite_err = 0; 2440 ASSERT(n4ep != NULL); 2441 2442 recov_retry: 2443 nfs4_error_zinit(n4ep); 2444 if (!recovery) 2445 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2446 2447 mutex_enter(&nfs4_server_lst_lock); 2448 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2449 mutex_exit(&nfs4_server_lst_lock); 2450 if (!np) { 2451 struct nfs4_server *tnp; 2452 np = new_nfs4_server(svp, cr); 2453 2454 mutex_enter(&nfs4_server_lst_lock); 2455 tnp = servinfo4_to_nfs4_server(svp); 2456 if (tnp) { 2457 /* 2458 * another thread snuck in and put server on list. 2459 * since we aren't adding it to the nfs4_server_list 2460 * we need to set the ref count to 0 and destroy it. 2461 */ 2462 np->s_refcnt = 0; 2463 destroy_nfs4_server(np); 2464 np = tnp; 2465 } else { 2466 /* 2467 * do not give list a reference until everything 2468 * succeeds 2469 */ 2470 mutex_enter(&np->s_lock); 2471 insque(np, &nfs4_server_lst); 2472 } 2473 mutex_exit(&nfs4_server_lst_lock); 2474 } 2475 ASSERT(MUTEX_HELD(&np->s_lock)); 2476 /* 2477 * If we find the server already has N4S_CLIENTID_SET, then 2478 * just return, we've already done SETCLIENTID to that server 2479 */ 2480 if (np->s_flags & N4S_CLIENTID_SET) { 2481 /* add mi to np's mntinfo4_list */ 2482 nfs4_add_mi_to_server(np, mi); 2483 if (!recovery) 2484 nfs_rw_exit(&mi->mi_recovlock); 2485 mutex_exit(&np->s_lock); 2486 nfs4_server_rele(np); 2487 return; 2488 } 2489 mutex_exit(&np->s_lock); 2490 2491 2492 /* 2493 * Drop the mi_recovlock since nfs4_start_op will 2494 * acquire it again for us. 2495 */ 2496 if (!recovery) { 2497 nfs_rw_exit(&mi->mi_recovlock); 2498 2499 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2500 if (n4ep->error) { 2501 nfs4_server_rele(np); 2502 return; 2503 } 2504 } 2505 2506 mutex_enter(&np->s_lock); 2507 while (np->s_flags & N4S_CLIENTID_PEND) { 2508 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 2509 mutex_exit(&np->s_lock); 2510 nfs4_server_rele(np); 2511 if (!recovery) 2512 nfs4_end_op(mi, NULL, NULL, &recov_state, 2513 recovery); 2514 n4ep->error = EINTR; 2515 return; 2516 } 2517 } 2518 2519 if (np->s_flags & N4S_CLIENTID_SET) { 2520 /* XXX copied/pasted from above */ 2521 /* add mi to np's mntinfo4_list */ 2522 nfs4_add_mi_to_server(np, mi); 2523 mutex_exit(&np->s_lock); 2524 nfs4_server_rele(np); 2525 if (!recovery) 2526 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2527 return; 2528 } 2529 2530 /* 2531 * Reset the N4S_CB_PINGED flag. This is used to 2532 * indicate if we have received a CB_NULL from the 2533 * server. Also we reset the waiter flag. 2534 */ 2535 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2536 /* any failure must now clear this flag */ 2537 np->s_flags |= N4S_CLIENTID_PEND; 2538 mutex_exit(&np->s_lock); 2539 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2540 2541 if (n4ep->error == EACCES) { 2542 /* 2543 * If the uid is set then set the creds for secure mounts 2544 * by proxy processes such as automountd. 2545 */ 2546 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2547 if (svp->sv_secdata->uid != 0) { 2548 lcr = crdup(cr); 2549 (void) crsetugid(lcr, svp->sv_secdata->uid, 2550 crgetgid(cr)); 2551 } 2552 nfs_rw_exit(&svp->sv_lock); 2553 2554 if (lcr != NULL) { 2555 mutex_enter(&np->s_lock); 2556 crfree(np->s_cred); 2557 np->s_cred = lcr; 2558 mutex_exit(&np->s_lock); 2559 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2560 &retry_inuse); 2561 } 2562 } 2563 mutex_enter(&np->s_lock); 2564 lease_time = np->s_lease_time; 2565 np->s_flags &= ~N4S_CLIENTID_PEND; 2566 mutex_exit(&np->s_lock); 2567 2568 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2569 /* 2570 * Start recovery if failover is a possibility. If 2571 * invoked by the recovery thread itself, then just 2572 * return and let it handle the failover first. NB: 2573 * recovery is not allowed if the mount is in progress 2574 * since the infrastructure is not sufficiently setup 2575 * to allow it. Just return the error (after suitable 2576 * retries). 2577 */ 2578 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2579 (void) nfs4_start_recovery(n4ep, mi, NULL, 2580 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2581 /* 2582 * Don't retry here, just return and let 2583 * recovery take over. 2584 */ 2585 if (recovery) 2586 retry = FALSE; 2587 } else if (nfs4_rpc_retry_error(n4ep->error) || 2588 n4ep->stat == NFS4ERR_RESOURCE || 2589 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2590 2591 retry = TRUE; 2592 /* 2593 * Always retry if in recovery or once had 2594 * contact with the server (but now it's 2595 * overloaded). 2596 */ 2597 if (recovery == TRUE || 2598 n4ep->error == ETIMEDOUT || 2599 n4ep->error == ECONNRESET) 2600 num_retries = 0; 2601 } else if (retry_inuse && n4ep->error == 0 && 2602 n4ep->stat == NFS4ERR_CLID_INUSE) { 2603 retry = TRUE; 2604 num_retries = 0; 2605 } 2606 } else { 2607 /* Since everything succeeded give the list a reference count */ 2608 mutex_enter(&np->s_lock); 2609 np->s_refcnt++; 2610 mutex_exit(&np->s_lock); 2611 } 2612 2613 if (!recovery) 2614 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2615 2616 2617 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2618 if (retry_inuse) { 2619 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2620 retry_inuse = 0; 2621 } else 2622 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2623 2624 nfs4_server_rele(np); 2625 goto recov_retry; 2626 } 2627 2628 2629 if (n4ep->error == 0) 2630 n4ep->error = geterrno4(n4ep->stat); 2631 2632 /* broadcast before release in case no other threads are waiting */ 2633 cv_broadcast(&np->s_clientid_pend); 2634 nfs4_server_rele(np); 2635 } 2636 2637 int nfs4setclientid_otw_debug = 0; 2638 2639 /* 2640 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2641 * but nothing else; the calling function must be designed to handle those 2642 * other errors. 2643 */ 2644 static void 2645 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2646 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2647 { 2648 COMPOUND4args_clnt args; 2649 COMPOUND4res_clnt res; 2650 nfs_argop4 argop[3]; 2651 SETCLIENTID4args *s_args; 2652 SETCLIENTID4resok *s_resok; 2653 int doqueue = 1; 2654 nfs4_ga_res_t *garp = NULL; 2655 timespec_t prop_time, after_time; 2656 verifier4 verf; 2657 clientid4 tmp_clientid; 2658 2659 ASSERT(!MUTEX_HELD(&np->s_lock)); 2660 2661 args.ctag = TAG_SETCLIENTID; 2662 2663 args.array = argop; 2664 args.array_len = 3; 2665 2666 /* PUTROOTFH */ 2667 argop[0].argop = OP_PUTROOTFH; 2668 2669 /* GETATTR */ 2670 argop[1].argop = OP_GETATTR; 2671 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2672 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2673 2674 /* SETCLIENTID */ 2675 argop[2].argop = OP_SETCLIENTID; 2676 2677 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2678 2679 mutex_enter(&np->s_lock); 2680 2681 s_args->client.verifier = np->clidtosend.verifier; 2682 s_args->client.id_len = np->clidtosend.id_len; 2683 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2684 s_args->client.id_val = np->clidtosend.id_val; 2685 2686 /* 2687 * Callback needs to happen on non-RDMA transport 2688 * Check if we have saved the original knetconfig 2689 * if so, use that instead. 2690 */ 2691 if (svp->sv_origknconf != NULL) 2692 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2693 else 2694 nfs4_cb_args(np, svp->sv_knconf, s_args); 2695 2696 mutex_exit(&np->s_lock); 2697 2698 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2699 2700 if (ep->error) 2701 return; 2702 2703 /* getattr lease_time res */ 2704 if (res.array_len >= 2) { 2705 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2706 2707 #ifndef _LP64 2708 /* 2709 * The 32 bit client cannot handle a lease time greater than 2710 * (INT32_MAX/1000000). This is due to the use of the 2711 * lease_time in calls to drv_usectohz() in 2712 * nfs4_renew_lease_thread(). The problem is that 2713 * drv_usectohz() takes a time_t (which is just a long = 4 2714 * bytes) as its parameter. The lease_time is multiplied by 2715 * 1000000 to convert seconds to usecs for the parameter. If 2716 * a number bigger than (INT32_MAX/1000000) is used then we 2717 * overflow on the 32bit client. 2718 */ 2719 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2720 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2721 } 2722 #endif 2723 2724 mutex_enter(&np->s_lock); 2725 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2726 2727 /* 2728 * Keep track of the lease period for the mi's 2729 * mi_msg_list. We need an appropiate time 2730 * bound to associate past facts with a current 2731 * event. The lease period is perfect for this. 2732 */ 2733 mutex_enter(&mi->mi_msg_list_lock); 2734 mi->mi_lease_period = np->s_lease_time; 2735 mutex_exit(&mi->mi_msg_list_lock); 2736 mutex_exit(&np->s_lock); 2737 } 2738 2739 2740 if (res.status == NFS4ERR_CLID_INUSE) { 2741 clientaddr4 *clid_inuse; 2742 2743 if (!(*retry_inusep)) { 2744 clid_inuse = &res.array->nfs_resop4_u. 2745 opsetclientid.SETCLIENTID4res_u.client_using; 2746 2747 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2748 "NFS4 mount (SETCLIENTID failed)." 2749 " nfs4_client_id.id is in" 2750 "use already by: r_netid<%s> r_addr<%s>", 2751 clid_inuse->r_netid, clid_inuse->r_addr); 2752 } 2753 2754 /* 2755 * XXX - The client should be more robust in its 2756 * handling of clientid in use errors (regen another 2757 * clientid and try again?) 2758 */ 2759 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2760 return; 2761 } 2762 2763 if (res.status) { 2764 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2765 return; 2766 } 2767 2768 s_resok = &res.array[2].nfs_resop4_u. 2769 opsetclientid.SETCLIENTID4res_u.resok4; 2770 2771 tmp_clientid = s_resok->clientid; 2772 2773 verf = s_resok->setclientid_confirm; 2774 2775 #ifdef DEBUG 2776 if (nfs4setclientid_otw_debug) { 2777 union { 2778 clientid4 clientid; 2779 int foo[2]; 2780 } cid; 2781 2782 cid.clientid = s_resok->clientid; 2783 2784 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2785 "nfs4setclientid_otw: OK, clientid = %x,%x, " 2786 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 2787 } 2788 #endif 2789 2790 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2791 2792 /* Confirm the client id and get the lease_time attribute */ 2793 2794 args.ctag = TAG_SETCLIENTID_CF; 2795 2796 args.array = argop; 2797 args.array_len = 1; 2798 2799 argop[0].argop = OP_SETCLIENTID_CONFIRM; 2800 2801 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 2802 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 2803 2804 /* used to figure out RTT for np */ 2805 gethrestime(&prop_time); 2806 2807 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 2808 "start time: %ld sec %ld nsec", prop_time.tv_sec, 2809 prop_time.tv_nsec)); 2810 2811 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2812 2813 gethrestime(&after_time); 2814 mutex_enter(&np->s_lock); 2815 np->propagation_delay.tv_sec = 2816 MAX(1, after_time.tv_sec - prop_time.tv_sec); 2817 mutex_exit(&np->s_lock); 2818 2819 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 2820 "finish time: %ld sec ", after_time.tv_sec)); 2821 2822 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 2823 "propagation delay set to %ld sec", 2824 np->propagation_delay.tv_sec)); 2825 2826 if (ep->error) 2827 return; 2828 2829 if (res.status == NFS4ERR_CLID_INUSE) { 2830 clientaddr4 *clid_inuse; 2831 2832 if (!(*retry_inusep)) { 2833 clid_inuse = &res.array->nfs_resop4_u. 2834 opsetclientid.SETCLIENTID4res_u.client_using; 2835 2836 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2837 "SETCLIENTID_CONFIRM failed. " 2838 "nfs4_client_id.id is in use already by: " 2839 "r_netid<%s> r_addr<%s>", 2840 clid_inuse->r_netid, clid_inuse->r_addr); 2841 } 2842 2843 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2844 return; 2845 } 2846 2847 if (res.status) { 2848 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2849 return; 2850 } 2851 2852 mutex_enter(&np->s_lock); 2853 np->clientid = tmp_clientid; 2854 np->s_flags |= N4S_CLIENTID_SET; 2855 2856 /* Add mi to np's mntinfo4 list */ 2857 nfs4_add_mi_to_server(np, mi); 2858 2859 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 2860 /* 2861 * Start lease management thread. 2862 * Keep trying until we succeed. 2863 */ 2864 2865 np->s_refcnt++; /* pass reference to thread */ 2866 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 2867 minclsyspri); 2868 } 2869 mutex_exit(&np->s_lock); 2870 2871 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2872 } 2873 2874 /* 2875 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 2876 * mi's clientid the same as sp's. 2877 * Assumes sp is locked down. 2878 */ 2879 void 2880 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 2881 { 2882 mntinfo4_t *tmi; 2883 int in_list = 0; 2884 2885 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 2886 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 2887 ASSERT(sp != &nfs4_server_lst); 2888 ASSERT(MUTEX_HELD(&sp->s_lock)); 2889 2890 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2891 "nfs4_add_mi_to_server: add mi %p to sp %p", 2892 (void*)mi, (void*)sp)); 2893 2894 for (tmi = sp->mntinfo4_list; 2895 tmi != NULL; 2896 tmi = tmi->mi_clientid_next) { 2897 if (tmi == mi) { 2898 NFS4_DEBUG(nfs4_client_lease_debug, 2899 (CE_NOTE, 2900 "nfs4_add_mi_to_server: mi in list")); 2901 in_list = 1; 2902 } 2903 } 2904 2905 /* 2906 * First put a hold on the mntinfo4's vfsp so that references via 2907 * mntinfo4_list will be valid. 2908 */ 2909 if (!in_list) 2910 VFS_HOLD(mi->mi_vfsp); 2911 2912 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 2913 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 2914 2915 if (!in_list) { 2916 if (sp->mntinfo4_list) 2917 sp->mntinfo4_list->mi_clientid_prev = mi; 2918 mi->mi_clientid_next = sp->mntinfo4_list; 2919 sp->mntinfo4_list = mi; 2920 mi->mi_srvsettime = gethrestime_sec(); 2921 } 2922 2923 /* set mi's clientid to that of sp's for later matching */ 2924 mi->mi_clientid = sp->clientid; 2925 2926 /* 2927 * Update the clientid for any other mi's belonging to sp. This 2928 * must be done here while we hold sp->s_lock, so that 2929 * find_nfs4_server() continues to work. 2930 */ 2931 2932 for (tmi = sp->mntinfo4_list; 2933 tmi != NULL; 2934 tmi = tmi->mi_clientid_next) { 2935 if (tmi != mi) { 2936 tmi->mi_clientid = sp->clientid; 2937 } 2938 } 2939 } 2940 2941 /* 2942 * Remove the mi from sp's mntinfo4_list and release its reference. 2943 * Exception: if mi still has open files, flag it for later removal (when 2944 * all the files are closed). 2945 * 2946 * If this is the last mntinfo4 in sp's list then tell the lease renewal 2947 * thread to exit. 2948 */ 2949 static void 2950 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 2951 { 2952 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2953 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 2954 (void*)mi, (void*)sp)); 2955 2956 ASSERT(sp != NULL); 2957 ASSERT(MUTEX_HELD(&sp->s_lock)); 2958 ASSERT(mi->mi_open_files >= 0); 2959 2960 /* 2961 * First make sure this mntinfo4 can be taken off of the list, 2962 * ie: it doesn't have any open files remaining. 2963 */ 2964 if (mi->mi_open_files > 0) { 2965 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2966 "nfs4_remove_mi_from_server_nolock: don't " 2967 "remove mi since it still has files open")); 2968 2969 mutex_enter(&mi->mi_lock); 2970 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 2971 mutex_exit(&mi->mi_lock); 2972 return; 2973 } 2974 2975 remove_mi(sp, mi); 2976 2977 if (sp->mntinfo4_list == NULL) { 2978 /* last fs unmounted, kill the thread */ 2979 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2980 "remove_mi_from_nfs4_server_nolock: kill the thread")); 2981 nfs4_mark_srv_dead(sp); 2982 } 2983 } 2984 2985 /* 2986 * Remove mi from sp's mntinfo4_list and release the vfs reference. 2987 */ 2988 static void 2989 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 2990 { 2991 ASSERT(MUTEX_HELD(&sp->s_lock)); 2992 2993 /* 2994 * We release a reference, and the caller must still have a 2995 * reference. 2996 */ 2997 ASSERT(mi->mi_vfsp->vfs_count >= 2); 2998 2999 if (mi->mi_clientid_prev) { 3000 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3001 } else { 3002 /* This is the first mi in sp's mntinfo4_list */ 3003 /* 3004 * Make sure the first mntinfo4 in the list is the actual 3005 * mntinfo4 passed in. 3006 */ 3007 ASSERT(sp->mntinfo4_list == mi); 3008 3009 sp->mntinfo4_list = mi->mi_clientid_next; 3010 } 3011 if (mi->mi_clientid_next) 3012 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3013 3014 /* Now mark the mntinfo4's links as being removed */ 3015 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3016 3017 VFS_RELE(mi->mi_vfsp); 3018 } 3019 3020 /* 3021 * Free all the entries in sp's mntinfo4_list. 3022 */ 3023 static void 3024 remove_all_mi(nfs4_server_t *sp) 3025 { 3026 mntinfo4_t *mi; 3027 3028 ASSERT(MUTEX_HELD(&sp->s_lock)); 3029 3030 while (sp->mntinfo4_list != NULL) { 3031 mi = sp->mntinfo4_list; 3032 /* 3033 * Grab a reference in case there is only one left (which 3034 * remove_mi() frees). 3035 */ 3036 VFS_HOLD(mi->mi_vfsp); 3037 remove_mi(sp, mi); 3038 VFS_RELE(mi->mi_vfsp); 3039 } 3040 } 3041 3042 /* 3043 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3044 * 3045 * This version can be called with a null nfs4_server_t arg, 3046 * and will either find the right one and handle locking, or 3047 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3048 */ 3049 void 3050 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3051 { 3052 nfs4_server_t *sp; 3053 3054 if (esp == NULL) { 3055 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3056 sp = find_nfs4_server_all(mi, 1); 3057 } else 3058 sp = esp; 3059 3060 if (sp != NULL) 3061 nfs4_remove_mi_from_server_nolock(mi, sp); 3062 3063 /* 3064 * If we had a valid esp as input, the calling function will be 3065 * responsible for unlocking the esp nfs4_server. 3066 */ 3067 if (esp == NULL) { 3068 if (sp != NULL) 3069 mutex_exit(&sp->s_lock); 3070 nfs_rw_exit(&mi->mi_recovlock); 3071 if (sp != NULL) 3072 nfs4_server_rele(sp); 3073 } 3074 } 3075 3076 /* 3077 * Return TRUE if the given server has any non-unmounted filesystems. 3078 */ 3079 3080 bool_t 3081 nfs4_fs_active(nfs4_server_t *sp) 3082 { 3083 mntinfo4_t *mi; 3084 3085 ASSERT(MUTEX_HELD(&sp->s_lock)); 3086 3087 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3088 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3089 return (TRUE); 3090 } 3091 3092 return (FALSE); 3093 } 3094 3095 /* 3096 * Mark sp as finished and notify any waiters. 3097 */ 3098 3099 void 3100 nfs4_mark_srv_dead(nfs4_server_t *sp) 3101 { 3102 ASSERT(MUTEX_HELD(&sp->s_lock)); 3103 3104 sp->s_thread_exit = NFS4_THREAD_EXIT; 3105 cv_broadcast(&sp->cv_thread_exit); 3106 } 3107 3108 /* 3109 * Create a new nfs4_server_t structure. 3110 * Returns new node unlocked and not in list, but with a reference count of 3111 * 1. 3112 */ 3113 struct nfs4_server * 3114 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3115 { 3116 struct nfs4_server *np; 3117 timespec_t tt; 3118 union { 3119 struct { 3120 uint32_t sec; 3121 uint32_t subsec; 3122 } un_curtime; 3123 verifier4 un_verifier; 3124 } nfs4clientid_verifier; 3125 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3126 int len; 3127 3128 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3129 np->saddr.len = svp->sv_addr.len; 3130 np->saddr.maxlen = svp->sv_addr.maxlen; 3131 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3132 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3133 np->s_refcnt = 1; 3134 3135 /* 3136 * Build the nfs_client_id4 for this server mount. Ensure 3137 * the verifier is useful and that the identification is 3138 * somehow based on the server's address for the case of 3139 * multi-homed servers. 3140 */ 3141 nfs4clientid_verifier.un_verifier = 0; 3142 gethrestime(&tt); 3143 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3144 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3145 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3146 3147 /* 3148 * calculate the length of the opaque identifier. Subtract 2 3149 * for the "%s" and add the traditional +1 for null 3150 * termination. 3151 */ 3152 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3153 np->clidtosend.id_len = len + np->saddr.maxlen; 3154 3155 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3156 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3157 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3158 3159 np->s_flags = 0; 3160 np->mntinfo4_list = NULL; 3161 /* save cred for issuing rfs4calls inside the renew thread */ 3162 crhold(cr); 3163 np->s_cred = cr; 3164 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3165 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3166 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3167 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3168 offsetof(rnode4_t, r_deleg_link)); 3169 np->s_thread_exit = 0; 3170 np->state_ref_count = 0; 3171 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3172 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3173 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3174 np->s_otw_call_count = 0; 3175 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3176 np->zoneid = getzoneid(); 3177 np->zone_globals = nfs4_get_callback_globals(); 3178 ASSERT(np->zone_globals != NULL); 3179 return (np); 3180 } 3181 3182 /* 3183 * Create a new nfs4_server_t structure and add it to the list. 3184 * Returns new node locked; reference must eventually be freed. 3185 */ 3186 static struct nfs4_server * 3187 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3188 { 3189 nfs4_server_t *sp; 3190 3191 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3192 sp = new_nfs4_server(svp, cr); 3193 mutex_enter(&sp->s_lock); 3194 insque(sp, &nfs4_server_lst); 3195 sp->s_refcnt++; /* list gets a reference */ 3196 sp->clientid = 0; 3197 return (sp); 3198 } 3199 3200 int nfs4_server_t_debug = 0; 3201 3202 #ifdef lint 3203 extern void 3204 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3205 #endif 3206 3207 #ifndef lint 3208 #ifdef DEBUG 3209 void 3210 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3211 { 3212 int hash16(void *p, int len); 3213 nfs4_server_t *np; 3214 3215 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3216 "dumping nfs4_server_t list in %s", txt)); 3217 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3218 "mi 0x%p, want clientid %llx, addr %d/%04X", 3219 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3220 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3221 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3222 np = np->forw) { 3223 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3224 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3225 np, (longlong_t)np->clientid, np->saddr.len, 3226 hash16((void *)np->saddr.buf, np->saddr.len), 3227 np->state_ref_count)); 3228 if (np->saddr.len == srv_p->sv_addr.len && 3229 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3230 np->saddr.len) == 0) 3231 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3232 " - address matches")); 3233 if (np->clientid == clientid || np->clientid == 0) 3234 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3235 " - clientid matches")); 3236 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3237 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3238 " - thread not exiting")); 3239 } 3240 delay(hz); 3241 } 3242 #endif 3243 #endif 3244 3245 3246 /* 3247 * Move a mntinfo4_t from one server list to another. 3248 * Locking of the two nfs4_server_t nodes will be done in list order. 3249 * 3250 * Returns NULL if the current nfs4_server_t for the filesystem could not 3251 * be found (e.g., due to forced unmount). Otherwise returns a reference 3252 * to the new nfs4_server_t, which must eventually be freed. 3253 */ 3254 nfs4_server_t * 3255 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3256 { 3257 nfs4_server_t *p, *op = NULL, *np = NULL; 3258 int num_open; 3259 zoneid_t zoneid = nfs_zoneid(); 3260 3261 ASSERT(nfs_zone() == mi->mi_zone); 3262 3263 mutex_enter(&nfs4_server_lst_lock); 3264 #ifdef DEBUG 3265 if (nfs4_server_t_debug) 3266 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3267 #endif 3268 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3269 if (p->zoneid != zoneid) 3270 continue; 3271 if (p->saddr.len == old->sv_addr.len && 3272 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3273 p->s_thread_exit != NFS4_THREAD_EXIT) { 3274 op = p; 3275 mutex_enter(&op->s_lock); 3276 op->s_refcnt++; 3277 } 3278 if (p->saddr.len == new->sv_addr.len && 3279 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3280 p->s_thread_exit != NFS4_THREAD_EXIT) { 3281 np = p; 3282 mutex_enter(&np->s_lock); 3283 } 3284 if (op != NULL && np != NULL) 3285 break; 3286 } 3287 if (op == NULL) { 3288 /* 3289 * Filesystem has been forcibly unmounted. Bail out. 3290 */ 3291 if (np != NULL) 3292 mutex_exit(&np->s_lock); 3293 mutex_exit(&nfs4_server_lst_lock); 3294 return (NULL); 3295 } 3296 if (np != NULL) { 3297 np->s_refcnt++; 3298 } else { 3299 #ifdef DEBUG 3300 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3301 "nfs4_move_mi: no target nfs4_server, will create.")); 3302 #endif 3303 np = add_new_nfs4_server(new, kcred); 3304 } 3305 mutex_exit(&nfs4_server_lst_lock); 3306 3307 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3308 "nfs4_move_mi: for mi 0x%p, " 3309 "old servinfo4 0x%p, new servinfo4 0x%p, " 3310 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3311 (void*)mi, (void*)old, (void*)new, 3312 (void*)op, (void*)np)); 3313 ASSERT(op != NULL && np != NULL); 3314 3315 /* discard any delegations */ 3316 nfs4_deleg_discard(mi, op); 3317 3318 num_open = mi->mi_open_files; 3319 mi->mi_open_files = 0; 3320 op->state_ref_count -= num_open; 3321 ASSERT(op->state_ref_count >= 0); 3322 np->state_ref_count += num_open; 3323 nfs4_remove_mi_from_server_nolock(mi, op); 3324 mi->mi_open_files = num_open; 3325 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3326 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3327 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3328 3329 nfs4_add_mi_to_server(np, mi); 3330 3331 mutex_exit(&op->s_lock); 3332 nfs4_server_rele(op); 3333 mutex_exit(&np->s_lock); 3334 3335 return (np); 3336 } 3337 3338 /* 3339 * Need to have the nfs4_server_lst_lock. 3340 * Search the nfs4_server list to find a match on this servinfo4 3341 * based on its address. 3342 * 3343 * Returns NULL if no match is found. Otherwise returns a reference (which 3344 * must eventually be freed) to a locked nfs4_server. 3345 */ 3346 nfs4_server_t * 3347 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3348 { 3349 nfs4_server_t *np; 3350 zoneid_t zoneid = nfs_zoneid(); 3351 3352 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3353 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3354 if (np->zoneid == zoneid && 3355 np->saddr.len == srv_p->sv_addr.len && 3356 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3357 np->saddr.len) == 0 && 3358 np->s_thread_exit != NFS4_THREAD_EXIT) { 3359 mutex_enter(&np->s_lock); 3360 np->s_refcnt++; 3361 return (np); 3362 } 3363 } 3364 return (NULL); 3365 } 3366 3367 /* 3368 * Search the nfs4_server_lst to find a match based on clientid and 3369 * addr. 3370 * Locks the nfs4_server down if it is found and returns a reference that 3371 * must eventually be freed. 3372 * 3373 * Returns NULL it no match is found. This means one of two things: either 3374 * mi is in the process of being mounted, or mi has been unmounted. 3375 * 3376 * The caller should be holding mi->mi_recovlock, and it should continue to 3377 * hold the lock until done with the returned nfs4_server_t. Once 3378 * mi->mi_recovlock is released, there is no guarantee that the returned 3379 * mi->nfs4_server_t will continue to correspond to mi. 3380 */ 3381 nfs4_server_t * 3382 find_nfs4_server(mntinfo4_t *mi) 3383 { 3384 return (find_nfs4_server_all(mi, 0)); 3385 } 3386 3387 /* 3388 * Same as above, but takes an "all" parameter which can be 3389 * set to 1 if the caller wishes to find nfs4_server_t's which 3390 * have been marked for termination by the exit of the renew 3391 * thread. This should only be used by operations which are 3392 * cleaning up and will not cause an OTW op. 3393 */ 3394 nfs4_server_t * 3395 find_nfs4_server_all(mntinfo4_t *mi, int all) 3396 { 3397 nfs4_server_t *np; 3398 servinfo4_t *svp; 3399 zoneid_t zoneid = mi->mi_zone->zone_id; 3400 3401 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3402 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3403 /* 3404 * This can be called from nfs4_unmount() which can be called from the 3405 * global zone, hence it's legal for the global zone to muck with 3406 * another zone's server list, as long as it doesn't try to contact 3407 * them. 3408 */ 3409 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3410 nfs_global_client_only != 0); 3411 3412 /* 3413 * The nfs4_server_lst_lock global lock is held when we get a new 3414 * clientid (via SETCLIENTID OTW). Holding this global lock and 3415 * mi_recovlock (READER is fine) ensures that the nfs4_server 3416 * and this mntinfo4 can't get out of sync, so the following search is 3417 * always valid. 3418 */ 3419 mutex_enter(&nfs4_server_lst_lock); 3420 #ifdef DEBUG 3421 if (nfs4_server_t_debug) { 3422 /* mi->mi_clientid is unprotected, ok for debug output */ 3423 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3424 mi->mi_curr_serv); 3425 } 3426 #endif 3427 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3428 mutex_enter(&np->s_lock); 3429 svp = mi->mi_curr_serv; 3430 3431 if (np->zoneid == zoneid && 3432 np->clientid == mi->mi_clientid && 3433 np->saddr.len == svp->sv_addr.len && 3434 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3435 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3436 mutex_exit(&nfs4_server_lst_lock); 3437 np->s_refcnt++; 3438 return (np); 3439 } 3440 mutex_exit(&np->s_lock); 3441 } 3442 mutex_exit(&nfs4_server_lst_lock); 3443 3444 return (NULL); 3445 } 3446 3447 /* 3448 * Release the reference to sp and destroy it if that's the last one. 3449 */ 3450 3451 void 3452 nfs4_server_rele(nfs4_server_t *sp) 3453 { 3454 mutex_enter(&sp->s_lock); 3455 ASSERT(sp->s_refcnt > 0); 3456 sp->s_refcnt--; 3457 if (sp->s_refcnt > 0) { 3458 mutex_exit(&sp->s_lock); 3459 return; 3460 } 3461 mutex_exit(&sp->s_lock); 3462 3463 mutex_enter(&nfs4_server_lst_lock); 3464 mutex_enter(&sp->s_lock); 3465 if (sp->s_refcnt > 0) { 3466 mutex_exit(&sp->s_lock); 3467 mutex_exit(&nfs4_server_lst_lock); 3468 return; 3469 } 3470 remque(sp); 3471 sp->forw = sp->back = NULL; 3472 mutex_exit(&nfs4_server_lst_lock); 3473 destroy_nfs4_server(sp); 3474 } 3475 3476 static void 3477 destroy_nfs4_server(nfs4_server_t *sp) 3478 { 3479 ASSERT(MUTEX_HELD(&sp->s_lock)); 3480 ASSERT(sp->s_refcnt == 0); 3481 ASSERT(sp->s_otw_call_count == 0); 3482 3483 remove_all_mi(sp); 3484 3485 crfree(sp->s_cred); 3486 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3487 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3488 mutex_exit(&sp->s_lock); 3489 3490 /* destroy the nfs4_server */ 3491 nfs4callback_destroy(sp); 3492 list_destroy(&sp->s_deleg_list); 3493 mutex_destroy(&sp->s_lock); 3494 cv_destroy(&sp->cv_thread_exit); 3495 cv_destroy(&sp->s_cv_otw_count); 3496 cv_destroy(&sp->s_clientid_pend); 3497 cv_destroy(&sp->wait_cb_null); 3498 nfs_rw_destroy(&sp->s_recovlock); 3499 kmem_free(sp, sizeof (*sp)); 3500 } 3501 3502 /* 3503 * Lock sp, but only if it's still active (in the list and hasn't been 3504 * flagged as exiting) or 'all' is non-zero. 3505 * Returns TRUE if sp got locked and adds a reference to sp. 3506 */ 3507 bool_t 3508 nfs4_server_vlock(nfs4_server_t *sp, int all) 3509 { 3510 nfs4_server_t *np; 3511 3512 mutex_enter(&nfs4_server_lst_lock); 3513 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3514 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3515 all != 0)) { 3516 mutex_enter(&np->s_lock); 3517 np->s_refcnt++; 3518 mutex_exit(&nfs4_server_lst_lock); 3519 return (TRUE); 3520 } 3521 } 3522 mutex_exit(&nfs4_server_lst_lock); 3523 return (FALSE); 3524 } 3525 3526 /* 3527 * Fork off a thread to free the data structures for a mount. 3528 */ 3529 3530 static void 3531 async_free_mount(vfs_t *vfsp, cred_t *cr) 3532 { 3533 freemountargs_t *args; 3534 3535 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3536 args->fm_vfsp = vfsp; 3537 VFS_HOLD(vfsp); 3538 args->fm_cr = cr; 3539 crhold(cr); 3540 3541 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3542 minclsyspri); 3543 } 3544 3545 static void 3546 nfs4_free_mount_thread(freemountargs_t *args) 3547 { 3548 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3549 VFS_RELE(args->fm_vfsp); 3550 crfree(args->fm_cr); 3551 kmem_free(args, sizeof (freemountargs_t)); 3552 zthread_exit(); 3553 /* NOTREACHED */ 3554 } 3555 3556 /* 3557 * Thread to free the data structures for a given filesystem. 3558 */ 3559 static void 3560 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3561 { 3562 mntinfo4_t *mi = VFTOMI4(vfsp); 3563 nfs4_server_t *sp; 3564 callb_cpr_t cpr_info; 3565 kmutex_t cpr_lock; 3566 boolean_t async_thread; 3567 3568 /* 3569 * We need to participate in the CPR framework if this is a kernel 3570 * thread. 3571 */ 3572 async_thread = (curproc == nfs_zone()->zone_zsched); 3573 if (async_thread) { 3574 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3575 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3576 "nfsv4AsyncUnmount"); 3577 } 3578 3579 /* 3580 * We need to wait for all outstanding OTW calls 3581 * and recovery to finish before we remove the mi 3582 * from the nfs4_server_t, as current pending 3583 * calls might still need this linkage (in order 3584 * to find a nfs4_server_t from a mntinfo4_t). 3585 */ 3586 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3587 sp = find_nfs4_server(mi); 3588 nfs_rw_exit(&mi->mi_recovlock); 3589 3590 if (sp) { 3591 while (sp->s_otw_call_count != 0) { 3592 if (async_thread) { 3593 mutex_enter(&cpr_lock); 3594 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3595 mutex_exit(&cpr_lock); 3596 } 3597 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3598 if (async_thread) { 3599 mutex_enter(&cpr_lock); 3600 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3601 mutex_exit(&cpr_lock); 3602 } 3603 } 3604 mutex_exit(&sp->s_lock); 3605 nfs4_server_rele(sp); 3606 sp = NULL; 3607 } 3608 3609 3610 mutex_enter(&mi->mi_lock); 3611 while (mi->mi_in_recovery != 0) { 3612 if (async_thread) { 3613 mutex_enter(&cpr_lock); 3614 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3615 mutex_exit(&cpr_lock); 3616 } 3617 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3618 if (async_thread) { 3619 mutex_enter(&cpr_lock); 3620 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3621 mutex_exit(&cpr_lock); 3622 } 3623 } 3624 mutex_exit(&mi->mi_lock); 3625 3626 /* 3627 * The original purge of the dnlc via 'dounmount' 3628 * doesn't guarantee that another dnlc entry was not 3629 * added while we waitied for all outstanding OTW 3630 * and recovery calls to finish. So re-purge the 3631 * dnlc now. 3632 */ 3633 (void) dnlc_purge_vfsp(vfsp, 0); 3634 3635 /* 3636 * We need to explicitly stop the manager thread; the asyc worker 3637 * threads can timeout and exit on their own. 3638 */ 3639 nfs4_async_manager_stop(vfsp); 3640 3641 destroy_rtable4(vfsp, cr); 3642 3643 nfs4_remove_mi_from_server(mi, NULL); 3644 3645 if (mi->mi_io_kstats) { 3646 kstat_delete(mi->mi_io_kstats); 3647 mi->mi_io_kstats = NULL; 3648 } 3649 if (mi->mi_ro_kstats) { 3650 kstat_delete(mi->mi_ro_kstats); 3651 mi->mi_ro_kstats = NULL; 3652 } 3653 if (mi->mi_recov_ksp) { 3654 kstat_delete(mi->mi_recov_ksp); 3655 mi->mi_recov_ksp = NULL; 3656 } 3657 3658 if (async_thread) { 3659 mutex_enter(&cpr_lock); 3660 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3661 mutex_destroy(&cpr_lock); 3662 } 3663 } 3664