1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 29 * All Rights Reserved 30 */ 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/pathname.h> 41 #include <sys/sysmacros.h> 42 #include <sys/kmem.h> 43 #include <sys/mkdev.h> 44 #include <sys/mount.h> 45 #include <sys/statvfs.h> 46 #include <sys/errno.h> 47 #include <sys/debug.h> 48 #include <sys/cmn_err.h> 49 #include <sys/utsname.h> 50 #include <sys/bootconf.h> 51 #include <sys/modctl.h> 52 #include <sys/acl.h> 53 #include <sys/flock.h> 54 #include <sys/time.h> 55 #include <sys/disp.h> 56 #include <sys/policy.h> 57 #include <sys/socket.h> 58 #include <sys/netconfig.h> 59 #include <sys/dnlc.h> 60 #include <sys/list.h> 61 62 #include <rpc/types.h> 63 #include <rpc/auth.h> 64 #include <rpc/rpcsec_gss.h> 65 #include <rpc/clnt.h> 66 67 #include <nfs/nfs.h> 68 #include <nfs/nfs_clnt.h> 69 #include <nfs/mount.h> 70 #include <nfs/nfs_acl.h> 71 72 #include <fs/fs_subr.h> 73 74 #include <nfs/nfs4.h> 75 #include <nfs/rnode4.h> 76 #include <nfs/nfs4_clnt.h> 77 78 /* 79 * Arguments passed to thread to free data structures from forced unmount. 80 */ 81 82 typedef struct { 83 vfs_t *fm_vfsp; 84 cred_t *fm_cr; 85 } freemountargs_t; 86 87 static void async_free_mount(vfs_t *, cred_t *); 88 static void nfs4_free_mount(vfs_t *, cred_t *); 89 static void nfs4_free_mount_thread(freemountargs_t *); 90 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 91 92 /* 93 * From rpcsec module (common/rpcsec). 94 */ 95 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 96 extern void sec_clnt_freeinfo(struct sec_data *); 97 98 /* 99 * The order and contents of this structure must be kept in sync with that of 100 * rfsreqcnt_v4_tmpl in nfs_stats.c 101 */ 102 static char *rfsnames_v4[] = { 103 "null", "compound", "reserved", "access", "close", "commit", "create", 104 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 105 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 106 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 107 "read", "readdir", "readlink", "remove", "rename", "renew", 108 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 109 "setclientid_confirm", "verify", "write" 110 }; 111 112 /* 113 * nfs4_max_mount_retry is the number of times the client will redrive 114 * a mount compound before giving up and returning failure. The intent 115 * is to redrive mount compounds which fail NFS4ERR_STALE so that 116 * if a component of the server path being mounted goes stale, it can 117 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 118 * code is needed outside of the recovery framework because mount is a 119 * special case. The client doesn't create vnodes/rnodes for components 120 * of the server path being mounted. The recovery code recovers real 121 * client objects, not STALE FHs which map to components of the server 122 * path being mounted. 123 * 124 * We could just fail the mount on the first time, but that would 125 * instantly trigger failover (from nfs4_mount), and the client should 126 * try to re-lookup the STALE FH before doing failover. The easiest 127 * way to "re-lookup" is to simply redrive the mount compound. 128 */ 129 static int nfs4_max_mount_retry = 2; 130 131 /* 132 * nfs4 vfs operations. 133 */ 134 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 135 static int nfs4_unmount(vfs_t *, int, cred_t *); 136 static int nfs4_root(vfs_t *, vnode_t **); 137 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 138 static int nfs4_sync(vfs_t *, short, cred_t *); 139 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 140 static int nfs4_mountroot(vfs_t *, whymountroot_t); 141 static void nfs4_freevfs(vfs_t *); 142 143 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 144 int, cred_t *, zone_t *); 145 146 vfsops_t *nfs4_vfsops; 147 148 int nfs4_vfsinit(void); 149 void nfs4_vfsfini(void); 150 static void nfs4setclientid_init(void); 151 static void nfs4setclientid_fini(void); 152 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 153 struct nfs4_server *, nfs4_error_t *, int *); 154 static void destroy_nfs4_server(nfs4_server_t *); 155 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 156 157 /* 158 * Initialize the vfs structure 159 */ 160 161 static int nfs4fstyp; 162 163 164 /* 165 * Debug variable to check for rdma based 166 * transport startup and cleanup. Controlled 167 * through /etc/system. Off by default. 168 */ 169 extern int rdma_debug; 170 171 int 172 nfs4init(int fstyp, char *name) 173 { 174 static const fs_operation_def_t nfs4_vfsops_template[] = { 175 VFSNAME_MOUNT, nfs4_mount, 176 VFSNAME_UNMOUNT, nfs4_unmount, 177 VFSNAME_ROOT, nfs4_root, 178 VFSNAME_STATVFS, nfs4_statvfs, 179 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 180 VFSNAME_VGET, nfs4_vget, 181 VFSNAME_MOUNTROOT, nfs4_mountroot, 182 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 183 NULL, NULL 184 }; 185 int error; 186 187 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 188 if (error != 0) { 189 zcmn_err(GLOBAL_ZONEID, CE_WARN, 190 "nfs4init: bad vfs ops template"); 191 return (error); 192 } 193 194 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 195 if (error != 0) { 196 (void) vfs_freevfsops_by_type(fstyp); 197 zcmn_err(GLOBAL_ZONEID, CE_WARN, 198 "nfs4init: bad vnode ops template"); 199 return (error); 200 } 201 202 nfs4fstyp = fstyp; 203 204 (void) nfs4_vfsinit(); 205 206 (void) nfs4_init_dot_entries(); 207 208 return (0); 209 } 210 211 void 212 nfs4fini(void) 213 { 214 (void) nfs4_destroy_dot_entries(); 215 nfs4_vfsfini(); 216 } 217 218 /* 219 * Create a new sec_data structure to store AUTH_DH related data: 220 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 221 * flag set for NFS V4 since we are avoiding to contact the rpcbind 222 * daemon and is using the IP time service (IPPORT_TIMESERVER). 223 * 224 * sec_data can be freed by sec_clnt_freeinfo(). 225 */ 226 struct sec_data * 227 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 228 struct knetconfig *knconf) { 229 struct sec_data *secdata; 230 dh_k4_clntdata_t *data; 231 char *pf, *p; 232 233 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 234 return (NULL); 235 236 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 237 secdata->flags = 0; 238 239 data = kmem_alloc(sizeof (*data), KM_SLEEP); 240 241 data->syncaddr.maxlen = syncaddr->maxlen; 242 data->syncaddr.len = syncaddr->len; 243 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 244 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 245 246 /* 247 * duplicate the knconf information for the 248 * new opaque data. 249 */ 250 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 251 *data->knconf = *knconf; 252 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 253 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 254 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 255 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 256 data->knconf->knc_protofmly = pf; 257 data->knconf->knc_proto = p; 258 259 /* move server netname to the sec_data structure */ 260 data->netname = kmem_alloc(nlen, KM_SLEEP); 261 bcopy(netname, data->netname, nlen); 262 data->netnamelen = (int)nlen; 263 264 secdata->secmod = AUTH_DH; 265 secdata->rpcflavor = AUTH_DH; 266 secdata->data = (caddr_t)data; 267 268 return (secdata); 269 } 270 271 static int 272 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 273 { 274 servinfo4_t *si; 275 276 /* 277 * Iterate over the servinfo4 list to make sure 278 * we do not have a duplicate. Skip any servinfo4 279 * that has been marked "NOT IN USE" 280 */ 281 for (si = svp_head; si; si = si->sv_next) { 282 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 283 if (si->sv_flags & SV4_NOTINUSE) { 284 nfs_rw_exit(&si->sv_lock); 285 continue; 286 } 287 nfs_rw_exit(&si->sv_lock); 288 if (si == svp) 289 continue; 290 if (si->sv_addr.len == svp->sv_addr.len && 291 strcmp(si->sv_knconf->knc_protofmly, 292 svp->sv_knconf->knc_protofmly) == 0 && 293 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 294 si->sv_addr.len) == 0) { 295 /* it's a duplicate */ 296 return (1); 297 } 298 } 299 /* it's not a duplicate */ 300 return (0); 301 } 302 303 /* 304 * nfs mount vfsop 305 * Set up mount info record and attach it to vfs struct. 306 */ 307 static int 308 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 309 { 310 char *data = uap->dataptr; 311 int error; 312 vnode_t *rtvp; /* the server's root */ 313 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 314 size_t hlen; /* length of hostname */ 315 size_t nlen; /* length of netname */ 316 char netname[MAXNETNAMELEN+1]; /* server's netname */ 317 struct netbuf addr; /* server's address */ 318 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 319 struct knetconfig *knconf; /* transport knetconfig structure */ 320 struct knetconfig *rdma_knconf; /* rdma transport structure */ 321 rnode4_t *rp; 322 struct servinfo4 *svp; /* nfs server info */ 323 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 324 struct servinfo4 *svp_head; /* first nfs server info */ 325 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 326 struct sec_data *secdata; /* security data */ 327 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 328 STRUCT_DECL(knetconfig, knconf_tmp); 329 STRUCT_DECL(netbuf, addr_tmp); 330 int flags, addr_type; 331 char *p, *pf; 332 struct pathname pn; 333 char *userbufptr; 334 zone_t *zone = nfs_zone(); 335 nfs4_error_t n4e; 336 337 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 338 return (EPERM); 339 if (mvp->v_type != VDIR) 340 return (ENOTDIR); 341 /* 342 * get arguments 343 * 344 * nfs_args is now versioned and is extensible, so 345 * uap->datalen might be different from sizeof (args) 346 * in a compatible situation. 347 */ 348 more: 349 STRUCT_INIT(args, get_udatamodel()); 350 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 351 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 352 STRUCT_SIZE(args)))) 353 return (EFAULT); 354 355 flags = STRUCT_FGET(args, flags); 356 357 /* 358 * If the request changes the locking type, disallow the remount, 359 * because it's questionable whether we can transfer the 360 * locking state correctly. 361 */ 362 if (uap->flags & MS_REMOUNT) { 363 if ((mi = VFTOMI4(vfsp)) != NULL) { 364 uint_t new_mi_llock; 365 uint_t old_mi_llock; 366 367 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 368 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 369 if (old_mi_llock != new_mi_llock) 370 return (EBUSY); 371 } 372 return (0); 373 } 374 375 mutex_enter(&mvp->v_lock); 376 if (!(uap->flags & MS_OVERLAY) && 377 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 378 mutex_exit(&mvp->v_lock); 379 return (EBUSY); 380 } 381 mutex_exit(&mvp->v_lock); 382 383 /* make sure things are zeroed for errout: */ 384 rtvp = NULL; 385 mi = NULL; 386 addr.buf = NULL; 387 syncaddr.buf = NULL; 388 secdata = NULL; 389 390 /* 391 * A valid knetconfig structure is required. 392 */ 393 if (!(flags & NFSMNT_KNCONF)) 394 return (EINVAL); 395 396 /* 397 * Allocate a servinfo4 struct. 398 */ 399 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 400 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 401 if (svp_tail) { 402 svp_2ndlast = svp_tail; 403 svp_tail->sv_next = svp; 404 } else { 405 svp_head = svp; 406 svp_2ndlast = svp; 407 } 408 409 svp_tail = svp; 410 411 /* 412 * Allocate space for a knetconfig structure and 413 * its strings and copy in from user-land. 414 */ 415 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 416 svp->sv_knconf = knconf; 417 STRUCT_INIT(knconf_tmp, get_udatamodel()); 418 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 419 STRUCT_SIZE(knconf_tmp))) { 420 sv4_free(svp_head); 421 return (EFAULT); 422 } 423 424 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 425 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 426 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 427 if (get_udatamodel() != DATAMODEL_LP64) { 428 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 429 } else { 430 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 431 } 432 433 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 434 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 435 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 436 if (error) { 437 kmem_free(pf, KNC_STRSIZE); 438 kmem_free(p, KNC_STRSIZE); 439 sv4_free(svp_head); 440 return (error); 441 } 442 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 443 if (error) { 444 kmem_free(pf, KNC_STRSIZE); 445 kmem_free(p, KNC_STRSIZE); 446 sv4_free(svp_head); 447 return (error); 448 } 449 if (strcmp(p, NC_UDP) == 0) { 450 kmem_free(pf, KNC_STRSIZE); 451 kmem_free(p, KNC_STRSIZE); 452 sv4_free(svp_head); 453 return (ENOTSUP); 454 } 455 knconf->knc_protofmly = pf; 456 knconf->knc_proto = p; 457 458 /* 459 * Get server address 460 */ 461 STRUCT_INIT(addr_tmp, get_udatamodel()); 462 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 463 STRUCT_SIZE(addr_tmp))) { 464 error = EFAULT; 465 goto errout; 466 } 467 468 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 469 addr.len = STRUCT_FGET(addr_tmp, len); 470 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 471 addr.maxlen = addr.len; 472 if (copyin(userbufptr, addr.buf, addr.len)) { 473 kmem_free(addr.buf, addr.len); 474 error = EFAULT; 475 goto errout; 476 } 477 478 svp->sv_addr = addr; 479 480 /* 481 * Get the root fhandle 482 */ 483 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 484 485 if (error) 486 goto errout; 487 488 /* Volatile fh: keep server paths, so use actual-size strings */ 489 svp->sv_path = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 490 bcopy(pn.pn_path, svp->sv_path, pn.pn_pathlen); 491 svp->sv_path[pn.pn_pathlen] = '\0'; 492 svp->sv_pathlen = pn.pn_pathlen + 1; 493 pn_free(&pn); 494 495 /* 496 * Get server's hostname 497 */ 498 if (flags & NFSMNT_HOSTNAME) { 499 error = copyinstr(STRUCT_FGETP(args, hostname), 500 netname, sizeof (netname), &hlen); 501 if (error) 502 goto errout; 503 } else { 504 char *p = "unknown-host"; 505 hlen = strlen(p) + 1; 506 (void) strcpy(netname, p); 507 } 508 svp->sv_hostnamelen = hlen; 509 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 510 (void) strcpy(svp->sv_hostname, netname); 511 512 /* 513 * RDMA MOUNT SUPPORT FOR NFS v4. 514 * Establish, is it possible to use RDMA, if so overload the 515 * knconf with rdma specific knconf and free the orignal knconf. 516 */ 517 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 518 /* 519 * Determine the addr type for RDMA, IPv4 or v6. 520 */ 521 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 522 addr_type = AF_INET; 523 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 524 addr_type = AF_INET6; 525 526 if (rdma_reachable(addr_type, &svp->sv_addr, 527 &rdma_knconf) == 0) { 528 /* 529 * If successful, hijack the orignal knconf and 530 * replace with the new one, depending on the flags. 531 */ 532 svp->sv_origknconf = svp->sv_knconf; 533 svp->sv_knconf = rdma_knconf; 534 knconf = rdma_knconf; 535 } else { 536 if (flags & NFSMNT_TRYRDMA) { 537 #ifdef DEBUG 538 if (rdma_debug) 539 zcmn_err(getzoneid(), CE_WARN, 540 "no RDMA onboard, revert\n"); 541 #endif 542 } 543 544 if (flags & NFSMNT_DORDMA) { 545 /* 546 * If proto=rdma is specified and no RDMA 547 * path to this server is avialable then 548 * ditch this server. 549 * This is not included in the mountable 550 * server list or the replica list. 551 * Check if more servers are specified; 552 * Failover case, otherwise bail out of mount. 553 */ 554 if (STRUCT_FGET(args, nfs_args_ext) == 555 NFS_ARGS_EXTB && STRUCT_FGETP(args, 556 nfs_ext_u.nfs_extB.next) != NULL) { 557 if (uap->flags & MS_RDONLY && 558 !(flags & NFSMNT_SOFT)) { 559 data = (char *) 560 STRUCT_FGETP(args, 561 nfs_ext_u.nfs_extB.next); 562 if (svp_head->sv_next == NULL) { 563 svp_tail = NULL; 564 svp_2ndlast = NULL; 565 sv4_free(svp_head); 566 goto more; 567 } else { 568 svp_tail = svp_2ndlast; 569 svp_2ndlast->sv_next = 570 NULL; 571 sv4_free(svp); 572 goto more; 573 } 574 } 575 } else { 576 /* 577 * This is the last server specified 578 * in the nfs_args list passed down 579 * and its not rdma capable. 580 */ 581 if (svp_head->sv_next == NULL) { 582 /* 583 * Is this the only one 584 */ 585 error = EINVAL; 586 #ifdef DEBUG 587 if (rdma_debug) 588 zcmn_err(getzoneid(), 589 CE_WARN, 590 "No RDMA srv"); 591 #endif 592 goto errout; 593 } else { 594 /* 595 * There is list, since some 596 * servers specified before 597 * this passed all requirements 598 */ 599 svp_tail = svp_2ndlast; 600 svp_2ndlast->sv_next = NULL; 601 sv4_free(svp); 602 goto proceed; 603 } 604 } 605 } 606 } 607 } 608 609 /* 610 * If there are syncaddr and netname data, load them in. This is 611 * to support data needed for NFSV4 when AUTH_DH is the negotiated 612 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 613 */ 614 netname[0] = '\0'; 615 if (flags & NFSMNT_SECURE) { 616 617 /* get syncaddr */ 618 STRUCT_INIT(addr_tmp, get_udatamodel()); 619 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 620 STRUCT_SIZE(addr_tmp))) { 621 error = EINVAL; 622 goto errout; 623 } 624 userbufptr = STRUCT_FGETP(addr_tmp, buf); 625 syncaddr.len = STRUCT_FGET(addr_tmp, len); 626 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 627 syncaddr.maxlen = syncaddr.len; 628 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 629 kmem_free(syncaddr.buf, syncaddr.len); 630 error = EFAULT; 631 goto errout; 632 } 633 634 /* get server's netname */ 635 if (copyinstr(STRUCT_FGETP(args, netname), netname, 636 sizeof (netname), &nlen)) { 637 kmem_free(syncaddr.buf, syncaddr.len); 638 error = EFAULT; 639 goto errout; 640 } 641 netname[nlen] = '\0'; 642 643 svp->sv_dhsec = create_authdh_data(netname, nlen, &syncaddr, 644 knconf); 645 } 646 647 /* 648 * Get the extention data which has the security data structure. 649 * This includes data for AUTH_SYS as well. 650 */ 651 if (flags & NFSMNT_NEWARGS) { 652 switch (STRUCT_FGET(args, nfs_args_ext)) { 653 case NFS_ARGS_EXTA: 654 case NFS_ARGS_EXTB: 655 /* 656 * Indicating the application is using the new 657 * sec_data structure to pass in the security 658 * data. 659 */ 660 if (STRUCT_FGETP(args, 661 nfs_ext_u.nfs_extA.secdata) == NULL) { 662 error = EINVAL; 663 } else { 664 error = sec_clnt_loadinfo( 665 (struct sec_data *)STRUCT_FGETP(args, 666 nfs_ext_u.nfs_extA.secdata), 667 &secdata, get_udatamodel()); 668 } 669 break; 670 671 default: 672 error = EINVAL; 673 break; 674 } 675 676 } else if (flags & NFSMNT_SECURE) { 677 /* 678 * NFSMNT_SECURE is deprecated but we keep it 679 * to support the rouge user generated application 680 * that may use this undocumented interface to do 681 * AUTH_DH security. 682 */ 683 secdata = create_authdh_data(netname, nlen, &syncaddr, knconf); 684 685 } else { 686 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 687 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 688 secdata->data = NULL; 689 } 690 691 svp->sv_secdata = secdata; 692 693 /* syncaddr is no longer needed. */ 694 if (syncaddr.buf != NULL) 695 kmem_free(syncaddr.buf, syncaddr.len); 696 697 /* 698 * User does not explictly specify a flavor, and a user 699 * defined default flavor is passed down. 700 */ 701 if (flags & NFSMNT_SECDEFAULT) { 702 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 703 svp->sv_flags |= SV4_TRYSECDEFAULT; 704 nfs_rw_exit(&svp->sv_lock); 705 } 706 707 /* 708 * Failover support: 709 * 710 * We may have a linked list of nfs_args structures, 711 * which means the user is looking for failover. If 712 * the mount is either not "read-only" or "soft", 713 * we want to bail out with EINVAL. 714 */ 715 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 716 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 717 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 718 data = (char *)STRUCT_FGETP(args, 719 nfs_ext_u.nfs_extB.next); 720 goto more; 721 } 722 error = EINVAL; 723 goto errout; 724 } 725 726 /* 727 * Determine the zone we're being mounted into. 728 */ 729 if (getzoneid() == GLOBAL_ZONEID) { 730 zone_t *mntzone; 731 732 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 733 ASSERT(mntzone != NULL); 734 zone_rele(mntzone); 735 if (mntzone != zone) { 736 error = EBUSY; 737 goto errout; 738 } 739 } 740 741 /* 742 * Stop the mount from going any further if the zone is going away. 743 */ 744 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) { 745 error = EBUSY; 746 goto errout; 747 } 748 749 /* 750 * Get root vnode. 751 */ 752 proceed: 753 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, zone); 754 755 if (error) 756 goto errout; 757 758 mi = VTOMI4(rtvp); 759 760 /* 761 * Send client id to the server, if necessary 762 */ 763 nfs4_error_zinit(&n4e); 764 nfs4setclientid(mi, cr, FALSE, &n4e); 765 error = n4e.error; 766 767 if (error) 768 goto errout; 769 770 /* 771 * Set option fields in the mount info record 772 */ 773 774 if (svp_head->sv_next) { 775 mutex_enter(&mi->mi_lock); 776 mi->mi_flags |= MI4_LLOCK; 777 mutex_exit(&mi->mi_lock); 778 } 779 780 error = nfs4_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 781 782 errout: 783 if (error) { 784 if (rtvp != NULL) { 785 rp = VTOR4(rtvp); 786 if (rp->r_flags & R4HASHED) 787 rp4_rmhash(rp); 788 } 789 if (mi != NULL) { 790 nfs4_async_stop(vfsp); 791 nfs4_async_manager_stop(vfsp); 792 nfs4_remove_mi_from_server(mi, NULL); 793 /* 794 * In this error path we need to sfh4_rele() before 795 * we free the mntinfo4_t as sfh4_rele() has a 796 * dependancy on mi_fh_lock. 797 */ 798 if (rtvp != NULL) 799 VN_RELE(rtvp); 800 if (mi->mi_io_kstats) { 801 kstat_delete(mi->mi_io_kstats); 802 mi->mi_io_kstats = NULL; 803 } 804 if (mi->mi_ro_kstats) { 805 kstat_delete(mi->mi_ro_kstats); 806 mi->mi_ro_kstats = NULL; 807 } 808 if (mi->mi_recov_ksp) { 809 kstat_delete(mi->mi_recov_ksp); 810 mi->mi_recov_ksp = NULL; 811 } 812 nfs_free_mi4(mi); 813 return (error); 814 } 815 sv4_free(svp_head); 816 } 817 818 if (rtvp != NULL) 819 VN_RELE(rtvp); 820 821 return (error); 822 } 823 824 #ifdef DEBUG 825 #define VERS_MSG "NFS4 server " 826 #else 827 #define VERS_MSG "NFS server " 828 #endif 829 830 #define READ_MSG \ 831 VERS_MSG "%s returned 0 for read transfer size" 832 #define WRITE_MSG \ 833 VERS_MSG "%s returned 0 for write transfer size" 834 #define SIZE_MSG \ 835 VERS_MSG "%s returned 0 for maximum file size" 836 837 /* 838 * Get the symbolic link text from the server for a given filehandle 839 * of that symlink. 840 * 841 * (get symlink text) PUTFH READLINK 842 */ 843 static int 844 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 845 int flags) 846 { 847 COMPOUND4args_clnt args; 848 COMPOUND4res_clnt res; 849 int doqueue; 850 nfs_argop4 argop[2]; 851 nfs_resop4 *resop; 852 READLINK4res *lr_res; 853 uint_t len; 854 bool_t needrecov = FALSE; 855 nfs4_recov_state_t recov_state; 856 nfs4_sharedfh_t *sfh; 857 nfs4_error_t e; 858 int num_retry = nfs4_max_mount_retry; 859 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 860 861 sfh = sfh4_get(fh, mi); 862 recov_state.rs_flags = 0; 863 recov_state.rs_num_retry_despite_err = 0; 864 865 recov_retry: 866 nfs4_error_zinit(&e); 867 868 args.array_len = 2; 869 args.array = argop; 870 args.ctag = TAG_GET_SYMLINK; 871 872 if (! recovery) { 873 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 874 if (e.error) { 875 sfh4_rele(&sfh); 876 return (e.error); 877 } 878 } 879 880 /* 0. putfh symlink fh */ 881 argop[0].argop = OP_CPUTFH; 882 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 883 884 /* 1. readlink */ 885 argop[1].argop = OP_READLINK; 886 887 doqueue = 1; 888 889 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 890 891 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 892 893 if (needrecov && !recovery && num_retry-- > 0) { 894 895 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 896 "getlinktext_otw: initiating recovery\n")); 897 898 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 899 OP_READLINK, NULL) == FALSE) { 900 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 901 if (!e.error) 902 (void) xdr_free(xdr_COMPOUND4res_clnt, 903 (caddr_t)&res); 904 goto recov_retry; 905 } 906 } 907 908 /* 909 * If non-NFS4 pcol error and/or we weren't able to recover. 910 */ 911 if (e.error != 0) { 912 if (! recovery) 913 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 914 sfh4_rele(&sfh); 915 return (e.error); 916 } 917 918 if (res.status) { 919 e.error = geterrno4(res.status); 920 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 921 if (! recovery) 922 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 923 sfh4_rele(&sfh); 924 return (e.error); 925 } 926 927 /* res.status == NFS4_OK */ 928 ASSERT(res.status == NFS4_OK); 929 930 resop = &res.array[1]; /* readlink res */ 931 lr_res = &resop->nfs_resop4_u.opreadlink; 932 933 /* treat symlink name as data */ 934 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 935 936 if (! recovery) 937 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 938 sfh4_rele(&sfh); 939 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 940 941 return (0); 942 } 943 944 /* 945 * Skip over consecutive slashes and "/./" in a pathname. 946 */ 947 void 948 pathname_skipslashdot(struct pathname *pnp) 949 { 950 char *c1, *c2; 951 952 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 953 954 c1 = pnp->pn_path + 1; 955 c2 = pnp->pn_path + 2; 956 957 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 958 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 959 pnp->pn_pathlen = pnp->pn_pathlen - 2; 960 } else { 961 pnp->pn_path++; 962 pnp->pn_pathlen--; 963 } 964 } 965 } 966 967 /* 968 * Resolve a symbolic link path. The symlink is in the nth component of 969 * svp->sv_path and has an nfs4 file handle "fh". 970 * Upon return, the sv_path will point to the new path that has the nth 971 * component resolved to its symlink text. 972 */ 973 int 974 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 975 cred_t *cr, int flags) 976 { 977 char *oldpath; 978 char *symlink, *newpath; 979 struct pathname oldpn, newpn; 980 char component[MAXNAMELEN]; 981 int i, addlen, error = 0; 982 int oldpathlen; 983 984 /* Get the symbolic link text over the wire. */ 985 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 986 987 if (error || symlink == NULL || strlen(symlink) == 0) 988 return (error); 989 990 /* 991 * Compose the new pathname. 992 * Note: 993 * - only the nth component is resolved for the pathname. 994 * - pathname.pn_pathlen does not count the ending null byte. 995 */ 996 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 997 oldpath = svp->sv_path; 998 oldpathlen = svp->sv_pathlen; 999 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1000 nfs_rw_exit(&svp->sv_lock); 1001 kmem_free(symlink, strlen(symlink) + 1); 1002 return (error); 1003 } 1004 nfs_rw_exit(&svp->sv_lock); 1005 pn_alloc(&newpn); 1006 1007 /* 1008 * Skip over previous components from the oldpath so that the 1009 * oldpn.pn_path will point to the symlink component. Skip 1010 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1011 * pn_getcompnent can get the component. 1012 */ 1013 for (i = 1; i < nth; i++) { 1014 pathname_skipslashdot(&oldpn); 1015 error = pn_getcomponent(&oldpn, component); 1016 if (error) 1017 goto out; 1018 } 1019 1020 /* 1021 * Copy the old path upto the component right before the symlink 1022 * if the symlink is not an absolute path. 1023 */ 1024 if (symlink[0] != '/') { 1025 addlen = oldpn.pn_path - oldpn.pn_buf; 1026 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1027 newpn.pn_pathlen += addlen; 1028 newpn.pn_path += addlen; 1029 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1030 newpn.pn_pathlen++; 1031 newpn.pn_path++; 1032 } 1033 1034 /* copy the resolved symbolic link text */ 1035 addlen = strlen(symlink); 1036 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1037 error = ENAMETOOLONG; 1038 goto out; 1039 } 1040 bcopy(symlink, newpn.pn_path, addlen); 1041 newpn.pn_pathlen += addlen; 1042 newpn.pn_path += addlen; 1043 1044 /* 1045 * Check if there is any remaining path after the symlink component. 1046 * First, skip the symlink component. 1047 */ 1048 pathname_skipslashdot(&oldpn); 1049 if (error = pn_getcomponent(&oldpn, component)) 1050 goto out; 1051 1052 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1053 1054 /* 1055 * Copy the remaining path to the new pathname if there is any. 1056 */ 1057 if (addlen > 0) { 1058 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1059 error = ENAMETOOLONG; 1060 goto out; 1061 } 1062 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1063 newpn.pn_pathlen += addlen; 1064 } 1065 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1066 1067 /* get the newpath and store it in the servinfo4_t */ 1068 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1069 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1070 newpath[newpn.pn_pathlen] = '\0'; 1071 1072 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1073 svp->sv_path = newpath; 1074 svp->sv_pathlen = strlen(newpath) + 1; 1075 nfs_rw_exit(&svp->sv_lock); 1076 1077 kmem_free(oldpath, oldpathlen); 1078 out: 1079 kmem_free(symlink, strlen(symlink) + 1); 1080 pn_free(&newpn); 1081 pn_free(&oldpn); 1082 1083 return (error); 1084 } 1085 1086 /* 1087 * Get the root filehandle for the given filesystem and server, and update 1088 * svp. 1089 * 1090 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1091 * to coordinate with recovery. Otherwise, the caller is assumed to be 1092 * the recovery thread or have already done a start_fop. 1093 * 1094 * Errors are returned by the nfs4_error_t parameter. 1095 */ 1096 1097 static void 1098 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1099 int flags, cred_t *cr, nfs4_error_t *ep) 1100 { 1101 COMPOUND4args_clnt args; 1102 COMPOUND4res_clnt res; 1103 int doqueue = 1; 1104 nfs_argop4 *argop; 1105 nfs_resop4 *resop; 1106 nfs4_ga_res_t *garp; 1107 int num_argops; 1108 lookup4_param_t lookuparg; 1109 nfs_fh4 *tmpfhp; 1110 nfs_fh4 *resfhp; 1111 bool_t needrecov = FALSE; 1112 nfs4_recov_state_t recov_state; 1113 int llndx; 1114 int nthcomp; 1115 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1116 1117 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1118 ASSERT(svp->sv_path != NULL); 1119 if (svp->sv_path[0] == '\0') { 1120 nfs_rw_exit(&svp->sv_lock); 1121 nfs4_error_init(ep, EINVAL); 1122 return; 1123 } 1124 nfs_rw_exit(&svp->sv_lock); 1125 1126 recov_state.rs_flags = 0; 1127 recov_state.rs_num_retry_despite_err = 0; 1128 recov_retry: 1129 nfs4_error_zinit(ep); 1130 1131 if (!recovery) { 1132 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1133 &recov_state, NULL); 1134 1135 /* 1136 * If recovery has been started and this request as 1137 * initiated by a mount, then we must wait for recovery 1138 * to finish before proceeding, otherwise, the error 1139 * cleanup would remove data structures needed by the 1140 * recovery thread. 1141 */ 1142 if (ep->error) { 1143 mutex_enter(&mi->mi_lock); 1144 if (mi->mi_flags & MI4_MOUNTING) { 1145 mi->mi_flags |= MI4_RECOV_FAIL; 1146 mi->mi_error = EIO; 1147 1148 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1149 "nfs4getfh_otw: waiting 4 recovery\n")); 1150 1151 while (mi->mi_flags & MI4_RECOV_ACTIV) 1152 cv_wait(&mi->mi_failover_cv, 1153 &mi->mi_lock); 1154 } 1155 mutex_exit(&mi->mi_lock); 1156 return; 1157 } 1158 1159 /* 1160 * If the client does not specify a specific flavor to use 1161 * and has not gotten a secinfo list from the server yet, 1162 * retrieve the secinfo list from the server and use a 1163 * flavor from the list to mount. 1164 * 1165 * If fail to get the secinfo list from the server, then 1166 * try the default flavor. 1167 */ 1168 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1169 svp->sv_secinfo == NULL) { 1170 (void) nfs4_secinfo_path(mi, cr, FALSE); 1171 } 1172 } 1173 1174 if (recovery) 1175 args.ctag = TAG_REMAP_MOUNT; 1176 else 1177 args.ctag = TAG_MOUNT; 1178 1179 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1180 lookuparg.argsp = &args; 1181 lookuparg.resp = &res; 1182 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1183 lookuparg.trailer_len = 0; 1184 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1185 lookuparg.mi = mi; 1186 1187 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1188 ASSERT(svp->sv_path != NULL); 1189 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1190 nfs_rw_exit(&svp->sv_lock); 1191 1192 argop = args.array; 1193 num_argops = args.array_len; 1194 1195 /* choose public or root filehandle */ 1196 if (flags & NFS4_GETFH_PUBLIC) 1197 argop[0].argop = OP_PUTPUBFH; 1198 else 1199 argop[0].argop = OP_PUTROOTFH; 1200 1201 /* get fh */ 1202 argop[1].argop = OP_GETFH; 1203 1204 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1205 "nfs4getfh_otw: %s call, mi 0x%p", 1206 needrecov ? "recov" : "first", (void *)mi)); 1207 1208 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1209 1210 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1211 1212 if (needrecov) { 1213 bool_t abort; 1214 1215 if (recovery) { 1216 nfs4args_lookup_free(argop, num_argops); 1217 kmem_free(argop, 1218 lookuparg.arglen * sizeof (nfs_argop4)); 1219 if (!ep->error) 1220 (void) xdr_free(xdr_COMPOUND4res_clnt, 1221 (caddr_t)&res); 1222 return; 1223 } 1224 1225 NFS4_DEBUG(nfs4_client_recov_debug, 1226 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1227 1228 abort = nfs4_start_recovery(ep, mi, NULL, 1229 NULL, NULL, NULL, OP_GETFH, NULL); 1230 if (!ep->error) { 1231 ep->error = geterrno4(res.status); 1232 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1233 } 1234 nfs4args_lookup_free(argop, num_argops); 1235 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1236 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1237 /* have another go? */ 1238 if (abort == FALSE) 1239 goto recov_retry; 1240 return; 1241 } 1242 1243 /* 1244 * No recovery, but check if error is set. 1245 */ 1246 if (ep->error) { 1247 nfs4args_lookup_free(argop, num_argops); 1248 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1249 if (!recovery) 1250 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1251 needrecov); 1252 return; 1253 } 1254 1255 is_link_err: 1256 1257 /* for non-recovery errors */ 1258 if (res.status && res.status != NFS4ERR_SYMLINK) { 1259 if (!recovery) { 1260 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1261 needrecov); 1262 } 1263 nfs4args_lookup_free(argop, num_argops); 1264 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1265 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1266 return; 1267 } 1268 1269 /* 1270 * If any intermediate component in the path is a symbolic link, 1271 * resolve the symlink, then try mount again using the new path. 1272 */ 1273 if (res.status == NFS4ERR_SYMLINK) { 1274 int where; 1275 1276 /* 1277 * This must be from OP_LOOKUP failure. The (cfh) for this 1278 * OP_LOOKUP is a symlink node. Found out where the 1279 * OP_GETFH is for the (cfh) that is a symlink node. 1280 * 1281 * Example: 1282 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1283 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1284 * 1285 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1286 * In this case, where = 7, nthcomp = 2. 1287 */ 1288 where = res.array_len - 2; 1289 ASSERT(where > 0); 1290 1291 resop = &res.array[where - 1]; 1292 ASSERT(resop->resop == OP_GETFH); 1293 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1294 nthcomp = res.array_len/3 - 1; 1295 1296 /* 1297 * Need to call nfs4_end_op before resolve_sympath to avoid 1298 * potential nfs4_start_op deadlock. 1299 */ 1300 if (!recovery) 1301 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1302 needrecov); 1303 1304 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1305 flags); 1306 1307 nfs4args_lookup_free(argop, num_argops); 1308 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1309 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1310 1311 if (ep->error) 1312 return; 1313 1314 goto recov_retry; 1315 } 1316 1317 /* getfh */ 1318 resop = &res.array[res.array_len - 2]; 1319 ASSERT(resop->resop == OP_GETFH); 1320 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1321 1322 /* getattr fsinfo res */ 1323 resop++; 1324 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1325 1326 *vtp = garp->n4g_va.va_type; 1327 1328 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1329 1330 mutex_enter(&mi->mi_lock); 1331 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1332 mi->mi_flags |= MI4_LINK; 1333 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1334 mi->mi_flags |= MI4_SYMLINK; 1335 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1336 mi->mi_flags |= MI4_ACL; 1337 mutex_exit(&mi->mi_lock); 1338 1339 if (garp->n4g_ext_res->n4g_maxread == 0) 1340 mi->mi_tsize = 1341 MIN(MAXBSIZE, mi->mi_tsize); 1342 else 1343 mi->mi_tsize = 1344 MIN(garp->n4g_ext_res->n4g_maxread, 1345 mi->mi_tsize); 1346 1347 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1348 mi->mi_stsize = 1349 MIN(MAXBSIZE, mi->mi_stsize); 1350 else 1351 mi->mi_stsize = 1352 MIN(garp->n4g_ext_res->n4g_maxwrite, 1353 mi->mi_stsize); 1354 1355 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1356 mi->mi_maxfilesize = 1357 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1358 mi->mi_maxfilesize); 1359 1360 /* 1361 * If the final component is a a symbolic link, resolve the symlink, 1362 * then try mount again using the new path. 1363 * 1364 * Assume no symbolic link for root filesysm "/". 1365 */ 1366 if (*vtp == VLNK) { 1367 /* 1368 * nthcomp is the total result length minus 1369 * the 1st 2 OPs (PUTROOTFH, GETFH), 1370 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1371 * 1372 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1373 * LOOKUP 2nd-comp GETFH GETATTR 1374 * 1375 * (8 - 2)/3 = 2 1376 */ 1377 nthcomp = (res.array_len - 2)/3; 1378 1379 /* 1380 * Need to call nfs4_end_op before resolve_sympath to avoid 1381 * potential nfs4_start_op deadlock. See RFE 4777612. 1382 */ 1383 if (!recovery) 1384 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1385 needrecov); 1386 1387 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1388 flags); 1389 1390 nfs4args_lookup_free(argop, num_argops); 1391 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1392 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1393 1394 if (ep->error) 1395 return; 1396 1397 goto recov_retry; 1398 } 1399 1400 /* 1401 * We need to figure out where in the compound the getfh 1402 * for the parent directory is. If the object to be mounted is 1403 * the root, then there is no lookup at all: 1404 * PUTROOTFH, GETFH. 1405 * If the object to be mounted is in the root, then the compound is: 1406 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1407 * In either of these cases, the index of the GETFH is 1. 1408 * If it is not at the root, then it's something like: 1409 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1410 * LOOKUP, GETFH, GETATTR 1411 * In this case, the index is llndx (last lookup index) - 2. 1412 */ 1413 if (llndx == -1 || llndx == 2) 1414 resop = &res.array[1]; 1415 else { 1416 ASSERT(llndx > 2); 1417 resop = &res.array[llndx-2]; 1418 } 1419 1420 ASSERT(resop->resop == OP_GETFH); 1421 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1422 1423 /* save the filehandles for the replica */ 1424 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1425 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1426 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1427 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1428 tmpfhp->nfs_fh4_len); 1429 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1430 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1431 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1432 1433 /* initialize fsid and supp_attrs for server fs */ 1434 svp->sv_fsid = garp->n4g_fsid; 1435 svp->sv_supp_attrs = 1436 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1437 1438 nfs_rw_exit(&svp->sv_lock); 1439 1440 nfs4args_lookup_free(argop, num_argops); 1441 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1442 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1443 if (!recovery) 1444 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1445 } 1446 1447 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1448 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1449 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1450 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1451 1452 /* 1453 * Remap the root filehandle for the given filesystem. 1454 * 1455 * results returned via the nfs4_error_t parameter. 1456 */ 1457 void 1458 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1459 { 1460 struct servinfo4 *svp; 1461 vtype_t vtype; 1462 nfs_fh4 rootfh; 1463 int getfh_flags; 1464 char *orig_sv_path; 1465 int orig_sv_pathlen, num_retry; 1466 1467 mutex_enter(&mi->mi_lock); 1468 1469 remap_retry: 1470 svp = mi->mi_curr_serv; 1471 getfh_flags = 1472 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1473 getfh_flags |= 1474 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1475 mutex_exit(&mi->mi_lock); 1476 1477 /* 1478 * Just in case server path being mounted contains 1479 * symlinks and fails w/STALE, save the initial sv_path 1480 * so we can redrive the initial mount compound with the 1481 * initial sv_path -- not a symlink-expanded version. 1482 * 1483 * This could only happen if a symlink was expanded 1484 * and the expanded mount compound failed stale. Because 1485 * it could be the case that the symlink was removed at 1486 * the server (and replaced with another symlink/dir, 1487 * we need to use the initial sv_path when attempting 1488 * to re-lookup everything and recover. 1489 */ 1490 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1491 orig_sv_pathlen = svp->sv_pathlen; 1492 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1493 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1494 nfs_rw_exit(&svp->sv_lock); 1495 1496 num_retry = nfs4_max_mount_retry; 1497 1498 do { 1499 /* 1500 * Get the root fh from the server. Retry nfs4_max_mount_retry 1501 * (2) times if it fails with STALE since the recovery 1502 * infrastructure doesn't do STALE recovery for components 1503 * of the server path to the object being mounted. 1504 */ 1505 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1506 1507 if (ep->error == 0 && ep->stat == NFS4_OK) 1508 break; 1509 1510 /* 1511 * For some reason, the mount compound failed. Before 1512 * retrying, we need to restore the original sv_path 1513 * because it might have contained symlinks that were 1514 * expanded by nfsgetfh_otw before the failure occurred. 1515 * replace current sv_path with orig sv_path -- just in case 1516 * it changed due to embedded symlinks. 1517 */ 1518 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1519 if (orig_sv_pathlen != svp->sv_pathlen) { 1520 kmem_free(svp->sv_path, svp->sv_pathlen); 1521 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1522 svp->sv_pathlen = orig_sv_pathlen; 1523 } 1524 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1525 nfs_rw_exit(&svp->sv_lock); 1526 1527 } while (num_retry-- > 0); 1528 1529 kmem_free(orig_sv_path, orig_sv_pathlen); 1530 1531 if (ep->error != 0 || ep->stat != 0) { 1532 return; 1533 } 1534 1535 if (vtype != VNON && vtype != mi->mi_type) { 1536 /* shouldn't happen */ 1537 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1538 "nfs4_remap_root: server root vnode type (%d) doesn't " 1539 "match mount info (%d)", vtype, mi->mi_type); 1540 } 1541 1542 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1543 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1544 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1545 nfs_rw_exit(&svp->sv_lock); 1546 sfh4_update(mi->mi_rootfh, &rootfh); 1547 1548 /* 1549 * It's possible that recovery took place on the filesystem 1550 * and the server has been updated between the time we did 1551 * the nfs4getfh_otw and now. Re-drive the otw operation 1552 * to make sure we have a good fh. 1553 */ 1554 mutex_enter(&mi->mi_lock); 1555 if (mi->mi_curr_serv != svp) 1556 goto remap_retry; 1557 1558 mutex_exit(&mi->mi_lock); 1559 } 1560 1561 static int 1562 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1563 int flags, cred_t *cr, zone_t *zone) 1564 { 1565 vnode_t *rtvp = NULL; 1566 mntinfo4_t *mi; 1567 dev_t nfs_dev; 1568 int error = 0; 1569 rnode4_t *rp; 1570 int i; 1571 struct vattr va; 1572 vtype_t vtype = VNON; 1573 vtype_t tmp_vtype = VNON; 1574 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1575 nfs4_oo_hash_bucket_t *bucketp; 1576 nfs_fh4 fh; 1577 char *droptext = ""; 1578 struct nfs_stats *nfsstatsp; 1579 nfs4_fname_t *mfname; 1580 nfs4_error_t e; 1581 char *orig_sv_path; 1582 int orig_sv_pathlen, num_retry; 1583 cred_t *lcr = NULL, *tcr = cr; 1584 1585 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1586 ASSERT(nfsstatsp != NULL); 1587 1588 ASSERT(nfs_zone() == zone); 1589 ASSERT(crgetref(cr)); 1590 1591 /* 1592 * Create a mount record and link it to the vfs struct. 1593 */ 1594 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1595 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1596 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1597 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1598 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1599 1600 if (!(flags & NFSMNT_SOFT)) 1601 mi->mi_flags |= MI4_HARD; 1602 if ((flags & NFSMNT_NOPRINT)) 1603 mi->mi_flags |= MI4_NOPRINT; 1604 if (flags & NFSMNT_INT) 1605 mi->mi_flags |= MI4_INT; 1606 if (flags & NFSMNT_PUBLIC) 1607 mi->mi_flags |= MI4_PUBLIC; 1608 mi->mi_retrans = NFS_RETRIES; 1609 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1610 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1611 mi->mi_timeo = nfs4_cots_timeo; 1612 else 1613 mi->mi_timeo = NFS_TIMEO; 1614 mi->mi_prog = NFS_PROGRAM; 1615 mi->mi_vers = NFS_V4; 1616 mi->mi_rfsnames = rfsnames_v4; 1617 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1618 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1619 mi->mi_servers = svp; 1620 mi->mi_curr_serv = svp; 1621 mi->mi_acregmin = SEC2HR(ACREGMIN); 1622 mi->mi_acregmax = SEC2HR(ACREGMAX); 1623 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1624 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1625 mi->mi_fh_expire_type = FH4_PERSISTENT; 1626 mi->mi_clientid_next = NULL; 1627 mi->mi_clientid_prev = NULL; 1628 mi->mi_grace_wait = 0; 1629 mi->mi_error = 0; 1630 mi->mi_srvsettime = 0; 1631 1632 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1633 mi->mi_stsize = mi->mi_tsize; 1634 1635 if (flags & NFSMNT_DIRECTIO) 1636 mi->mi_flags |= MI4_DIRECTIO; 1637 1638 mi->mi_flags |= MI4_MOUNTING; 1639 1640 /* 1641 * Make a vfs struct for nfs. We do this here instead of below 1642 * because rtvp needs a vfs before we can do a getattr on it. 1643 * 1644 * Assign a unique device id to the mount 1645 */ 1646 mutex_enter(&nfs_minor_lock); 1647 do { 1648 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1649 nfs_dev = makedevice(nfs_major, nfs_minor); 1650 } while (vfs_devismounted(nfs_dev)); 1651 mutex_exit(&nfs_minor_lock); 1652 1653 vfsp->vfs_dev = nfs_dev; 1654 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1655 vfsp->vfs_data = (caddr_t)mi; 1656 vfsp->vfs_fstype = nfsfstyp; 1657 vfsp->vfs_bsize = nfs4_bsize; 1658 1659 /* 1660 * Initialize fields used to support async putpage operations. 1661 */ 1662 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1663 mi->mi_async_clusters[i] = nfs4_async_clusters; 1664 mi->mi_async_init_clusters = nfs4_async_clusters; 1665 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1666 mi->mi_max_threads = nfs4_max_threads; 1667 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1668 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1669 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1670 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1671 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1672 1673 mi->mi_vfsp = vfsp; 1674 zone_hold(mi->mi_zone = zone); 1675 nfs4_mi_zonelist_add(mi); 1676 1677 /* 1678 * Initialize the <open owner/cred> hash table. 1679 */ 1680 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1681 bucketp = &(mi->mi_oo_list[i]); 1682 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1683 list_create(&bucketp->b_oo_hash_list, 1684 sizeof (nfs4_open_owner_t), 1685 offsetof(nfs4_open_owner_t, oo_hash_node)); 1686 } 1687 1688 /* 1689 * Initialize the freed open owner list. 1690 */ 1691 mi->mi_foo_num = 0; 1692 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1693 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1694 offsetof(nfs4_open_owner_t, oo_foo_node)); 1695 1696 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1697 offsetof(nfs4_lost_rqst_t, lr_node)); 1698 1699 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1700 offsetof(nfs4_bseqid_entry_t, bs_node)); 1701 1702 /* 1703 * Initialize the msg buffer. 1704 */ 1705 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1706 offsetof(nfs4_debug_msg_t, msg_node)); 1707 mi->mi_msg_count = 0; 1708 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1709 1710 /* 1711 * Initialize kstats 1712 */ 1713 nfs4_mnt_kstat_init(vfsp); 1714 1715 /* 1716 * Initialize the shared filehandle pool, and get the fname for 1717 * the filesystem root. 1718 */ 1719 sfh4_createtab(&mi->mi_filehandles); 1720 mi->mi_fname = fn_get(NULL, "."); 1721 1722 /* 1723 * Save server path we're attempting to mount. 1724 */ 1725 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1726 orig_sv_pathlen = svp_head->sv_pathlen; 1727 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1728 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1729 nfs_rw_exit(&svp->sv_lock); 1730 1731 /* 1732 * Make the GETFH call to get root fh for each replica. 1733 */ 1734 if (svp_head->sv_next) 1735 droptext = ", dropping replica"; 1736 1737 /* 1738 * If the uid is set then set the creds for secure mounts 1739 * by proxy processes such as automountd. 1740 */ 1741 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1742 if (svp->sv_secdata->uid != 0) { 1743 lcr = crdup(cr); 1744 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1745 tcr = lcr; 1746 } 1747 nfs_rw_exit(&svp->sv_lock); 1748 for (svp = svp_head; svp; svp = svp->sv_next) { 1749 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 1750 nfs_cmn_err(error, CE_WARN, 1751 VERS_MSG "Host %s is a duplicate%s", 1752 svp->sv_hostname, droptext); 1753 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1754 svp->sv_flags |= SV4_NOTINUSE; 1755 nfs_rw_exit(&svp->sv_lock); 1756 continue; 1757 } 1758 mi->mi_curr_serv = svp; 1759 1760 /* 1761 * Just in case server path being mounted contains 1762 * symlinks and fails w/STALE, save the initial sv_path 1763 * so we can redrive the initial mount compound with the 1764 * initial sv_path -- not a symlink-expanded version. 1765 * 1766 * This could only happen if a symlink was expanded 1767 * and the expanded mount compound failed stale. Because 1768 * it could be the case that the symlink was removed at 1769 * the server (and replaced with another symlink/dir, 1770 * we need to use the initial sv_path when attempting 1771 * to re-lookup everything and recover. 1772 * 1773 * Other mount errors should evenutally be handled here also 1774 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 1775 * failures will result in mount being redriven a few times. 1776 */ 1777 num_retry = nfs4_max_mount_retry; 1778 do { 1779 nfs4getfh_otw(mi, svp, &tmp_vtype, 1780 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 1781 NFS4_GETFH_NEEDSOP, tcr, &e); 1782 1783 if (e.error == 0 && e.stat == NFS4_OK) 1784 break; 1785 1786 /* 1787 * replace current sv_path with orig sv_path -- just in 1788 * case it changed due to embedded symlinks. 1789 */ 1790 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1791 if (orig_sv_pathlen != svp->sv_pathlen) { 1792 kmem_free(svp->sv_path, svp->sv_pathlen); 1793 svp->sv_path = kmem_alloc(orig_sv_pathlen, 1794 KM_SLEEP); 1795 svp->sv_pathlen = orig_sv_pathlen; 1796 } 1797 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1798 nfs_rw_exit(&svp->sv_lock); 1799 1800 } while (num_retry-- > 0); 1801 1802 error = e.error ? e.error : geterrno4(e.stat); 1803 if (error) { 1804 nfs_cmn_err(error, CE_WARN, 1805 VERS_MSG "initial call to %s failed%s: %m", 1806 svp->sv_hostname, droptext); 1807 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1808 svp->sv_flags |= SV4_NOTINUSE; 1809 nfs_rw_exit(&svp->sv_lock); 1810 mi->mi_flags &= ~MI4_RECOV_FAIL; 1811 mi->mi_error = 0; 1812 continue; 1813 } 1814 1815 if (tmp_vtype == VBAD) { 1816 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1817 VERS_MSG "%s returned a bad file type for " 1818 "root%s", svp->sv_hostname, droptext); 1819 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1820 svp->sv_flags |= SV4_NOTINUSE; 1821 nfs_rw_exit(&svp->sv_lock); 1822 continue; 1823 } 1824 1825 if (vtype == VNON) { 1826 vtype = tmp_vtype; 1827 } else if (vtype != tmp_vtype) { 1828 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1829 VERS_MSG "%s returned a different file type " 1830 "for root%s", svp->sv_hostname, droptext); 1831 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1832 svp->sv_flags |= SV4_NOTINUSE; 1833 nfs_rw_exit(&svp->sv_lock); 1834 continue; 1835 } 1836 if (firstsvp == NULL) 1837 firstsvp = svp; 1838 } 1839 1840 kmem_free(orig_sv_path, orig_sv_pathlen); 1841 1842 if (firstsvp == NULL) { 1843 if (error == 0) 1844 error = ENOENT; 1845 goto bad; 1846 } 1847 1848 mi->mi_curr_serv = svp = firstsvp; 1849 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1850 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 1851 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1852 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1853 mi->mi_rootfh = sfh4_get(&fh, mi); 1854 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 1855 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 1856 mi->mi_srvparentfh = sfh4_get(&fh, mi); 1857 nfs_rw_exit(&svp->sv_lock); 1858 1859 /* 1860 * Make the root vnode without attributes. 1861 */ 1862 mfname = mi->mi_fname; 1863 fn_hold(mfname); 1864 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 1865 &mfname, NULL, mi, cr, gethrtime()); 1866 rtvp->v_type = vtype; 1867 1868 mi->mi_curread = mi->mi_tsize; 1869 mi->mi_curwrite = mi->mi_stsize; 1870 1871 /* 1872 * Start the manager thread responsible for handling async worker 1873 * threads. 1874 */ 1875 VFS_HOLD(vfsp); /* add reference for thread */ 1876 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 1877 vfsp, 0, minclsyspri); 1878 ASSERT(mi->mi_manager_thread != NULL); 1879 /* 1880 * Create the thread that handles over-the-wire calls for 1881 * VOP_INACTIVE. 1882 * This needs to happen after the manager thread is created. 1883 */ 1884 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 1885 mi, 0, minclsyspri); 1886 ASSERT(mi->mi_inactive_thread != NULL); 1887 1888 /* If we didn't get a type, get one now */ 1889 if (rtvp->v_type == VNON) { 1890 va.va_mask = AT_TYPE; 1891 error = nfs4getattr(rtvp, &va, tcr); 1892 if (error) 1893 goto bad; 1894 rtvp->v_type = va.va_type; 1895 } 1896 1897 mi->mi_type = rtvp->v_type; 1898 1899 mutex_enter(&mi->mi_lock); 1900 mi->mi_flags &= ~MI4_MOUNTING; 1901 mutex_exit(&mi->mi_lock); 1902 1903 *rtvpp = rtvp; 1904 if (lcr != NULL) 1905 crfree(lcr); 1906 1907 return (0); 1908 bad: 1909 /* 1910 * An error occurred somewhere, need to clean up... 1911 * 1912 * XXX Should not svp be cleaned too? 1913 */ 1914 if (lcr != NULL) 1915 crfree(lcr); 1916 if (rtvp != NULL) { 1917 /* 1918 * We need to release our reference to the root vnode and 1919 * destroy the mntinfo4 struct that we just created. 1920 */ 1921 rp = VTOR4(rtvp); 1922 if (rp->r_flags & R4HASHED) 1923 rp4_rmhash(rp); 1924 VN_RELE(rtvp); 1925 } 1926 nfs4_async_stop(vfsp); 1927 nfs4_async_manager_stop(vfsp); 1928 if (mi->mi_io_kstats) { 1929 kstat_delete(mi->mi_io_kstats); 1930 mi->mi_io_kstats = NULL; 1931 } 1932 if (mi->mi_ro_kstats) { 1933 kstat_delete(mi->mi_ro_kstats); 1934 mi->mi_ro_kstats = NULL; 1935 } 1936 if (mi->mi_recov_ksp) { 1937 kstat_delete(mi->mi_recov_ksp); 1938 mi->mi_recov_ksp = NULL; 1939 } 1940 nfs_free_mi4(mi); 1941 *rtvpp = NULL; 1942 return (error); 1943 } 1944 1945 /* 1946 * vfs operations 1947 */ 1948 static int 1949 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1950 { 1951 mntinfo4_t *mi; 1952 ushort_t omax; 1953 1954 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1955 return (EPERM); 1956 1957 mi = VFTOMI4(vfsp); 1958 1959 if (flag & MS_FORCE) { 1960 vfsp->vfs_flag |= VFS_UNMOUNTED; 1961 if (nfs_zone() != mi->mi_zone) { 1962 /* 1963 * If the request is coming from the wrong zone, 1964 * we don't want to create any new threads, and 1965 * performance is not a concern. Do everything 1966 * inline. 1967 */ 1968 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 1969 "nfs4_unmount x-zone forced unmount of vfs %p\n", 1970 (void *)vfsp)); 1971 nfs4_free_mount(vfsp, cr); 1972 } else { 1973 /* 1974 * Free data structures asynchronously, to avoid 1975 * blocking the current thread (for performance 1976 * reasons only). 1977 */ 1978 async_free_mount(vfsp, cr); 1979 } 1980 return (0); 1981 } 1982 /* 1983 * Wait until all asynchronous putpage operations on 1984 * this file system are complete before flushing rnodes 1985 * from the cache. 1986 */ 1987 omax = mi->mi_max_threads; 1988 if (nfs4_async_stop_sig(vfsp)) { 1989 return (EINTR); 1990 } 1991 r4flush(vfsp, cr); 1992 /* 1993 * If there are any active vnodes on this file system, 1994 * then the file system is busy and can't be umounted. 1995 */ 1996 if (check_rtable4(vfsp)) { 1997 mutex_enter(&mi->mi_async_lock); 1998 mi->mi_max_threads = omax; 1999 mutex_exit(&mi->mi_async_lock); 2000 return (EBUSY); 2001 } 2002 /* 2003 * The unmount can't fail from now on, and there are no active 2004 * files that could require over-the-wire calls to the server, 2005 * so stop the async manager and the inactive thread. 2006 */ 2007 nfs4_async_manager_stop(vfsp); 2008 /* 2009 * Destroy all rnodes belonging to this file system from the 2010 * rnode hash queues and purge any resources allocated to 2011 * them. 2012 */ 2013 destroy_rtable4(vfsp, cr); 2014 vfsp->vfs_flag |= VFS_UNMOUNTED; 2015 nfs4_remove_mi_from_server(mi, NULL); 2016 if (mi->mi_io_kstats) { 2017 kstat_delete(mi->mi_io_kstats); 2018 mi->mi_io_kstats = NULL; 2019 } 2020 if (mi->mi_ro_kstats) { 2021 kstat_delete(mi->mi_ro_kstats); 2022 mi->mi_ro_kstats = NULL; 2023 } 2024 if (mi->mi_recov_ksp) { 2025 kstat_delete(mi->mi_recov_ksp); 2026 mi->mi_recov_ksp = NULL; 2027 } 2028 return (0); 2029 } 2030 2031 /* 2032 * find root of nfs 2033 */ 2034 static int 2035 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2036 { 2037 mntinfo4_t *mi; 2038 vnode_t *vp; 2039 nfs4_fname_t *mfname; 2040 servinfo4_t *svp; 2041 2042 mi = VFTOMI4(vfsp); 2043 2044 if (nfs_zone() != mi->mi_zone) 2045 return (EPERM); 2046 2047 svp = mi->mi_curr_serv; 2048 if (svp) { 2049 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2050 if (svp->sv_flags & SV4_ROOT_STALE) { 2051 nfs_rw_exit(&svp->sv_lock); 2052 2053 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2054 if (svp->sv_flags & SV4_ROOT_STALE) { 2055 svp->sv_flags &= ~SV4_ROOT_STALE; 2056 nfs_rw_exit(&svp->sv_lock); 2057 return (ENOENT); 2058 } 2059 nfs_rw_exit(&svp->sv_lock); 2060 } else 2061 nfs_rw_exit(&svp->sv_lock); 2062 } 2063 2064 mfname = mi->mi_fname; 2065 fn_hold(mfname); 2066 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2067 VFTOMI4(vfsp), CRED(), gethrtime()); 2068 2069 if (VTOR4(vp)->r_flags & R4STALE) { 2070 VN_RELE(vp); 2071 return (ENOENT); 2072 } 2073 2074 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2075 2076 vp->v_type = mi->mi_type; 2077 2078 *vpp = vp; 2079 2080 return (0); 2081 } 2082 2083 static int 2084 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2085 { 2086 int error; 2087 nfs4_ga_res_t gar; 2088 nfs4_ga_ext_res_t ger; 2089 2090 gar.n4g_ext_res = &ger; 2091 2092 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2093 NFS4_STATFS_ATTR_MASK, cr)) 2094 return (error); 2095 2096 *sbp = gar.n4g_ext_res->n4g_sb; 2097 2098 return (0); 2099 } 2100 2101 /* 2102 * Get file system statistics. 2103 */ 2104 static int 2105 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2106 { 2107 int error; 2108 vnode_t *vp; 2109 cred_t *cr; 2110 2111 error = nfs4_root(vfsp, &vp); 2112 if (error) 2113 return (error); 2114 2115 cr = CRED(); 2116 2117 error = nfs4_statfs_otw(vp, sbp, cr); 2118 if (!error) { 2119 (void) strncpy(sbp->f_basetype, 2120 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2121 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2122 } else { 2123 nfs4_purge_stale_fh(error, vp, cr); 2124 } 2125 2126 VN_RELE(vp); 2127 2128 return (error); 2129 } 2130 2131 static kmutex_t nfs4_syncbusy; 2132 2133 /* 2134 * Flush dirty nfs files for file system vfsp. 2135 * If vfsp == NULL, all nfs files are flushed. 2136 * 2137 * SYNC_CLOSE in flag is passed to us to 2138 * indicate that we are shutting down and or 2139 * rebooting. 2140 */ 2141 static int 2142 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2143 { 2144 /* 2145 * Cross-zone calls are OK here, since this translates to a 2146 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2147 */ 2148 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2149 r4flush(vfsp, cr); 2150 mutex_exit(&nfs4_syncbusy); 2151 } 2152 2153 /* 2154 * if SYNC_CLOSE is set then we know that 2155 * the system is rebooting, mark the mntinfo 2156 * for later examination. 2157 */ 2158 if (vfsp && (flag & SYNC_CLOSE)) { 2159 mntinfo4_t *mi; 2160 2161 mi = VFTOMI4(vfsp); 2162 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2163 mutex_enter(&mi->mi_lock); 2164 mi->mi_flags |= MI4_SHUTDOWN; 2165 mutex_exit(&mi->mi_lock); 2166 } 2167 } 2168 return (0); 2169 } 2170 2171 /* 2172 * vget is difficult, if not impossible, to support in v4 because we don't 2173 * know the parent directory or name, which makes it impossible to create a 2174 * useful shadow vnode. And we need the shadow vnode for things like 2175 * OPEN. 2176 */ 2177 2178 /* ARGSUSED */ 2179 /* 2180 * XXX Check nfs4_vget_pseudo() for dependency. 2181 */ 2182 static int 2183 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2184 { 2185 return (EREMOTE); 2186 } 2187 2188 /* 2189 * nfs4_mountroot get called in the case where we are diskless booting. All 2190 * we need from here is the ability to get the server info and from there we 2191 * can simply call nfs4_rootvp. 2192 */ 2193 /* ARGSUSED */ 2194 static int 2195 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2196 { 2197 vnode_t *rtvp; 2198 char root_hostname[SYS_NMLN+1]; 2199 struct servinfo4 *svp; 2200 int error; 2201 int vfsflags; 2202 size_t size; 2203 char *root_path; 2204 struct pathname pn; 2205 char *name; 2206 cred_t *cr; 2207 mntinfo4_t *mi; 2208 struct nfs_args args; /* nfs mount arguments */ 2209 static char token[10]; 2210 nfs4_error_t n4e; 2211 2212 bzero(&args, sizeof (args)); 2213 2214 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2215 clkset(-1L); /* hack for now - until we get time svc? */ 2216 2217 if (why == ROOT_REMOUNT) { 2218 /* 2219 * Shouldn't happen. 2220 */ 2221 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2222 } 2223 2224 if (why == ROOT_UNMOUNT) { 2225 /* 2226 * Nothing to do for NFS. 2227 */ 2228 return (0); 2229 } 2230 2231 /* 2232 * why == ROOT_INIT 2233 */ 2234 2235 name = token; 2236 *name = 0; 2237 (void) getfsname("root", name, sizeof (token)); 2238 2239 pn_alloc(&pn); 2240 root_path = pn.pn_path; 2241 2242 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2243 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2244 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2245 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2246 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2247 2248 /* 2249 * Get server address 2250 * Get the root path 2251 * Get server's transport 2252 * Get server's hostname 2253 * Get options 2254 */ 2255 args.addr = &svp->sv_addr; 2256 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2257 args.fh = (char *)&svp->sv_fhandle; 2258 args.knconf = svp->sv_knconf; 2259 args.hostname = root_hostname; 2260 vfsflags = 0; 2261 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2262 &args, &vfsflags)) { 2263 if (error == EPROTONOSUPPORT) 2264 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2265 "mount_root failed: server doesn't support NFS V4"); 2266 else 2267 nfs_cmn_err(error, CE_WARN, 2268 "nfs4_mountroot: mount_root failed: %m"); 2269 nfs_rw_exit(&svp->sv_lock); 2270 sv4_free(svp); 2271 pn_free(&pn); 2272 return (error); 2273 } 2274 nfs_rw_exit(&svp->sv_lock); 2275 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2276 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2277 (void) strcpy(svp->sv_hostname, root_hostname); 2278 2279 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2280 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2281 (void) strcpy(svp->sv_path, root_path); 2282 2283 /* 2284 * Force root partition to always be mounted with AUTH_UNIX for now 2285 */ 2286 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2287 svp->sv_secdata->secmod = AUTH_UNIX; 2288 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2289 svp->sv_secdata->data = NULL; 2290 2291 cr = crgetcred(); 2292 rtvp = NULL; 2293 2294 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2295 2296 if (error) { 2297 crfree(cr); 2298 pn_free(&pn); 2299 goto errout; 2300 } 2301 2302 mi = VTOMI4(rtvp); 2303 2304 /* 2305 * Send client id to the server, if necessary 2306 */ 2307 nfs4_error_zinit(&n4e); 2308 nfs4setclientid(mi, cr, FALSE, &n4e); 2309 error = n4e.error; 2310 2311 crfree(cr); 2312 2313 if (error) { 2314 pn_free(&pn); 2315 goto errout; 2316 } 2317 2318 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2319 if (error) { 2320 nfs_cmn_err(error, CE_WARN, 2321 "nfs4_mountroot: invalid root mount options"); 2322 pn_free(&pn); 2323 goto errout; 2324 } 2325 2326 (void) vfs_lock_wait(vfsp); 2327 vfs_add(NULL, vfsp, vfsflags); 2328 vfs_unlock(vfsp); 2329 2330 size = strlen(svp->sv_hostname); 2331 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2332 rootfs.bo_name[size] = ':'; 2333 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2334 2335 pn_free(&pn); 2336 2337 errout: 2338 if (error) { 2339 sv4_free(svp); 2340 nfs4_async_stop(vfsp); 2341 nfs4_async_manager_stop(vfsp); 2342 } 2343 2344 if (rtvp != NULL) 2345 VN_RELE(rtvp); 2346 2347 return (error); 2348 } 2349 2350 /* 2351 * Initialization routine for VFS routines. Should only be called once 2352 */ 2353 int 2354 nfs4_vfsinit(void) 2355 { 2356 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2357 nfs4setclientid_init(); 2358 return (0); 2359 } 2360 2361 void 2362 nfs4_vfsfini(void) 2363 { 2364 nfs4setclientid_fini(); 2365 mutex_destroy(&nfs4_syncbusy); 2366 } 2367 2368 void 2369 nfs4_freevfs(vfs_t *vfsp) 2370 { 2371 mntinfo4_t *mi; 2372 servinfo4_t *svp; 2373 2374 /* free up the resources */ 2375 mi = VFTOMI4(vfsp); 2376 svp = mi->mi_servers; 2377 mi->mi_servers = mi->mi_curr_serv = NULL; 2378 sv4_free(svp); 2379 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_freevfs: " 2380 "free mi %p", (void *)mi)); 2381 2382 /* 2383 * By this time we should have already deleted the 2384 * mi kstats in the unmount code. If they are still around 2385 * somethings wrong 2386 */ 2387 ASSERT(mi->mi_io_kstats == NULL); 2388 2389 nfs_free_mi4(mi); 2390 } 2391 2392 /* 2393 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2394 */ 2395 struct nfs4_server nfs4_server_lst = 2396 { &nfs4_server_lst, &nfs4_server_lst }; 2397 2398 kmutex_t nfs4_server_lst_lock; 2399 2400 static void 2401 nfs4setclientid_init(void) 2402 { 2403 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2404 } 2405 2406 static void 2407 nfs4setclientid_fini(void) 2408 { 2409 mutex_destroy(&nfs4_server_lst_lock); 2410 } 2411 2412 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2413 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2414 2415 /* 2416 * Set the clientid for the server for "mi". No-op if the clientid is 2417 * already set. 2418 * 2419 * The recovery boolean should be set to TRUE if this function was called 2420 * by the recovery code, and FALSE otherwise. This is used to determine 2421 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2422 * for adding a mntinfo4_t to a nfs4_server_t. 2423 * 2424 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2425 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2426 */ 2427 void 2428 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2429 { 2430 struct nfs4_server *np; 2431 struct servinfo4 *svp = mi->mi_curr_serv; 2432 nfs4_recov_state_t recov_state; 2433 int num_retries = 0; 2434 bool_t retry = FALSE; 2435 cred_t *lcr = NULL; 2436 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2437 time_t lease_time = 0; 2438 2439 recov_state.rs_flags = 0; 2440 recov_state.rs_num_retry_despite_err = 0; 2441 ASSERT(n4ep != NULL); 2442 2443 recov_retry: 2444 nfs4_error_zinit(n4ep); 2445 if (!recovery) 2446 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2447 2448 /* This locks np if it is found */ 2449 np = servinfo4_to_nfs4_server(svp); 2450 ASSERT(np == NULL || MUTEX_HELD(&np->s_lock)); 2451 2452 /* 2453 * If we find the server already in the list, then just 2454 * return, we've already done SETCLIENTID to that server 2455 */ 2456 2457 if (np && (np->s_flags & N4S_CLIENTID_SET)) { 2458 /* 2459 * XXX - more is needed here. SETCLIENTID may not 2460 * be completed. A VFS lock may prevent multiple 2461 * mounts and provide needed serialization. 2462 */ 2463 /* add mi to np's mntinfo4_list */ 2464 nfs4_add_mi_to_server(np, mi); 2465 if (!recovery) 2466 nfs_rw_exit(&mi->mi_recovlock); 2467 mutex_exit(&np->s_lock); 2468 nfs4_server_rele(np); 2469 return; 2470 } 2471 2472 /* 2473 * Drop the mi_recovlock since nfs4_start_op will 2474 * acquire it again for us. 2475 */ 2476 if (!recovery) 2477 nfs_rw_exit(&mi->mi_recovlock); 2478 2479 if (!np) 2480 np = new_nfs4_server(svp, cr); 2481 else 2482 mutex_exit(&np->s_lock); 2483 2484 if (!recovery) { 2485 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2486 if (n4ep->error) { 2487 nfs4_server_rele(np); 2488 return; 2489 } 2490 } 2491 2492 /* 2493 * Will potentially add np to global list, which transfers 2494 * ownership of the reference to the list. 2495 */ 2496 mutex_enter(&nfs4_server_lst_lock); 2497 mutex_enter(&np->s_lock); 2498 2499 /* 2500 * Reset the N4S_CB_PINGED flag. This is used to 2501 * indicate if we have received a CB_NULL from the 2502 * server. Also we reset the waiter flag. 2503 */ 2504 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2505 2506 if (np->s_flags & N4S_CLIENTID_SET) { 2507 /* XXX copied/pasted from above */ 2508 /* 2509 * XXX - more is needed here. SETCLIENTID may not 2510 * be completed. A VFS lock may prevent multiple 2511 * mounts and provide needed serialization. 2512 */ 2513 /* add mi to np's mntinfo4_list */ 2514 nfs4_add_mi_to_server(np, mi); 2515 mutex_exit(&np->s_lock); 2516 mutex_exit(&nfs4_server_lst_lock); 2517 nfs4_server_rele(np); 2518 if (!recovery) 2519 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2520 return; 2521 } 2522 2523 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2524 2525 if (n4ep->error == EACCES) { 2526 /* 2527 * If the uid is set then set the creds for secure mounts 2528 * by proxy processes such as automountd. 2529 */ 2530 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2531 if (svp->sv_secdata->uid != 0) { 2532 lcr = crdup(cr); 2533 (void) crsetugid(lcr, svp->sv_secdata->uid, 2534 crgetgid(cr)); 2535 crfree(np->s_cred); 2536 np->s_cred = lcr; 2537 } 2538 nfs_rw_exit(&svp->sv_lock); 2539 2540 if (lcr != NULL) 2541 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2542 &retry_inuse); 2543 } 2544 lease_time = np->s_lease_time; 2545 mutex_exit(&np->s_lock); 2546 mutex_exit(&nfs4_server_lst_lock); 2547 2548 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2549 /* 2550 * Start recovery if failover is a possibility. If 2551 * invoked by the recovery thread itself, then just 2552 * return and let it handle the failover first. NB: 2553 * recovery is not allowed if the mount is in progress 2554 * since the infrastructure is not sufficiently setup 2555 * to allow it. Just return the error (after suitable 2556 * retries). 2557 */ 2558 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2559 (void) nfs4_start_recovery(n4ep, mi, NULL, 2560 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2561 /* 2562 * Don't retry here, just return and let 2563 * recovery take over. 2564 */ 2565 if (recovery) 2566 retry = FALSE; 2567 } else if (nfs4_rpc_retry_error(n4ep->error) || 2568 n4ep->stat == NFS4ERR_RESOURCE || 2569 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2570 2571 retry = TRUE; 2572 /* 2573 * Always retry if in recovery or once had 2574 * contact with the server (but now it's 2575 * overloaded). 2576 */ 2577 if (recovery == TRUE || 2578 n4ep->error == ETIMEDOUT || 2579 n4ep->error == ECONNRESET) 2580 num_retries = 0; 2581 } else if (retry_inuse && n4ep->error == 0 && 2582 n4ep->stat == NFS4ERR_CLID_INUSE) { 2583 retry = TRUE; 2584 num_retries = 0; 2585 } 2586 } 2587 2588 if (!recovery) 2589 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2590 nfs4_server_rele(np); 2591 2592 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2593 if (retry_inuse) { 2594 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2595 retry_inuse = 0; 2596 } else 2597 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2598 goto recov_retry; 2599 } 2600 2601 if (n4ep->error == 0) 2602 n4ep->error = geterrno4(n4ep->stat); 2603 } 2604 2605 int nfs4setclientid_otw_debug = 0; 2606 2607 /* 2608 * This assumes np is locked down. 2609 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2610 * but nothing else; the calling function must be designed to handle those 2611 * other errors. 2612 */ 2613 static void 2614 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2615 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2616 { 2617 COMPOUND4args_clnt args; 2618 COMPOUND4res_clnt res; 2619 nfs_argop4 argop[3]; 2620 SETCLIENTID4args *s_args; 2621 SETCLIENTID4resok *s_resok; 2622 int doqueue = 1; 2623 nfs4_ga_res_t *garp = NULL; 2624 timespec_t prop_time, after_time; 2625 verifier4 verf; 2626 clientid4 tmp_clientid; 2627 2628 ASSERT(MUTEX_HELD(&np->s_lock)); 2629 2630 args.ctag = TAG_SETCLIENTID; 2631 2632 args.array = argop; 2633 args.array_len = 3; 2634 2635 /* PUTROOTFH */ 2636 argop[0].argop = OP_PUTROOTFH; 2637 2638 /* GETATTR */ 2639 argop[1].argop = OP_GETATTR; 2640 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2641 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2642 2643 /* SETCLIENTID */ 2644 argop[2].argop = OP_SETCLIENTID; 2645 2646 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2647 2648 s_args->client.verifier = np->clidtosend.verifier; 2649 s_args->client.id_len = np->clidtosend.id_len; 2650 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2651 s_args->client.id_val = np->clidtosend.id_val; 2652 2653 /* 2654 * Callback needs to happen on non-RDMA transport 2655 * Check if we have saved the original knetconfig 2656 * if so, use that instead. 2657 */ 2658 if (svp->sv_origknconf != NULL) 2659 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2660 else 2661 nfs4_cb_args(np, svp->sv_knconf, s_args); 2662 2663 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 2664 2665 if (ep->error) 2666 return; 2667 2668 /* getattr lease_time res */ 2669 if (res.array_len >= 2) { 2670 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2671 2672 #ifndef _LP64 2673 /* 2674 * The 32 bit client cannot handle a lease time greater than 2675 * (INT32_MAX/1000000). This is due to the use of the 2676 * lease_time in calls to drv_usectohz() in 2677 * nfs4_renew_lease_thread(). The problem is that 2678 * drv_usectohz() takes a time_t (which is just a long = 4 2679 * bytes) as its parameter. The lease_time is multiplied by 2680 * 1000000 to convert seconds to usecs for the parameter. If 2681 * a number bigger than (INT32_MAX/1000000) is used then we 2682 * overflow on the 32bit client. 2683 */ 2684 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2685 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2686 } 2687 #endif 2688 2689 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2690 2691 /* 2692 * Keep track of the lease period for the mi's 2693 * mi_msg_list. We need an appropiate time 2694 * bound to associate past facts with a current 2695 * event. The lease period is perfect for this. 2696 */ 2697 mutex_enter(&mi->mi_msg_list_lock); 2698 mi->mi_lease_period = np->s_lease_time; 2699 mutex_exit(&mi->mi_msg_list_lock); 2700 } 2701 2702 2703 if (res.status == NFS4ERR_CLID_INUSE) { 2704 clientaddr4 *clid_inuse; 2705 2706 if (!(*retry_inusep)) { 2707 clid_inuse = &res.array->nfs_resop4_u. 2708 opsetclientid.SETCLIENTID4res_u.client_using; 2709 2710 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2711 "NFS4 mount (SETCLIENTID failed)." 2712 " nfs4_client_id.id is in" 2713 "use already by: r_netid<%s> r_addr<%s>", 2714 clid_inuse->r_netid, clid_inuse->r_addr); 2715 } 2716 2717 /* 2718 * XXX - The client should be more robust in its 2719 * handling of clientid in use errors (regen another 2720 * clientid and try again?) 2721 */ 2722 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2723 return; 2724 } 2725 2726 if (res.status) { 2727 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2728 return; 2729 } 2730 2731 s_resok = &res.array[2].nfs_resop4_u. 2732 opsetclientid.SETCLIENTID4res_u.resok4; 2733 2734 tmp_clientid = s_resok->clientid; 2735 2736 verf = s_resok->setclientid_confirm; 2737 2738 #ifdef DEBUG 2739 if (nfs4setclientid_otw_debug) { 2740 union { 2741 clientid4 clientid; 2742 int foo[2]; 2743 } cid; 2744 2745 cid.clientid = s_resok->clientid; 2746 2747 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2748 "nfs4setclientid_otw: OK, clientid = %x,%x, " 2749 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 2750 } 2751 #endif 2752 2753 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2754 2755 /* Confirm the client id and get the lease_time attribute */ 2756 2757 args.ctag = TAG_SETCLIENTID_CF; 2758 2759 args.array = argop; 2760 args.array_len = 1; 2761 2762 argop[0].argop = OP_SETCLIENTID_CONFIRM; 2763 2764 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 2765 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 2766 2767 /* used to figure out RTT for np */ 2768 gethrestime(&prop_time); 2769 2770 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 2771 "start time: %ld sec %ld nsec", prop_time.tv_sec, 2772 prop_time.tv_nsec)); 2773 2774 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2775 2776 gethrestime(&after_time); 2777 np->propagation_delay.tv_sec = 2778 MAX(1, after_time.tv_sec - prop_time.tv_sec); 2779 2780 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 2781 "finish time: %ld sec ", after_time.tv_sec)); 2782 2783 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 2784 "propagation delay set to %ld sec", 2785 np->propagation_delay.tv_sec)); 2786 2787 if (ep->error) 2788 return; 2789 2790 if (res.status == NFS4ERR_CLID_INUSE) { 2791 clientaddr4 *clid_inuse; 2792 2793 if (!(*retry_inusep)) { 2794 clid_inuse = &res.array->nfs_resop4_u. 2795 opsetclientid.SETCLIENTID4res_u.client_using; 2796 2797 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2798 "SETCLIENTID_CONFIRM failed. " 2799 "nfs4_client_id.id is in use already by: " 2800 "r_netid<%s> r_addr<%s>", 2801 clid_inuse->r_netid, clid_inuse->r_addr); 2802 } 2803 2804 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2805 return; 2806 } 2807 2808 if (res.status) { 2809 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2810 return; 2811 } 2812 2813 if (!(np->s_flags & N4S_INSERTED)) { 2814 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 2815 insque(np, &nfs4_server_lst); 2816 ASSERT(MUTEX_HELD(&np->s_lock)); 2817 np->s_flags |= N4S_INSERTED; 2818 np->s_refcnt++; /* list gets a reference */ 2819 } 2820 2821 np->clientid = tmp_clientid; 2822 np->s_flags |= N4S_CLIENTID_SET; 2823 2824 /* Add mi to np's mntinfo4 list */ 2825 nfs4_add_mi_to_server(np, mi); 2826 2827 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 2828 /* 2829 * Start lease management thread. 2830 * Keep trying until we succeed. 2831 */ 2832 2833 np->s_refcnt++; /* pass reference to thread */ 2834 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 2835 minclsyspri); 2836 } 2837 2838 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2839 } 2840 2841 /* 2842 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 2843 * mi's clientid the same as sp's. 2844 * Assumes sp is locked down. 2845 */ 2846 void 2847 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 2848 { 2849 mntinfo4_t *tmi; 2850 int in_list = 0; 2851 2852 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 2853 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 2854 ASSERT(sp != &nfs4_server_lst); 2855 ASSERT(MUTEX_HELD(&sp->s_lock)); 2856 2857 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2858 "nfs4_add_mi_to_server: add mi %p to sp %p", 2859 (void*)mi, (void*)sp)); 2860 2861 for (tmi = sp->mntinfo4_list; 2862 tmi != NULL; 2863 tmi = tmi->mi_clientid_next) { 2864 if (tmi == mi) { 2865 NFS4_DEBUG(nfs4_client_lease_debug, 2866 (CE_NOTE, 2867 "nfs4_add_mi_to_server: mi in list")); 2868 in_list = 1; 2869 } 2870 } 2871 2872 /* 2873 * First put a hold on the mntinfo4's vfsp so that references via 2874 * mntinfo4_list will be valid. 2875 */ 2876 if (!in_list) 2877 VFS_HOLD(mi->mi_vfsp); 2878 2879 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 2880 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 2881 2882 if (!in_list) { 2883 if (sp->mntinfo4_list) 2884 sp->mntinfo4_list->mi_clientid_prev = mi; 2885 mi->mi_clientid_next = sp->mntinfo4_list; 2886 sp->mntinfo4_list = mi; 2887 mi->mi_srvsettime = gethrestime_sec(); 2888 } 2889 2890 /* set mi's clientid to that of sp's for later matching */ 2891 mi->mi_clientid = sp->clientid; 2892 2893 /* 2894 * Update the clientid for any other mi's belonging to sp. This 2895 * must be done here while we hold sp->s_lock, so that 2896 * find_nfs4_server() continues to work. 2897 */ 2898 2899 for (tmi = sp->mntinfo4_list; 2900 tmi != NULL; 2901 tmi = tmi->mi_clientid_next) { 2902 if (tmi != mi) { 2903 tmi->mi_clientid = sp->clientid; 2904 } 2905 } 2906 } 2907 2908 /* 2909 * Remove the mi from sp's mntinfo4_list and release its reference. 2910 * Exception: if mi still has open files, flag it for later removal (when 2911 * all the files are closed). 2912 * 2913 * If this is the last mntinfo4 in sp's list then tell the lease renewal 2914 * thread to exit. 2915 */ 2916 static void 2917 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 2918 { 2919 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2920 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 2921 (void*)mi, (void*)sp)); 2922 2923 ASSERT(sp != NULL); 2924 ASSERT(MUTEX_HELD(&sp->s_lock)); 2925 ASSERT(mi->mi_open_files >= 0); 2926 2927 /* 2928 * First make sure this mntinfo4 can be taken off of the list, 2929 * ie: it doesn't have any open files remaining. 2930 */ 2931 if (mi->mi_open_files > 0) { 2932 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2933 "nfs4_remove_mi_from_server_nolock: don't " 2934 "remove mi since it still has files open")); 2935 2936 mutex_enter(&mi->mi_lock); 2937 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 2938 mutex_exit(&mi->mi_lock); 2939 return; 2940 } 2941 2942 remove_mi(sp, mi); 2943 2944 if (sp->mntinfo4_list == NULL) { 2945 /* last fs unmounted, kill the thread */ 2946 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2947 "remove_mi_from_nfs4_server_nolock: kill the thread")); 2948 nfs4_mark_srv_dead(sp); 2949 } 2950 } 2951 2952 /* 2953 * Remove mi from sp's mntinfo4_list and release the vfs reference. 2954 */ 2955 static void 2956 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 2957 { 2958 ASSERT(MUTEX_HELD(&sp->s_lock)); 2959 2960 /* 2961 * We release a reference, and the caller must still have a 2962 * reference. 2963 */ 2964 ASSERT(mi->mi_vfsp->vfs_count >= 2); 2965 2966 if (mi->mi_clientid_prev) { 2967 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 2968 } else { 2969 /* This is the first mi in sp's mntinfo4_list */ 2970 /* 2971 * Make sure the first mntinfo4 in the list is the actual 2972 * mntinfo4 passed in. 2973 */ 2974 ASSERT(sp->mntinfo4_list == mi); 2975 2976 sp->mntinfo4_list = mi->mi_clientid_next; 2977 } 2978 if (mi->mi_clientid_next) 2979 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 2980 2981 /* Now mark the mntinfo4's links as being removed */ 2982 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 2983 2984 VFS_RELE(mi->mi_vfsp); 2985 } 2986 2987 /* 2988 * Free all the entries in sp's mntinfo4_list. 2989 */ 2990 static void 2991 remove_all_mi(nfs4_server_t *sp) 2992 { 2993 mntinfo4_t *mi; 2994 2995 ASSERT(MUTEX_HELD(&sp->s_lock)); 2996 2997 while (sp->mntinfo4_list != NULL) { 2998 mi = sp->mntinfo4_list; 2999 /* 3000 * Grab a reference in case there is only one left (which 3001 * remove_mi() frees). 3002 */ 3003 VFS_HOLD(mi->mi_vfsp); 3004 remove_mi(sp, mi); 3005 VFS_RELE(mi->mi_vfsp); 3006 } 3007 } 3008 3009 /* 3010 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3011 * 3012 * This version can be called with a null nfs4_server_t arg, 3013 * and will either find the right one and handle locking, or 3014 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3015 */ 3016 void 3017 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3018 { 3019 nfs4_server_t *sp; 3020 3021 if (esp == NULL) { 3022 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3023 sp = find_nfs4_server_all(mi, 1); 3024 } else 3025 sp = esp; 3026 3027 if (sp != NULL) 3028 nfs4_remove_mi_from_server_nolock(mi, sp); 3029 3030 /* 3031 * If we had a valid esp as input, the calling function will be 3032 * responsible for unlocking the esp nfs4_server. 3033 */ 3034 if (esp == NULL) { 3035 if (sp != NULL) 3036 mutex_exit(&sp->s_lock); 3037 nfs_rw_exit(&mi->mi_recovlock); 3038 if (sp != NULL) 3039 nfs4_server_rele(sp); 3040 } 3041 } 3042 3043 /* 3044 * Return TRUE if the given server has any non-unmounted filesystems. 3045 */ 3046 3047 bool_t 3048 nfs4_fs_active(nfs4_server_t *sp) 3049 { 3050 mntinfo4_t *mi; 3051 3052 ASSERT(MUTEX_HELD(&sp->s_lock)); 3053 3054 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3055 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3056 return (TRUE); 3057 } 3058 3059 return (FALSE); 3060 } 3061 3062 /* 3063 * Mark sp as finished and notify any waiters. 3064 */ 3065 3066 void 3067 nfs4_mark_srv_dead(nfs4_server_t *sp) 3068 { 3069 ASSERT(MUTEX_HELD(&sp->s_lock)); 3070 3071 sp->s_thread_exit = NFS4_THREAD_EXIT; 3072 cv_broadcast(&sp->cv_thread_exit); 3073 } 3074 3075 /* 3076 * Create a new nfs4_server_t structure. 3077 * Returns new node unlocked and not in list, but with a reference count of 3078 * 1. 3079 */ 3080 struct nfs4_server * 3081 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3082 { 3083 struct nfs4_server *np; 3084 timespec_t tt; 3085 union { 3086 struct { 3087 uint32_t sec; 3088 uint32_t subsec; 3089 } un_curtime; 3090 verifier4 un_verifier; 3091 } nfs4clientid_verifier; 3092 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3093 int len; 3094 3095 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3096 np->saddr.len = svp->sv_addr.len; 3097 np->saddr.maxlen = svp->sv_addr.maxlen; 3098 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3099 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3100 np->s_refcnt = 1; 3101 3102 /* 3103 * Build the nfs_client_id4 for this server mount. Ensure 3104 * the verifier is useful and that the identification is 3105 * somehow based on the server's address for the case of 3106 * multi-homed servers. 3107 */ 3108 nfs4clientid_verifier.un_verifier = 0; 3109 gethrestime(&tt); 3110 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3111 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3112 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3113 3114 /* 3115 * calculate the length of the opaque identifier. Subtract 2 3116 * for the "%s" and add the traditional +1 for null 3117 * termination. 3118 */ 3119 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3120 np->clidtosend.id_len = len + np->saddr.maxlen; 3121 3122 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3123 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3124 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3125 3126 np->s_flags = 0; 3127 np->mntinfo4_list = NULL; 3128 /* save cred for issuing rfs4calls inside the renew thread */ 3129 crhold(cr); 3130 np->s_cred = cr; 3131 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3132 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3133 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3134 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3135 offsetof(rnode4_t, r_deleg_link)); 3136 np->s_thread_exit = 0; 3137 np->state_ref_count = 0; 3138 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3139 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3140 np->s_otw_call_count = 0; 3141 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3142 np->zoneid = getzoneid(); 3143 np->zone_globals = nfs4_get_callback_globals(); 3144 ASSERT(np->zone_globals != NULL); 3145 return (np); 3146 } 3147 3148 /* 3149 * Create a new nfs4_server_t structure and add it to the list. 3150 * Returns new node locked; reference must eventually be freed. 3151 */ 3152 static struct nfs4_server * 3153 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3154 { 3155 nfs4_server_t *sp; 3156 3157 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3158 sp = new_nfs4_server(svp, cr); 3159 mutex_enter(&sp->s_lock); 3160 insque(sp, &nfs4_server_lst); 3161 sp->s_refcnt++; /* list gets a reference */ 3162 sp->clientid = 0; 3163 sp->s_flags |= N4S_INSERTED; 3164 return (sp); 3165 } 3166 3167 int nfs4_server_t_debug = 0; 3168 3169 #ifdef lint 3170 extern void 3171 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3172 #endif 3173 3174 #ifndef lint 3175 #ifdef DEBUG 3176 void 3177 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3178 { 3179 int hash16(void *p, int len); 3180 nfs4_server_t *np; 3181 3182 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3183 "dumping nfs4_server_t list in %s", txt)); 3184 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3185 "mi 0x%p, want clientid %llx, addr %d/%04X", 3186 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3187 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3188 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3189 np = np->forw) { 3190 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3191 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3192 np, (longlong_t)np->clientid, np->saddr.len, 3193 hash16((void *)np->saddr.buf, np->saddr.len), 3194 np->state_ref_count)); 3195 if (np->saddr.len == srv_p->sv_addr.len && 3196 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3197 np->saddr.len) == 0) 3198 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3199 " - address matches")); 3200 if (np->clientid == clientid || np->clientid == 0) 3201 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3202 " - clientid matches")); 3203 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3204 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3205 " - thread not exiting")); 3206 } 3207 delay(hz); 3208 } 3209 #endif 3210 #endif 3211 3212 3213 /* 3214 * Move a mntinfo4_t from one server list to another. 3215 * Locking of the two nfs4_server_t nodes will be done in list order. 3216 * 3217 * Returns NULL if the current nfs4_server_t for the filesystem could not 3218 * be found (e.g., due to forced unmount). Otherwise returns a reference 3219 * to the new nfs4_server_t, which must eventually be freed. 3220 */ 3221 nfs4_server_t * 3222 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3223 { 3224 nfs4_server_t *p, *op = NULL, *np = NULL; 3225 int num_open; 3226 zoneid_t zoneid = nfs_zoneid(); 3227 3228 ASSERT(nfs_zone() == mi->mi_zone); 3229 3230 mutex_enter(&nfs4_server_lst_lock); 3231 #ifdef DEBUG 3232 if (nfs4_server_t_debug) 3233 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3234 #endif 3235 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3236 if (p->zoneid != zoneid) 3237 continue; 3238 if (p->saddr.len == old->sv_addr.len && 3239 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3240 p->s_thread_exit != NFS4_THREAD_EXIT) { 3241 op = p; 3242 mutex_enter(&op->s_lock); 3243 op->s_refcnt++; 3244 } 3245 if (p->saddr.len == new->sv_addr.len && 3246 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3247 p->s_thread_exit != NFS4_THREAD_EXIT) { 3248 np = p; 3249 mutex_enter(&np->s_lock); 3250 } 3251 if (op != NULL && np != NULL) 3252 break; 3253 } 3254 if (op == NULL) { 3255 /* 3256 * Filesystem has been forcibly unmounted. Bail out. 3257 */ 3258 if (np != NULL) 3259 mutex_exit(&np->s_lock); 3260 mutex_exit(&nfs4_server_lst_lock); 3261 return (NULL); 3262 } 3263 if (np != NULL) { 3264 np->s_refcnt++; 3265 } else { 3266 #ifdef DEBUG 3267 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3268 "nfs4_move_mi: no target nfs4_server, will create.")); 3269 #endif 3270 np = add_new_nfs4_server(new, kcred); 3271 } 3272 mutex_exit(&nfs4_server_lst_lock); 3273 3274 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3275 "nfs4_move_mi: for mi 0x%p, " 3276 "old servinfo4 0x%p, new servinfo4 0x%p, " 3277 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3278 (void*)mi, (void*)old, (void*)new, 3279 (void*)op, (void*)np)); 3280 ASSERT(op != NULL && np != NULL); 3281 3282 /* discard any delegations */ 3283 nfs4_deleg_discard(mi, op); 3284 3285 num_open = mi->mi_open_files; 3286 mi->mi_open_files = 0; 3287 op->state_ref_count -= num_open; 3288 ASSERT(op->state_ref_count >= 0); 3289 np->state_ref_count += num_open; 3290 nfs4_remove_mi_from_server_nolock(mi, op); 3291 mi->mi_open_files = num_open; 3292 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3293 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3294 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3295 3296 nfs4_add_mi_to_server(np, mi); 3297 3298 mutex_exit(&op->s_lock); 3299 nfs4_server_rele(op); 3300 mutex_exit(&np->s_lock); 3301 3302 return (np); 3303 } 3304 3305 /* 3306 * Search the nfs4_server list to find a match on this servinfo4 3307 * based on its address. 3308 * 3309 * Returns NULL if no match is found. Otherwise returns a reference (which 3310 * must eventually be freed) to a locked nfs4_server. 3311 */ 3312 nfs4_server_t * 3313 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3314 { 3315 nfs4_server_t *np; 3316 zoneid_t zoneid = nfs_zoneid(); 3317 3318 mutex_enter(&nfs4_server_lst_lock); 3319 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3320 if (np->zoneid == zoneid && 3321 np->saddr.len == srv_p->sv_addr.len && 3322 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3323 np->saddr.len) == 0 && 3324 np->s_thread_exit != NFS4_THREAD_EXIT) { 3325 mutex_enter(&np->s_lock); 3326 np->s_refcnt++; 3327 mutex_exit(&nfs4_server_lst_lock); 3328 return (np); 3329 } 3330 } 3331 mutex_exit(&nfs4_server_lst_lock); 3332 return (NULL); 3333 } 3334 3335 /* 3336 * Search the nfs4_server_lst to find a match based on clientid and 3337 * addr. 3338 * Locks the nfs4_server down if it is found and returns a reference that 3339 * must eventually be freed. 3340 * 3341 * Returns NULL it no match is found. This means one of two things: either 3342 * mi is in the process of being mounted, or mi has been unmounted. 3343 * 3344 * The caller should be holding mi->mi_recovlock, and it should continue to 3345 * hold the lock until done with the returned nfs4_server_t. Once 3346 * mi->mi_recovlock is released, there is no guarantee that the returned 3347 * mi->nfs4_server_t will continue to correspond to mi. 3348 */ 3349 nfs4_server_t * 3350 find_nfs4_server(mntinfo4_t *mi) 3351 { 3352 return (find_nfs4_server_all(mi, 0)); 3353 } 3354 3355 /* 3356 * Same as above, but takes an "all" parameter which can be 3357 * set to 1 if the caller wishes to find nfs4_server_t's which 3358 * have been marked for termination by the exit of the renew 3359 * thread. This should only be used by operations which are 3360 * cleaning up and will not cause an OTW op. 3361 */ 3362 nfs4_server_t * 3363 find_nfs4_server_all(mntinfo4_t *mi, int all) 3364 { 3365 nfs4_server_t *np; 3366 servinfo4_t *svp; 3367 zoneid_t zoneid = mi->mi_zone->zone_id; 3368 3369 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3370 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3371 /* 3372 * This can be called from nfs4_unmount() which can be called from the 3373 * global zone, hence it's legal for the global zone to muck with 3374 * another zone's server list, as long as it doesn't try to contact 3375 * them. 3376 */ 3377 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3378 nfs_global_client_only != 0); 3379 3380 /* 3381 * The nfs4_server_lst_lock global lock is held when we get a new 3382 * clientid (via SETCLIENTID OTW). Holding this global lock and 3383 * mi_recovlock (READER is fine) ensures that the nfs4_server 3384 * and this mntinfo4 can't get out of sync, so the following search is 3385 * always valid. 3386 */ 3387 mutex_enter(&nfs4_server_lst_lock); 3388 #ifdef DEBUG 3389 if (nfs4_server_t_debug) { 3390 /* mi->mi_clientid is unprotected, ok for debug output */ 3391 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3392 mi->mi_curr_serv); 3393 } 3394 #endif 3395 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3396 mutex_enter(&np->s_lock); 3397 svp = mi->mi_curr_serv; 3398 3399 if (np->zoneid == zoneid && 3400 np->clientid == mi->mi_clientid && 3401 np->saddr.len == svp->sv_addr.len && 3402 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3403 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3404 mutex_exit(&nfs4_server_lst_lock); 3405 np->s_refcnt++; 3406 return (np); 3407 } 3408 mutex_exit(&np->s_lock); 3409 } 3410 mutex_exit(&nfs4_server_lst_lock); 3411 3412 return (NULL); 3413 } 3414 3415 /* 3416 * Release the reference to sp and destroy it if that's the last one. 3417 */ 3418 3419 void 3420 nfs4_server_rele(nfs4_server_t *sp) 3421 { 3422 mutex_enter(&sp->s_lock); 3423 ASSERT(sp->s_refcnt > 0); 3424 sp->s_refcnt--; 3425 if (sp->s_refcnt > 0) { 3426 mutex_exit(&sp->s_lock); 3427 return; 3428 } 3429 if (!(sp->s_flags & N4S_INSERTED)) { 3430 destroy_nfs4_server(sp); 3431 return; 3432 } 3433 mutex_exit(&sp->s_lock); 3434 mutex_enter(&nfs4_server_lst_lock); 3435 mutex_enter(&sp->s_lock); 3436 if (sp->s_refcnt > 0) { 3437 mutex_exit(&sp->s_lock); 3438 mutex_exit(&nfs4_server_lst_lock); 3439 return; 3440 } 3441 if (sp->s_flags & N4S_INSERTED) { 3442 remque(sp); 3443 sp->forw = sp->back = NULL; 3444 sp->s_flags &= ~N4S_INSERTED; 3445 } 3446 mutex_exit(&nfs4_server_lst_lock); 3447 destroy_nfs4_server(sp); 3448 } 3449 3450 static void 3451 destroy_nfs4_server(nfs4_server_t *sp) 3452 { 3453 ASSERT(MUTEX_HELD(&sp->s_lock)); 3454 ASSERT(!(sp->s_flags & N4S_INSERTED)); 3455 ASSERT(sp->s_refcnt == 0); 3456 ASSERT(sp->s_otw_call_count == 0); 3457 3458 remove_all_mi(sp); 3459 3460 crfree(sp->s_cred); 3461 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3462 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3463 mutex_exit(&sp->s_lock); 3464 3465 /* destroy the nfs4_server */ 3466 nfs4callback_destroy(sp); 3467 list_destroy(&sp->s_deleg_list); 3468 mutex_destroy(&sp->s_lock); 3469 cv_destroy(&sp->cv_thread_exit); 3470 cv_destroy(&sp->s_cv_otw_count); 3471 cv_destroy(&sp->wait_cb_null); 3472 nfs_rw_destroy(&sp->s_recovlock); 3473 kmem_free(sp, sizeof (*sp)); 3474 } 3475 3476 /* 3477 * Lock sp, but only if it's still active (in the list and hasn't been 3478 * flagged as exiting) or 'all' is non-zero. 3479 * Returns TRUE if sp got locked and adds a reference to sp. 3480 */ 3481 bool_t 3482 nfs4_server_vlock(nfs4_server_t *sp, int all) 3483 { 3484 nfs4_server_t *np; 3485 3486 mutex_enter(&nfs4_server_lst_lock); 3487 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3488 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3489 all != 0)) { 3490 mutex_enter(&np->s_lock); 3491 np->s_refcnt++; 3492 mutex_exit(&nfs4_server_lst_lock); 3493 return (TRUE); 3494 } 3495 } 3496 mutex_exit(&nfs4_server_lst_lock); 3497 return (FALSE); 3498 } 3499 3500 /* 3501 * Fork off a thread to free the data structures for a mount. 3502 */ 3503 3504 static void 3505 async_free_mount(vfs_t *vfsp, cred_t *cr) 3506 { 3507 freemountargs_t *args; 3508 3509 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3510 args->fm_vfsp = vfsp; 3511 VFS_HOLD(vfsp); 3512 args->fm_cr = cr; 3513 crhold(cr); 3514 3515 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3516 minclsyspri); 3517 } 3518 3519 static void 3520 nfs4_free_mount_thread(freemountargs_t *args) 3521 { 3522 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3523 VFS_RELE(args->fm_vfsp); 3524 crfree(args->fm_cr); 3525 kmem_free(args, sizeof (freemountargs_t)); 3526 zthread_exit(); 3527 /* NOTREACHED */ 3528 } 3529 3530 /* 3531 * Thread to free the data structures for a given filesystem. 3532 */ 3533 static void 3534 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3535 { 3536 mntinfo4_t *mi = VFTOMI4(vfsp); 3537 nfs4_server_t *sp; 3538 callb_cpr_t cpr_info; 3539 kmutex_t cpr_lock; 3540 boolean_t async_thread; 3541 3542 /* 3543 * We need to participate in the CPR framework if this is a kernel 3544 * thread. 3545 */ 3546 async_thread = (curproc == nfs_zone()->zone_zsched); 3547 if (async_thread) { 3548 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3549 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3550 "nfsv4AsyncUnmount"); 3551 } 3552 3553 /* 3554 * We need to wait for all outstanding OTW calls 3555 * and recovery to finish before we remove the mi 3556 * from the nfs4_server_t, as current pending 3557 * calls might still need this linkage (in order 3558 * to find a nfs4_server_t from a mntinfo4_t). 3559 */ 3560 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3561 sp = find_nfs4_server(mi); 3562 nfs_rw_exit(&mi->mi_recovlock); 3563 3564 if (sp) { 3565 while (sp->s_otw_call_count != 0) { 3566 if (async_thread) { 3567 mutex_enter(&cpr_lock); 3568 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3569 mutex_exit(&cpr_lock); 3570 } 3571 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3572 if (async_thread) { 3573 mutex_enter(&cpr_lock); 3574 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3575 mutex_exit(&cpr_lock); 3576 } 3577 } 3578 mutex_exit(&sp->s_lock); 3579 nfs4_server_rele(sp); 3580 sp = NULL; 3581 } 3582 3583 3584 mutex_enter(&mi->mi_lock); 3585 while (mi->mi_in_recovery != 0) { 3586 if (async_thread) { 3587 mutex_enter(&cpr_lock); 3588 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3589 mutex_exit(&cpr_lock); 3590 } 3591 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3592 if (async_thread) { 3593 mutex_enter(&cpr_lock); 3594 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3595 mutex_exit(&cpr_lock); 3596 } 3597 } 3598 mutex_exit(&mi->mi_lock); 3599 3600 /* 3601 * The original purge of the dnlc via 'dounmount' 3602 * doesn't guarantee that another dnlc entry was not 3603 * added while we waitied for all outstanding OTW 3604 * and recovery calls to finish. So re-purge the 3605 * dnlc now. 3606 */ 3607 (void) dnlc_purge_vfsp(vfsp, 0); 3608 3609 /* 3610 * We need to explicitly stop the manager thread; the asyc worker 3611 * threads can timeout and exit on their own. 3612 */ 3613 nfs4_async_manager_stop(vfsp); 3614 3615 destroy_rtable4(vfsp, cr); 3616 3617 nfs4_remove_mi_from_server(mi, NULL); 3618 3619 if (mi->mi_io_kstats) { 3620 kstat_delete(mi->mi_io_kstats); 3621 mi->mi_io_kstats = NULL; 3622 } 3623 if (mi->mi_ro_kstats) { 3624 kstat_delete(mi->mi_ro_kstats); 3625 mi->mi_ro_kstats = NULL; 3626 } 3627 if (mi->mi_recov_ksp) { 3628 kstat_delete(mi->mi_recov_ksp); 3629 mi->mi_recov_ksp = NULL; 3630 } 3631 3632 if (async_thread) { 3633 mutex_enter(&cpr_lock); 3634 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3635 mutex_destroy(&cpr_lock); 3636 } 3637 } 3638