1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 29 * All Rights Reserved 30 */ 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/pathname.h> 41 #include <sys/sysmacros.h> 42 #include <sys/kmem.h> 43 #include <sys/mkdev.h> 44 #include <sys/mount.h> 45 #include <sys/statvfs.h> 46 #include <sys/errno.h> 47 #include <sys/debug.h> 48 #include <sys/cmn_err.h> 49 #include <sys/utsname.h> 50 #include <sys/bootconf.h> 51 #include <sys/modctl.h> 52 #include <sys/acl.h> 53 #include <sys/flock.h> 54 #include <sys/time.h> 55 #include <sys/disp.h> 56 #include <sys/policy.h> 57 #include <sys/socket.h> 58 #include <sys/netconfig.h> 59 #include <sys/dnlc.h> 60 #include <sys/list.h> 61 62 #include <rpc/types.h> 63 #include <rpc/auth.h> 64 #include <rpc/rpcsec_gss.h> 65 #include <rpc/clnt.h> 66 67 #include <nfs/nfs.h> 68 #include <nfs/nfs_clnt.h> 69 #include <nfs/mount.h> 70 #include <nfs/nfs_acl.h> 71 72 #include <fs/fs_subr.h> 73 74 #include <nfs/nfs4.h> 75 #include <nfs/rnode4.h> 76 #include <nfs/nfs4_clnt.h> 77 78 /* 79 * Arguments passed to thread to free data structures from forced unmount. 80 */ 81 82 typedef struct { 83 vfs_t *fm_vfsp; 84 cred_t *fm_cr; 85 } freemountargs_t; 86 87 static void async_free_mount(vfs_t *, cred_t *); 88 static void nfs4_free_mount(vfs_t *, cred_t *); 89 static void nfs4_free_mount_thread(freemountargs_t *); 90 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 91 92 /* 93 * From rpcsec module (common/rpcsec). 94 */ 95 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 96 extern void sec_clnt_freeinfo(struct sec_data *); 97 98 /* 99 * The order and contents of this structure must be kept in sync with that of 100 * rfsreqcnt_v4_tmpl in nfs_stats.c 101 */ 102 static char *rfsnames_v4[] = { 103 "null", "compound", "reserved", "access", "close", "commit", "create", 104 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 105 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 106 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 107 "read", "readdir", "readlink", "remove", "rename", "renew", 108 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 109 "setclientid_confirm", "verify", "write" 110 }; 111 112 /* 113 * nfs4_max_mount_retry is the number of times the client will redrive 114 * a mount compound before giving up and returning failure. The intent 115 * is to redrive mount compounds which fail NFS4ERR_STALE so that 116 * if a component of the server path being mounted goes stale, it can 117 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 118 * code is needed outside of the recovery framework because mount is a 119 * special case. The client doesn't create vnodes/rnodes for components 120 * of the server path being mounted. The recovery code recovers real 121 * client objects, not STALE FHs which map to components of the server 122 * path being mounted. 123 * 124 * We could just fail the mount on the first time, but that would 125 * instantly trigger failover (from nfs4_mount), and the client should 126 * try to re-lookup the STALE FH before doing failover. The easiest 127 * way to "re-lookup" is to simply redrive the mount compound. 128 */ 129 static int nfs4_max_mount_retry = 2; 130 131 /* 132 * nfs4 vfs operations. 133 */ 134 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 135 static int nfs4_unmount(vfs_t *, int, cred_t *); 136 static int nfs4_root(vfs_t *, vnode_t **); 137 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 138 static int nfs4_sync(vfs_t *, short, cred_t *); 139 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 140 static int nfs4_mountroot(vfs_t *, whymountroot_t); 141 static void nfs4_freevfs(vfs_t *); 142 143 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 144 int, cred_t *, zone_t *); 145 146 vfsops_t *nfs4_vfsops; 147 148 int nfs4_vfsinit(void); 149 void nfs4_vfsfini(void); 150 static void nfs4setclientid_init(void); 151 static void nfs4setclientid_fini(void); 152 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 153 struct nfs4_server *, nfs4_error_t *, int *); 154 static void destroy_nfs4_server(nfs4_server_t *); 155 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 156 157 /* 158 * Initialize the vfs structure 159 */ 160 161 static int nfs4fstyp; 162 163 164 /* 165 * Debug variable to check for rdma based 166 * transport startup and cleanup. Controlled 167 * through /etc/system. Off by default. 168 */ 169 extern int rdma_debug; 170 171 int 172 nfs4init(int fstyp, char *name) 173 { 174 static const fs_operation_def_t nfs4_vfsops_template[] = { 175 VFSNAME_MOUNT, nfs4_mount, 176 VFSNAME_UNMOUNT, nfs4_unmount, 177 VFSNAME_ROOT, nfs4_root, 178 VFSNAME_STATVFS, nfs4_statvfs, 179 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 180 VFSNAME_VGET, nfs4_vget, 181 VFSNAME_MOUNTROOT, nfs4_mountroot, 182 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 183 NULL, NULL 184 }; 185 int error; 186 187 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 188 if (error != 0) { 189 zcmn_err(GLOBAL_ZONEID, CE_WARN, 190 "nfs4init: bad vfs ops template"); 191 return (error); 192 } 193 194 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 195 if (error != 0) { 196 (void) vfs_freevfsops_by_type(fstyp); 197 zcmn_err(GLOBAL_ZONEID, CE_WARN, 198 "nfs4init: bad vnode ops template"); 199 return (error); 200 } 201 202 nfs4fstyp = fstyp; 203 204 (void) nfs4_vfsinit(); 205 206 (void) nfs4_init_dot_entries(); 207 208 return (0); 209 } 210 211 void 212 nfs4fini(void) 213 { 214 (void) nfs4_destroy_dot_entries(); 215 nfs4_vfsfini(); 216 } 217 218 /* 219 * Create a new sec_data structure to store AUTH_DH related data: 220 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 221 * flag set for NFS V4 since we are avoiding to contact the rpcbind 222 * daemon and is using the IP time service (IPPORT_TIMESERVER). 223 * 224 * sec_data can be freed by sec_clnt_freeinfo(). 225 */ 226 struct sec_data * 227 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 228 struct knetconfig *knconf) { 229 struct sec_data *secdata; 230 dh_k4_clntdata_t *data; 231 char *pf, *p; 232 233 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 234 return (NULL); 235 236 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 237 secdata->flags = 0; 238 239 data = kmem_alloc(sizeof (*data), KM_SLEEP); 240 241 data->syncaddr.maxlen = syncaddr->maxlen; 242 data->syncaddr.len = syncaddr->len; 243 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 244 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 245 246 /* 247 * duplicate the knconf information for the 248 * new opaque data. 249 */ 250 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 251 *data->knconf = *knconf; 252 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 253 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 254 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 255 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 256 data->knconf->knc_protofmly = pf; 257 data->knconf->knc_proto = p; 258 259 /* move server netname to the sec_data structure */ 260 data->netname = kmem_alloc(nlen, KM_SLEEP); 261 bcopy(netname, data->netname, nlen); 262 data->netnamelen = (int)nlen; 263 264 secdata->secmod = AUTH_DH; 265 secdata->rpcflavor = AUTH_DH; 266 secdata->data = (caddr_t)data; 267 268 return (secdata); 269 } 270 271 static int 272 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 273 { 274 servinfo4_t *si; 275 276 /* 277 * Iterate over the servinfo4 list to make sure 278 * we do not have a duplicate. Skip any servinfo4 279 * that has been marked "NOT IN USE" 280 */ 281 for (si = svp_head; si; si = si->sv_next) { 282 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 283 if (si->sv_flags & SV4_NOTINUSE) { 284 nfs_rw_exit(&si->sv_lock); 285 continue; 286 } 287 nfs_rw_exit(&si->sv_lock); 288 if (si == svp) 289 continue; 290 if (si->sv_addr.len == svp->sv_addr.len && 291 strcmp(si->sv_knconf->knc_protofmly, 292 svp->sv_knconf->knc_protofmly) == 0 && 293 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 294 si->sv_addr.len) == 0) { 295 /* it's a duplicate */ 296 return (1); 297 } 298 } 299 /* it's not a duplicate */ 300 return (0); 301 } 302 303 /* 304 * nfs mount vfsop 305 * Set up mount info record and attach it to vfs struct. 306 */ 307 static int 308 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 309 { 310 char *data = uap->dataptr; 311 int error; 312 vnode_t *rtvp; /* the server's root */ 313 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 314 size_t hlen; /* length of hostname */ 315 size_t nlen; /* length of netname */ 316 char netname[MAXNETNAMELEN+1]; /* server's netname */ 317 struct netbuf addr; /* server's address */ 318 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 319 struct knetconfig *knconf; /* transport knetconfig structure */ 320 struct knetconfig *rdma_knconf; /* rdma transport structure */ 321 rnode4_t *rp; 322 struct servinfo4 *svp; /* nfs server info */ 323 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 324 struct servinfo4 *svp_head; /* first nfs server info */ 325 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 326 struct sec_data *secdata; /* security data */ 327 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 328 STRUCT_DECL(knetconfig, knconf_tmp); 329 STRUCT_DECL(netbuf, addr_tmp); 330 int flags, addr_type; 331 char *p, *pf; 332 struct pathname pn; 333 char *userbufptr; 334 zone_t *zone = curproc->p_zone; 335 nfs4_error_t n4e; 336 337 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 338 return (EPERM); 339 if (mvp->v_type != VDIR) 340 return (ENOTDIR); 341 /* 342 * get arguments 343 * 344 * nfs_args is now versioned and is extensible, so 345 * uap->datalen might be different from sizeof (args) 346 * in a compatible situation. 347 */ 348 more: 349 STRUCT_INIT(args, get_udatamodel()); 350 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 351 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 352 STRUCT_SIZE(args)))) 353 return (EFAULT); 354 355 flags = STRUCT_FGET(args, flags); 356 357 /* 358 * If the request changes the locking type, disallow the remount, 359 * because it's questionable whether we can transfer the 360 * locking state correctly. 361 */ 362 if (uap->flags & MS_REMOUNT) { 363 if ((mi = VFTOMI4(vfsp)) != NULL) { 364 uint_t new_mi_llock; 365 uint_t old_mi_llock; 366 367 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 368 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 369 if (old_mi_llock != new_mi_llock) 370 return (EBUSY); 371 } 372 return (0); 373 } 374 375 mutex_enter(&mvp->v_lock); 376 if (!(uap->flags & MS_OVERLAY) && 377 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 378 mutex_exit(&mvp->v_lock); 379 return (EBUSY); 380 } 381 mutex_exit(&mvp->v_lock); 382 383 /* make sure things are zeroed for errout: */ 384 rtvp = NULL; 385 mi = NULL; 386 addr.buf = NULL; 387 syncaddr.buf = NULL; 388 secdata = NULL; 389 390 /* 391 * A valid knetconfig structure is required. 392 */ 393 if (!(flags & NFSMNT_KNCONF)) 394 return (EINVAL); 395 396 /* 397 * Allocate a servinfo4 struct. 398 */ 399 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 400 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 401 if (svp_tail) { 402 svp_2ndlast = svp_tail; 403 svp_tail->sv_next = svp; 404 } else { 405 svp_head = svp; 406 svp_2ndlast = svp; 407 } 408 409 svp_tail = svp; 410 411 /* 412 * Allocate space for a knetconfig structure and 413 * its strings and copy in from user-land. 414 */ 415 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 416 svp->sv_knconf = knconf; 417 STRUCT_INIT(knconf_tmp, get_udatamodel()); 418 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 419 STRUCT_SIZE(knconf_tmp))) { 420 sv4_free(svp_head); 421 return (EFAULT); 422 } 423 424 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 425 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 426 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 427 if (get_udatamodel() != DATAMODEL_LP64) { 428 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 429 } else { 430 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 431 } 432 433 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 434 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 435 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 436 if (error) { 437 kmem_free(pf, KNC_STRSIZE); 438 kmem_free(p, KNC_STRSIZE); 439 sv4_free(svp_head); 440 return (error); 441 } 442 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 443 if (error) { 444 kmem_free(pf, KNC_STRSIZE); 445 kmem_free(p, KNC_STRSIZE); 446 sv4_free(svp_head); 447 return (error); 448 } 449 if (strcmp(p, NC_UDP) == 0) { 450 kmem_free(pf, KNC_STRSIZE); 451 kmem_free(p, KNC_STRSIZE); 452 sv4_free(svp_head); 453 return (ENOTSUP); 454 } 455 knconf->knc_protofmly = pf; 456 knconf->knc_proto = p; 457 458 /* 459 * Get server address 460 */ 461 STRUCT_INIT(addr_tmp, get_udatamodel()); 462 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 463 STRUCT_SIZE(addr_tmp))) { 464 error = EFAULT; 465 goto errout; 466 } 467 468 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 469 addr.len = STRUCT_FGET(addr_tmp, len); 470 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 471 addr.maxlen = addr.len; 472 if (copyin(userbufptr, addr.buf, addr.len)) { 473 kmem_free(addr.buf, addr.len); 474 error = EFAULT; 475 goto errout; 476 } 477 478 svp->sv_addr = addr; 479 480 /* 481 * Get the root fhandle 482 */ 483 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 484 485 if (error) 486 goto errout; 487 488 /* Volatile fh: keep server paths, so use actual-size strings */ 489 svp->sv_path = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 490 bcopy(pn.pn_path, svp->sv_path, pn.pn_pathlen); 491 svp->sv_path[pn.pn_pathlen] = '\0'; 492 svp->sv_pathlen = pn.pn_pathlen + 1; 493 pn_free(&pn); 494 495 /* 496 * Get server's hostname 497 */ 498 if (flags & NFSMNT_HOSTNAME) { 499 error = copyinstr(STRUCT_FGETP(args, hostname), 500 netname, sizeof (netname), &hlen); 501 if (error) 502 goto errout; 503 } else { 504 char *p = "unknown-host"; 505 hlen = strlen(p) + 1; 506 (void) strcpy(netname, p); 507 } 508 svp->sv_hostnamelen = hlen; 509 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 510 (void) strcpy(svp->sv_hostname, netname); 511 512 /* 513 * RDMA MOUNT SUPPORT FOR NFS v4. 514 * Establish, is it possible to use RDMA, if so overload the 515 * knconf with rdma specific knconf and free the orignal knconf. 516 */ 517 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 518 /* 519 * Determine the addr type for RDMA, IPv4 or v6. 520 */ 521 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 522 addr_type = AF_INET; 523 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 524 addr_type = AF_INET6; 525 526 if (rdma_reachable(addr_type, &svp->sv_addr, 527 &rdma_knconf) == 0) { 528 /* 529 * If successful, hijack the orignal knconf and 530 * replace with the new one, depending on the flags. 531 */ 532 svp->sv_origknconf = svp->sv_knconf; 533 svp->sv_knconf = rdma_knconf; 534 knconf = rdma_knconf; 535 } else { 536 if (flags & NFSMNT_TRYRDMA) { 537 #ifdef DEBUG 538 if (rdma_debug) 539 zcmn_err(getzoneid(), CE_WARN, 540 "no RDMA onboard, revert\n"); 541 #endif 542 } 543 544 if (flags & NFSMNT_DORDMA) { 545 /* 546 * If proto=rdma is specified and no RDMA 547 * path to this server is avialable then 548 * ditch this server. 549 * This is not included in the mountable 550 * server list or the replica list. 551 * Check if more servers are specified; 552 * Failover case, otherwise bail out of mount. 553 */ 554 if (STRUCT_FGET(args, nfs_args_ext) == 555 NFS_ARGS_EXTB && STRUCT_FGETP(args, 556 nfs_ext_u.nfs_extB.next) != NULL) { 557 if (uap->flags & MS_RDONLY && 558 !(flags & NFSMNT_SOFT)) { 559 data = (char *) 560 STRUCT_FGETP(args, 561 nfs_ext_u.nfs_extB.next); 562 if (svp_head->sv_next == NULL) { 563 svp_tail = NULL; 564 svp_2ndlast = NULL; 565 sv4_free(svp_head); 566 goto more; 567 } else { 568 svp_tail = svp_2ndlast; 569 svp_2ndlast->sv_next = 570 NULL; 571 sv4_free(svp); 572 goto more; 573 } 574 } 575 } else { 576 /* 577 * This is the last server specified 578 * in the nfs_args list passed down 579 * and its not rdma capable. 580 */ 581 if (svp_head->sv_next == NULL) { 582 /* 583 * Is this the only one 584 */ 585 error = EINVAL; 586 #ifdef DEBUG 587 if (rdma_debug) 588 zcmn_err(getzoneid(), 589 CE_WARN, 590 "No RDMA srv"); 591 #endif 592 goto errout; 593 } else { 594 /* 595 * There is list, since some 596 * servers specified before 597 * this passed all requirements 598 */ 599 svp_tail = svp_2ndlast; 600 svp_2ndlast->sv_next = NULL; 601 sv4_free(svp); 602 goto proceed; 603 } 604 } 605 } 606 } 607 } 608 609 /* 610 * If there are syncaddr and netname data, load them in. This is 611 * to support data needed for NFSV4 when AUTH_DH is the negotiated 612 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 613 */ 614 netname[0] = '\0'; 615 if (flags & NFSMNT_SECURE) { 616 617 /* get syncaddr */ 618 STRUCT_INIT(addr_tmp, get_udatamodel()); 619 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 620 STRUCT_SIZE(addr_tmp))) { 621 error = EINVAL; 622 goto errout; 623 } 624 userbufptr = STRUCT_FGETP(addr_tmp, buf); 625 syncaddr.len = STRUCT_FGET(addr_tmp, len); 626 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 627 syncaddr.maxlen = syncaddr.len; 628 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 629 kmem_free(syncaddr.buf, syncaddr.len); 630 error = EFAULT; 631 goto errout; 632 } 633 634 /* get server's netname */ 635 if (copyinstr(STRUCT_FGETP(args, netname), netname, 636 sizeof (netname), &nlen)) { 637 kmem_free(syncaddr.buf, syncaddr.len); 638 error = EFAULT; 639 goto errout; 640 } 641 netname[nlen] = '\0'; 642 643 svp->sv_dhsec = create_authdh_data(netname, nlen, &syncaddr, 644 knconf); 645 } 646 647 /* 648 * Get the extention data which has the security data structure. 649 * This includes data for AUTH_SYS as well. 650 */ 651 if (flags & NFSMNT_NEWARGS) { 652 switch (STRUCT_FGET(args, nfs_args_ext)) { 653 case NFS_ARGS_EXTA: 654 case NFS_ARGS_EXTB: 655 /* 656 * Indicating the application is using the new 657 * sec_data structure to pass in the security 658 * data. 659 */ 660 if (STRUCT_FGETP(args, 661 nfs_ext_u.nfs_extA.secdata) == NULL) { 662 error = EINVAL; 663 } else { 664 error = sec_clnt_loadinfo( 665 (struct sec_data *)STRUCT_FGETP(args, 666 nfs_ext_u.nfs_extA.secdata), 667 &secdata, get_udatamodel()); 668 } 669 break; 670 671 default: 672 error = EINVAL; 673 break; 674 } 675 676 } else if (flags & NFSMNT_SECURE) { 677 /* 678 * NFSMNT_SECURE is deprecated but we keep it 679 * to support the rouge user generated application 680 * that may use this undocumented interface to do 681 * AUTH_DH security. 682 */ 683 secdata = create_authdh_data(netname, nlen, &syncaddr, knconf); 684 685 } else { 686 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 687 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 688 secdata->data = NULL; 689 } 690 691 svp->sv_secdata = secdata; 692 693 /* syncaddr is no longer needed. */ 694 if (syncaddr.buf != NULL) 695 kmem_free(syncaddr.buf, syncaddr.len); 696 697 /* 698 * User does not explictly specify a flavor, and a user 699 * defined default flavor is passed down. 700 */ 701 if (flags & NFSMNT_SECDEFAULT) { 702 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 703 svp->sv_flags |= SV4_TRYSECDEFAULT; 704 nfs_rw_exit(&svp->sv_lock); 705 } 706 707 /* 708 * Failover support: 709 * 710 * We may have a linked list of nfs_args structures, 711 * which means the user is looking for failover. If 712 * the mount is either not "read-only" or "soft", 713 * we want to bail out with EINVAL. 714 */ 715 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 716 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 717 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 718 data = (char *)STRUCT_FGETP(args, 719 nfs_ext_u.nfs_extB.next); 720 goto more; 721 } 722 error = EINVAL; 723 goto errout; 724 } 725 726 /* 727 * Determine the zone we're being mounted into. 728 */ 729 if (getzoneid() == GLOBAL_ZONEID) { 730 zone_t *mntzone; 731 732 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 733 ASSERT(mntzone != NULL); 734 zone_rele(mntzone); 735 if (mntzone != zone) { 736 error = EBUSY; 737 goto errout; 738 } 739 } 740 741 /* 742 * Stop the mount from going any further if the zone is going away. 743 */ 744 if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { 745 error = EBUSY; 746 goto errout; 747 } 748 749 /* 750 * Get root vnode. 751 */ 752 proceed: 753 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, zone); 754 755 if (error) 756 goto errout; 757 758 mi = VTOMI4(rtvp); 759 760 /* 761 * Send client id to the server, if necessary 762 */ 763 nfs4_error_zinit(&n4e); 764 nfs4setclientid(mi, cr, FALSE, &n4e); 765 error = n4e.error; 766 767 if (error) 768 goto errout; 769 770 /* 771 * Set option fields in the mount info record 772 */ 773 774 if (svp_head->sv_next) { 775 mutex_enter(&mi->mi_lock); 776 mi->mi_flags |= MI4_LLOCK; 777 mutex_exit(&mi->mi_lock); 778 } 779 780 error = nfs4_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 781 782 errout: 783 if (error) { 784 if (rtvp != NULL) { 785 rp = VTOR4(rtvp); 786 if (rp->r_flags & R4HASHED) 787 rp4_rmhash(rp); 788 if (rp->r_flags & R4FILEIDMAP) 789 rp4_fileid_map_remove(rp); 790 } 791 if (mi != NULL) { 792 nfs4_async_stop(vfsp); 793 nfs4_async_manager_stop(vfsp); 794 nfs4_remove_mi_from_server(mi, NULL); 795 /* 796 * In this error path we need to sfh4_rele() before 797 * we free the mntinfo4_t as sfh4_rele() has a 798 * dependancy on mi_fh_lock. 799 */ 800 if (rtvp != NULL) 801 VN_RELE(rtvp); 802 if (mi->mi_io_kstats) { 803 kstat_delete(mi->mi_io_kstats); 804 mi->mi_io_kstats = NULL; 805 } 806 if (mi->mi_ro_kstats) { 807 kstat_delete(mi->mi_ro_kstats); 808 mi->mi_ro_kstats = NULL; 809 } 810 if (mi->mi_recov_ksp) { 811 kstat_delete(mi->mi_recov_ksp); 812 mi->mi_recov_ksp = NULL; 813 } 814 nfs_free_mi4(mi); 815 return (error); 816 } 817 sv4_free(svp_head); 818 } 819 820 if (rtvp != NULL) 821 VN_RELE(rtvp); 822 823 return (error); 824 } 825 826 #ifdef DEBUG 827 #define VERS_MSG "NFS4 server " 828 #else 829 #define VERS_MSG "NFS server " 830 #endif 831 832 #define READ_MSG \ 833 VERS_MSG "%s returned 0 for read transfer size" 834 #define WRITE_MSG \ 835 VERS_MSG "%s returned 0 for write transfer size" 836 #define SIZE_MSG \ 837 VERS_MSG "%s returned 0 for maximum file size" 838 839 /* 840 * Get the symbolic link text from the server for a given filehandle 841 * of that symlink. 842 * 843 * (get symlink text) PUTFH READLINK 844 */ 845 static int 846 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 847 int flags) 848 { 849 COMPOUND4args_clnt args; 850 COMPOUND4res_clnt res; 851 int doqueue; 852 nfs_argop4 argop[2]; 853 nfs_resop4 *resop; 854 READLINK4res *lr_res; 855 uint_t len; 856 bool_t needrecov = FALSE; 857 nfs4_recov_state_t recov_state; 858 nfs4_sharedfh_t *sfh; 859 nfs4_error_t e; 860 int num_retry = nfs4_max_mount_retry; 861 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 862 863 sfh = sfh4_get(fh, mi); 864 recov_state.rs_flags = 0; 865 recov_state.rs_num_retry_despite_err = 0; 866 867 recov_retry: 868 nfs4_error_zinit(&e); 869 870 args.array_len = 2; 871 args.array = argop; 872 args.ctag = TAG_GET_SYMLINK; 873 874 if (! recovery) { 875 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 876 if (e.error) { 877 sfh4_rele(&sfh); 878 return (e.error); 879 } 880 } 881 882 /* 0. putfh symlink fh */ 883 argop[0].argop = OP_CPUTFH; 884 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 885 886 /* 1. readlink */ 887 argop[1].argop = OP_READLINK; 888 889 doqueue = 1; 890 891 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 892 893 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 894 895 if (needrecov && !recovery && num_retry-- > 0) { 896 897 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 898 "getlinktext_otw: initiating recovery\n")); 899 900 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 901 OP_READLINK, NULL) == FALSE) { 902 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 903 if (!e.error) 904 (void) xdr_free(xdr_COMPOUND4res_clnt, 905 (caddr_t)&res); 906 goto recov_retry; 907 } 908 } 909 910 /* 911 * If non-NFS4 pcol error and/or we weren't able to recover. 912 */ 913 if (e.error != 0) { 914 if (! recovery) 915 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 916 sfh4_rele(&sfh); 917 return (e.error); 918 } 919 920 if (res.status) { 921 e.error = geterrno4(res.status); 922 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 923 if (! recovery) 924 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 925 sfh4_rele(&sfh); 926 return (e.error); 927 } 928 929 /* res.status == NFS4_OK */ 930 ASSERT(res.status == NFS4_OK); 931 932 resop = &res.array[1]; /* readlink res */ 933 lr_res = &resop->nfs_resop4_u.opreadlink; 934 935 /* treat symlink name as data */ 936 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 937 938 if (! recovery) 939 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 940 sfh4_rele(&sfh); 941 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 942 943 return (0); 944 } 945 946 /* 947 * Skip over consecutive slashes and "/./" in a pathname. 948 */ 949 void 950 pathname_skipslashdot(struct pathname *pnp) 951 { 952 char *c1, *c2; 953 954 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 955 956 c1 = pnp->pn_path + 1; 957 c2 = pnp->pn_path + 2; 958 959 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 960 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 961 pnp->pn_pathlen = pnp->pn_pathlen - 2; 962 } else { 963 pnp->pn_path++; 964 pnp->pn_pathlen--; 965 } 966 } 967 } 968 969 /* 970 * Resolve a symbolic link path. The symlink is in the nth component of 971 * svp->sv_path and has an nfs4 file handle "fh". 972 * Upon return, the sv_path will point to the new path that has the nth 973 * component resolved to its symlink text. 974 */ 975 int 976 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 977 cred_t *cr, int flags) 978 { 979 char *oldpath; 980 char *symlink, *newpath; 981 struct pathname oldpn, newpn; 982 char component[MAXNAMELEN]; 983 int i, addlen, error = 0; 984 int oldpathlen; 985 986 /* Get the symbolic link text over the wire. */ 987 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 988 989 if (error || symlink == NULL || strlen(symlink) == 0) 990 return (error); 991 992 /* 993 * Compose the new pathname. 994 * Note: 995 * - only the nth component is resolved for the pathname. 996 * - pathname.pn_pathlen does not count the ending null byte. 997 */ 998 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 999 oldpath = svp->sv_path; 1000 oldpathlen = svp->sv_pathlen; 1001 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1002 nfs_rw_exit(&svp->sv_lock); 1003 kmem_free(symlink, strlen(symlink) + 1); 1004 return (error); 1005 } 1006 nfs_rw_exit(&svp->sv_lock); 1007 pn_alloc(&newpn); 1008 1009 /* 1010 * Skip over previous components from the oldpath so that the 1011 * oldpn.pn_path will point to the symlink component. Skip 1012 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1013 * pn_getcompnent can get the component. 1014 */ 1015 for (i = 1; i < nth; i++) { 1016 pathname_skipslashdot(&oldpn); 1017 error = pn_getcomponent(&oldpn, component); 1018 if (error) 1019 goto out; 1020 } 1021 1022 /* 1023 * Copy the old path upto the component right before the symlink 1024 * if the symlink is not an absolute path. 1025 */ 1026 if (symlink[0] != '/') { 1027 addlen = oldpn.pn_path - oldpn.pn_buf; 1028 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1029 newpn.pn_pathlen += addlen; 1030 newpn.pn_path += addlen; 1031 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1032 newpn.pn_pathlen++; 1033 newpn.pn_path++; 1034 } 1035 1036 /* copy the resolved symbolic link text */ 1037 addlen = strlen(symlink); 1038 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1039 error = ENAMETOOLONG; 1040 goto out; 1041 } 1042 bcopy(symlink, newpn.pn_path, addlen); 1043 newpn.pn_pathlen += addlen; 1044 newpn.pn_path += addlen; 1045 1046 /* 1047 * Check if there is any remaining path after the symlink component. 1048 * First, skip the symlink component. 1049 */ 1050 pathname_skipslashdot(&oldpn); 1051 if (error = pn_getcomponent(&oldpn, component)) 1052 goto out; 1053 1054 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1055 1056 /* 1057 * Copy the remaining path to the new pathname if there is any. 1058 */ 1059 if (addlen > 0) { 1060 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1061 error = ENAMETOOLONG; 1062 goto out; 1063 } 1064 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1065 newpn.pn_pathlen += addlen; 1066 } 1067 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1068 1069 /* get the newpath and store it in the servinfo4_t */ 1070 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1071 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1072 newpath[newpn.pn_pathlen] = '\0'; 1073 1074 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1075 svp->sv_path = newpath; 1076 svp->sv_pathlen = strlen(newpath) + 1; 1077 nfs_rw_exit(&svp->sv_lock); 1078 1079 kmem_free(oldpath, oldpathlen); 1080 out: 1081 kmem_free(symlink, strlen(symlink) + 1); 1082 pn_free(&newpn); 1083 pn_free(&oldpn); 1084 1085 return (error); 1086 } 1087 1088 /* 1089 * Get the root filehandle for the given filesystem and server, and update 1090 * svp. 1091 * 1092 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1093 * to coordinate with recovery. Otherwise, the caller is assumed to be 1094 * the recovery thread or have already done a start_fop. 1095 * 1096 * Errors are returned by the nfs4_error_t parameter. 1097 */ 1098 1099 static void 1100 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1101 int flags, cred_t *cr, nfs4_error_t *ep) 1102 { 1103 COMPOUND4args_clnt args; 1104 COMPOUND4res_clnt res; 1105 int doqueue = 1; 1106 nfs_argop4 *argop; 1107 nfs_resop4 *resop; 1108 nfs4_ga_res_t *garp; 1109 int num_argops; 1110 lookup4_param_t lookuparg; 1111 nfs_fh4 *tmpfhp; 1112 nfs_fh4 *resfhp; 1113 bool_t needrecov = FALSE; 1114 nfs4_recov_state_t recov_state; 1115 int llndx; 1116 int nthcomp; 1117 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1118 1119 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1120 ASSERT(svp->sv_path != NULL); 1121 if (svp->sv_path[0] == '\0') { 1122 nfs_rw_exit(&svp->sv_lock); 1123 nfs4_error_init(ep, EINVAL); 1124 return; 1125 } 1126 nfs_rw_exit(&svp->sv_lock); 1127 1128 recov_state.rs_flags = 0; 1129 recov_state.rs_num_retry_despite_err = 0; 1130 recov_retry: 1131 nfs4_error_zinit(ep); 1132 1133 if (!recovery) { 1134 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1135 &recov_state, NULL); 1136 1137 /* 1138 * If recovery has been started and this request as 1139 * initiated by a mount, then we must wait for recovery 1140 * to finish before proceeding, otherwise, the error 1141 * cleanup would remove data structures needed by the 1142 * recovery thread. 1143 */ 1144 if (ep->error) { 1145 mutex_enter(&mi->mi_lock); 1146 if (mi->mi_flags & MI4_MOUNTING) { 1147 mi->mi_flags |= MI4_RECOV_FAIL; 1148 mi->mi_error = EIO; 1149 1150 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1151 "nfs4getfh_otw: waiting 4 recovery\n")); 1152 1153 while (mi->mi_flags & MI4_RECOV_ACTIV) 1154 cv_wait(&mi->mi_failover_cv, 1155 &mi->mi_lock); 1156 } 1157 mutex_exit(&mi->mi_lock); 1158 return; 1159 } 1160 1161 /* 1162 * If the client does not specify a specific flavor to use 1163 * and has not gotten a secinfo list from the server yet, 1164 * retrieve the secinfo list from the server and use a 1165 * flavor from the list to mount. 1166 * 1167 * If fail to get the secinfo list from the server, then 1168 * try the default flavor. 1169 */ 1170 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1171 svp->sv_secinfo == NULL) { 1172 (void) nfs4_secinfo_path(mi, cr, FALSE); 1173 } 1174 } 1175 1176 if (recovery) 1177 args.ctag = TAG_REMAP_MOUNT; 1178 else 1179 args.ctag = TAG_MOUNT; 1180 1181 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1182 lookuparg.argsp = &args; 1183 lookuparg.resp = &res; 1184 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1185 lookuparg.trailer_len = 0; 1186 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1187 lookuparg.mi = mi; 1188 1189 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1190 ASSERT(svp->sv_path != NULL); 1191 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1192 nfs_rw_exit(&svp->sv_lock); 1193 1194 argop = args.array; 1195 num_argops = args.array_len; 1196 1197 /* choose public or root filehandle */ 1198 if (flags & NFS4_GETFH_PUBLIC) 1199 argop[0].argop = OP_PUTPUBFH; 1200 else 1201 argop[0].argop = OP_PUTROOTFH; 1202 1203 /* get fh */ 1204 argop[1].argop = OP_GETFH; 1205 1206 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1207 "nfs4getfh_otw: %s call, mi 0x%p", 1208 needrecov ? "recov" : "first", (void *)mi)); 1209 1210 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1211 1212 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1213 1214 if (needrecov) { 1215 bool_t abort; 1216 1217 if (recovery) { 1218 nfs4args_lookup_free(argop, num_argops); 1219 kmem_free(argop, 1220 lookuparg.arglen * sizeof (nfs_argop4)); 1221 if (!ep->error) 1222 (void) xdr_free(xdr_COMPOUND4res_clnt, 1223 (caddr_t)&res); 1224 return; 1225 } 1226 1227 NFS4_DEBUG(nfs4_client_recov_debug, 1228 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1229 1230 abort = nfs4_start_recovery(ep, mi, NULL, 1231 NULL, NULL, NULL, OP_GETFH, NULL); 1232 if (!ep->error) { 1233 ep->error = geterrno4(res.status); 1234 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1235 } 1236 nfs4args_lookup_free(argop, num_argops); 1237 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1238 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1239 /* have another go? */ 1240 if (abort == FALSE) 1241 goto recov_retry; 1242 return; 1243 } 1244 1245 /* 1246 * No recovery, but check if error is set. 1247 */ 1248 if (ep->error) { 1249 nfs4args_lookup_free(argop, num_argops); 1250 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1251 if (!recovery) 1252 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1253 needrecov); 1254 return; 1255 } 1256 1257 is_link_err: 1258 1259 /* for non-recovery errors */ 1260 if (res.status && res.status != NFS4ERR_SYMLINK) { 1261 if (!recovery) { 1262 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1263 needrecov); 1264 } 1265 nfs4args_lookup_free(argop, num_argops); 1266 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1267 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1268 return; 1269 } 1270 1271 /* 1272 * If any intermediate component in the path is a symbolic link, 1273 * resolve the symlink, then try mount again using the new path. 1274 */ 1275 if (res.status == NFS4ERR_SYMLINK) { 1276 int where; 1277 1278 /* 1279 * This must be from OP_LOOKUP failure. The (cfh) for this 1280 * OP_LOOKUP is a symlink node. Found out where the 1281 * OP_GETFH is for the (cfh) that is a symlink node. 1282 * 1283 * Example: 1284 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1285 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1286 * 1287 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1288 * In this case, where = 7, nthcomp = 2. 1289 */ 1290 where = res.array_len - 2; 1291 ASSERT(where > 0); 1292 1293 resop = &res.array[where - 1]; 1294 ASSERT(resop->resop == OP_GETFH); 1295 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1296 nthcomp = res.array_len/3 - 1; 1297 1298 /* 1299 * Need to call nfs4_end_op before resolve_sympath to avoid 1300 * potential nfs4_start_op deadlock. 1301 */ 1302 if (!recovery) 1303 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1304 needrecov); 1305 1306 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1307 flags); 1308 1309 nfs4args_lookup_free(argop, num_argops); 1310 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1311 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1312 1313 if (ep->error) 1314 return; 1315 1316 goto recov_retry; 1317 } 1318 1319 /* getfh */ 1320 resop = &res.array[res.array_len - 2]; 1321 ASSERT(resop->resop == OP_GETFH); 1322 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1323 1324 /* getattr fsinfo res */ 1325 resop++; 1326 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1327 1328 *vtp = garp->n4g_va.va_type; 1329 1330 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1331 1332 mutex_enter(&mi->mi_lock); 1333 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1334 mi->mi_flags |= MI4_LINK; 1335 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1336 mi->mi_flags |= MI4_SYMLINK; 1337 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1338 mi->mi_flags |= MI4_ACL; 1339 mutex_exit(&mi->mi_lock); 1340 1341 if (garp->n4g_ext_res->n4g_maxread == 0) 1342 mi->mi_tsize = 1343 MIN(MAXBSIZE, mi->mi_tsize); 1344 else 1345 mi->mi_tsize = 1346 MIN(garp->n4g_ext_res->n4g_maxread, 1347 mi->mi_tsize); 1348 1349 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1350 mi->mi_stsize = 1351 MIN(MAXBSIZE, mi->mi_stsize); 1352 else 1353 mi->mi_stsize = 1354 MIN(garp->n4g_ext_res->n4g_maxwrite, 1355 mi->mi_stsize); 1356 1357 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1358 mi->mi_maxfilesize = 1359 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1360 mi->mi_maxfilesize); 1361 1362 /* 1363 * If the final component is a a symbolic link, resolve the symlink, 1364 * then try mount again using the new path. 1365 * 1366 * Assume no symbolic link for root filesysm "/". 1367 */ 1368 if (*vtp == VLNK) { 1369 /* 1370 * nthcomp is the total result length minus 1371 * the 1st 2 OPs (PUTROOTFH, GETFH), 1372 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1373 * 1374 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1375 * LOOKUP 2nd-comp GETFH GETATTR 1376 * 1377 * (8 - 2)/3 = 2 1378 */ 1379 nthcomp = (res.array_len - 2)/3; 1380 1381 /* 1382 * Need to call nfs4_end_op before resolve_sympath to avoid 1383 * potential nfs4_start_op deadlock. See RFE 4777612. 1384 */ 1385 if (!recovery) 1386 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1387 needrecov); 1388 1389 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1390 flags); 1391 1392 nfs4args_lookup_free(argop, num_argops); 1393 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1394 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1395 1396 if (ep->error) 1397 return; 1398 1399 goto recov_retry; 1400 } 1401 1402 /* 1403 * We need to figure out where in the compound the getfh 1404 * for the parent directory is. If the object to be mounted is 1405 * the root, then there is no lookup at all: 1406 * PUTROOTFH, GETFH. 1407 * If the object to be mounted is in the root, then the compound is: 1408 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1409 * In either of these cases, the index of the GETFH is 1. 1410 * If it is not at the root, then it's something like: 1411 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1412 * LOOKUP, GETFH, GETATTR 1413 * In this case, the index is llndx (last lookup index) - 2. 1414 */ 1415 if (llndx == -1 || llndx == 2) 1416 resop = &res.array[1]; 1417 else { 1418 ASSERT(llndx > 2); 1419 resop = &res.array[llndx-2]; 1420 } 1421 1422 ASSERT(resop->resop == OP_GETFH); 1423 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1424 1425 /* save the filehandles for the replica */ 1426 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1427 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1428 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1429 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1430 tmpfhp->nfs_fh4_len); 1431 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1432 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1433 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1434 1435 /* initialize fsid and supp_attrs for server fs */ 1436 svp->sv_fsid = garp->n4g_fsid; 1437 svp->sv_supp_attrs = 1438 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1439 1440 nfs_rw_exit(&svp->sv_lock); 1441 1442 nfs4args_lookup_free(argop, num_argops); 1443 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1444 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1445 if (!recovery) 1446 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1447 } 1448 1449 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1450 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1451 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1452 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1453 1454 /* 1455 * Remap the root filehandle for the given filesystem. 1456 * 1457 * results returned via the nfs4_error_t parameter. 1458 */ 1459 void 1460 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1461 { 1462 struct servinfo4 *svp; 1463 vtype_t vtype; 1464 nfs_fh4 rootfh; 1465 int getfh_flags; 1466 char *orig_sv_path; 1467 int orig_sv_pathlen, num_retry; 1468 1469 mutex_enter(&mi->mi_lock); 1470 svp = mi->mi_curr_serv; 1471 getfh_flags = 1472 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1473 getfh_flags |= 1474 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1475 mutex_exit(&mi->mi_lock); 1476 1477 /* 1478 * Just in case server path being mounted contains 1479 * symlinks and fails w/STALE, save the initial sv_path 1480 * so we can redrive the initial mount compound with the 1481 * initial sv_path -- not a symlink-expanded version. 1482 * 1483 * This could only happen if a symlink was expanded 1484 * and the expanded mount compound failed stale. Because 1485 * it could be the case that the symlink was removed at 1486 * the server (and replaced with another symlink/dir, 1487 * we need to use the initial sv_path when attempting 1488 * to re-lookup everything and recover. 1489 */ 1490 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1491 orig_sv_pathlen = svp->sv_pathlen; 1492 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1493 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1494 nfs_rw_exit(&svp->sv_lock); 1495 1496 num_retry = nfs4_max_mount_retry; 1497 1498 do { 1499 /* 1500 * Get the root fh from the server. Retry nfs4_max_mount_retry 1501 * (2) times if it fails with STALE since the recovery 1502 * infrastructure doesn't do STALE recovery for components 1503 * of the server path to the object being mounted. 1504 */ 1505 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1506 1507 if (ep->error == 0 && ep->stat == NFS4_OK) 1508 break; 1509 1510 /* 1511 * For some reason, the mount compound failed. Before 1512 * retrying, we need to restore the original sv_path 1513 * because it might have contained symlinks that were 1514 * expanded by nfsgetfh_otw before the failure occurred. 1515 * replace current sv_path with orig sv_path -- just in case 1516 * it changed due to embedded symlinks. 1517 */ 1518 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1519 if (orig_sv_pathlen != svp->sv_pathlen) { 1520 kmem_free(svp->sv_path, svp->sv_pathlen); 1521 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1522 svp->sv_pathlen = orig_sv_pathlen; 1523 1524 } 1525 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1526 nfs_rw_exit(&svp->sv_lock); 1527 1528 } while (num_retry-- > 0); 1529 1530 kmem_free(orig_sv_path, orig_sv_pathlen); 1531 1532 if (ep->error != 0 || ep->stat != 0) { 1533 return; 1534 } 1535 1536 if (vtype != VNON && vtype != mi->mi_type) { 1537 /* shouldn't happen */ 1538 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1539 "nfs4_remap_root: server root vnode type (%d) doesn't " 1540 "match mount info (%d)", vtype, mi->mi_type); 1541 } 1542 1543 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1544 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1545 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1546 nfs_rw_exit(&svp->sv_lock); 1547 sfh4_update(mi->mi_rootfh, &rootfh); 1548 1549 #ifdef DEBUG 1550 /* 1551 * There shouldn't have been any other recovery activity on the 1552 * filesystem. 1553 */ 1554 mutex_enter(&mi->mi_lock); 1555 ASSERT(mi->mi_curr_serv == svp); 1556 mutex_exit(&mi->mi_lock); 1557 #endif 1558 } 1559 1560 static int 1561 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1562 int flags, cred_t *cr, zone_t *zone) 1563 { 1564 vnode_t *rtvp = NULL; 1565 mntinfo4_t *mi; 1566 dev_t nfs_dev; 1567 int error = 0; 1568 rnode4_t *rp; 1569 int i; 1570 struct vattr va; 1571 vtype_t vtype = VNON; 1572 vtype_t tmp_vtype = VNON; 1573 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1574 nfs4_oo_hash_bucket_t *bucketp; 1575 nfs_fh4 fh; 1576 char *droptext = ""; 1577 struct nfs_stats *nfsstatsp; 1578 nfs4_fname_t *mfname; 1579 nfs4_error_t e; 1580 char *orig_sv_path; 1581 int orig_sv_pathlen, num_retry; 1582 cred_t *lcr = NULL, *tcr = cr; 1583 1584 nfsstatsp = zone_getspecific(nfsstat_zone_key, curproc->p_zone); 1585 ASSERT(nfsstatsp != NULL); 1586 1587 ASSERT(curproc->p_zone == zone); 1588 ASSERT(crgetref(cr)); 1589 1590 /* 1591 * Create a mount record and link it to the vfs struct. 1592 */ 1593 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1594 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1595 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1596 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1597 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1598 1599 if (!(flags & NFSMNT_SOFT)) 1600 mi->mi_flags |= MI4_HARD; 1601 if ((flags & NFSMNT_NOPRINT)) 1602 mi->mi_flags |= MI4_NOPRINT; 1603 if (flags & NFSMNT_INT) 1604 mi->mi_flags |= MI4_INT; 1605 if (flags & NFSMNT_PUBLIC) 1606 mi->mi_flags |= MI4_PUBLIC; 1607 mi->mi_retrans = NFS_RETRIES; 1608 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1609 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1610 mi->mi_timeo = nfs4_cots_timeo; 1611 else 1612 mi->mi_timeo = NFS_TIMEO; 1613 mi->mi_prog = NFS_PROGRAM; 1614 mi->mi_vers = NFS_V4; 1615 mi->mi_rfsnames = rfsnames_v4; 1616 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1617 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1618 mi->mi_servers = svp; 1619 mi->mi_curr_serv = svp; 1620 mi->mi_acregmin = SEC2HR(ACREGMIN); 1621 mi->mi_acregmax = SEC2HR(ACREGMAX); 1622 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1623 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1624 mi->mi_fh_expire_type = FH4_PERSISTENT; 1625 mi->mi_clientid_next = NULL; 1626 mi->mi_clientid_prev = NULL; 1627 mi->mi_grace_wait = 0; 1628 mi->mi_error = 0; 1629 mi->mi_srvsettime = 0; 1630 1631 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1632 mi->mi_stsize = mi->mi_tsize; 1633 1634 if (flags & NFSMNT_DIRECTIO) 1635 mi->mi_flags |= MI4_DIRECTIO; 1636 1637 mi->mi_flags |= MI4_MOUNTING; 1638 1639 /* 1640 * Make a vfs struct for nfs. We do this here instead of below 1641 * because rtvp needs a vfs before we can do a getattr on it. 1642 * 1643 * Assign a unique device id to the mount 1644 */ 1645 mutex_enter(&nfs_minor_lock); 1646 do { 1647 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1648 nfs_dev = makedevice(nfs_major, nfs_minor); 1649 } while (vfs_devismounted(nfs_dev)); 1650 mutex_exit(&nfs_minor_lock); 1651 1652 vfsp->vfs_dev = nfs_dev; 1653 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1654 vfsp->vfs_data = (caddr_t)mi; 1655 vfsp->vfs_fstype = nfsfstyp; 1656 vfsp->vfs_bsize = nfs4_bsize; 1657 1658 /* 1659 * Initialize fields used to support async putpage operations. 1660 */ 1661 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1662 mi->mi_async_clusters[i] = nfs4_async_clusters; 1663 mi->mi_async_init_clusters = nfs4_async_clusters; 1664 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1665 mi->mi_max_threads = nfs4_max_threads; 1666 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1667 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1668 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1669 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1670 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1671 1672 mi->mi_vfsp = vfsp; 1673 zone_hold(mi->mi_zone = zone); 1674 nfs4_mi_zonelist_add(mi); 1675 1676 /* 1677 * Initialize the <open owner/cred> hash table. 1678 */ 1679 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1680 bucketp = &(mi->mi_oo_list[i]); 1681 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1682 list_create(&bucketp->b_oo_hash_list, 1683 sizeof (nfs4_open_owner_t), 1684 offsetof(nfs4_open_owner_t, oo_hash_node)); 1685 } 1686 1687 /* 1688 * Initialize the freed open owner list. 1689 */ 1690 mi->mi_foo_num = 0; 1691 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1692 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1693 offsetof(nfs4_open_owner_t, oo_foo_node)); 1694 1695 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1696 offsetof(nfs4_lost_rqst_t, lr_node)); 1697 1698 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1699 offsetof(nfs4_bseqid_entry_t, bs_node)); 1700 1701 /* 1702 * Initialize the msg buffer. 1703 */ 1704 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1705 offsetof(nfs4_debug_msg_t, msg_node)); 1706 mi->mi_msg_count = 0; 1707 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1708 1709 /* 1710 * Initialize kstats 1711 */ 1712 nfs4_mnt_kstat_init(vfsp); 1713 1714 /* 1715 * Initialize the shared filehandle pool, and get the fname for 1716 * the filesystem root. 1717 */ 1718 sfh4_createtab(&mi->mi_filehandles); 1719 mi->mi_fname = fn_get(NULL, "."); 1720 1721 /* 1722 * Initialize the fileid map. 1723 */ 1724 mutex_init(&mi->mi_fileid_lock, NULL, MUTEX_DEFAULT, NULL); 1725 rp4_fileid_map_init(&mi->mi_fileid_map); 1726 1727 /* 1728 * Save server path we're attempting to mount. 1729 */ 1730 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1731 orig_sv_pathlen = svp_head->sv_pathlen; 1732 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1733 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1734 nfs_rw_exit(&svp->sv_lock); 1735 1736 /* 1737 * Make the GETFH call to get root fh for each replica. 1738 */ 1739 if (svp_head->sv_next) 1740 droptext = ", dropping replica"; 1741 1742 /* 1743 * If the uid is set then set the creds for secure mounts 1744 * by proxy processes such as automountd. 1745 */ 1746 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1747 if (svp->sv_secdata->uid != 0) { 1748 lcr = crdup(cr); 1749 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1750 tcr = lcr; 1751 } 1752 nfs_rw_exit(&svp->sv_lock); 1753 for (svp = svp_head; svp; svp = svp->sv_next) { 1754 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 1755 nfs_cmn_err(error, CE_WARN, 1756 VERS_MSG "Host %s is a duplicate%s", 1757 svp->sv_hostname, droptext); 1758 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1759 svp->sv_flags |= SV4_NOTINUSE; 1760 nfs_rw_exit(&svp->sv_lock); 1761 continue; 1762 } 1763 mi->mi_curr_serv = svp; 1764 1765 /* 1766 * Just in case server path being mounted contains 1767 * symlinks and fails w/STALE, save the initial sv_path 1768 * so we can redrive the initial mount compound with the 1769 * initial sv_path -- not a symlink-expanded version. 1770 * 1771 * This could only happen if a symlink was expanded 1772 * and the expanded mount compound failed stale. Because 1773 * it could be the case that the symlink was removed at 1774 * the server (and replaced with another symlink/dir, 1775 * we need to use the initial sv_path when attempting 1776 * to re-lookup everything and recover. 1777 * 1778 * Other mount errors should evenutally be handled here also 1779 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 1780 * failures will result in mount being redriven a few times. 1781 */ 1782 num_retry = nfs4_max_mount_retry; 1783 do { 1784 nfs4getfh_otw(mi, svp, &tmp_vtype, 1785 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 1786 NFS4_GETFH_NEEDSOP, tcr, &e); 1787 1788 if (e.error == 0 && e.stat == NFS4_OK) 1789 break; 1790 1791 /* 1792 * replace current sv_path with orig sv_path -- just in 1793 * case it changed due to embedded symlinks. 1794 */ 1795 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1796 if (orig_sv_pathlen != svp->sv_pathlen) { 1797 kmem_free(svp->sv_path, svp->sv_pathlen); 1798 svp->sv_path = kmem_alloc(orig_sv_pathlen, 1799 KM_SLEEP); 1800 svp->sv_pathlen = orig_sv_pathlen; 1801 } 1802 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1803 nfs_rw_exit(&svp->sv_lock); 1804 1805 } while (num_retry-- > 0); 1806 1807 error = e.error ? e.error : geterrno4(e.stat); 1808 if (error) { 1809 nfs_cmn_err(error, CE_WARN, 1810 VERS_MSG "initial call to %s failed%s: %m", 1811 svp->sv_hostname, droptext); 1812 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1813 svp->sv_flags |= SV4_NOTINUSE; 1814 nfs_rw_exit(&svp->sv_lock); 1815 mi->mi_flags &= ~MI4_RECOV_FAIL; 1816 mi->mi_error = 0; 1817 continue; 1818 } 1819 1820 if (tmp_vtype == VBAD) { 1821 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1822 VERS_MSG "%s returned a bad file type for " 1823 "root%s", svp->sv_hostname, droptext); 1824 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1825 svp->sv_flags |= SV4_NOTINUSE; 1826 nfs_rw_exit(&svp->sv_lock); 1827 continue; 1828 } 1829 1830 if (vtype == VNON) { 1831 vtype = tmp_vtype; 1832 } else if (vtype != tmp_vtype) { 1833 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1834 VERS_MSG "%s returned a different file type " 1835 "for root%s", svp->sv_hostname, droptext); 1836 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1837 svp->sv_flags |= SV4_NOTINUSE; 1838 nfs_rw_exit(&svp->sv_lock); 1839 continue; 1840 } 1841 if (firstsvp == NULL) 1842 firstsvp = svp; 1843 } 1844 1845 kmem_free(orig_sv_path, orig_sv_pathlen); 1846 1847 if (firstsvp == NULL) { 1848 if (error == 0) 1849 error = ENOENT; 1850 goto bad; 1851 } 1852 1853 mi->mi_curr_serv = svp = firstsvp; 1854 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1855 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 1856 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1857 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1858 mi->mi_rootfh = sfh4_get(&fh, mi); 1859 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 1860 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 1861 mi->mi_srvparentfh = sfh4_get(&fh, mi); 1862 nfs_rw_exit(&svp->sv_lock); 1863 1864 /* 1865 * Make the root vnode without attributes. 1866 */ 1867 mfname = mi->mi_fname; 1868 fn_hold(mfname); 1869 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 1870 &mfname, NULL, mi, cr, gethrtime()); 1871 rtvp->v_type = vtype; 1872 1873 mi->mi_curread = mi->mi_tsize; 1874 mi->mi_curwrite = mi->mi_stsize; 1875 1876 /* 1877 * Start the manager thread responsible for handling async worker 1878 * threads. 1879 */ 1880 VFS_HOLD(vfsp); /* add reference for thread */ 1881 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 1882 vfsp, 0, minclsyspri); 1883 ASSERT(mi->mi_manager_thread != NULL); 1884 /* 1885 * Create the thread that handles over-the-wire calls for 1886 * VOP_INACTIVE. 1887 * This needs to happen after the manager thread is created. 1888 */ 1889 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 1890 mi, 0, minclsyspri); 1891 ASSERT(mi->mi_inactive_thread != NULL); 1892 1893 /* If we didn't get a type, get one now */ 1894 if (rtvp->v_type == VNON) { 1895 va.va_mask = AT_TYPE; 1896 error = nfs4getattr(rtvp, &va, tcr); 1897 if (error) 1898 goto bad; 1899 rtvp->v_type = va.va_type; 1900 } 1901 1902 mi->mi_type = rtvp->v_type; 1903 1904 mutex_enter(&mi->mi_lock); 1905 mi->mi_flags &= ~MI4_MOUNTING; 1906 mutex_exit(&mi->mi_lock); 1907 1908 *rtvpp = rtvp; 1909 if (lcr != NULL) 1910 crfree(lcr); 1911 1912 return (0); 1913 bad: 1914 /* 1915 * An error occurred somewhere, need to clean up... 1916 * 1917 * XXX Should not svp be cleaned too? 1918 */ 1919 if (lcr != NULL) 1920 crfree(lcr); 1921 if (rtvp != NULL) { 1922 /* 1923 * We need to release our reference to the root vnode and 1924 * destroy the mntinfo4 struct that we just created. 1925 */ 1926 rp = VTOR4(rtvp); 1927 if (rp->r_flags & R4HASHED) 1928 rp4_rmhash(rp); 1929 if (rp->r_flags & R4FILEIDMAP) 1930 rp4_fileid_map_remove(rp); 1931 VN_RELE(rtvp); 1932 } 1933 nfs4_async_stop(vfsp); 1934 nfs4_async_manager_stop(vfsp); 1935 if (mi->mi_io_kstats) { 1936 kstat_delete(mi->mi_io_kstats); 1937 mi->mi_io_kstats = NULL; 1938 } 1939 if (mi->mi_ro_kstats) { 1940 kstat_delete(mi->mi_ro_kstats); 1941 mi->mi_ro_kstats = NULL; 1942 } 1943 if (mi->mi_recov_ksp) { 1944 kstat_delete(mi->mi_recov_ksp); 1945 mi->mi_recov_ksp = NULL; 1946 } 1947 nfs_free_mi4(mi); 1948 *rtvpp = NULL; 1949 return (error); 1950 } 1951 1952 /* 1953 * vfs operations 1954 */ 1955 static int 1956 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1957 { 1958 mntinfo4_t *mi; 1959 ushort_t omax; 1960 1961 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1962 return (EPERM); 1963 1964 mi = VFTOMI4(vfsp); 1965 1966 if (flag & MS_FORCE) { 1967 vfsp->vfs_flag |= VFS_UNMOUNTED; 1968 if (curproc->p_zone != mi->mi_zone) { 1969 /* 1970 * If the request is coming from the wrong zone, 1971 * we don't want to create any new threads, and 1972 * performance is not a concern. Do everything 1973 * inline. 1974 */ 1975 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 1976 "nfs4_unmount x-zone forced unmount of vfs %p\n", 1977 (void *)vfsp)); 1978 nfs4_free_mount(vfsp, cr); 1979 } else { 1980 /* 1981 * Free data structures asynchronously, to avoid 1982 * blocking the current thread (for performance 1983 * reasons only). 1984 */ 1985 async_free_mount(vfsp, cr); 1986 } 1987 return (0); 1988 } 1989 /* 1990 * Wait until all asynchronous putpage operations on 1991 * this file system are complete before flushing rnodes 1992 * from the cache. 1993 */ 1994 omax = mi->mi_max_threads; 1995 if (nfs4_async_stop_sig(vfsp)) { 1996 return (EINTR); 1997 } 1998 r4flush(vfsp, cr); 1999 /* 2000 * If there are any active vnodes on this file system, 2001 * then the file system is busy and can't be umounted. 2002 */ 2003 if (check_rtable4(vfsp)) { 2004 mutex_enter(&mi->mi_async_lock); 2005 mi->mi_max_threads = omax; 2006 mutex_exit(&mi->mi_async_lock); 2007 return (EBUSY); 2008 } 2009 /* 2010 * The unmount can't fail from now on, and there are no active 2011 * files that could require over-the-wire calls to the server, 2012 * so stop the async manager and the inactive thread. 2013 */ 2014 nfs4_async_manager_stop(vfsp); 2015 /* 2016 * Destroy all rnodes belonging to this file system from the 2017 * rnode hash queues and purge any resources allocated to 2018 * them. 2019 */ 2020 destroy_fileid_map(vfsp); 2021 destroy_rtable4(vfsp, cr); 2022 vfsp->vfs_flag |= VFS_UNMOUNTED; 2023 nfs4_remove_mi_from_server(mi, NULL); 2024 if (mi->mi_io_kstats) { 2025 kstat_delete(mi->mi_io_kstats); 2026 mi->mi_io_kstats = NULL; 2027 } 2028 if (mi->mi_ro_kstats) { 2029 kstat_delete(mi->mi_ro_kstats); 2030 mi->mi_ro_kstats = NULL; 2031 } 2032 if (mi->mi_recov_ksp) { 2033 kstat_delete(mi->mi_recov_ksp); 2034 mi->mi_recov_ksp = NULL; 2035 } 2036 return (0); 2037 } 2038 2039 /* 2040 * find root of nfs 2041 */ 2042 static int 2043 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2044 { 2045 mntinfo4_t *mi; 2046 vnode_t *vp; 2047 nfs4_fname_t *mfname; 2048 servinfo4_t *svp; 2049 2050 mi = VFTOMI4(vfsp); 2051 2052 if (curproc->p_zone != mi->mi_zone) 2053 return (EPERM); 2054 2055 svp = mi->mi_curr_serv; 2056 if (svp) { 2057 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2058 if (svp->sv_flags & SV4_ROOT_STALE) { 2059 nfs_rw_exit(&svp->sv_lock); 2060 2061 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2062 if (svp->sv_flags & SV4_ROOT_STALE) { 2063 svp->sv_flags &= ~SV4_ROOT_STALE; 2064 nfs_rw_exit(&svp->sv_lock); 2065 return (ENOENT); 2066 } 2067 nfs_rw_exit(&svp->sv_lock); 2068 } else 2069 nfs_rw_exit(&svp->sv_lock); 2070 } 2071 2072 mfname = mi->mi_fname; 2073 fn_hold(mfname); 2074 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2075 VFTOMI4(vfsp), CRED(), gethrtime()); 2076 2077 if (VTOR4(vp)->r_flags & R4STALE) { 2078 VN_RELE(vp); 2079 return (ENOENT); 2080 } 2081 2082 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2083 2084 vp->v_type = mi->mi_type; 2085 2086 *vpp = vp; 2087 2088 return (0); 2089 } 2090 2091 static int 2092 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2093 { 2094 int error; 2095 nfs4_ga_res_t gar; 2096 nfs4_ga_ext_res_t ger; 2097 2098 gar.n4g_ext_res = &ger; 2099 2100 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2101 NFS4_STATFS_ATTR_MASK, cr)) 2102 return (error); 2103 2104 *sbp = gar.n4g_ext_res->n4g_sb; 2105 2106 return (0); 2107 } 2108 2109 /* 2110 * Get file system statistics. 2111 */ 2112 static int 2113 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2114 { 2115 int error; 2116 vnode_t *vp; 2117 cred_t *cr; 2118 2119 error = nfs4_root(vfsp, &vp); 2120 if (error) 2121 return (error); 2122 2123 cr = CRED(); 2124 2125 error = nfs4_statfs_otw(vp, sbp, cr); 2126 if (!error) { 2127 (void) strncpy(sbp->f_basetype, 2128 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2129 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2130 } else { 2131 nfs4_purge_stale_fh(error, vp, cr); 2132 } 2133 2134 VN_RELE(vp); 2135 2136 return (error); 2137 } 2138 2139 static kmutex_t nfs4_syncbusy; 2140 2141 /* 2142 * Flush dirty nfs files for file system vfsp. 2143 * If vfsp == NULL, all nfs files are flushed. 2144 * 2145 * SYNC_CLOSE in flag is passed to us to 2146 * indicate that we are shutting down and or 2147 * rebooting. 2148 */ 2149 static int 2150 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2151 { 2152 /* 2153 * Cross-zone calls are OK here, since this translates to a 2154 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2155 */ 2156 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2157 r4flush(vfsp, cr); 2158 mutex_exit(&nfs4_syncbusy); 2159 } 2160 2161 /* 2162 * if SYNC_CLOSE is set then we know that 2163 * the system is rebooting, mark the mntinfo 2164 * for later examination. 2165 */ 2166 if (vfsp && (flag & SYNC_CLOSE)) { 2167 mntinfo4_t *mi; 2168 2169 mi = VFTOMI4(vfsp); 2170 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2171 mutex_enter(&mi->mi_lock); 2172 mi->mi_flags |= MI4_SHUTDOWN; 2173 mutex_exit(&mi->mi_lock); 2174 } 2175 } 2176 return (0); 2177 } 2178 2179 /* 2180 * vget is difficult, if not impossible, to support in v4 because we don't 2181 * know the parent directory or name, which makes it impossible to create a 2182 * useful shadow vnode. And we need the shadow vnode for things like 2183 * OPEN. 2184 */ 2185 2186 /* ARGSUSED */ 2187 /* 2188 * XXX Check nfs4_vget_pseudo() for dependency. 2189 */ 2190 static int 2191 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2192 { 2193 return (EREMOTE); 2194 } 2195 2196 /* 2197 * nfs4_mountroot get called in the case where we are diskless booting. All 2198 * we need from here is the ability to get the server info and from there we 2199 * can simply call nfs4_rootvp. 2200 */ 2201 /* ARGSUSED */ 2202 static int 2203 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2204 { 2205 vnode_t *rtvp; 2206 char root_hostname[SYS_NMLN+1]; 2207 struct servinfo4 *svp; 2208 int error; 2209 int vfsflags; 2210 size_t size; 2211 char *root_path; 2212 struct pathname pn; 2213 char *name; 2214 cred_t *cr; 2215 mntinfo4_t *mi; 2216 struct nfs_args args; /* nfs mount arguments */ 2217 static char token[10]; 2218 nfs4_error_t n4e; 2219 2220 bzero(&args, sizeof (args)); 2221 2222 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2223 clkset(-1L); /* hack for now - until we get time svc? */ 2224 2225 if (why == ROOT_REMOUNT) { 2226 /* 2227 * Shouldn't happen. 2228 */ 2229 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2230 } 2231 2232 if (why == ROOT_UNMOUNT) { 2233 /* 2234 * Nothing to do for NFS. 2235 */ 2236 return (0); 2237 } 2238 2239 /* 2240 * why == ROOT_INIT 2241 */ 2242 2243 name = token; 2244 *name = 0; 2245 (void) getfsname("root", name, sizeof (token)); 2246 2247 pn_alloc(&pn); 2248 root_path = pn.pn_path; 2249 2250 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2251 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2252 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2253 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2254 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2255 2256 /* 2257 * Get server address 2258 * Get the root path 2259 * Get server's transport 2260 * Get server's hostname 2261 * Get options 2262 */ 2263 args.addr = &svp->sv_addr; 2264 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2265 args.fh = (char *)&svp->sv_fhandle; 2266 args.knconf = svp->sv_knconf; 2267 args.hostname = root_hostname; 2268 vfsflags = 0; 2269 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2270 &args, &vfsflags)) { 2271 if (error == EPROTONOSUPPORT) 2272 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2273 "mount_root failed: server doesn't support NFS V4"); 2274 else 2275 nfs_cmn_err(error, CE_WARN, 2276 "nfs4_mountroot: mount_root failed: %m"); 2277 nfs_rw_exit(&svp->sv_lock); 2278 sv4_free(svp); 2279 pn_free(&pn); 2280 return (error); 2281 } 2282 nfs_rw_exit(&svp->sv_lock); 2283 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2284 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2285 (void) strcpy(svp->sv_hostname, root_hostname); 2286 2287 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2288 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2289 (void) strcpy(svp->sv_path, root_path); 2290 2291 /* 2292 * Force root partition to always be mounted with AUTH_UNIX for now 2293 */ 2294 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2295 svp->sv_secdata->secmod = AUTH_UNIX; 2296 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2297 svp->sv_secdata->data = NULL; 2298 2299 cr = crgetcred(); 2300 rtvp = NULL; 2301 2302 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2303 2304 if (error) { 2305 crfree(cr); 2306 pn_free(&pn); 2307 goto errout; 2308 } 2309 2310 mi = VTOMI4(rtvp); 2311 2312 /* 2313 * Send client id to the server, if necessary 2314 */ 2315 nfs4_error_zinit(&n4e); 2316 nfs4setclientid(mi, cr, FALSE, &n4e); 2317 error = n4e.error; 2318 2319 crfree(cr); 2320 2321 if (error) { 2322 pn_free(&pn); 2323 goto errout; 2324 } 2325 2326 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2327 if (error) { 2328 nfs_cmn_err(error, CE_WARN, 2329 "nfs4_mountroot: invalid root mount options"); 2330 pn_free(&pn); 2331 goto errout; 2332 } 2333 2334 (void) vfs_lock_wait(vfsp); 2335 vfs_add(NULL, vfsp, vfsflags); 2336 vfs_unlock(vfsp); 2337 2338 size = strlen(svp->sv_hostname); 2339 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2340 rootfs.bo_name[size] = ':'; 2341 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2342 2343 pn_free(&pn); 2344 2345 errout: 2346 if (error) { 2347 sv4_free(svp); 2348 nfs4_async_stop(vfsp); 2349 nfs4_async_manager_stop(vfsp); 2350 } 2351 2352 if (rtvp != NULL) 2353 VN_RELE(rtvp); 2354 2355 return (error); 2356 } 2357 2358 /* 2359 * Initialization routine for VFS routines. Should only be called once 2360 */ 2361 int 2362 nfs4_vfsinit(void) 2363 { 2364 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2365 nfs4setclientid_init(); 2366 return (0); 2367 } 2368 2369 void 2370 nfs4_vfsfini(void) 2371 { 2372 nfs4setclientid_fini(); 2373 mutex_destroy(&nfs4_syncbusy); 2374 } 2375 2376 void 2377 nfs4_freevfs(vfs_t *vfsp) 2378 { 2379 mntinfo4_t *mi; 2380 servinfo4_t *svp; 2381 2382 /* free up the resources */ 2383 mi = VFTOMI4(vfsp); 2384 svp = mi->mi_servers; 2385 mi->mi_servers = mi->mi_curr_serv = NULL; 2386 sv4_free(svp); 2387 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_freevfs: " 2388 "free mi %p", (void *)mi)); 2389 2390 /* 2391 * By this time we should have already deleted the 2392 * mi kstats in the unmount code. If they are still around 2393 * somethings wrong 2394 */ 2395 ASSERT(mi->mi_io_kstats == NULL); 2396 2397 nfs_free_mi4(mi); 2398 } 2399 2400 /* 2401 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2402 */ 2403 struct nfs4_server nfs4_server_lst = 2404 { &nfs4_server_lst, &nfs4_server_lst }; 2405 2406 kmutex_t nfs4_server_lst_lock; 2407 2408 static void 2409 nfs4setclientid_init(void) 2410 { 2411 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2412 } 2413 2414 static void 2415 nfs4setclientid_fini(void) 2416 { 2417 mutex_destroy(&nfs4_server_lst_lock); 2418 } 2419 2420 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2421 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2422 2423 /* 2424 * Set the clientid for the server for "mi". No-op if the clientid is 2425 * already set. 2426 * 2427 * The recovery boolean should be set to TRUE if this function was called 2428 * by the recovery code, and FALSE otherwise. 2429 * 2430 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2431 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2432 */ 2433 void 2434 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2435 { 2436 struct nfs4_server *np; 2437 struct servinfo4 *svp = mi->mi_curr_serv; 2438 nfs4_recov_state_t recov_state; 2439 int num_retries = 0; 2440 bool_t retry = FALSE; 2441 cred_t *lcr = NULL; 2442 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2443 time_t lease_time = 0; 2444 2445 recov_state.rs_flags = 0; 2446 recov_state.rs_num_retry_despite_err = 0; 2447 ASSERT(n4ep != NULL); 2448 2449 recov_retry: 2450 nfs4_error_zinit(n4ep); 2451 /* This locks np if it is found */ 2452 np = servinfo4_to_nfs4_server(svp); 2453 ASSERT(np == NULL || MUTEX_HELD(&np->s_lock)); 2454 2455 /* 2456 * If we find the server already in the list, then just 2457 * return, we've already done SETCLIENTID to that server 2458 */ 2459 2460 if (np && (np->s_flags & N4S_CLIENTID_SET)) { 2461 /* 2462 * XXX - more is needed here. SETCLIENTID may not 2463 * be completed. A VFS lock may prevent multiple 2464 * mounts and provide needed serialization. 2465 */ 2466 /* add mi to np's mntinfo4_list */ 2467 nfs4_add_mi_to_server(np, mi); 2468 mutex_exit(&np->s_lock); 2469 nfs4_server_rele(np); 2470 return; 2471 } 2472 2473 if (!np) 2474 np = new_nfs4_server(svp, cr); 2475 else 2476 mutex_exit(&np->s_lock); 2477 2478 if (!recovery) { 2479 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2480 if (n4ep->error) { 2481 nfs4_server_rele(np); 2482 return; 2483 } 2484 } 2485 2486 /* 2487 * Will potentially add np to global list, which transfers 2488 * ownership of the reference to the list. 2489 */ 2490 mutex_enter(&nfs4_server_lst_lock); 2491 mutex_enter(&np->s_lock); 2492 2493 /* 2494 * Reset the N4S_CB_PINGED flag. This is used to 2495 * indicate if we have received a CB_NULL from the 2496 * server. Also we reset the waiter flag. 2497 */ 2498 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2499 2500 if (np->s_flags & N4S_CLIENTID_SET) { 2501 /* XXX copied/pasted from above */ 2502 /* 2503 * XXX - more is needed here. SETCLIENTID may not 2504 * be completed. A VFS lock may prevent multiple 2505 * mounts and provide needed serialization. 2506 */ 2507 /* add mi to np's mntinfo4_list */ 2508 nfs4_add_mi_to_server(np, mi); 2509 mutex_exit(&np->s_lock); 2510 mutex_exit(&nfs4_server_lst_lock); 2511 nfs4_server_rele(np); 2512 if (!recovery) 2513 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2514 return; 2515 } 2516 2517 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2518 2519 if (n4ep->error == EACCES) { 2520 /* 2521 * If the uid is set then set the creds for secure mounts 2522 * by proxy processes such as automountd. 2523 */ 2524 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2525 if (svp->sv_secdata->uid != 0) { 2526 lcr = crdup(cr); 2527 (void) crsetugid(lcr, svp->sv_secdata->uid, 2528 crgetgid(cr)); 2529 crfree(np->s_cred); 2530 np->s_cred = lcr; 2531 } 2532 nfs_rw_exit(&svp->sv_lock); 2533 2534 if (lcr != NULL) 2535 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2536 &retry_inuse); 2537 } 2538 lease_time = np->s_lease_time; 2539 mutex_exit(&np->s_lock); 2540 mutex_exit(&nfs4_server_lst_lock); 2541 2542 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2543 /* 2544 * Start recovery if failover is a possibility. If 2545 * invoked by the recovery thread itself, then just 2546 * return and let it handle the failover first. NB: 2547 * recovery is not allowed if the mount is in progress 2548 * since the infrastructure is not sufficiently setup 2549 * to allow it. Just return the error (after suitable 2550 * retries). 2551 */ 2552 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2553 (void) nfs4_start_recovery(n4ep, mi, NULL, 2554 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2555 /* 2556 * Don't retry here, just return and let 2557 * recovery take over. 2558 */ 2559 if (recovery) 2560 retry = FALSE; 2561 } else if (nfs4_rpc_retry_error(n4ep->error) || 2562 n4ep->stat == NFS4ERR_RESOURCE || 2563 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2564 2565 retry = TRUE; 2566 /* 2567 * Always retry if in recovery or once had 2568 * contact with the server (but now it's 2569 * overloaded). 2570 */ 2571 if (recovery == TRUE || 2572 n4ep->error == ETIMEDOUT || 2573 n4ep->error == ECONNRESET) 2574 num_retries = 0; 2575 } else if (retry_inuse && n4ep->error == 0 && 2576 n4ep->stat == NFS4ERR_CLID_INUSE) { 2577 retry = TRUE; 2578 num_retries = 0; 2579 } 2580 } 2581 2582 if (!recovery) 2583 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2584 nfs4_server_rele(np); 2585 2586 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2587 if (retry_inuse) { 2588 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2589 retry_inuse = 0; 2590 } else 2591 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2592 goto recov_retry; 2593 } 2594 2595 if (n4ep->error == 0) 2596 n4ep->error = geterrno4(n4ep->stat); 2597 } 2598 2599 int nfs4setclientid_otw_debug = 0; 2600 2601 /* 2602 * This assumes np is locked down. 2603 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2604 * but nothing else; the calling function must be designed to handle those 2605 * other errors. 2606 */ 2607 static void 2608 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2609 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2610 { 2611 COMPOUND4args_clnt args; 2612 COMPOUND4res_clnt res; 2613 nfs_argop4 argop[3]; 2614 SETCLIENTID4args *s_args; 2615 SETCLIENTID4resok *s_resok; 2616 int doqueue = 1; 2617 nfs4_ga_res_t *garp = NULL; 2618 timespec_t prop_time, after_time; 2619 verifier4 verf; 2620 clientid4 tmp_clientid; 2621 2622 ASSERT(MUTEX_HELD(&np->s_lock)); 2623 2624 args.ctag = TAG_SETCLIENTID; 2625 2626 args.array = argop; 2627 args.array_len = 3; 2628 2629 /* PUTROOTFH */ 2630 argop[0].argop = OP_PUTROOTFH; 2631 2632 /* GETATTR */ 2633 argop[1].argop = OP_GETATTR; 2634 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2635 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2636 2637 /* SETCLIENTID */ 2638 argop[2].argop = OP_SETCLIENTID; 2639 2640 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2641 2642 s_args->client.verifier = np->clidtosend.verifier; 2643 s_args->client.id_len = np->clidtosend.id_len; 2644 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2645 s_args->client.id_val = np->clidtosend.id_val; 2646 2647 /* 2648 * Callback needs to happen on non-RDMA transport 2649 * Check if we have saved the original knetconfig 2650 * if so, use that instead. 2651 */ 2652 if (svp->sv_origknconf != NULL) 2653 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2654 else 2655 nfs4_cb_args(np, svp->sv_knconf, s_args); 2656 2657 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 2658 2659 if (ep->error) 2660 return; 2661 2662 /* getattr lease_time res */ 2663 if (res.array_len >= 2) { 2664 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2665 2666 #ifndef _LP64 2667 /* 2668 * The 32 bit client cannot handle a lease time greater than 2669 * (INT32_MAX/1000000). This is due to the use of the 2670 * lease_time in calls to drv_usectohz() in 2671 * nfs4_renew_lease_thread(). The problem is that 2672 * drv_usectohz() takes a time_t (which is just a long = 4 2673 * bytes) as its parameter. The lease_time is multiplied by 2674 * 1000000 to convert seconds to usecs for the parameter. If 2675 * a number bigger than (INT32_MAX/1000000) is used then we 2676 * overflow on the 32bit client. 2677 */ 2678 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2679 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2680 } 2681 #endif 2682 2683 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2684 2685 /* 2686 * Keep track of the lease period for the mi's 2687 * mi_msg_list. We need an appropiate time 2688 * bound to associate past facts with a current 2689 * event. The lease period is perfect for this. 2690 */ 2691 mutex_enter(&mi->mi_msg_list_lock); 2692 mi->mi_lease_period = np->s_lease_time; 2693 mutex_exit(&mi->mi_msg_list_lock); 2694 } 2695 2696 2697 if (res.status == NFS4ERR_CLID_INUSE) { 2698 clientaddr4 *clid_inuse; 2699 2700 if (!(*retry_inusep)) { 2701 clid_inuse = &res.array->nfs_resop4_u. 2702 opsetclientid.SETCLIENTID4res_u.client_using; 2703 2704 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2705 "NFS4 mount (SETCLIENTID failed)." 2706 " nfs4_client_id.id is in" 2707 "use already by: r_netid<%s> r_addr<%s>", 2708 clid_inuse->r_netid, clid_inuse->r_addr); 2709 } 2710 2711 /* 2712 * XXX - The client should be more robust in its 2713 * handling of clientid in use errors (regen another 2714 * clientid and try again?) 2715 */ 2716 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2717 return; 2718 } 2719 2720 if (res.status) { 2721 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2722 return; 2723 } 2724 2725 s_resok = &res.array[2].nfs_resop4_u. 2726 opsetclientid.SETCLIENTID4res_u.resok4; 2727 2728 tmp_clientid = s_resok->clientid; 2729 2730 verf = s_resok->setclientid_confirm; 2731 2732 #ifdef DEBUG 2733 if (nfs4setclientid_otw_debug) { 2734 union { 2735 clientid4 clientid; 2736 int foo[2]; 2737 } cid; 2738 2739 cid.clientid = s_resok->clientid; 2740 2741 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2742 "nfs4setclientid_otw: OK, clientid = %x,%x, " 2743 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 2744 } 2745 #endif 2746 2747 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2748 2749 /* Confirm the client id and get the lease_time attribute */ 2750 2751 args.ctag = TAG_SETCLIENTID_CF; 2752 2753 args.array = argop; 2754 args.array_len = 1; 2755 2756 argop[0].argop = OP_SETCLIENTID_CONFIRM; 2757 2758 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 2759 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 2760 2761 /* used to figure out RTT for np */ 2762 gethrestime(&prop_time); 2763 2764 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 2765 "start time: %ld sec %ld nsec", prop_time.tv_sec, 2766 prop_time.tv_nsec)); 2767 2768 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2769 2770 gethrestime(&after_time); 2771 np->propagation_delay.tv_sec = 2772 MAX(1, after_time.tv_sec - prop_time.tv_sec); 2773 2774 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 2775 "finish time: %ld sec ", after_time.tv_sec)); 2776 2777 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 2778 "propagation delay set to %ld sec", 2779 np->propagation_delay.tv_sec)); 2780 2781 if (ep->error) 2782 return; 2783 2784 if (res.status == NFS4ERR_CLID_INUSE) { 2785 clientaddr4 *clid_inuse; 2786 2787 if (!(*retry_inusep)) { 2788 clid_inuse = &res.array->nfs_resop4_u. 2789 opsetclientid.SETCLIENTID4res_u.client_using; 2790 2791 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2792 "SETCLIENTID_CONFIRM failed. " 2793 "nfs4_client_id.id is in use already by: " 2794 "r_netid<%s> r_addr<%s>", 2795 clid_inuse->r_netid, clid_inuse->r_addr); 2796 } 2797 2798 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2799 return; 2800 } 2801 2802 if (res.status) { 2803 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2804 return; 2805 } 2806 2807 if (!(np->s_flags & N4S_INSERTED)) { 2808 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 2809 insque(np, &nfs4_server_lst); 2810 ASSERT(MUTEX_HELD(&np->s_lock)); 2811 np->s_flags |= N4S_INSERTED; 2812 np->s_refcnt++; /* list gets a reference */ 2813 } 2814 2815 np->clientid = tmp_clientid; 2816 np->s_flags |= N4S_CLIENTID_SET; 2817 2818 /* Add mi to np's mntinfo4 list */ 2819 nfs4_add_mi_to_server(np, mi); 2820 2821 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 2822 /* 2823 * Start lease management thread. 2824 * Keep trying until we succeed. 2825 */ 2826 2827 np->s_refcnt++; /* pass reference to thread */ 2828 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 2829 minclsyspri); 2830 } 2831 2832 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2833 } 2834 2835 /* 2836 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 2837 * mi's clientid the same as sp's. 2838 * Assumes sp is locked down. 2839 */ 2840 void 2841 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 2842 { 2843 mntinfo4_t *tmi; 2844 int in_list = 0; 2845 2846 ASSERT(sp != &nfs4_server_lst); 2847 ASSERT(MUTEX_HELD(&sp->s_lock)); 2848 2849 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2850 "nfs4_add_mi_to_server: add mi %p to sp %p", 2851 (void*)mi, (void*)sp)); 2852 2853 for (tmi = sp->mntinfo4_list; 2854 tmi != NULL; 2855 tmi = tmi->mi_clientid_next) { 2856 if (tmi == mi) { 2857 NFS4_DEBUG(nfs4_client_lease_debug, 2858 (CE_NOTE, 2859 "nfs4_add_mi_to_server: mi in list")); 2860 in_list = 1; 2861 } 2862 } 2863 2864 /* 2865 * First put a hold on the mntinfo4's vfsp so that references via 2866 * mntinfo4_list will be valid. 2867 */ 2868 if (!in_list) 2869 VFS_HOLD(mi->mi_vfsp); 2870 2871 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 2872 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 2873 2874 if (!in_list) { 2875 if (sp->mntinfo4_list) 2876 sp->mntinfo4_list->mi_clientid_prev = mi; 2877 mi->mi_clientid_next = sp->mntinfo4_list; 2878 sp->mntinfo4_list = mi; 2879 mi->mi_srvsettime = gethrestime_sec(); 2880 } 2881 2882 /* set mi's clientid to that of sp's for later matching */ 2883 mi->mi_clientid = sp->clientid; 2884 2885 /* 2886 * Update the clientid for any other mi's belonging to sp. This 2887 * must be done here while we hold sp->s_lock, so that 2888 * find_nfs4_server() continues to work. 2889 */ 2890 2891 for (tmi = sp->mntinfo4_list; 2892 tmi != NULL; 2893 tmi = tmi->mi_clientid_next) { 2894 if (tmi != mi) { 2895 tmi->mi_clientid = sp->clientid; 2896 } 2897 } 2898 } 2899 2900 /* 2901 * Remove the mi from sp's mntinfo4_list and release its reference. 2902 * Exception: if mi still has open files, flag it for later removal (when 2903 * all the files are closed). 2904 * 2905 * If this is the last mntinfo4 in sp's list then tell the lease renewal 2906 * thread to exit. 2907 */ 2908 static void 2909 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 2910 { 2911 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2912 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 2913 (void*)mi, (void*)sp)); 2914 2915 ASSERT(sp != NULL); 2916 ASSERT(MUTEX_HELD(&sp->s_lock)); 2917 ASSERT(mi->mi_open_files >= 0); 2918 2919 /* 2920 * First make sure this mntinfo4 can be taken off of the list, 2921 * ie: it doesn't have any open files remaining. 2922 */ 2923 if (mi->mi_open_files > 0) { 2924 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2925 "nfs4_remove_mi_from_server_nolock: don't " 2926 "remove mi since it still has files open")); 2927 2928 mutex_enter(&mi->mi_lock); 2929 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 2930 mutex_exit(&mi->mi_lock); 2931 return; 2932 } 2933 2934 remove_mi(sp, mi); 2935 2936 if (sp->mntinfo4_list == NULL) { 2937 /* last fs unmounted, kill the thread */ 2938 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2939 "remove_mi_from_nfs4_server_nolock: kill the thread")); 2940 nfs4_mark_srv_dead(sp); 2941 } 2942 } 2943 2944 /* 2945 * Remove mi from sp's mntinfo4_list and release the vfs reference. 2946 */ 2947 static void 2948 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 2949 { 2950 ASSERT(MUTEX_HELD(&sp->s_lock)); 2951 2952 /* 2953 * We release a reference, and the caller must still have a 2954 * reference. 2955 */ 2956 ASSERT(mi->mi_vfsp->vfs_count >= 2); 2957 2958 if (mi->mi_clientid_prev) { 2959 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 2960 } else { 2961 /* This is the first mi in sp's mntinfo4_list */ 2962 /* 2963 * Make sure the first mntinfo4 in the list is the actual 2964 * mntinfo4 passed in. 2965 */ 2966 ASSERT(sp->mntinfo4_list == mi); 2967 2968 sp->mntinfo4_list = mi->mi_clientid_next; 2969 } 2970 if (mi->mi_clientid_next) 2971 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 2972 2973 /* Now mark the mntinfo4's links as being removed */ 2974 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 2975 2976 VFS_RELE(mi->mi_vfsp); 2977 } 2978 2979 /* 2980 * Free all the entries in sp's mntinfo4_list. 2981 */ 2982 static void 2983 remove_all_mi(nfs4_server_t *sp) 2984 { 2985 mntinfo4_t *mi; 2986 2987 ASSERT(MUTEX_HELD(&sp->s_lock)); 2988 2989 while (sp->mntinfo4_list != NULL) { 2990 mi = sp->mntinfo4_list; 2991 /* 2992 * Grab a reference in case there is only one left (which 2993 * remove_mi() frees). 2994 */ 2995 VFS_HOLD(mi->mi_vfsp); 2996 remove_mi(sp, mi); 2997 VFS_RELE(mi->mi_vfsp); 2998 } 2999 } 3000 3001 /* 3002 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3003 * 3004 * This version can be called with a null nfs4_server_t arg, 3005 * and will either find the right one and handle locking, or 3006 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3007 */ 3008 void 3009 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3010 { 3011 nfs4_server_t *sp; 3012 3013 if (esp == NULL) { 3014 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3015 sp = find_nfs4_server_all(mi, 1); 3016 } else 3017 sp = esp; 3018 3019 if (sp != NULL) 3020 nfs4_remove_mi_from_server_nolock(mi, sp); 3021 3022 /* 3023 * If we had a valid esp as input, the calling function will be 3024 * responsible for unlocking the esp nfs4_server. 3025 */ 3026 if (esp == NULL) { 3027 if (sp != NULL) 3028 mutex_exit(&sp->s_lock); 3029 nfs_rw_exit(&mi->mi_recovlock); 3030 if (sp != NULL) 3031 nfs4_server_rele(sp); 3032 } 3033 } 3034 3035 /* 3036 * Return TRUE if the given server has any non-unmounted filesystems. 3037 */ 3038 3039 bool_t 3040 nfs4_fs_active(nfs4_server_t *sp) 3041 { 3042 mntinfo4_t *mi; 3043 3044 ASSERT(MUTEX_HELD(&sp->s_lock)); 3045 3046 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3047 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3048 return (TRUE); 3049 } 3050 3051 return (FALSE); 3052 } 3053 3054 /* 3055 * Mark sp as finished and notify any waiters. 3056 */ 3057 3058 void 3059 nfs4_mark_srv_dead(nfs4_server_t *sp) 3060 { 3061 ASSERT(MUTEX_HELD(&sp->s_lock)); 3062 3063 sp->s_thread_exit = NFS4_THREAD_EXIT; 3064 cv_broadcast(&sp->cv_thread_exit); 3065 } 3066 3067 /* 3068 * Create a new nfs4_server_t structure. 3069 * Returns new node unlocked and not in list, but with a reference count of 3070 * 1. 3071 */ 3072 struct nfs4_server * 3073 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3074 { 3075 struct nfs4_server *np; 3076 timespec_t tt; 3077 union { 3078 struct { 3079 uint32_t sec; 3080 uint32_t subsec; 3081 } un_curtime; 3082 verifier4 un_verifier; 3083 } nfs4clientid_verifier; 3084 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3085 int len; 3086 3087 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3088 np->saddr.len = svp->sv_addr.len; 3089 np->saddr.maxlen = svp->sv_addr.maxlen; 3090 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3091 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3092 np->s_refcnt = 1; 3093 3094 /* 3095 * Build the nfs_client_id4 for this server mount. Ensure 3096 * the verifier is useful and that the identification is 3097 * somehow based on the server's address for the case of 3098 * multi-homed servers. 3099 */ 3100 nfs4clientid_verifier.un_verifier = 0; 3101 gethrestime(&tt); 3102 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3103 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3104 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3105 3106 /* 3107 * calculate the length of the opaque identifier. Subtract 2 3108 * for the "%s" and add the traditional +1 for null 3109 * termination. 3110 */ 3111 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3112 np->clidtosend.id_len = len + np->saddr.maxlen; 3113 3114 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3115 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3116 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3117 3118 np->s_flags = 0; 3119 np->mntinfo4_list = NULL; 3120 /* save cred for issuing rfs4calls inside the renew thread */ 3121 crhold(cr); 3122 np->s_cred = cr; 3123 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3124 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3125 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3126 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3127 offsetof(rnode4_t, r_deleg_link)); 3128 np->s_thread_exit = 0; 3129 np->state_ref_count = 0; 3130 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3131 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3132 np->s_otw_call_count = 0; 3133 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3134 np->zoneid = getzoneid(); 3135 np->zone_globals = nfs4_get_callback_globals(); 3136 ASSERT(np->zone_globals != NULL); 3137 return (np); 3138 } 3139 3140 /* 3141 * Create a new nfs4_server_t structure and add it to the list. 3142 * Returns new node locked; reference must eventually be freed. 3143 */ 3144 static struct nfs4_server * 3145 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3146 { 3147 nfs4_server_t *sp; 3148 3149 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3150 sp = new_nfs4_server(svp, cr); 3151 mutex_enter(&sp->s_lock); 3152 insque(sp, &nfs4_server_lst); 3153 sp->s_refcnt++; /* list gets a reference */ 3154 sp->clientid = 0; 3155 sp->s_flags |= N4S_INSERTED; 3156 return (sp); 3157 } 3158 3159 int nfs4_server_t_debug = 0; 3160 3161 #ifdef lint 3162 extern void 3163 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3164 #endif 3165 3166 #ifndef lint 3167 #ifdef DEBUG 3168 void 3169 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3170 { 3171 int hash16(void *p, int len); 3172 nfs4_server_t *np; 3173 3174 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3175 "dumping nfs4_server_t list in %s", txt)); 3176 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3177 "mi 0x%p, want clientid %llx, addr %d/%04X", 3178 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3179 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3180 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3181 np = np->forw) { 3182 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3183 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3184 np, (longlong_t)np->clientid, np->saddr.len, 3185 hash16((void *)np->saddr.buf, np->saddr.len), 3186 np->state_ref_count)); 3187 if (np->saddr.len == srv_p->sv_addr.len && 3188 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3189 np->saddr.len) == 0) 3190 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3191 " - address matches")); 3192 if (np->clientid == clientid || np->clientid == 0) 3193 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3194 " - clientid matches")); 3195 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3196 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3197 " - thread not exiting")); 3198 } 3199 delay(hz); 3200 } 3201 #endif 3202 #endif 3203 3204 3205 /* 3206 * Move a mntinfo4_t from one server list to another. 3207 * Locking of the two nfs4_server_t nodes will be done in list order. 3208 * 3209 * Returns NULL if the current nfs4_server_t for the filesystem could not 3210 * be found (e.g., due to forced unmount). Otherwise returns a reference 3211 * to the new nfs4_server_t, which must eventually be freed. 3212 */ 3213 nfs4_server_t * 3214 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3215 { 3216 nfs4_server_t *p, *op = NULL, *np = NULL; 3217 int num_open; 3218 zoneid_t zoneid = getzoneid(); 3219 3220 ASSERT(curproc->p_zone == mi->mi_zone); 3221 3222 mutex_enter(&nfs4_server_lst_lock); 3223 #ifdef DEBUG 3224 if (nfs4_server_t_debug) 3225 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3226 #endif 3227 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3228 if (p->zoneid != zoneid) 3229 continue; 3230 if (p->saddr.len == old->sv_addr.len && 3231 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3232 p->s_thread_exit != NFS4_THREAD_EXIT) { 3233 op = p; 3234 mutex_enter(&op->s_lock); 3235 op->s_refcnt++; 3236 } 3237 if (p->saddr.len == new->sv_addr.len && 3238 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3239 p->s_thread_exit != NFS4_THREAD_EXIT) { 3240 np = p; 3241 mutex_enter(&np->s_lock); 3242 } 3243 if (op != NULL && np != NULL) 3244 break; 3245 } 3246 if (op == NULL) { 3247 /* 3248 * Filesystem has been forcibly unmounted. Bail out. 3249 */ 3250 if (np != NULL) 3251 mutex_exit(&np->s_lock); 3252 mutex_exit(&nfs4_server_lst_lock); 3253 return (NULL); 3254 } 3255 if (np != NULL) { 3256 np->s_refcnt++; 3257 } else { 3258 #ifdef DEBUG 3259 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3260 "nfs4_move_mi: no target nfs4_server, will create.")); 3261 #endif 3262 np = add_new_nfs4_server(new, kcred); 3263 } 3264 mutex_exit(&nfs4_server_lst_lock); 3265 3266 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3267 "nfs4_move_mi: for mi 0x%p, " 3268 "old servinfo4 0x%p, new servinfo4 0x%p, " 3269 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3270 (void*)mi, (void*)old, (void*)new, 3271 (void*)op, (void*)np)); 3272 ASSERT(op != NULL && np != NULL); 3273 3274 /* discard any delegations */ 3275 nfs4_deleg_discard(mi, op); 3276 3277 num_open = mi->mi_open_files; 3278 mi->mi_open_files = 0; 3279 op->state_ref_count -= num_open; 3280 ASSERT(op->state_ref_count >= 0); 3281 np->state_ref_count += num_open; 3282 nfs4_remove_mi_from_server_nolock(mi, op); 3283 mi->mi_open_files = num_open; 3284 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3285 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3286 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3287 3288 nfs4_add_mi_to_server(np, mi); 3289 3290 mutex_exit(&op->s_lock); 3291 nfs4_server_rele(op); 3292 mutex_exit(&np->s_lock); 3293 3294 return (np); 3295 } 3296 3297 /* 3298 * Search the nfs4_server list to find a match on this servinfo4 3299 * based on its address. 3300 * 3301 * Returns NULL if no match is found. Otherwise returns a reference (which 3302 * must eventually be freed) to a locked nfs4_server. 3303 */ 3304 nfs4_server_t * 3305 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3306 { 3307 nfs4_server_t *np; 3308 zoneid_t zoneid = getzoneid(); 3309 3310 mutex_enter(&nfs4_server_lst_lock); 3311 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3312 if (np->zoneid == zoneid && 3313 np->saddr.len == srv_p->sv_addr.len && 3314 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3315 np->saddr.len) == 0 && 3316 np->s_thread_exit != NFS4_THREAD_EXIT) { 3317 mutex_enter(&np->s_lock); 3318 np->s_refcnt++; 3319 mutex_exit(&nfs4_server_lst_lock); 3320 return (np); 3321 } 3322 } 3323 mutex_exit(&nfs4_server_lst_lock); 3324 return (NULL); 3325 } 3326 3327 /* 3328 * Search the nfs4_server_lst to find a match based on clientid and 3329 * addr. 3330 * Locks the nfs4_server down if it is found and returns a reference that 3331 * must eventually be freed. 3332 * 3333 * Returns NULL it no match is found. This means one of two things: either 3334 * mi is in the process of being mounted, or mi has been unmounted. 3335 * 3336 * The caller should be holding mi->mi_recovlock, and it should continue to 3337 * hold the lock until done with the returned nfs4_server_t. Once 3338 * mi->mi_recovlock is released, there is no guarantee that the returned 3339 * mi->nfs4_server_t will continue to correspond to mi. 3340 */ 3341 nfs4_server_t * 3342 find_nfs4_server(mntinfo4_t *mi) 3343 { 3344 return (find_nfs4_server_all(mi, 0)); 3345 } 3346 3347 /* 3348 * Same as above, but takes an "all" parameter which can be 3349 * set to 1 if the caller wishes to find nfs4_server_t's which 3350 * have been marked for termination by the exit of the renew 3351 * thread. This should only be used by operations which are 3352 * cleaning up and will not cause an OTW op. 3353 */ 3354 nfs4_server_t * 3355 find_nfs4_server_all(mntinfo4_t *mi, int all) 3356 { 3357 nfs4_server_t *np; 3358 servinfo4_t *svp; 3359 zoneid_t zoneid = mi->mi_zone->zone_id; 3360 3361 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3362 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3363 /* 3364 * This can be called from nfs4_unmount() which can be called from the 3365 * global zone, hence it's legal for the global zone to muck with 3366 * another zone's server list, as long as it doesn't try to contact 3367 * them. 3368 */ 3369 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID); 3370 3371 /* 3372 * The nfs4_server_lst_lock global lock is held when we get a new 3373 * clientid (via SETCLIENTID OTW). Holding this global lock and 3374 * mi_recovlock (READER is fine) ensures that the nfs4_server 3375 * and this mntinfo4 can't get out of sync, so the following search is 3376 * always valid. 3377 */ 3378 mutex_enter(&nfs4_server_lst_lock); 3379 #ifdef DEBUG 3380 if (nfs4_server_t_debug) { 3381 /* mi->mi_clientid is unprotected, ok for debug output */ 3382 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3383 mi->mi_curr_serv); 3384 } 3385 #endif 3386 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3387 mutex_enter(&np->s_lock); 3388 svp = mi->mi_curr_serv; 3389 3390 if (np->zoneid == zoneid && 3391 np->clientid == mi->mi_clientid && 3392 np->saddr.len == svp->sv_addr.len && 3393 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3394 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3395 mutex_exit(&nfs4_server_lst_lock); 3396 np->s_refcnt++; 3397 return (np); 3398 } 3399 mutex_exit(&np->s_lock); 3400 } 3401 mutex_exit(&nfs4_server_lst_lock); 3402 3403 return (NULL); 3404 } 3405 3406 /* 3407 * Release the reference to sp and destroy it if that's the last one. 3408 */ 3409 3410 void 3411 nfs4_server_rele(nfs4_server_t *sp) 3412 { 3413 mutex_enter(&sp->s_lock); 3414 ASSERT(sp->s_refcnt > 0); 3415 sp->s_refcnt--; 3416 if (sp->s_refcnt > 0) { 3417 mutex_exit(&sp->s_lock); 3418 return; 3419 } 3420 if (!(sp->s_flags & N4S_INSERTED)) { 3421 destroy_nfs4_server(sp); 3422 return; 3423 } 3424 mutex_exit(&sp->s_lock); 3425 mutex_enter(&nfs4_server_lst_lock); 3426 mutex_enter(&sp->s_lock); 3427 if (sp->s_refcnt > 0) { 3428 mutex_exit(&sp->s_lock); 3429 mutex_exit(&nfs4_server_lst_lock); 3430 return; 3431 } 3432 if (sp->s_flags & N4S_INSERTED) { 3433 remque(sp); 3434 sp->forw = sp->back = NULL; 3435 sp->s_flags &= ~N4S_INSERTED; 3436 } 3437 mutex_exit(&nfs4_server_lst_lock); 3438 destroy_nfs4_server(sp); 3439 } 3440 3441 static void 3442 destroy_nfs4_server(nfs4_server_t *sp) 3443 { 3444 ASSERT(MUTEX_HELD(&sp->s_lock)); 3445 ASSERT(!(sp->s_flags & N4S_INSERTED)); 3446 ASSERT(sp->s_refcnt == 0); 3447 ASSERT(sp->s_otw_call_count == 0); 3448 3449 remove_all_mi(sp); 3450 3451 crfree(sp->s_cred); 3452 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3453 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3454 mutex_exit(&sp->s_lock); 3455 3456 /* destroy the nfs4_server */ 3457 nfs4callback_destroy(sp); 3458 list_destroy(&sp->s_deleg_list); 3459 mutex_destroy(&sp->s_lock); 3460 cv_destroy(&sp->cv_thread_exit); 3461 cv_destroy(&sp->s_cv_otw_count); 3462 cv_destroy(&sp->wait_cb_null); 3463 nfs_rw_destroy(&sp->s_recovlock); 3464 kmem_free(sp, sizeof (*sp)); 3465 } 3466 3467 /* 3468 * Lock sp, but only if it's still active (in the list and hasn't been 3469 * flagged as exiting) or 'all' is non-zero. 3470 * Returns TRUE if sp got locked and adds a reference to sp. 3471 */ 3472 bool_t 3473 nfs4_server_vlock(nfs4_server_t *sp, int all) 3474 { 3475 nfs4_server_t *np; 3476 3477 mutex_enter(&nfs4_server_lst_lock); 3478 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3479 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3480 all != 0)) { 3481 mutex_enter(&np->s_lock); 3482 np->s_refcnt++; 3483 mutex_exit(&nfs4_server_lst_lock); 3484 return (TRUE); 3485 } 3486 } 3487 mutex_exit(&nfs4_server_lst_lock); 3488 return (FALSE); 3489 } 3490 3491 /* 3492 * Fork off a thread to free the data structures for a mount. 3493 */ 3494 3495 static void 3496 async_free_mount(vfs_t *vfsp, cred_t *cr) 3497 { 3498 freemountargs_t *args; 3499 3500 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3501 args->fm_vfsp = vfsp; 3502 VFS_HOLD(vfsp); 3503 args->fm_cr = cr; 3504 crhold(cr); 3505 3506 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3507 minclsyspri); 3508 } 3509 3510 static void 3511 nfs4_free_mount_thread(freemountargs_t *args) 3512 { 3513 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3514 VFS_RELE(args->fm_vfsp); 3515 crfree(args->fm_cr); 3516 kmem_free(args, sizeof (freemountargs_t)); 3517 zthread_exit(); 3518 /* NOTREACHED */ 3519 } 3520 3521 /* 3522 * Thread to free the data structures for a given filesystem. 3523 */ 3524 static void 3525 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3526 { 3527 mntinfo4_t *mi = VFTOMI4(vfsp); 3528 nfs4_server_t *sp; 3529 callb_cpr_t cpr_info; 3530 kmutex_t cpr_lock; 3531 boolean_t async_thread; 3532 3533 /* 3534 * We need to participate in the CPR framework if this is a kernel 3535 * thread. 3536 */ 3537 async_thread = (curproc == curproc->p_zone->zone_zsched); 3538 if (async_thread) { 3539 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3540 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3541 "nfsv4AsyncUnmount"); 3542 } 3543 3544 /* 3545 * We need to wait for all outstanding OTW calls 3546 * and recovery to finish before we remove the mi 3547 * from the nfs4_server_t, as current pending 3548 * calls might still need this linkage (in order 3549 * to find a nfs4_server_t from a mntinfo4_t). 3550 */ 3551 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3552 sp = find_nfs4_server(mi); 3553 nfs_rw_exit(&mi->mi_recovlock); 3554 3555 if (sp) { 3556 while (sp->s_otw_call_count != 0) { 3557 if (async_thread) { 3558 mutex_enter(&cpr_lock); 3559 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3560 mutex_exit(&cpr_lock); 3561 } 3562 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3563 if (async_thread) { 3564 mutex_enter(&cpr_lock); 3565 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3566 mutex_exit(&cpr_lock); 3567 } 3568 } 3569 mutex_exit(&sp->s_lock); 3570 nfs4_server_rele(sp); 3571 sp = NULL; 3572 } 3573 3574 3575 mutex_enter(&mi->mi_lock); 3576 while (mi->mi_in_recovery != 0) { 3577 if (async_thread) { 3578 mutex_enter(&cpr_lock); 3579 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3580 mutex_exit(&cpr_lock); 3581 } 3582 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3583 if (async_thread) { 3584 mutex_enter(&cpr_lock); 3585 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3586 mutex_exit(&cpr_lock); 3587 } 3588 } 3589 mutex_exit(&mi->mi_lock); 3590 3591 /* 3592 * The original purge of the dnlc via 'dounmount' 3593 * doesn't guarantee that another dnlc entry was not 3594 * added while we waitied for all outstanding OTW 3595 * and recovery calls to finish. So re-purge the 3596 * dnlc now. 3597 */ 3598 (void) dnlc_purge_vfsp(vfsp, 0); 3599 3600 /* 3601 * We need to explicitly stop the manager thread; the asyc worker 3602 * threads can timeout and exit on their own. 3603 */ 3604 nfs4_async_manager_stop(vfsp); 3605 3606 destroy_fileid_map(vfsp); 3607 destroy_rtable4(vfsp, cr); 3608 3609 nfs4_remove_mi_from_server(mi, NULL); 3610 3611 if (mi->mi_io_kstats) { 3612 kstat_delete(mi->mi_io_kstats); 3613 mi->mi_io_kstats = NULL; 3614 } 3615 if (mi->mi_ro_kstats) { 3616 kstat_delete(mi->mi_ro_kstats); 3617 mi->mi_ro_kstats = NULL; 3618 } 3619 if (mi->mi_recov_ksp) { 3620 kstat_delete(mi->mi_recov_ksp); 3621 mi->mi_recov_ksp = NULL; 3622 } 3623 3624 if (async_thread) { 3625 mutex_enter(&cpr_lock); 3626 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3627 mutex_destroy(&cpr_lock); 3628 } 3629 } 3630