1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/time.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/dnlc.h> 59 #include <sys/list.h> 60 #include <sys/mntent.h> 61 #include <sys/tsol/label.h> 62 63 #include <rpc/types.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpcsec_gss.h> 66 #include <rpc/clnt.h> 67 68 #include <nfs/nfs.h> 69 #include <nfs/nfs_clnt.h> 70 #include <nfs/mount.h> 71 #include <nfs/nfs_acl.h> 72 73 #include <fs/fs_subr.h> 74 75 #include <nfs/nfs4.h> 76 #include <nfs/rnode4.h> 77 #include <nfs/nfs4_clnt.h> 78 79 /* 80 * Arguments passed to thread to free data structures from forced unmount. 81 */ 82 83 typedef struct { 84 vfs_t *fm_vfsp; 85 cred_t *fm_cr; 86 } freemountargs_t; 87 88 static void async_free_mount(vfs_t *, cred_t *); 89 static void nfs4_free_mount(vfs_t *, cred_t *); 90 static void nfs4_free_mount_thread(freemountargs_t *); 91 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 92 93 /* 94 * From rpcsec module (common/rpcsec). 95 */ 96 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 97 extern void sec_clnt_freeinfo(struct sec_data *); 98 99 /* 100 * The order and contents of this structure must be kept in sync with that of 101 * rfsreqcnt_v4_tmpl in nfs_stats.c 102 */ 103 static char *rfsnames_v4[] = { 104 "null", "compound", "reserved", "access", "close", "commit", "create", 105 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 106 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 107 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 108 "read", "readdir", "readlink", "remove", "rename", "renew", 109 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 110 "setclientid_confirm", "verify", "write" 111 }; 112 113 /* 114 * nfs4_max_mount_retry is the number of times the client will redrive 115 * a mount compound before giving up and returning failure. The intent 116 * is to redrive mount compounds which fail NFS4ERR_STALE so that 117 * if a component of the server path being mounted goes stale, it can 118 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 119 * code is needed outside of the recovery framework because mount is a 120 * special case. The client doesn't create vnodes/rnodes for components 121 * of the server path being mounted. The recovery code recovers real 122 * client objects, not STALE FHs which map to components of the server 123 * path being mounted. 124 * 125 * We could just fail the mount on the first time, but that would 126 * instantly trigger failover (from nfs4_mount), and the client should 127 * try to re-lookup the STALE FH before doing failover. The easiest 128 * way to "re-lookup" is to simply redrive the mount compound. 129 */ 130 static int nfs4_max_mount_retry = 2; 131 132 /* 133 * nfs4 vfs operations. 134 */ 135 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 136 static int nfs4_unmount(vfs_t *, int, cred_t *); 137 static int nfs4_root(vfs_t *, vnode_t **); 138 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 139 static int nfs4_sync(vfs_t *, short, cred_t *); 140 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 141 static int nfs4_mountroot(vfs_t *, whymountroot_t); 142 static void nfs4_freevfs(vfs_t *); 143 144 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 145 int, cred_t *, zone_t *); 146 147 vfsops_t *nfs4_vfsops; 148 149 int nfs4_vfsinit(void); 150 void nfs4_vfsfini(void); 151 static void nfs4setclientid_init(void); 152 static void nfs4setclientid_fini(void); 153 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 154 struct nfs4_server *, nfs4_error_t *, int *); 155 static void destroy_nfs4_server(nfs4_server_t *); 156 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 157 158 /* 159 * Initialize the vfs structure 160 */ 161 162 static int nfs4fstyp; 163 164 165 /* 166 * Debug variable to check for rdma based 167 * transport startup and cleanup. Controlled 168 * through /etc/system. Off by default. 169 */ 170 extern int rdma_debug; 171 172 int 173 nfs4init(int fstyp, char *name) 174 { 175 static const fs_operation_def_t nfs4_vfsops_template[] = { 176 VFSNAME_MOUNT, nfs4_mount, 177 VFSNAME_UNMOUNT, nfs4_unmount, 178 VFSNAME_ROOT, nfs4_root, 179 VFSNAME_STATVFS, nfs4_statvfs, 180 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 181 VFSNAME_VGET, nfs4_vget, 182 VFSNAME_MOUNTROOT, nfs4_mountroot, 183 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 184 NULL, NULL 185 }; 186 int error; 187 188 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 189 if (error != 0) { 190 zcmn_err(GLOBAL_ZONEID, CE_WARN, 191 "nfs4init: bad vfs ops template"); 192 return (error); 193 } 194 195 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 196 if (error != 0) { 197 (void) vfs_freevfsops_by_type(fstyp); 198 zcmn_err(GLOBAL_ZONEID, CE_WARN, 199 "nfs4init: bad vnode ops template"); 200 return (error); 201 } 202 203 nfs4fstyp = fstyp; 204 205 (void) nfs4_vfsinit(); 206 207 (void) nfs4_init_dot_entries(); 208 209 return (0); 210 } 211 212 void 213 nfs4fini(void) 214 { 215 (void) nfs4_destroy_dot_entries(); 216 nfs4_vfsfini(); 217 } 218 219 /* 220 * Create a new sec_data structure to store AUTH_DH related data: 221 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 222 * flag set for NFS V4 since we are avoiding to contact the rpcbind 223 * daemon and is using the IP time service (IPPORT_TIMESERVER). 224 * 225 * sec_data can be freed by sec_clnt_freeinfo(). 226 */ 227 struct sec_data * 228 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 229 struct knetconfig *knconf) { 230 struct sec_data *secdata; 231 dh_k4_clntdata_t *data; 232 char *pf, *p; 233 234 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 235 return (NULL); 236 237 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 238 secdata->flags = 0; 239 240 data = kmem_alloc(sizeof (*data), KM_SLEEP); 241 242 data->syncaddr.maxlen = syncaddr->maxlen; 243 data->syncaddr.len = syncaddr->len; 244 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 245 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 246 247 /* 248 * duplicate the knconf information for the 249 * new opaque data. 250 */ 251 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 252 *data->knconf = *knconf; 253 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 254 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 255 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 256 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 257 data->knconf->knc_protofmly = pf; 258 data->knconf->knc_proto = p; 259 260 /* move server netname to the sec_data structure */ 261 data->netname = kmem_alloc(nlen, KM_SLEEP); 262 bcopy(netname, data->netname, nlen); 263 data->netnamelen = (int)nlen; 264 265 secdata->secmod = AUTH_DH; 266 secdata->rpcflavor = AUTH_DH; 267 secdata->data = (caddr_t)data; 268 269 return (secdata); 270 } 271 272 static int 273 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 274 { 275 servinfo4_t *si; 276 277 /* 278 * Iterate over the servinfo4 list to make sure 279 * we do not have a duplicate. Skip any servinfo4 280 * that has been marked "NOT IN USE" 281 */ 282 for (si = svp_head; si; si = si->sv_next) { 283 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 284 if (si->sv_flags & SV4_NOTINUSE) { 285 nfs_rw_exit(&si->sv_lock); 286 continue; 287 } 288 nfs_rw_exit(&si->sv_lock); 289 if (si == svp) 290 continue; 291 if (si->sv_addr.len == svp->sv_addr.len && 292 strcmp(si->sv_knconf->knc_protofmly, 293 svp->sv_knconf->knc_protofmly) == 0 && 294 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 295 si->sv_addr.len) == 0) { 296 /* it's a duplicate */ 297 return (1); 298 } 299 } 300 /* it's not a duplicate */ 301 return (0); 302 } 303 304 /* 305 * nfs mount vfsop 306 * Set up mount info record and attach it to vfs struct. 307 */ 308 static int 309 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 310 { 311 char *data = uap->dataptr; 312 int error; 313 vnode_t *rtvp; /* the server's root */ 314 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 315 size_t hlen; /* length of hostname */ 316 size_t nlen; /* length of netname */ 317 char netname[MAXNETNAMELEN+1]; /* server's netname */ 318 struct netbuf addr; /* server's address */ 319 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 320 struct knetconfig *knconf; /* transport knetconfig structure */ 321 struct knetconfig *rdma_knconf; /* rdma transport structure */ 322 rnode4_t *rp; 323 struct servinfo4 *svp; /* nfs server info */ 324 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 325 struct servinfo4 *svp_head; /* first nfs server info */ 326 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 327 struct sec_data *secdata; /* security data */ 328 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 329 STRUCT_DECL(knetconfig, knconf_tmp); 330 STRUCT_DECL(netbuf, addr_tmp); 331 int flags, addr_type, removed; 332 char *p, *pf; 333 struct pathname pn; 334 char *userbufptr; 335 zone_t *zone = nfs_zone(); 336 nfs4_error_t n4e; 337 zone_t *mntzone = NULL; 338 339 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 340 return (EPERM); 341 if (mvp->v_type != VDIR) 342 return (ENOTDIR); 343 /* 344 * get arguments 345 * 346 * nfs_args is now versioned and is extensible, so 347 * uap->datalen might be different from sizeof (args) 348 * in a compatible situation. 349 */ 350 more: 351 STRUCT_INIT(args, get_udatamodel()); 352 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 353 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 354 STRUCT_SIZE(args)))) 355 return (EFAULT); 356 357 flags = STRUCT_FGET(args, flags); 358 359 /* 360 * If the request changes the locking type, disallow the remount, 361 * because it's questionable whether we can transfer the 362 * locking state correctly. 363 */ 364 if (uap->flags & MS_REMOUNT) { 365 if ((mi = VFTOMI4(vfsp)) != NULL) { 366 uint_t new_mi_llock; 367 uint_t old_mi_llock; 368 369 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 370 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 371 if (old_mi_llock != new_mi_llock) 372 return (EBUSY); 373 } 374 return (0); 375 } 376 377 mutex_enter(&mvp->v_lock); 378 if (!(uap->flags & MS_OVERLAY) && 379 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 380 mutex_exit(&mvp->v_lock); 381 return (EBUSY); 382 } 383 mutex_exit(&mvp->v_lock); 384 385 /* make sure things are zeroed for errout: */ 386 rtvp = NULL; 387 mi = NULL; 388 addr.buf = NULL; 389 syncaddr.buf = NULL; 390 secdata = NULL; 391 392 /* 393 * A valid knetconfig structure is required. 394 */ 395 if (!(flags & NFSMNT_KNCONF)) 396 return (EINVAL); 397 398 /* 399 * Allocate a servinfo4 struct. 400 */ 401 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 402 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 403 if (svp_tail) { 404 svp_2ndlast = svp_tail; 405 svp_tail->sv_next = svp; 406 } else { 407 svp_head = svp; 408 svp_2ndlast = svp; 409 } 410 411 svp_tail = svp; 412 413 /* 414 * Allocate space for a knetconfig structure and 415 * its strings and copy in from user-land. 416 */ 417 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 418 svp->sv_knconf = knconf; 419 STRUCT_INIT(knconf_tmp, get_udatamodel()); 420 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 421 STRUCT_SIZE(knconf_tmp))) { 422 sv4_free(svp_head); 423 return (EFAULT); 424 } 425 426 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 427 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 428 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 429 if (get_udatamodel() != DATAMODEL_LP64) { 430 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 431 } else { 432 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 433 } 434 435 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 436 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 437 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 438 if (error) { 439 kmem_free(pf, KNC_STRSIZE); 440 kmem_free(p, KNC_STRSIZE); 441 sv4_free(svp_head); 442 return (error); 443 } 444 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 445 if (error) { 446 kmem_free(pf, KNC_STRSIZE); 447 kmem_free(p, KNC_STRSIZE); 448 sv4_free(svp_head); 449 return (error); 450 } 451 if (strcmp(p, NC_UDP) == 0) { 452 kmem_free(pf, KNC_STRSIZE); 453 kmem_free(p, KNC_STRSIZE); 454 sv4_free(svp_head); 455 return (ENOTSUP); 456 } 457 knconf->knc_protofmly = pf; 458 knconf->knc_proto = p; 459 460 /* 461 * Get server address 462 */ 463 STRUCT_INIT(addr_tmp, get_udatamodel()); 464 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 465 STRUCT_SIZE(addr_tmp))) { 466 error = EFAULT; 467 goto errout; 468 } 469 470 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 471 addr.len = STRUCT_FGET(addr_tmp, len); 472 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 473 addr.maxlen = addr.len; 474 if (copyin(userbufptr, addr.buf, addr.len)) { 475 kmem_free(addr.buf, addr.len); 476 error = EFAULT; 477 goto errout; 478 } 479 480 svp->sv_addr = addr; 481 482 /* 483 * Get the root fhandle 484 */ 485 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 486 487 if (error) 488 goto errout; 489 490 /* Volatile fh: keep server paths, so use actual-size strings */ 491 svp->sv_path = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 492 bcopy(pn.pn_path, svp->sv_path, pn.pn_pathlen); 493 svp->sv_path[pn.pn_pathlen] = '\0'; 494 svp->sv_pathlen = pn.pn_pathlen + 1; 495 pn_free(&pn); 496 497 /* 498 * Get server's hostname 499 */ 500 if (flags & NFSMNT_HOSTNAME) { 501 error = copyinstr(STRUCT_FGETP(args, hostname), 502 netname, sizeof (netname), &hlen); 503 if (error) 504 goto errout; 505 } else { 506 char *p = "unknown-host"; 507 hlen = strlen(p) + 1; 508 (void) strcpy(netname, p); 509 } 510 svp->sv_hostnamelen = hlen; 511 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 512 (void) strcpy(svp->sv_hostname, netname); 513 514 /* 515 * RDMA MOUNT SUPPORT FOR NFS v4. 516 * Establish, is it possible to use RDMA, if so overload the 517 * knconf with rdma specific knconf and free the orignal knconf. 518 */ 519 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 520 /* 521 * Determine the addr type for RDMA, IPv4 or v6. 522 */ 523 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 524 addr_type = AF_INET; 525 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 526 addr_type = AF_INET6; 527 528 if (rdma_reachable(addr_type, &svp->sv_addr, 529 &rdma_knconf) == 0) { 530 /* 531 * If successful, hijack the orignal knconf and 532 * replace with the new one, depending on the flags. 533 */ 534 svp->sv_origknconf = svp->sv_knconf; 535 svp->sv_knconf = rdma_knconf; 536 knconf = rdma_knconf; 537 } else { 538 if (flags & NFSMNT_TRYRDMA) { 539 #ifdef DEBUG 540 if (rdma_debug) 541 zcmn_err(getzoneid(), CE_WARN, 542 "no RDMA onboard, revert\n"); 543 #endif 544 } 545 546 if (flags & NFSMNT_DORDMA) { 547 /* 548 * If proto=rdma is specified and no RDMA 549 * path to this server is avialable then 550 * ditch this server. 551 * This is not included in the mountable 552 * server list or the replica list. 553 * Check if more servers are specified; 554 * Failover case, otherwise bail out of mount. 555 */ 556 if (STRUCT_FGET(args, nfs_args_ext) == 557 NFS_ARGS_EXTB && STRUCT_FGETP(args, 558 nfs_ext_u.nfs_extB.next) != NULL) { 559 if (uap->flags & MS_RDONLY && 560 !(flags & NFSMNT_SOFT)) { 561 data = (char *) 562 STRUCT_FGETP(args, 563 nfs_ext_u.nfs_extB.next); 564 if (svp_head->sv_next == NULL) { 565 svp_tail = NULL; 566 svp_2ndlast = NULL; 567 sv4_free(svp_head); 568 goto more; 569 } else { 570 svp_tail = svp_2ndlast; 571 svp_2ndlast->sv_next = 572 NULL; 573 sv4_free(svp); 574 goto more; 575 } 576 } 577 } else { 578 /* 579 * This is the last server specified 580 * in the nfs_args list passed down 581 * and its not rdma capable. 582 */ 583 if (svp_head->sv_next == NULL) { 584 /* 585 * Is this the only one 586 */ 587 error = EINVAL; 588 #ifdef DEBUG 589 if (rdma_debug) 590 zcmn_err(getzoneid(), 591 CE_WARN, 592 "No RDMA srv"); 593 #endif 594 goto errout; 595 } else { 596 /* 597 * There is list, since some 598 * servers specified before 599 * this passed all requirements 600 */ 601 svp_tail = svp_2ndlast; 602 svp_2ndlast->sv_next = NULL; 603 sv4_free(svp); 604 goto proceed; 605 } 606 } 607 } 608 } 609 } 610 611 /* 612 * If there are syncaddr and netname data, load them in. This is 613 * to support data needed for NFSV4 when AUTH_DH is the negotiated 614 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 615 */ 616 netname[0] = '\0'; 617 if (flags & NFSMNT_SECURE) { 618 619 /* get syncaddr */ 620 STRUCT_INIT(addr_tmp, get_udatamodel()); 621 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 622 STRUCT_SIZE(addr_tmp))) { 623 error = EINVAL; 624 goto errout; 625 } 626 userbufptr = STRUCT_FGETP(addr_tmp, buf); 627 syncaddr.len = STRUCT_FGET(addr_tmp, len); 628 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 629 syncaddr.maxlen = syncaddr.len; 630 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 631 kmem_free(syncaddr.buf, syncaddr.len); 632 error = EFAULT; 633 goto errout; 634 } 635 636 /* get server's netname */ 637 if (copyinstr(STRUCT_FGETP(args, netname), netname, 638 sizeof (netname), &nlen)) { 639 kmem_free(syncaddr.buf, syncaddr.len); 640 error = EFAULT; 641 goto errout; 642 } 643 netname[nlen] = '\0'; 644 645 svp->sv_dhsec = create_authdh_data(netname, nlen, &syncaddr, 646 knconf); 647 } 648 649 /* 650 * Get the extention data which has the security data structure. 651 * This includes data for AUTH_SYS as well. 652 */ 653 if (flags & NFSMNT_NEWARGS) { 654 switch (STRUCT_FGET(args, nfs_args_ext)) { 655 case NFS_ARGS_EXTA: 656 case NFS_ARGS_EXTB: 657 /* 658 * Indicating the application is using the new 659 * sec_data structure to pass in the security 660 * data. 661 */ 662 if (STRUCT_FGETP(args, 663 nfs_ext_u.nfs_extA.secdata) == NULL) { 664 error = EINVAL; 665 } else { 666 error = sec_clnt_loadinfo( 667 (struct sec_data *)STRUCT_FGETP(args, 668 nfs_ext_u.nfs_extA.secdata), 669 &secdata, get_udatamodel()); 670 } 671 break; 672 673 default: 674 error = EINVAL; 675 break; 676 } 677 678 } else if (flags & NFSMNT_SECURE) { 679 /* 680 * NFSMNT_SECURE is deprecated but we keep it 681 * to support the rouge user generated application 682 * that may use this undocumented interface to do 683 * AUTH_DH security. 684 */ 685 secdata = create_authdh_data(netname, nlen, &syncaddr, knconf); 686 687 } else { 688 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 689 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 690 secdata->data = NULL; 691 } 692 693 svp->sv_secdata = secdata; 694 695 /* syncaddr is no longer needed. */ 696 if (syncaddr.buf != NULL) 697 kmem_free(syncaddr.buf, syncaddr.len); 698 699 /* 700 * User does not explictly specify a flavor, and a user 701 * defined default flavor is passed down. 702 */ 703 if (flags & NFSMNT_SECDEFAULT) { 704 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 705 svp->sv_flags |= SV4_TRYSECDEFAULT; 706 nfs_rw_exit(&svp->sv_lock); 707 } 708 709 /* 710 * Failover support: 711 * 712 * We may have a linked list of nfs_args structures, 713 * which means the user is looking for failover. If 714 * the mount is either not "read-only" or "soft", 715 * we want to bail out with EINVAL. 716 */ 717 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 718 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 719 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 720 data = (char *)STRUCT_FGETP(args, 721 nfs_ext_u.nfs_extB.next); 722 goto more; 723 } 724 error = EINVAL; 725 goto errout; 726 } 727 728 /* 729 * Determine the zone we're being mounted into. 730 */ 731 zone_hold(mntzone = zone); /* start with this assumption */ 732 if (getzoneid() == GLOBAL_ZONEID) { 733 zone_rele(mntzone); 734 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 735 ASSERT(mntzone != NULL); 736 if (mntzone != zone) { 737 error = EBUSY; 738 goto errout; 739 } 740 } 741 742 if (is_system_labeled()) { 743 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 744 svp->sv_knconf, cr); 745 746 if (error > 0) 747 goto errout; 748 749 if (error == -1) { 750 /* change mount to read-only to prevent write-down */ 751 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 752 } 753 } 754 755 /* 756 * Stop the mount from going any further if the zone is going away. 757 */ 758 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 759 error = EBUSY; 760 goto errout; 761 } 762 763 /* 764 * Get root vnode. 765 */ 766 proceed: 767 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 768 769 if (error) { 770 /* if nfs4rootvp failed, it will free svp_head */ 771 svp_head = NULL; 772 goto errout; 773 } 774 775 mi = VTOMI4(rtvp); 776 777 /* 778 * Send client id to the server, if necessary 779 */ 780 nfs4_error_zinit(&n4e); 781 nfs4setclientid(mi, cr, FALSE, &n4e); 782 error = n4e.error; 783 784 if (error) 785 goto errout; 786 787 /* 788 * Set option fields in the mount info record 789 */ 790 791 if (svp_head->sv_next) { 792 mutex_enter(&mi->mi_lock); 793 mi->mi_flags |= MI4_LLOCK; 794 mutex_exit(&mi->mi_lock); 795 } 796 797 error = nfs4_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 798 799 errout: 800 if (error) { 801 if (rtvp != NULL) { 802 rp = VTOR4(rtvp); 803 if (rp->r_flags & R4HASHED) 804 rp4_rmhash(rp); 805 } 806 if (mi != NULL) { 807 nfs4_async_stop(vfsp); 808 nfs4_async_manager_stop(vfsp); 809 nfs4_remove_mi_from_server(mi, NULL); 810 if (rtvp != NULL) 811 VN_RELE(rtvp); 812 if (mntzone != NULL) 813 zone_rele(mntzone); 814 /* need to remove it from the zone */ 815 removed = nfs4_mi_zonelist_remove(mi); 816 if (removed) 817 zone_rele(mi->mi_zone); 818 MI4_RELE(mi); 819 return (error); 820 } 821 if (svp_head) 822 sv4_free(svp_head); 823 } 824 825 if (rtvp != NULL) 826 VN_RELE(rtvp); 827 828 if (mntzone != NULL) 829 zone_rele(mntzone); 830 831 return (error); 832 } 833 834 #ifdef DEBUG 835 #define VERS_MSG "NFS4 server " 836 #else 837 #define VERS_MSG "NFS server " 838 #endif 839 840 #define READ_MSG \ 841 VERS_MSG "%s returned 0 for read transfer size" 842 #define WRITE_MSG \ 843 VERS_MSG "%s returned 0 for write transfer size" 844 #define SIZE_MSG \ 845 VERS_MSG "%s returned 0 for maximum file size" 846 847 /* 848 * Get the symbolic link text from the server for a given filehandle 849 * of that symlink. 850 * 851 * (get symlink text) PUTFH READLINK 852 */ 853 static int 854 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 855 int flags) 856 { 857 COMPOUND4args_clnt args; 858 COMPOUND4res_clnt res; 859 int doqueue; 860 nfs_argop4 argop[2]; 861 nfs_resop4 *resop; 862 READLINK4res *lr_res; 863 uint_t len; 864 bool_t needrecov = FALSE; 865 nfs4_recov_state_t recov_state; 866 nfs4_sharedfh_t *sfh; 867 nfs4_error_t e; 868 int num_retry = nfs4_max_mount_retry; 869 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 870 871 sfh = sfh4_get(fh, mi); 872 recov_state.rs_flags = 0; 873 recov_state.rs_num_retry_despite_err = 0; 874 875 recov_retry: 876 nfs4_error_zinit(&e); 877 878 args.array_len = 2; 879 args.array = argop; 880 args.ctag = TAG_GET_SYMLINK; 881 882 if (! recovery) { 883 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 884 if (e.error) { 885 sfh4_rele(&sfh); 886 return (e.error); 887 } 888 } 889 890 /* 0. putfh symlink fh */ 891 argop[0].argop = OP_CPUTFH; 892 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 893 894 /* 1. readlink */ 895 argop[1].argop = OP_READLINK; 896 897 doqueue = 1; 898 899 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 900 901 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 902 903 if (needrecov && !recovery && num_retry-- > 0) { 904 905 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 906 "getlinktext_otw: initiating recovery\n")); 907 908 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 909 OP_READLINK, NULL) == FALSE) { 910 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 911 if (!e.error) 912 (void) xdr_free(xdr_COMPOUND4res_clnt, 913 (caddr_t)&res); 914 goto recov_retry; 915 } 916 } 917 918 /* 919 * If non-NFS4 pcol error and/or we weren't able to recover. 920 */ 921 if (e.error != 0) { 922 if (! recovery) 923 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 924 sfh4_rele(&sfh); 925 return (e.error); 926 } 927 928 if (res.status) { 929 e.error = geterrno4(res.status); 930 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 931 if (! recovery) 932 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 933 sfh4_rele(&sfh); 934 return (e.error); 935 } 936 937 /* res.status == NFS4_OK */ 938 ASSERT(res.status == NFS4_OK); 939 940 resop = &res.array[1]; /* readlink res */ 941 lr_res = &resop->nfs_resop4_u.opreadlink; 942 943 /* treat symlink name as data */ 944 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 945 946 if (! recovery) 947 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 948 sfh4_rele(&sfh); 949 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 950 951 return (0); 952 } 953 954 /* 955 * Skip over consecutive slashes and "/./" in a pathname. 956 */ 957 void 958 pathname_skipslashdot(struct pathname *pnp) 959 { 960 char *c1, *c2; 961 962 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 963 964 c1 = pnp->pn_path + 1; 965 c2 = pnp->pn_path + 2; 966 967 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 968 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 969 pnp->pn_pathlen = pnp->pn_pathlen - 2; 970 } else { 971 pnp->pn_path++; 972 pnp->pn_pathlen--; 973 } 974 } 975 } 976 977 /* 978 * Resolve a symbolic link path. The symlink is in the nth component of 979 * svp->sv_path and has an nfs4 file handle "fh". 980 * Upon return, the sv_path will point to the new path that has the nth 981 * component resolved to its symlink text. 982 */ 983 int 984 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 985 cred_t *cr, int flags) 986 { 987 char *oldpath; 988 char *symlink, *newpath; 989 struct pathname oldpn, newpn; 990 char component[MAXNAMELEN]; 991 int i, addlen, error = 0; 992 int oldpathlen; 993 994 /* Get the symbolic link text over the wire. */ 995 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 996 997 if (error || symlink == NULL || strlen(symlink) == 0) 998 return (error); 999 1000 /* 1001 * Compose the new pathname. 1002 * Note: 1003 * - only the nth component is resolved for the pathname. 1004 * - pathname.pn_pathlen does not count the ending null byte. 1005 */ 1006 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1007 oldpath = svp->sv_path; 1008 oldpathlen = svp->sv_pathlen; 1009 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1010 nfs_rw_exit(&svp->sv_lock); 1011 kmem_free(symlink, strlen(symlink) + 1); 1012 return (error); 1013 } 1014 nfs_rw_exit(&svp->sv_lock); 1015 pn_alloc(&newpn); 1016 1017 /* 1018 * Skip over previous components from the oldpath so that the 1019 * oldpn.pn_path will point to the symlink component. Skip 1020 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1021 * pn_getcompnent can get the component. 1022 */ 1023 for (i = 1; i < nth; i++) { 1024 pathname_skipslashdot(&oldpn); 1025 error = pn_getcomponent(&oldpn, component); 1026 if (error) 1027 goto out; 1028 } 1029 1030 /* 1031 * Copy the old path upto the component right before the symlink 1032 * if the symlink is not an absolute path. 1033 */ 1034 if (symlink[0] != '/') { 1035 addlen = oldpn.pn_path - oldpn.pn_buf; 1036 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1037 newpn.pn_pathlen += addlen; 1038 newpn.pn_path += addlen; 1039 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1040 newpn.pn_pathlen++; 1041 newpn.pn_path++; 1042 } 1043 1044 /* copy the resolved symbolic link text */ 1045 addlen = strlen(symlink); 1046 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1047 error = ENAMETOOLONG; 1048 goto out; 1049 } 1050 bcopy(symlink, newpn.pn_path, addlen); 1051 newpn.pn_pathlen += addlen; 1052 newpn.pn_path += addlen; 1053 1054 /* 1055 * Check if there is any remaining path after the symlink component. 1056 * First, skip the symlink component. 1057 */ 1058 pathname_skipslashdot(&oldpn); 1059 if (error = pn_getcomponent(&oldpn, component)) 1060 goto out; 1061 1062 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1063 1064 /* 1065 * Copy the remaining path to the new pathname if there is any. 1066 */ 1067 if (addlen > 0) { 1068 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1069 error = ENAMETOOLONG; 1070 goto out; 1071 } 1072 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1073 newpn.pn_pathlen += addlen; 1074 } 1075 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1076 1077 /* get the newpath and store it in the servinfo4_t */ 1078 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1079 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1080 newpath[newpn.pn_pathlen] = '\0'; 1081 1082 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1083 svp->sv_path = newpath; 1084 svp->sv_pathlen = strlen(newpath) + 1; 1085 nfs_rw_exit(&svp->sv_lock); 1086 1087 kmem_free(oldpath, oldpathlen); 1088 out: 1089 kmem_free(symlink, strlen(symlink) + 1); 1090 pn_free(&newpn); 1091 pn_free(&oldpn); 1092 1093 return (error); 1094 } 1095 1096 /* 1097 * Get the root filehandle for the given filesystem and server, and update 1098 * svp. 1099 * 1100 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1101 * to coordinate with recovery. Otherwise, the caller is assumed to be 1102 * the recovery thread or have already done a start_fop. 1103 * 1104 * Errors are returned by the nfs4_error_t parameter. 1105 */ 1106 1107 static void 1108 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1109 int flags, cred_t *cr, nfs4_error_t *ep) 1110 { 1111 COMPOUND4args_clnt args; 1112 COMPOUND4res_clnt res; 1113 int doqueue = 1; 1114 nfs_argop4 *argop; 1115 nfs_resop4 *resop; 1116 nfs4_ga_res_t *garp; 1117 int num_argops; 1118 lookup4_param_t lookuparg; 1119 nfs_fh4 *tmpfhp; 1120 nfs_fh4 *resfhp; 1121 bool_t needrecov = FALSE; 1122 nfs4_recov_state_t recov_state; 1123 int llndx; 1124 int nthcomp; 1125 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1126 1127 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1128 ASSERT(svp->sv_path != NULL); 1129 if (svp->sv_path[0] == '\0') { 1130 nfs_rw_exit(&svp->sv_lock); 1131 nfs4_error_init(ep, EINVAL); 1132 return; 1133 } 1134 nfs_rw_exit(&svp->sv_lock); 1135 1136 recov_state.rs_flags = 0; 1137 recov_state.rs_num_retry_despite_err = 0; 1138 recov_retry: 1139 nfs4_error_zinit(ep); 1140 1141 if (!recovery) { 1142 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1143 &recov_state, NULL); 1144 1145 /* 1146 * If recovery has been started and this request as 1147 * initiated by a mount, then we must wait for recovery 1148 * to finish before proceeding, otherwise, the error 1149 * cleanup would remove data structures needed by the 1150 * recovery thread. 1151 */ 1152 if (ep->error) { 1153 mutex_enter(&mi->mi_lock); 1154 if (mi->mi_flags & MI4_MOUNTING) { 1155 mi->mi_flags |= MI4_RECOV_FAIL; 1156 mi->mi_error = EIO; 1157 1158 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1159 "nfs4getfh_otw: waiting 4 recovery\n")); 1160 1161 while (mi->mi_flags & MI4_RECOV_ACTIV) 1162 cv_wait(&mi->mi_failover_cv, 1163 &mi->mi_lock); 1164 } 1165 mutex_exit(&mi->mi_lock); 1166 return; 1167 } 1168 1169 /* 1170 * If the client does not specify a specific flavor to use 1171 * and has not gotten a secinfo list from the server yet, 1172 * retrieve the secinfo list from the server and use a 1173 * flavor from the list to mount. 1174 * 1175 * If fail to get the secinfo list from the server, then 1176 * try the default flavor. 1177 */ 1178 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1179 svp->sv_secinfo == NULL) { 1180 (void) nfs4_secinfo_path(mi, cr, FALSE); 1181 } 1182 } 1183 1184 if (recovery) 1185 args.ctag = TAG_REMAP_MOUNT; 1186 else 1187 args.ctag = TAG_MOUNT; 1188 1189 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1190 lookuparg.argsp = &args; 1191 lookuparg.resp = &res; 1192 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1193 lookuparg.trailer_len = 0; 1194 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1195 lookuparg.mi = mi; 1196 1197 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1198 ASSERT(svp->sv_path != NULL); 1199 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1200 nfs_rw_exit(&svp->sv_lock); 1201 1202 argop = args.array; 1203 num_argops = args.array_len; 1204 1205 /* choose public or root filehandle */ 1206 if (flags & NFS4_GETFH_PUBLIC) 1207 argop[0].argop = OP_PUTPUBFH; 1208 else 1209 argop[0].argop = OP_PUTROOTFH; 1210 1211 /* get fh */ 1212 argop[1].argop = OP_GETFH; 1213 1214 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1215 "nfs4getfh_otw: %s call, mi 0x%p", 1216 needrecov ? "recov" : "first", (void *)mi)); 1217 1218 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1219 1220 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1221 1222 if (needrecov) { 1223 bool_t abort; 1224 1225 if (recovery) { 1226 nfs4args_lookup_free(argop, num_argops); 1227 kmem_free(argop, 1228 lookuparg.arglen * sizeof (nfs_argop4)); 1229 if (!ep->error) 1230 (void) xdr_free(xdr_COMPOUND4res_clnt, 1231 (caddr_t)&res); 1232 return; 1233 } 1234 1235 NFS4_DEBUG(nfs4_client_recov_debug, 1236 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1237 1238 abort = nfs4_start_recovery(ep, mi, NULL, 1239 NULL, NULL, NULL, OP_GETFH, NULL); 1240 if (!ep->error) { 1241 ep->error = geterrno4(res.status); 1242 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1243 } 1244 nfs4args_lookup_free(argop, num_argops); 1245 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1246 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1247 /* have another go? */ 1248 if (abort == FALSE) 1249 goto recov_retry; 1250 return; 1251 } 1252 1253 /* 1254 * No recovery, but check if error is set. 1255 */ 1256 if (ep->error) { 1257 nfs4args_lookup_free(argop, num_argops); 1258 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1259 if (!recovery) 1260 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1261 needrecov); 1262 return; 1263 } 1264 1265 is_link_err: 1266 1267 /* for non-recovery errors */ 1268 if (res.status && res.status != NFS4ERR_SYMLINK) { 1269 if (!recovery) { 1270 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1271 needrecov); 1272 } 1273 nfs4args_lookup_free(argop, num_argops); 1274 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1275 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1276 return; 1277 } 1278 1279 /* 1280 * If any intermediate component in the path is a symbolic link, 1281 * resolve the symlink, then try mount again using the new path. 1282 */ 1283 if (res.status == NFS4ERR_SYMLINK) { 1284 int where; 1285 1286 /* 1287 * This must be from OP_LOOKUP failure. The (cfh) for this 1288 * OP_LOOKUP is a symlink node. Found out where the 1289 * OP_GETFH is for the (cfh) that is a symlink node. 1290 * 1291 * Example: 1292 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1293 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1294 * 1295 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1296 * In this case, where = 7, nthcomp = 2. 1297 */ 1298 where = res.array_len - 2; 1299 ASSERT(where > 0); 1300 1301 resop = &res.array[where - 1]; 1302 ASSERT(resop->resop == OP_GETFH); 1303 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1304 nthcomp = res.array_len/3 - 1; 1305 1306 /* 1307 * Need to call nfs4_end_op before resolve_sympath to avoid 1308 * potential nfs4_start_op deadlock. 1309 */ 1310 if (!recovery) 1311 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1312 needrecov); 1313 1314 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1315 flags); 1316 1317 nfs4args_lookup_free(argop, num_argops); 1318 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1319 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1320 1321 if (ep->error) 1322 return; 1323 1324 goto recov_retry; 1325 } 1326 1327 /* getfh */ 1328 resop = &res.array[res.array_len - 2]; 1329 ASSERT(resop->resop == OP_GETFH); 1330 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1331 1332 /* getattr fsinfo res */ 1333 resop++; 1334 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1335 1336 *vtp = garp->n4g_va.va_type; 1337 1338 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1339 1340 mutex_enter(&mi->mi_lock); 1341 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1342 mi->mi_flags |= MI4_LINK; 1343 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1344 mi->mi_flags |= MI4_SYMLINK; 1345 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1346 mi->mi_flags |= MI4_ACL; 1347 mutex_exit(&mi->mi_lock); 1348 1349 if (garp->n4g_ext_res->n4g_maxread == 0) 1350 mi->mi_tsize = 1351 MIN(MAXBSIZE, mi->mi_tsize); 1352 else 1353 mi->mi_tsize = 1354 MIN(garp->n4g_ext_res->n4g_maxread, 1355 mi->mi_tsize); 1356 1357 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1358 mi->mi_stsize = 1359 MIN(MAXBSIZE, mi->mi_stsize); 1360 else 1361 mi->mi_stsize = 1362 MIN(garp->n4g_ext_res->n4g_maxwrite, 1363 mi->mi_stsize); 1364 1365 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1366 mi->mi_maxfilesize = 1367 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1368 mi->mi_maxfilesize); 1369 1370 /* 1371 * If the final component is a a symbolic link, resolve the symlink, 1372 * then try mount again using the new path. 1373 * 1374 * Assume no symbolic link for root filesysm "/". 1375 */ 1376 if (*vtp == VLNK) { 1377 /* 1378 * nthcomp is the total result length minus 1379 * the 1st 2 OPs (PUTROOTFH, GETFH), 1380 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1381 * 1382 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1383 * LOOKUP 2nd-comp GETFH GETATTR 1384 * 1385 * (8 - 2)/3 = 2 1386 */ 1387 nthcomp = (res.array_len - 2)/3; 1388 1389 /* 1390 * Need to call nfs4_end_op before resolve_sympath to avoid 1391 * potential nfs4_start_op deadlock. See RFE 4777612. 1392 */ 1393 if (!recovery) 1394 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1395 needrecov); 1396 1397 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1398 flags); 1399 1400 nfs4args_lookup_free(argop, num_argops); 1401 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1402 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1403 1404 if (ep->error) 1405 return; 1406 1407 goto recov_retry; 1408 } 1409 1410 /* 1411 * We need to figure out where in the compound the getfh 1412 * for the parent directory is. If the object to be mounted is 1413 * the root, then there is no lookup at all: 1414 * PUTROOTFH, GETFH. 1415 * If the object to be mounted is in the root, then the compound is: 1416 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1417 * In either of these cases, the index of the GETFH is 1. 1418 * If it is not at the root, then it's something like: 1419 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1420 * LOOKUP, GETFH, GETATTR 1421 * In this case, the index is llndx (last lookup index) - 2. 1422 */ 1423 if (llndx == -1 || llndx == 2) 1424 resop = &res.array[1]; 1425 else { 1426 ASSERT(llndx > 2); 1427 resop = &res.array[llndx-2]; 1428 } 1429 1430 ASSERT(resop->resop == OP_GETFH); 1431 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1432 1433 /* save the filehandles for the replica */ 1434 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1435 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1436 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1437 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1438 tmpfhp->nfs_fh4_len); 1439 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1440 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1441 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1442 1443 /* initialize fsid and supp_attrs for server fs */ 1444 svp->sv_fsid = garp->n4g_fsid; 1445 svp->sv_supp_attrs = 1446 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1447 1448 nfs_rw_exit(&svp->sv_lock); 1449 1450 nfs4args_lookup_free(argop, num_argops); 1451 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1452 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1453 if (!recovery) 1454 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1455 } 1456 1457 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1458 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1459 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1460 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1461 1462 /* 1463 * Remap the root filehandle for the given filesystem. 1464 * 1465 * results returned via the nfs4_error_t parameter. 1466 */ 1467 void 1468 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1469 { 1470 struct servinfo4 *svp; 1471 vtype_t vtype; 1472 nfs_fh4 rootfh; 1473 int getfh_flags; 1474 char *orig_sv_path; 1475 int orig_sv_pathlen, num_retry; 1476 1477 mutex_enter(&mi->mi_lock); 1478 1479 remap_retry: 1480 svp = mi->mi_curr_serv; 1481 getfh_flags = 1482 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1483 getfh_flags |= 1484 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1485 mutex_exit(&mi->mi_lock); 1486 1487 /* 1488 * Just in case server path being mounted contains 1489 * symlinks and fails w/STALE, save the initial sv_path 1490 * so we can redrive the initial mount compound with the 1491 * initial sv_path -- not a symlink-expanded version. 1492 * 1493 * This could only happen if a symlink was expanded 1494 * and the expanded mount compound failed stale. Because 1495 * it could be the case that the symlink was removed at 1496 * the server (and replaced with another symlink/dir, 1497 * we need to use the initial sv_path when attempting 1498 * to re-lookup everything and recover. 1499 */ 1500 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1501 orig_sv_pathlen = svp->sv_pathlen; 1502 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1503 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1504 nfs_rw_exit(&svp->sv_lock); 1505 1506 num_retry = nfs4_max_mount_retry; 1507 1508 do { 1509 /* 1510 * Get the root fh from the server. Retry nfs4_max_mount_retry 1511 * (2) times if it fails with STALE since the recovery 1512 * infrastructure doesn't do STALE recovery for components 1513 * of the server path to the object being mounted. 1514 */ 1515 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1516 1517 if (ep->error == 0 && ep->stat == NFS4_OK) 1518 break; 1519 1520 /* 1521 * For some reason, the mount compound failed. Before 1522 * retrying, we need to restore the original sv_path 1523 * because it might have contained symlinks that were 1524 * expanded by nfsgetfh_otw before the failure occurred. 1525 * replace current sv_path with orig sv_path -- just in case 1526 * it changed due to embedded symlinks. 1527 */ 1528 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1529 if (orig_sv_pathlen != svp->sv_pathlen) { 1530 kmem_free(svp->sv_path, svp->sv_pathlen); 1531 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1532 svp->sv_pathlen = orig_sv_pathlen; 1533 } 1534 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1535 nfs_rw_exit(&svp->sv_lock); 1536 1537 } while (num_retry-- > 0); 1538 1539 kmem_free(orig_sv_path, orig_sv_pathlen); 1540 1541 if (ep->error != 0 || ep->stat != 0) { 1542 return; 1543 } 1544 1545 if (vtype != VNON && vtype != mi->mi_type) { 1546 /* shouldn't happen */ 1547 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1548 "nfs4_remap_root: server root vnode type (%d) doesn't " 1549 "match mount info (%d)", vtype, mi->mi_type); 1550 } 1551 1552 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1553 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1554 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1555 nfs_rw_exit(&svp->sv_lock); 1556 sfh4_update(mi->mi_rootfh, &rootfh); 1557 1558 /* 1559 * It's possible that recovery took place on the filesystem 1560 * and the server has been updated between the time we did 1561 * the nfs4getfh_otw and now. Re-drive the otw operation 1562 * to make sure we have a good fh. 1563 */ 1564 mutex_enter(&mi->mi_lock); 1565 if (mi->mi_curr_serv != svp) 1566 goto remap_retry; 1567 1568 mutex_exit(&mi->mi_lock); 1569 } 1570 1571 static int 1572 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1573 int flags, cred_t *cr, zone_t *zone) 1574 { 1575 vnode_t *rtvp = NULL; 1576 mntinfo4_t *mi; 1577 dev_t nfs_dev; 1578 int error = 0; 1579 rnode4_t *rp; 1580 int i; 1581 struct vattr va; 1582 vtype_t vtype = VNON; 1583 vtype_t tmp_vtype = VNON; 1584 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1585 nfs4_oo_hash_bucket_t *bucketp; 1586 nfs_fh4 fh; 1587 char *droptext = ""; 1588 struct nfs_stats *nfsstatsp; 1589 nfs4_fname_t *mfname; 1590 nfs4_error_t e; 1591 char *orig_sv_path; 1592 int orig_sv_pathlen, num_retry, removed; 1593 cred_t *lcr = NULL, *tcr = cr; 1594 1595 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1596 ASSERT(nfsstatsp != NULL); 1597 1598 ASSERT(nfs_zone() == zone); 1599 ASSERT(crgetref(cr)); 1600 1601 /* 1602 * Create a mount record and link it to the vfs struct. 1603 */ 1604 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1605 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1606 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1607 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1608 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1609 1610 if (!(flags & NFSMNT_SOFT)) 1611 mi->mi_flags |= MI4_HARD; 1612 if ((flags & NFSMNT_NOPRINT)) 1613 mi->mi_flags |= MI4_NOPRINT; 1614 if (flags & NFSMNT_INT) 1615 mi->mi_flags |= MI4_INT; 1616 if (flags & NFSMNT_PUBLIC) 1617 mi->mi_flags |= MI4_PUBLIC; 1618 mi->mi_retrans = NFS_RETRIES; 1619 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1620 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1621 mi->mi_timeo = nfs4_cots_timeo; 1622 else 1623 mi->mi_timeo = NFS_TIMEO; 1624 mi->mi_prog = NFS_PROGRAM; 1625 mi->mi_vers = NFS_V4; 1626 mi->mi_rfsnames = rfsnames_v4; 1627 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1628 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1629 mi->mi_servers = svp; 1630 mi->mi_curr_serv = svp; 1631 mi->mi_acregmin = SEC2HR(ACREGMIN); 1632 mi->mi_acregmax = SEC2HR(ACREGMAX); 1633 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1634 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1635 mi->mi_fh_expire_type = FH4_PERSISTENT; 1636 mi->mi_clientid_next = NULL; 1637 mi->mi_clientid_prev = NULL; 1638 mi->mi_grace_wait = 0; 1639 mi->mi_error = 0; 1640 mi->mi_srvsettime = 0; 1641 1642 mi->mi_count = 1; 1643 1644 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1645 mi->mi_stsize = mi->mi_tsize; 1646 1647 if (flags & NFSMNT_DIRECTIO) 1648 mi->mi_flags |= MI4_DIRECTIO; 1649 1650 mi->mi_flags |= MI4_MOUNTING; 1651 1652 /* 1653 * Make a vfs struct for nfs. We do this here instead of below 1654 * because rtvp needs a vfs before we can do a getattr on it. 1655 * 1656 * Assign a unique device id to the mount 1657 */ 1658 mutex_enter(&nfs_minor_lock); 1659 do { 1660 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1661 nfs_dev = makedevice(nfs_major, nfs_minor); 1662 } while (vfs_devismounted(nfs_dev)); 1663 mutex_exit(&nfs_minor_lock); 1664 1665 vfsp->vfs_dev = nfs_dev; 1666 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1667 vfsp->vfs_data = (caddr_t)mi; 1668 vfsp->vfs_fstype = nfsfstyp; 1669 vfsp->vfs_bsize = nfs4_bsize; 1670 1671 /* 1672 * Initialize fields used to support async putpage operations. 1673 */ 1674 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1675 mi->mi_async_clusters[i] = nfs4_async_clusters; 1676 mi->mi_async_init_clusters = nfs4_async_clusters; 1677 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1678 mi->mi_max_threads = nfs4_max_threads; 1679 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1680 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1681 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1682 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1683 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1684 1685 mi->mi_vfsp = vfsp; 1686 zone_hold(mi->mi_zone = zone); 1687 nfs4_mi_zonelist_add(mi); 1688 1689 /* 1690 * Initialize the <open owner/cred> hash table. 1691 */ 1692 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1693 bucketp = &(mi->mi_oo_list[i]); 1694 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1695 list_create(&bucketp->b_oo_hash_list, 1696 sizeof (nfs4_open_owner_t), 1697 offsetof(nfs4_open_owner_t, oo_hash_node)); 1698 } 1699 1700 /* 1701 * Initialize the freed open owner list. 1702 */ 1703 mi->mi_foo_num = 0; 1704 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1705 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1706 offsetof(nfs4_open_owner_t, oo_foo_node)); 1707 1708 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1709 offsetof(nfs4_lost_rqst_t, lr_node)); 1710 1711 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1712 offsetof(nfs4_bseqid_entry_t, bs_node)); 1713 1714 /* 1715 * Initialize the msg buffer. 1716 */ 1717 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1718 offsetof(nfs4_debug_msg_t, msg_node)); 1719 mi->mi_msg_count = 0; 1720 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1721 1722 /* 1723 * Initialize kstats 1724 */ 1725 nfs4_mnt_kstat_init(vfsp); 1726 1727 /* 1728 * Initialize the shared filehandle pool, and get the fname for 1729 * the filesystem root. 1730 */ 1731 sfh4_createtab(&mi->mi_filehandles); 1732 mi->mi_fname = fn_get(NULL, "."); 1733 1734 /* 1735 * Save server path we're attempting to mount. 1736 */ 1737 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1738 orig_sv_pathlen = svp_head->sv_pathlen; 1739 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1740 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1741 nfs_rw_exit(&svp->sv_lock); 1742 1743 /* 1744 * Make the GETFH call to get root fh for each replica. 1745 */ 1746 if (svp_head->sv_next) 1747 droptext = ", dropping replica"; 1748 1749 /* 1750 * If the uid is set then set the creds for secure mounts 1751 * by proxy processes such as automountd. 1752 */ 1753 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1754 if (svp->sv_secdata->uid != 0) { 1755 lcr = crdup(cr); 1756 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1757 tcr = lcr; 1758 } 1759 nfs_rw_exit(&svp->sv_lock); 1760 for (svp = svp_head; svp; svp = svp->sv_next) { 1761 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 1762 nfs_cmn_err(error, CE_WARN, 1763 VERS_MSG "Host %s is a duplicate%s", 1764 svp->sv_hostname, droptext); 1765 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1766 svp->sv_flags |= SV4_NOTINUSE; 1767 nfs_rw_exit(&svp->sv_lock); 1768 continue; 1769 } 1770 mi->mi_curr_serv = svp; 1771 1772 /* 1773 * Just in case server path being mounted contains 1774 * symlinks and fails w/STALE, save the initial sv_path 1775 * so we can redrive the initial mount compound with the 1776 * initial sv_path -- not a symlink-expanded version. 1777 * 1778 * This could only happen if a symlink was expanded 1779 * and the expanded mount compound failed stale. Because 1780 * it could be the case that the symlink was removed at 1781 * the server (and replaced with another symlink/dir, 1782 * we need to use the initial sv_path when attempting 1783 * to re-lookup everything and recover. 1784 * 1785 * Other mount errors should evenutally be handled here also 1786 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 1787 * failures will result in mount being redriven a few times. 1788 */ 1789 num_retry = nfs4_max_mount_retry; 1790 do { 1791 nfs4getfh_otw(mi, svp, &tmp_vtype, 1792 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 1793 NFS4_GETFH_NEEDSOP, tcr, &e); 1794 1795 if (e.error == 0 && e.stat == NFS4_OK) 1796 break; 1797 1798 /* 1799 * replace current sv_path with orig sv_path -- just in 1800 * case it changed due to embedded symlinks. 1801 */ 1802 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1803 if (orig_sv_pathlen != svp->sv_pathlen) { 1804 kmem_free(svp->sv_path, svp->sv_pathlen); 1805 svp->sv_path = kmem_alloc(orig_sv_pathlen, 1806 KM_SLEEP); 1807 svp->sv_pathlen = orig_sv_pathlen; 1808 } 1809 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1810 nfs_rw_exit(&svp->sv_lock); 1811 1812 } while (num_retry-- > 0); 1813 1814 error = e.error ? e.error : geterrno4(e.stat); 1815 if (error) { 1816 nfs_cmn_err(error, CE_WARN, 1817 VERS_MSG "initial call to %s failed%s: %m", 1818 svp->sv_hostname, droptext); 1819 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1820 svp->sv_flags |= SV4_NOTINUSE; 1821 nfs_rw_exit(&svp->sv_lock); 1822 mi->mi_flags &= ~MI4_RECOV_FAIL; 1823 mi->mi_error = 0; 1824 continue; 1825 } 1826 1827 if (tmp_vtype == VBAD) { 1828 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1829 VERS_MSG "%s returned a bad file type for " 1830 "root%s", svp->sv_hostname, droptext); 1831 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1832 svp->sv_flags |= SV4_NOTINUSE; 1833 nfs_rw_exit(&svp->sv_lock); 1834 continue; 1835 } 1836 1837 if (vtype == VNON) { 1838 vtype = tmp_vtype; 1839 } else if (vtype != tmp_vtype) { 1840 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1841 VERS_MSG "%s returned a different file type " 1842 "for root%s", svp->sv_hostname, droptext); 1843 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1844 svp->sv_flags |= SV4_NOTINUSE; 1845 nfs_rw_exit(&svp->sv_lock); 1846 continue; 1847 } 1848 if (firstsvp == NULL) 1849 firstsvp = svp; 1850 } 1851 1852 kmem_free(orig_sv_path, orig_sv_pathlen); 1853 1854 if (firstsvp == NULL) { 1855 if (error == 0) 1856 error = ENOENT; 1857 goto bad; 1858 } 1859 1860 mi->mi_curr_serv = svp = firstsvp; 1861 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1862 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 1863 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1864 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1865 mi->mi_rootfh = sfh4_get(&fh, mi); 1866 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 1867 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 1868 mi->mi_srvparentfh = sfh4_get(&fh, mi); 1869 nfs_rw_exit(&svp->sv_lock); 1870 1871 /* 1872 * Make the root vnode without attributes. 1873 */ 1874 mfname = mi->mi_fname; 1875 fn_hold(mfname); 1876 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 1877 &mfname, NULL, mi, cr, gethrtime()); 1878 rtvp->v_type = vtype; 1879 1880 mi->mi_curread = mi->mi_tsize; 1881 mi->mi_curwrite = mi->mi_stsize; 1882 1883 /* 1884 * Start the manager thread responsible for handling async worker 1885 * threads. 1886 */ 1887 MI4_HOLD(mi); 1888 VFS_HOLD(vfsp); /* add reference for thread */ 1889 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 1890 vfsp, 0, minclsyspri); 1891 ASSERT(mi->mi_manager_thread != NULL); 1892 1893 /* 1894 * Create the thread that handles over-the-wire calls for 1895 * VOP_INACTIVE. 1896 * This needs to happen after the manager thread is created. 1897 */ 1898 MI4_HOLD(mi); 1899 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 1900 mi, 0, minclsyspri); 1901 ASSERT(mi->mi_inactive_thread != NULL); 1902 1903 /* If we didn't get a type, get one now */ 1904 if (rtvp->v_type == VNON) { 1905 va.va_mask = AT_TYPE; 1906 error = nfs4getattr(rtvp, &va, tcr); 1907 if (error) 1908 goto bad; 1909 rtvp->v_type = va.va_type; 1910 } 1911 1912 mi->mi_type = rtvp->v_type; 1913 1914 mutex_enter(&mi->mi_lock); 1915 mi->mi_flags &= ~MI4_MOUNTING; 1916 mutex_exit(&mi->mi_lock); 1917 1918 *rtvpp = rtvp; 1919 if (lcr != NULL) 1920 crfree(lcr); 1921 1922 return (0); 1923 bad: 1924 /* 1925 * An error occurred somewhere, need to clean up... 1926 */ 1927 if (lcr != NULL) 1928 crfree(lcr); 1929 if (rtvp != NULL) { 1930 /* 1931 * We need to release our reference to the root vnode and 1932 * destroy the mntinfo4 struct that we just created. 1933 */ 1934 rp = VTOR4(rtvp); 1935 if (rp->r_flags & R4HASHED) 1936 rp4_rmhash(rp); 1937 VN_RELE(rtvp); 1938 } 1939 nfs4_async_stop(vfsp); 1940 nfs4_async_manager_stop(vfsp); 1941 removed = nfs4_mi_zonelist_remove(mi); 1942 if (removed) 1943 zone_rele(mi->mi_zone); 1944 1945 /* 1946 * This releases the initial "hold" of the mi since it will never 1947 * be referenced by the vfsp. Also, when mount returns to vfs.c 1948 * with an error, the vfsp will be destroyed, not rele'd. 1949 */ 1950 MI4_RELE(mi); 1951 1952 *rtvpp = NULL; 1953 return (error); 1954 } 1955 1956 /* 1957 * vfs operations 1958 */ 1959 static int 1960 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1961 { 1962 mntinfo4_t *mi; 1963 ushort_t omax; 1964 int removed; 1965 1966 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1967 return (EPERM); 1968 1969 mi = VFTOMI4(vfsp); 1970 1971 if (flag & MS_FORCE) { 1972 vfsp->vfs_flag |= VFS_UNMOUNTED; 1973 if (nfs_zone() != mi->mi_zone) { 1974 /* 1975 * If the request is coming from the wrong zone, 1976 * we don't want to create any new threads, and 1977 * performance is not a concern. Do everything 1978 * inline. 1979 */ 1980 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 1981 "nfs4_unmount x-zone forced unmount of vfs %p\n", 1982 (void *)vfsp)); 1983 nfs4_free_mount(vfsp, cr); 1984 } else { 1985 /* 1986 * Free data structures asynchronously, to avoid 1987 * blocking the current thread (for performance 1988 * reasons only). 1989 */ 1990 async_free_mount(vfsp, cr); 1991 } 1992 return (0); 1993 } 1994 /* 1995 * Wait until all asynchronous putpage operations on 1996 * this file system are complete before flushing rnodes 1997 * from the cache. 1998 */ 1999 omax = mi->mi_max_threads; 2000 if (nfs4_async_stop_sig(vfsp)) { 2001 return (EINTR); 2002 } 2003 r4flush(vfsp, cr); 2004 /* 2005 * If there are any active vnodes on this file system, 2006 * then the file system is busy and can't be umounted. 2007 */ 2008 if (check_rtable4(vfsp)) { 2009 mutex_enter(&mi->mi_async_lock); 2010 mi->mi_max_threads = omax; 2011 mutex_exit(&mi->mi_async_lock); 2012 return (EBUSY); 2013 } 2014 /* 2015 * The unmount can't fail from now on, and there are no active 2016 * files that could require over-the-wire calls to the server, 2017 * so stop the async manager and the inactive thread. 2018 */ 2019 nfs4_async_manager_stop(vfsp); 2020 /* 2021 * Destroy all rnodes belonging to this file system from the 2022 * rnode hash queues and purge any resources allocated to 2023 * them. 2024 */ 2025 destroy_rtable4(vfsp, cr); 2026 vfsp->vfs_flag |= VFS_UNMOUNTED; 2027 2028 nfs4_remove_mi_from_server(mi, NULL); 2029 removed = nfs4_mi_zonelist_remove(mi); 2030 if (removed) 2031 zone_rele(mi->mi_zone); 2032 2033 return (0); 2034 } 2035 2036 /* 2037 * find root of nfs 2038 */ 2039 static int 2040 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2041 { 2042 mntinfo4_t *mi; 2043 vnode_t *vp; 2044 nfs4_fname_t *mfname; 2045 servinfo4_t *svp; 2046 2047 mi = VFTOMI4(vfsp); 2048 2049 if (nfs_zone() != mi->mi_zone) 2050 return (EPERM); 2051 2052 svp = mi->mi_curr_serv; 2053 if (svp) { 2054 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2055 if (svp->sv_flags & SV4_ROOT_STALE) { 2056 nfs_rw_exit(&svp->sv_lock); 2057 2058 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2059 if (svp->sv_flags & SV4_ROOT_STALE) { 2060 svp->sv_flags &= ~SV4_ROOT_STALE; 2061 nfs_rw_exit(&svp->sv_lock); 2062 return (ENOENT); 2063 } 2064 nfs_rw_exit(&svp->sv_lock); 2065 } else 2066 nfs_rw_exit(&svp->sv_lock); 2067 } 2068 2069 mfname = mi->mi_fname; 2070 fn_hold(mfname); 2071 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2072 VFTOMI4(vfsp), CRED(), gethrtime()); 2073 2074 if (VTOR4(vp)->r_flags & R4STALE) { 2075 VN_RELE(vp); 2076 return (ENOENT); 2077 } 2078 2079 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2080 2081 vp->v_type = mi->mi_type; 2082 2083 *vpp = vp; 2084 2085 return (0); 2086 } 2087 2088 static int 2089 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2090 { 2091 int error; 2092 nfs4_ga_res_t gar; 2093 nfs4_ga_ext_res_t ger; 2094 2095 gar.n4g_ext_res = &ger; 2096 2097 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2098 NFS4_STATFS_ATTR_MASK, cr)) 2099 return (error); 2100 2101 *sbp = gar.n4g_ext_res->n4g_sb; 2102 2103 return (0); 2104 } 2105 2106 /* 2107 * Get file system statistics. 2108 */ 2109 static int 2110 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2111 { 2112 int error; 2113 vnode_t *vp; 2114 cred_t *cr; 2115 2116 error = nfs4_root(vfsp, &vp); 2117 if (error) 2118 return (error); 2119 2120 cr = CRED(); 2121 2122 error = nfs4_statfs_otw(vp, sbp, cr); 2123 if (!error) { 2124 (void) strncpy(sbp->f_basetype, 2125 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2126 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2127 } else { 2128 nfs4_purge_stale_fh(error, vp, cr); 2129 } 2130 2131 VN_RELE(vp); 2132 2133 return (error); 2134 } 2135 2136 static kmutex_t nfs4_syncbusy; 2137 2138 /* 2139 * Flush dirty nfs files for file system vfsp. 2140 * If vfsp == NULL, all nfs files are flushed. 2141 * 2142 * SYNC_CLOSE in flag is passed to us to 2143 * indicate that we are shutting down and or 2144 * rebooting. 2145 */ 2146 static int 2147 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2148 { 2149 /* 2150 * Cross-zone calls are OK here, since this translates to a 2151 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2152 */ 2153 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2154 r4flush(vfsp, cr); 2155 mutex_exit(&nfs4_syncbusy); 2156 } 2157 2158 /* 2159 * if SYNC_CLOSE is set then we know that 2160 * the system is rebooting, mark the mntinfo 2161 * for later examination. 2162 */ 2163 if (vfsp && (flag & SYNC_CLOSE)) { 2164 mntinfo4_t *mi; 2165 2166 mi = VFTOMI4(vfsp); 2167 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2168 mutex_enter(&mi->mi_lock); 2169 mi->mi_flags |= MI4_SHUTDOWN; 2170 mutex_exit(&mi->mi_lock); 2171 } 2172 } 2173 return (0); 2174 } 2175 2176 /* 2177 * vget is difficult, if not impossible, to support in v4 because we don't 2178 * know the parent directory or name, which makes it impossible to create a 2179 * useful shadow vnode. And we need the shadow vnode for things like 2180 * OPEN. 2181 */ 2182 2183 /* ARGSUSED */ 2184 /* 2185 * XXX Check nfs4_vget_pseudo() for dependency. 2186 */ 2187 static int 2188 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2189 { 2190 return (EREMOTE); 2191 } 2192 2193 /* 2194 * nfs4_mountroot get called in the case where we are diskless booting. All 2195 * we need from here is the ability to get the server info and from there we 2196 * can simply call nfs4_rootvp. 2197 */ 2198 /* ARGSUSED */ 2199 static int 2200 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2201 { 2202 vnode_t *rtvp; 2203 char root_hostname[SYS_NMLN+1]; 2204 struct servinfo4 *svp; 2205 int error; 2206 int vfsflags; 2207 size_t size; 2208 char *root_path; 2209 struct pathname pn; 2210 char *name; 2211 cred_t *cr; 2212 mntinfo4_t *mi; 2213 struct nfs_args args; /* nfs mount arguments */ 2214 static char token[10]; 2215 nfs4_error_t n4e; 2216 2217 bzero(&args, sizeof (args)); 2218 2219 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2220 clkset(-1L); /* hack for now - until we get time svc? */ 2221 2222 if (why == ROOT_REMOUNT) { 2223 /* 2224 * Shouldn't happen. 2225 */ 2226 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2227 } 2228 2229 if (why == ROOT_UNMOUNT) { 2230 /* 2231 * Nothing to do for NFS. 2232 */ 2233 return (0); 2234 } 2235 2236 /* 2237 * why == ROOT_INIT 2238 */ 2239 2240 name = token; 2241 *name = 0; 2242 (void) getfsname("root", name, sizeof (token)); 2243 2244 pn_alloc(&pn); 2245 root_path = pn.pn_path; 2246 2247 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2248 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2249 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2250 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2251 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2252 2253 /* 2254 * Get server address 2255 * Get the root path 2256 * Get server's transport 2257 * Get server's hostname 2258 * Get options 2259 */ 2260 args.addr = &svp->sv_addr; 2261 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2262 args.fh = (char *)&svp->sv_fhandle; 2263 args.knconf = svp->sv_knconf; 2264 args.hostname = root_hostname; 2265 vfsflags = 0; 2266 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2267 &args, &vfsflags)) { 2268 if (error == EPROTONOSUPPORT) 2269 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2270 "mount_root failed: server doesn't support NFS V4"); 2271 else 2272 nfs_cmn_err(error, CE_WARN, 2273 "nfs4_mountroot: mount_root failed: %m"); 2274 nfs_rw_exit(&svp->sv_lock); 2275 sv4_free(svp); 2276 pn_free(&pn); 2277 return (error); 2278 } 2279 nfs_rw_exit(&svp->sv_lock); 2280 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2281 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2282 (void) strcpy(svp->sv_hostname, root_hostname); 2283 2284 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2285 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2286 (void) strcpy(svp->sv_path, root_path); 2287 2288 /* 2289 * Force root partition to always be mounted with AUTH_UNIX for now 2290 */ 2291 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2292 svp->sv_secdata->secmod = AUTH_UNIX; 2293 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2294 svp->sv_secdata->data = NULL; 2295 2296 cr = crgetcred(); 2297 rtvp = NULL; 2298 2299 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2300 2301 if (error) { 2302 crfree(cr); 2303 pn_free(&pn); 2304 goto errout; 2305 } 2306 2307 mi = VTOMI4(rtvp); 2308 2309 /* 2310 * Send client id to the server, if necessary 2311 */ 2312 nfs4_error_zinit(&n4e); 2313 nfs4setclientid(mi, cr, FALSE, &n4e); 2314 error = n4e.error; 2315 2316 crfree(cr); 2317 2318 if (error) { 2319 pn_free(&pn); 2320 goto errout; 2321 } 2322 2323 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2324 if (error) { 2325 nfs_cmn_err(error, CE_WARN, 2326 "nfs4_mountroot: invalid root mount options"); 2327 pn_free(&pn); 2328 goto errout; 2329 } 2330 2331 (void) vfs_lock_wait(vfsp); 2332 vfs_add(NULL, vfsp, vfsflags); 2333 vfs_unlock(vfsp); 2334 2335 size = strlen(svp->sv_hostname); 2336 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2337 rootfs.bo_name[size] = ':'; 2338 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2339 2340 pn_free(&pn); 2341 2342 errout: 2343 if (error) { 2344 sv4_free(svp); 2345 nfs4_async_stop(vfsp); 2346 nfs4_async_manager_stop(vfsp); 2347 } 2348 2349 if (rtvp != NULL) 2350 VN_RELE(rtvp); 2351 2352 return (error); 2353 } 2354 2355 /* 2356 * Initialization routine for VFS routines. Should only be called once 2357 */ 2358 int 2359 nfs4_vfsinit(void) 2360 { 2361 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2362 nfs4setclientid_init(); 2363 return (0); 2364 } 2365 2366 void 2367 nfs4_vfsfini(void) 2368 { 2369 nfs4setclientid_fini(); 2370 mutex_destroy(&nfs4_syncbusy); 2371 } 2372 2373 void 2374 nfs4_freevfs(vfs_t *vfsp) 2375 { 2376 mntinfo4_t *mi; 2377 2378 /* need to release the initial hold */ 2379 mi = VFTOMI4(vfsp); 2380 MI4_RELE(mi); 2381 } 2382 2383 /* 2384 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2385 */ 2386 struct nfs4_server nfs4_server_lst = 2387 { &nfs4_server_lst, &nfs4_server_lst }; 2388 2389 kmutex_t nfs4_server_lst_lock; 2390 2391 static void 2392 nfs4setclientid_init(void) 2393 { 2394 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2395 } 2396 2397 static void 2398 nfs4setclientid_fini(void) 2399 { 2400 mutex_destroy(&nfs4_server_lst_lock); 2401 } 2402 2403 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2404 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2405 2406 /* 2407 * Set the clientid for the server for "mi". No-op if the clientid is 2408 * already set. 2409 * 2410 * The recovery boolean should be set to TRUE if this function was called 2411 * by the recovery code, and FALSE otherwise. This is used to determine 2412 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2413 * for adding a mntinfo4_t to a nfs4_server_t. 2414 * 2415 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2416 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2417 */ 2418 void 2419 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2420 { 2421 struct nfs4_server *np; 2422 struct servinfo4 *svp = mi->mi_curr_serv; 2423 nfs4_recov_state_t recov_state; 2424 int num_retries = 0; 2425 bool_t retry; 2426 cred_t *lcr = NULL; 2427 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2428 time_t lease_time = 0; 2429 2430 recov_state.rs_flags = 0; 2431 recov_state.rs_num_retry_despite_err = 0; 2432 ASSERT(n4ep != NULL); 2433 2434 recov_retry: 2435 retry = FALSE; 2436 nfs4_error_zinit(n4ep); 2437 if (!recovery) 2438 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2439 2440 mutex_enter(&nfs4_server_lst_lock); 2441 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2442 mutex_exit(&nfs4_server_lst_lock); 2443 if (!np) { 2444 struct nfs4_server *tnp; 2445 np = new_nfs4_server(svp, cr); 2446 mutex_enter(&np->s_lock); 2447 2448 mutex_enter(&nfs4_server_lst_lock); 2449 tnp = servinfo4_to_nfs4_server(svp); 2450 if (tnp) { 2451 /* 2452 * another thread snuck in and put server on list. 2453 * since we aren't adding it to the nfs4_server_list 2454 * we need to set the ref count to 0 and destroy it. 2455 */ 2456 np->s_refcnt = 0; 2457 destroy_nfs4_server(np); 2458 np = tnp; 2459 } else { 2460 /* 2461 * do not give list a reference until everything 2462 * succeeds 2463 */ 2464 insque(np, &nfs4_server_lst); 2465 } 2466 mutex_exit(&nfs4_server_lst_lock); 2467 } 2468 ASSERT(MUTEX_HELD(&np->s_lock)); 2469 /* 2470 * If we find the server already has N4S_CLIENTID_SET, then 2471 * just return, we've already done SETCLIENTID to that server 2472 */ 2473 if (np->s_flags & N4S_CLIENTID_SET) { 2474 /* add mi to np's mntinfo4_list */ 2475 nfs4_add_mi_to_server(np, mi); 2476 if (!recovery) 2477 nfs_rw_exit(&mi->mi_recovlock); 2478 mutex_exit(&np->s_lock); 2479 nfs4_server_rele(np); 2480 return; 2481 } 2482 mutex_exit(&np->s_lock); 2483 2484 2485 /* 2486 * Drop the mi_recovlock since nfs4_start_op will 2487 * acquire it again for us. 2488 */ 2489 if (!recovery) { 2490 nfs_rw_exit(&mi->mi_recovlock); 2491 2492 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2493 if (n4ep->error) { 2494 nfs4_server_rele(np); 2495 return; 2496 } 2497 } 2498 2499 mutex_enter(&np->s_lock); 2500 while (np->s_flags & N4S_CLIENTID_PEND) { 2501 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 2502 mutex_exit(&np->s_lock); 2503 nfs4_server_rele(np); 2504 if (!recovery) 2505 nfs4_end_op(mi, NULL, NULL, &recov_state, 2506 recovery); 2507 n4ep->error = EINTR; 2508 return; 2509 } 2510 } 2511 2512 if (np->s_flags & N4S_CLIENTID_SET) { 2513 /* XXX copied/pasted from above */ 2514 /* add mi to np's mntinfo4_list */ 2515 nfs4_add_mi_to_server(np, mi); 2516 mutex_exit(&np->s_lock); 2517 nfs4_server_rele(np); 2518 if (!recovery) 2519 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2520 return; 2521 } 2522 2523 /* 2524 * Reset the N4S_CB_PINGED flag. This is used to 2525 * indicate if we have received a CB_NULL from the 2526 * server. Also we reset the waiter flag. 2527 */ 2528 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2529 /* any failure must now clear this flag */ 2530 np->s_flags |= N4S_CLIENTID_PEND; 2531 mutex_exit(&np->s_lock); 2532 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2533 2534 if (n4ep->error == EACCES) { 2535 /* 2536 * If the uid is set then set the creds for secure mounts 2537 * by proxy processes such as automountd. 2538 */ 2539 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2540 if (svp->sv_secdata->uid != 0) { 2541 lcr = crdup(cr); 2542 (void) crsetugid(lcr, svp->sv_secdata->uid, 2543 crgetgid(cr)); 2544 } 2545 nfs_rw_exit(&svp->sv_lock); 2546 2547 if (lcr != NULL) { 2548 mutex_enter(&np->s_lock); 2549 crfree(np->s_cred); 2550 np->s_cred = lcr; 2551 mutex_exit(&np->s_lock); 2552 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2553 &retry_inuse); 2554 } 2555 } 2556 mutex_enter(&np->s_lock); 2557 lease_time = np->s_lease_time; 2558 np->s_flags &= ~N4S_CLIENTID_PEND; 2559 mutex_exit(&np->s_lock); 2560 2561 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2562 /* 2563 * Start recovery if failover is a possibility. If 2564 * invoked by the recovery thread itself, then just 2565 * return and let it handle the failover first. NB: 2566 * recovery is not allowed if the mount is in progress 2567 * since the infrastructure is not sufficiently setup 2568 * to allow it. Just return the error (after suitable 2569 * retries). 2570 */ 2571 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2572 (void) nfs4_start_recovery(n4ep, mi, NULL, 2573 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2574 /* 2575 * Don't retry here, just return and let 2576 * recovery take over. 2577 */ 2578 if (recovery) 2579 retry = FALSE; 2580 } else if (nfs4_rpc_retry_error(n4ep->error) || 2581 n4ep->stat == NFS4ERR_RESOURCE || 2582 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2583 2584 retry = TRUE; 2585 /* 2586 * Always retry if in recovery or once had 2587 * contact with the server (but now it's 2588 * overloaded). 2589 */ 2590 if (recovery == TRUE || 2591 n4ep->error == ETIMEDOUT || 2592 n4ep->error == ECONNRESET) 2593 num_retries = 0; 2594 } else if (retry_inuse && n4ep->error == 0 && 2595 n4ep->stat == NFS4ERR_CLID_INUSE) { 2596 retry = TRUE; 2597 num_retries = 0; 2598 } 2599 } else { 2600 /* 2601 * Since everything succeeded give the list a reference count if 2602 * it hasn't been given one by add_new_nfs4_server() or if this 2603 * is not a recovery situation in which case it is already on 2604 * the list. 2605 */ 2606 mutex_enter(&np->s_lock); 2607 if ((np->s_flags & N4S_INSERTED) == 0) { 2608 np->s_refcnt++; 2609 np->s_flags |= N4S_INSERTED; 2610 } 2611 mutex_exit(&np->s_lock); 2612 } 2613 2614 if (!recovery) 2615 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2616 2617 2618 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2619 if (retry_inuse) { 2620 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2621 retry_inuse = 0; 2622 } else 2623 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2624 2625 nfs4_server_rele(np); 2626 goto recov_retry; 2627 } 2628 2629 2630 if (n4ep->error == 0) 2631 n4ep->error = geterrno4(n4ep->stat); 2632 2633 /* broadcast before release in case no other threads are waiting */ 2634 cv_broadcast(&np->s_clientid_pend); 2635 nfs4_server_rele(np); 2636 } 2637 2638 int nfs4setclientid_otw_debug = 0; 2639 2640 /* 2641 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2642 * but nothing else; the calling function must be designed to handle those 2643 * other errors. 2644 */ 2645 static void 2646 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2647 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2648 { 2649 COMPOUND4args_clnt args; 2650 COMPOUND4res_clnt res; 2651 nfs_argop4 argop[3]; 2652 SETCLIENTID4args *s_args; 2653 SETCLIENTID4resok *s_resok; 2654 int doqueue = 1; 2655 nfs4_ga_res_t *garp = NULL; 2656 timespec_t prop_time, after_time; 2657 verifier4 verf; 2658 clientid4 tmp_clientid; 2659 2660 ASSERT(!MUTEX_HELD(&np->s_lock)); 2661 2662 args.ctag = TAG_SETCLIENTID; 2663 2664 args.array = argop; 2665 args.array_len = 3; 2666 2667 /* PUTROOTFH */ 2668 argop[0].argop = OP_PUTROOTFH; 2669 2670 /* GETATTR */ 2671 argop[1].argop = OP_GETATTR; 2672 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2673 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2674 2675 /* SETCLIENTID */ 2676 argop[2].argop = OP_SETCLIENTID; 2677 2678 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2679 2680 mutex_enter(&np->s_lock); 2681 2682 s_args->client.verifier = np->clidtosend.verifier; 2683 s_args->client.id_len = np->clidtosend.id_len; 2684 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2685 s_args->client.id_val = np->clidtosend.id_val; 2686 2687 /* 2688 * Callback needs to happen on non-RDMA transport 2689 * Check if we have saved the original knetconfig 2690 * if so, use that instead. 2691 */ 2692 if (svp->sv_origknconf != NULL) 2693 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2694 else 2695 nfs4_cb_args(np, svp->sv_knconf, s_args); 2696 2697 mutex_exit(&np->s_lock); 2698 2699 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2700 2701 if (ep->error) 2702 return; 2703 2704 /* getattr lease_time res */ 2705 if (res.array_len >= 2) { 2706 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2707 2708 #ifndef _LP64 2709 /* 2710 * The 32 bit client cannot handle a lease time greater than 2711 * (INT32_MAX/1000000). This is due to the use of the 2712 * lease_time in calls to drv_usectohz() in 2713 * nfs4_renew_lease_thread(). The problem is that 2714 * drv_usectohz() takes a time_t (which is just a long = 4 2715 * bytes) as its parameter. The lease_time is multiplied by 2716 * 1000000 to convert seconds to usecs for the parameter. If 2717 * a number bigger than (INT32_MAX/1000000) is used then we 2718 * overflow on the 32bit client. 2719 */ 2720 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2721 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2722 } 2723 #endif 2724 2725 mutex_enter(&np->s_lock); 2726 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2727 2728 /* 2729 * Keep track of the lease period for the mi's 2730 * mi_msg_list. We need an appropiate time 2731 * bound to associate past facts with a current 2732 * event. The lease period is perfect for this. 2733 */ 2734 mutex_enter(&mi->mi_msg_list_lock); 2735 mi->mi_lease_period = np->s_lease_time; 2736 mutex_exit(&mi->mi_msg_list_lock); 2737 mutex_exit(&np->s_lock); 2738 } 2739 2740 2741 if (res.status == NFS4ERR_CLID_INUSE) { 2742 clientaddr4 *clid_inuse; 2743 2744 if (!(*retry_inusep)) { 2745 clid_inuse = &res.array->nfs_resop4_u. 2746 opsetclientid.SETCLIENTID4res_u.client_using; 2747 2748 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2749 "NFS4 mount (SETCLIENTID failed)." 2750 " nfs4_client_id.id is in" 2751 "use already by: r_netid<%s> r_addr<%s>", 2752 clid_inuse->r_netid, clid_inuse->r_addr); 2753 } 2754 2755 /* 2756 * XXX - The client should be more robust in its 2757 * handling of clientid in use errors (regen another 2758 * clientid and try again?) 2759 */ 2760 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2761 return; 2762 } 2763 2764 if (res.status) { 2765 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2766 return; 2767 } 2768 2769 s_resok = &res.array[2].nfs_resop4_u. 2770 opsetclientid.SETCLIENTID4res_u.resok4; 2771 2772 tmp_clientid = s_resok->clientid; 2773 2774 verf = s_resok->setclientid_confirm; 2775 2776 #ifdef DEBUG 2777 if (nfs4setclientid_otw_debug) { 2778 union { 2779 clientid4 clientid; 2780 int foo[2]; 2781 } cid; 2782 2783 cid.clientid = s_resok->clientid; 2784 2785 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2786 "nfs4setclientid_otw: OK, clientid = %x,%x, " 2787 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 2788 } 2789 #endif 2790 2791 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2792 2793 /* Confirm the client id and get the lease_time attribute */ 2794 2795 args.ctag = TAG_SETCLIENTID_CF; 2796 2797 args.array = argop; 2798 args.array_len = 1; 2799 2800 argop[0].argop = OP_SETCLIENTID_CONFIRM; 2801 2802 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 2803 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 2804 2805 /* used to figure out RTT for np */ 2806 gethrestime(&prop_time); 2807 2808 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 2809 "start time: %ld sec %ld nsec", prop_time.tv_sec, 2810 prop_time.tv_nsec)); 2811 2812 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2813 2814 gethrestime(&after_time); 2815 mutex_enter(&np->s_lock); 2816 np->propagation_delay.tv_sec = 2817 MAX(1, after_time.tv_sec - prop_time.tv_sec); 2818 mutex_exit(&np->s_lock); 2819 2820 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 2821 "finish time: %ld sec ", after_time.tv_sec)); 2822 2823 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 2824 "propagation delay set to %ld sec", 2825 np->propagation_delay.tv_sec)); 2826 2827 if (ep->error) 2828 return; 2829 2830 if (res.status == NFS4ERR_CLID_INUSE) { 2831 clientaddr4 *clid_inuse; 2832 2833 if (!(*retry_inusep)) { 2834 clid_inuse = &res.array->nfs_resop4_u. 2835 opsetclientid.SETCLIENTID4res_u.client_using; 2836 2837 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2838 "SETCLIENTID_CONFIRM failed. " 2839 "nfs4_client_id.id is in use already by: " 2840 "r_netid<%s> r_addr<%s>", 2841 clid_inuse->r_netid, clid_inuse->r_addr); 2842 } 2843 2844 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2845 return; 2846 } 2847 2848 if (res.status) { 2849 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2850 return; 2851 } 2852 2853 mutex_enter(&np->s_lock); 2854 np->clientid = tmp_clientid; 2855 np->s_flags |= N4S_CLIENTID_SET; 2856 2857 /* Add mi to np's mntinfo4 list */ 2858 nfs4_add_mi_to_server(np, mi); 2859 2860 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 2861 /* 2862 * Start lease management thread. 2863 * Keep trying until we succeed. 2864 */ 2865 2866 np->s_refcnt++; /* pass reference to thread */ 2867 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 2868 minclsyspri); 2869 } 2870 mutex_exit(&np->s_lock); 2871 2872 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2873 } 2874 2875 /* 2876 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 2877 * mi's clientid the same as sp's. 2878 * Assumes sp is locked down. 2879 */ 2880 void 2881 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 2882 { 2883 mntinfo4_t *tmi; 2884 int in_list = 0; 2885 2886 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 2887 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 2888 ASSERT(sp != &nfs4_server_lst); 2889 ASSERT(MUTEX_HELD(&sp->s_lock)); 2890 2891 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2892 "nfs4_add_mi_to_server: add mi %p to sp %p", 2893 (void*)mi, (void*)sp)); 2894 2895 for (tmi = sp->mntinfo4_list; 2896 tmi != NULL; 2897 tmi = tmi->mi_clientid_next) { 2898 if (tmi == mi) { 2899 NFS4_DEBUG(nfs4_client_lease_debug, 2900 (CE_NOTE, 2901 "nfs4_add_mi_to_server: mi in list")); 2902 in_list = 1; 2903 } 2904 } 2905 2906 /* 2907 * First put a hold on the mntinfo4's vfsp so that references via 2908 * mntinfo4_list will be valid. 2909 */ 2910 if (!in_list) 2911 VFS_HOLD(mi->mi_vfsp); 2912 2913 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 2914 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 2915 2916 if (!in_list) { 2917 if (sp->mntinfo4_list) 2918 sp->mntinfo4_list->mi_clientid_prev = mi; 2919 mi->mi_clientid_next = sp->mntinfo4_list; 2920 sp->mntinfo4_list = mi; 2921 mi->mi_srvsettime = gethrestime_sec(); 2922 } 2923 2924 /* set mi's clientid to that of sp's for later matching */ 2925 mi->mi_clientid = sp->clientid; 2926 2927 /* 2928 * Update the clientid for any other mi's belonging to sp. This 2929 * must be done here while we hold sp->s_lock, so that 2930 * find_nfs4_server() continues to work. 2931 */ 2932 2933 for (tmi = sp->mntinfo4_list; 2934 tmi != NULL; 2935 tmi = tmi->mi_clientid_next) { 2936 if (tmi != mi) { 2937 tmi->mi_clientid = sp->clientid; 2938 } 2939 } 2940 } 2941 2942 /* 2943 * Remove the mi from sp's mntinfo4_list and release its reference. 2944 * Exception: if mi still has open files, flag it for later removal (when 2945 * all the files are closed). 2946 * 2947 * If this is the last mntinfo4 in sp's list then tell the lease renewal 2948 * thread to exit. 2949 */ 2950 static void 2951 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 2952 { 2953 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2954 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 2955 (void*)mi, (void*)sp)); 2956 2957 ASSERT(sp != NULL); 2958 ASSERT(MUTEX_HELD(&sp->s_lock)); 2959 ASSERT(mi->mi_open_files >= 0); 2960 2961 /* 2962 * First make sure this mntinfo4 can be taken off of the list, 2963 * ie: it doesn't have any open files remaining. 2964 */ 2965 if (mi->mi_open_files > 0) { 2966 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2967 "nfs4_remove_mi_from_server_nolock: don't " 2968 "remove mi since it still has files open")); 2969 2970 mutex_enter(&mi->mi_lock); 2971 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 2972 mutex_exit(&mi->mi_lock); 2973 return; 2974 } 2975 2976 VFS_HOLD(mi->mi_vfsp); 2977 remove_mi(sp, mi); 2978 VFS_RELE(mi->mi_vfsp); 2979 2980 if (sp->mntinfo4_list == NULL) { 2981 /* last fs unmounted, kill the thread */ 2982 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2983 "remove_mi_from_nfs4_server_nolock: kill the thread")); 2984 nfs4_mark_srv_dead(sp); 2985 } 2986 } 2987 2988 /* 2989 * Remove mi from sp's mntinfo4_list and release the vfs reference. 2990 */ 2991 static void 2992 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 2993 { 2994 ASSERT(MUTEX_HELD(&sp->s_lock)); 2995 2996 /* 2997 * We release a reference, and the caller must still have a 2998 * reference. 2999 */ 3000 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3001 3002 if (mi->mi_clientid_prev) { 3003 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3004 } else { 3005 /* This is the first mi in sp's mntinfo4_list */ 3006 /* 3007 * Make sure the first mntinfo4 in the list is the actual 3008 * mntinfo4 passed in. 3009 */ 3010 ASSERT(sp->mntinfo4_list == mi); 3011 3012 sp->mntinfo4_list = mi->mi_clientid_next; 3013 } 3014 if (mi->mi_clientid_next) 3015 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3016 3017 /* Now mark the mntinfo4's links as being removed */ 3018 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3019 3020 VFS_RELE(mi->mi_vfsp); 3021 } 3022 3023 /* 3024 * Free all the entries in sp's mntinfo4_list. 3025 */ 3026 static void 3027 remove_all_mi(nfs4_server_t *sp) 3028 { 3029 mntinfo4_t *mi; 3030 3031 ASSERT(MUTEX_HELD(&sp->s_lock)); 3032 3033 while (sp->mntinfo4_list != NULL) { 3034 mi = sp->mntinfo4_list; 3035 /* 3036 * Grab a reference in case there is only one left (which 3037 * remove_mi() frees). 3038 */ 3039 VFS_HOLD(mi->mi_vfsp); 3040 remove_mi(sp, mi); 3041 VFS_RELE(mi->mi_vfsp); 3042 } 3043 } 3044 3045 /* 3046 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3047 * 3048 * This version can be called with a null nfs4_server_t arg, 3049 * and will either find the right one and handle locking, or 3050 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3051 */ 3052 void 3053 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3054 { 3055 nfs4_server_t *sp; 3056 3057 if (esp == NULL) { 3058 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3059 sp = find_nfs4_server_all(mi, 1); 3060 } else 3061 sp = esp; 3062 3063 if (sp != NULL) 3064 nfs4_remove_mi_from_server_nolock(mi, sp); 3065 3066 /* 3067 * If we had a valid esp as input, the calling function will be 3068 * responsible for unlocking the esp nfs4_server. 3069 */ 3070 if (esp == NULL) { 3071 if (sp != NULL) 3072 mutex_exit(&sp->s_lock); 3073 nfs_rw_exit(&mi->mi_recovlock); 3074 if (sp != NULL) 3075 nfs4_server_rele(sp); 3076 } 3077 } 3078 3079 /* 3080 * Return TRUE if the given server has any non-unmounted filesystems. 3081 */ 3082 3083 bool_t 3084 nfs4_fs_active(nfs4_server_t *sp) 3085 { 3086 mntinfo4_t *mi; 3087 3088 ASSERT(MUTEX_HELD(&sp->s_lock)); 3089 3090 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3091 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3092 return (TRUE); 3093 } 3094 3095 return (FALSE); 3096 } 3097 3098 /* 3099 * Mark sp as finished and notify any waiters. 3100 */ 3101 3102 void 3103 nfs4_mark_srv_dead(nfs4_server_t *sp) 3104 { 3105 ASSERT(MUTEX_HELD(&sp->s_lock)); 3106 3107 sp->s_thread_exit = NFS4_THREAD_EXIT; 3108 cv_broadcast(&sp->cv_thread_exit); 3109 } 3110 3111 /* 3112 * Create a new nfs4_server_t structure. 3113 * Returns new node unlocked and not in list, but with a reference count of 3114 * 1. 3115 */ 3116 struct nfs4_server * 3117 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3118 { 3119 struct nfs4_server *np; 3120 timespec_t tt; 3121 union { 3122 struct { 3123 uint32_t sec; 3124 uint32_t subsec; 3125 } un_curtime; 3126 verifier4 un_verifier; 3127 } nfs4clientid_verifier; 3128 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3129 int len; 3130 3131 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3132 np->saddr.len = svp->sv_addr.len; 3133 np->saddr.maxlen = svp->sv_addr.maxlen; 3134 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3135 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3136 np->s_refcnt = 1; 3137 3138 /* 3139 * Build the nfs_client_id4 for this server mount. Ensure 3140 * the verifier is useful and that the identification is 3141 * somehow based on the server's address for the case of 3142 * multi-homed servers. 3143 */ 3144 nfs4clientid_verifier.un_verifier = 0; 3145 gethrestime(&tt); 3146 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3147 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3148 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3149 3150 /* 3151 * calculate the length of the opaque identifier. Subtract 2 3152 * for the "%s" and add the traditional +1 for null 3153 * termination. 3154 */ 3155 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3156 np->clidtosend.id_len = len + np->saddr.maxlen; 3157 3158 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3159 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3160 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3161 3162 np->s_flags = 0; 3163 np->mntinfo4_list = NULL; 3164 /* save cred for issuing rfs4calls inside the renew thread */ 3165 crhold(cr); 3166 np->s_cred = cr; 3167 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3168 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3169 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3170 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3171 offsetof(rnode4_t, r_deleg_link)); 3172 np->s_thread_exit = 0; 3173 np->state_ref_count = 0; 3174 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3175 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3176 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3177 np->s_otw_call_count = 0; 3178 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3179 np->zoneid = getzoneid(); 3180 np->zone_globals = nfs4_get_callback_globals(); 3181 ASSERT(np->zone_globals != NULL); 3182 return (np); 3183 } 3184 3185 /* 3186 * Create a new nfs4_server_t structure and add it to the list. 3187 * Returns new node locked; reference must eventually be freed. 3188 */ 3189 static struct nfs4_server * 3190 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3191 { 3192 nfs4_server_t *sp; 3193 3194 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3195 sp = new_nfs4_server(svp, cr); 3196 mutex_enter(&sp->s_lock); 3197 insque(sp, &nfs4_server_lst); 3198 sp->s_refcnt++; /* list gets a reference */ 3199 sp->s_flags |= N4S_INSERTED; 3200 sp->clientid = 0; 3201 return (sp); 3202 } 3203 3204 int nfs4_server_t_debug = 0; 3205 3206 #ifdef lint 3207 extern void 3208 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3209 #endif 3210 3211 #ifndef lint 3212 #ifdef DEBUG 3213 void 3214 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3215 { 3216 int hash16(void *p, int len); 3217 nfs4_server_t *np; 3218 3219 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3220 "dumping nfs4_server_t list in %s", txt)); 3221 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3222 "mi 0x%p, want clientid %llx, addr %d/%04X", 3223 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3224 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3225 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3226 np = np->forw) { 3227 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3228 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3229 np, (longlong_t)np->clientid, np->saddr.len, 3230 hash16((void *)np->saddr.buf, np->saddr.len), 3231 np->state_ref_count)); 3232 if (np->saddr.len == srv_p->sv_addr.len && 3233 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3234 np->saddr.len) == 0) 3235 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3236 " - address matches")); 3237 if (np->clientid == clientid || np->clientid == 0) 3238 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3239 " - clientid matches")); 3240 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3241 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3242 " - thread not exiting")); 3243 } 3244 delay(hz); 3245 } 3246 #endif 3247 #endif 3248 3249 3250 /* 3251 * Move a mntinfo4_t from one server list to another. 3252 * Locking of the two nfs4_server_t nodes will be done in list order. 3253 * 3254 * Returns NULL if the current nfs4_server_t for the filesystem could not 3255 * be found (e.g., due to forced unmount). Otherwise returns a reference 3256 * to the new nfs4_server_t, which must eventually be freed. 3257 */ 3258 nfs4_server_t * 3259 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3260 { 3261 nfs4_server_t *p, *op = NULL, *np = NULL; 3262 int num_open; 3263 zoneid_t zoneid = nfs_zoneid(); 3264 3265 ASSERT(nfs_zone() == mi->mi_zone); 3266 3267 mutex_enter(&nfs4_server_lst_lock); 3268 #ifdef DEBUG 3269 if (nfs4_server_t_debug) 3270 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3271 #endif 3272 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3273 if (p->zoneid != zoneid) 3274 continue; 3275 if (p->saddr.len == old->sv_addr.len && 3276 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3277 p->s_thread_exit != NFS4_THREAD_EXIT) { 3278 op = p; 3279 mutex_enter(&op->s_lock); 3280 op->s_refcnt++; 3281 } 3282 if (p->saddr.len == new->sv_addr.len && 3283 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3284 p->s_thread_exit != NFS4_THREAD_EXIT) { 3285 np = p; 3286 mutex_enter(&np->s_lock); 3287 } 3288 if (op != NULL && np != NULL) 3289 break; 3290 } 3291 if (op == NULL) { 3292 /* 3293 * Filesystem has been forcibly unmounted. Bail out. 3294 */ 3295 if (np != NULL) 3296 mutex_exit(&np->s_lock); 3297 mutex_exit(&nfs4_server_lst_lock); 3298 return (NULL); 3299 } 3300 if (np != NULL) { 3301 np->s_refcnt++; 3302 } else { 3303 #ifdef DEBUG 3304 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3305 "nfs4_move_mi: no target nfs4_server, will create.")); 3306 #endif 3307 np = add_new_nfs4_server(new, kcred); 3308 } 3309 mutex_exit(&nfs4_server_lst_lock); 3310 3311 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3312 "nfs4_move_mi: for mi 0x%p, " 3313 "old servinfo4 0x%p, new servinfo4 0x%p, " 3314 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3315 (void*)mi, (void*)old, (void*)new, 3316 (void*)op, (void*)np)); 3317 ASSERT(op != NULL && np != NULL); 3318 3319 /* discard any delegations */ 3320 nfs4_deleg_discard(mi, op); 3321 3322 num_open = mi->mi_open_files; 3323 mi->mi_open_files = 0; 3324 op->state_ref_count -= num_open; 3325 ASSERT(op->state_ref_count >= 0); 3326 np->state_ref_count += num_open; 3327 nfs4_remove_mi_from_server_nolock(mi, op); 3328 mi->mi_open_files = num_open; 3329 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3330 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3331 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3332 3333 nfs4_add_mi_to_server(np, mi); 3334 3335 mutex_exit(&op->s_lock); 3336 nfs4_server_rele(op); 3337 mutex_exit(&np->s_lock); 3338 3339 return (np); 3340 } 3341 3342 /* 3343 * Need to have the nfs4_server_lst_lock. 3344 * Search the nfs4_server list to find a match on this servinfo4 3345 * based on its address. 3346 * 3347 * Returns NULL if no match is found. Otherwise returns a reference (which 3348 * must eventually be freed) to a locked nfs4_server. 3349 */ 3350 nfs4_server_t * 3351 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3352 { 3353 nfs4_server_t *np; 3354 zoneid_t zoneid = nfs_zoneid(); 3355 3356 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3357 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3358 if (np->zoneid == zoneid && 3359 np->saddr.len == srv_p->sv_addr.len && 3360 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3361 np->saddr.len) == 0 && 3362 np->s_thread_exit != NFS4_THREAD_EXIT) { 3363 mutex_enter(&np->s_lock); 3364 np->s_refcnt++; 3365 return (np); 3366 } 3367 } 3368 return (NULL); 3369 } 3370 3371 /* 3372 * Search the nfs4_server_lst to find a match based on clientid and 3373 * addr. 3374 * Locks the nfs4_server down if it is found and returns a reference that 3375 * must eventually be freed. 3376 * 3377 * Returns NULL it no match is found. This means one of two things: either 3378 * mi is in the process of being mounted, or mi has been unmounted. 3379 * 3380 * The caller should be holding mi->mi_recovlock, and it should continue to 3381 * hold the lock until done with the returned nfs4_server_t. Once 3382 * mi->mi_recovlock is released, there is no guarantee that the returned 3383 * mi->nfs4_server_t will continue to correspond to mi. 3384 */ 3385 nfs4_server_t * 3386 find_nfs4_server(mntinfo4_t *mi) 3387 { 3388 return (find_nfs4_server_all(mi, 0)); 3389 } 3390 3391 /* 3392 * Same as above, but takes an "all" parameter which can be 3393 * set to 1 if the caller wishes to find nfs4_server_t's which 3394 * have been marked for termination by the exit of the renew 3395 * thread. This should only be used by operations which are 3396 * cleaning up and will not cause an OTW op. 3397 */ 3398 nfs4_server_t * 3399 find_nfs4_server_all(mntinfo4_t *mi, int all) 3400 { 3401 nfs4_server_t *np; 3402 servinfo4_t *svp; 3403 zoneid_t zoneid = mi->mi_zone->zone_id; 3404 3405 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3406 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3407 /* 3408 * This can be called from nfs4_unmount() which can be called from the 3409 * global zone, hence it's legal for the global zone to muck with 3410 * another zone's server list, as long as it doesn't try to contact 3411 * them. 3412 */ 3413 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3414 nfs_global_client_only != 0); 3415 3416 /* 3417 * The nfs4_server_lst_lock global lock is held when we get a new 3418 * clientid (via SETCLIENTID OTW). Holding this global lock and 3419 * mi_recovlock (READER is fine) ensures that the nfs4_server 3420 * and this mntinfo4 can't get out of sync, so the following search is 3421 * always valid. 3422 */ 3423 mutex_enter(&nfs4_server_lst_lock); 3424 #ifdef DEBUG 3425 if (nfs4_server_t_debug) { 3426 /* mi->mi_clientid is unprotected, ok for debug output */ 3427 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3428 mi->mi_curr_serv); 3429 } 3430 #endif 3431 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3432 mutex_enter(&np->s_lock); 3433 svp = mi->mi_curr_serv; 3434 3435 if (np->zoneid == zoneid && 3436 np->clientid == mi->mi_clientid && 3437 np->saddr.len == svp->sv_addr.len && 3438 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3439 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3440 mutex_exit(&nfs4_server_lst_lock); 3441 np->s_refcnt++; 3442 return (np); 3443 } 3444 mutex_exit(&np->s_lock); 3445 } 3446 mutex_exit(&nfs4_server_lst_lock); 3447 3448 return (NULL); 3449 } 3450 3451 /* 3452 * Release the reference to sp and destroy it if that's the last one. 3453 */ 3454 3455 void 3456 nfs4_server_rele(nfs4_server_t *sp) 3457 { 3458 mutex_enter(&sp->s_lock); 3459 ASSERT(sp->s_refcnt > 0); 3460 sp->s_refcnt--; 3461 if (sp->s_refcnt > 0) { 3462 mutex_exit(&sp->s_lock); 3463 return; 3464 } 3465 mutex_exit(&sp->s_lock); 3466 3467 mutex_enter(&nfs4_server_lst_lock); 3468 mutex_enter(&sp->s_lock); 3469 if (sp->s_refcnt > 0) { 3470 mutex_exit(&sp->s_lock); 3471 mutex_exit(&nfs4_server_lst_lock); 3472 return; 3473 } 3474 remque(sp); 3475 sp->forw = sp->back = NULL; 3476 mutex_exit(&nfs4_server_lst_lock); 3477 destroy_nfs4_server(sp); 3478 } 3479 3480 static void 3481 destroy_nfs4_server(nfs4_server_t *sp) 3482 { 3483 ASSERT(MUTEX_HELD(&sp->s_lock)); 3484 ASSERT(sp->s_refcnt == 0); 3485 ASSERT(sp->s_otw_call_count == 0); 3486 3487 remove_all_mi(sp); 3488 3489 crfree(sp->s_cred); 3490 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3491 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3492 mutex_exit(&sp->s_lock); 3493 3494 /* destroy the nfs4_server */ 3495 nfs4callback_destroy(sp); 3496 list_destroy(&sp->s_deleg_list); 3497 mutex_destroy(&sp->s_lock); 3498 cv_destroy(&sp->cv_thread_exit); 3499 cv_destroy(&sp->s_cv_otw_count); 3500 cv_destroy(&sp->s_clientid_pend); 3501 cv_destroy(&sp->wait_cb_null); 3502 nfs_rw_destroy(&sp->s_recovlock); 3503 kmem_free(sp, sizeof (*sp)); 3504 } 3505 3506 /* 3507 * Lock sp, but only if it's still active (in the list and hasn't been 3508 * flagged as exiting) or 'all' is non-zero. 3509 * Returns TRUE if sp got locked and adds a reference to sp. 3510 */ 3511 bool_t 3512 nfs4_server_vlock(nfs4_server_t *sp, int all) 3513 { 3514 nfs4_server_t *np; 3515 3516 mutex_enter(&nfs4_server_lst_lock); 3517 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3518 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3519 all != 0)) { 3520 mutex_enter(&np->s_lock); 3521 np->s_refcnt++; 3522 mutex_exit(&nfs4_server_lst_lock); 3523 return (TRUE); 3524 } 3525 } 3526 mutex_exit(&nfs4_server_lst_lock); 3527 return (FALSE); 3528 } 3529 3530 /* 3531 * Fork off a thread to free the data structures for a mount. 3532 */ 3533 3534 static void 3535 async_free_mount(vfs_t *vfsp, cred_t *cr) 3536 { 3537 freemountargs_t *args; 3538 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3539 args->fm_vfsp = vfsp; 3540 VFS_HOLD(vfsp); 3541 MI4_HOLD(VFTOMI4(vfsp)); 3542 args->fm_cr = cr; 3543 crhold(cr); 3544 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3545 minclsyspri); 3546 } 3547 3548 static void 3549 nfs4_free_mount_thread(freemountargs_t *args) 3550 { 3551 mntinfo4_t *mi; 3552 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3553 mi = VFTOMI4(args->fm_vfsp); 3554 crfree(args->fm_cr); 3555 VFS_RELE(args->fm_vfsp); 3556 MI4_RELE(mi); 3557 kmem_free(args, sizeof (freemountargs_t)); 3558 zthread_exit(); 3559 /* NOTREACHED */ 3560 } 3561 3562 /* 3563 * Thread to free the data structures for a given filesystem. 3564 */ 3565 static void 3566 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3567 { 3568 mntinfo4_t *mi = VFTOMI4(vfsp); 3569 nfs4_server_t *sp; 3570 callb_cpr_t cpr_info; 3571 kmutex_t cpr_lock; 3572 boolean_t async_thread; 3573 int removed; 3574 3575 /* 3576 * We need to participate in the CPR framework if this is a kernel 3577 * thread. 3578 */ 3579 async_thread = (curproc == nfs_zone()->zone_zsched); 3580 if (async_thread) { 3581 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3582 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3583 "nfsv4AsyncUnmount"); 3584 } 3585 3586 /* 3587 * We need to wait for all outstanding OTW calls 3588 * and recovery to finish before we remove the mi 3589 * from the nfs4_server_t, as current pending 3590 * calls might still need this linkage (in order 3591 * to find a nfs4_server_t from a mntinfo4_t). 3592 */ 3593 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3594 sp = find_nfs4_server(mi); 3595 nfs_rw_exit(&mi->mi_recovlock); 3596 3597 if (sp) { 3598 while (sp->s_otw_call_count != 0) { 3599 if (async_thread) { 3600 mutex_enter(&cpr_lock); 3601 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3602 mutex_exit(&cpr_lock); 3603 } 3604 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3605 if (async_thread) { 3606 mutex_enter(&cpr_lock); 3607 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3608 mutex_exit(&cpr_lock); 3609 } 3610 } 3611 mutex_exit(&sp->s_lock); 3612 nfs4_server_rele(sp); 3613 sp = NULL; 3614 } 3615 3616 3617 mutex_enter(&mi->mi_lock); 3618 while (mi->mi_in_recovery != 0) { 3619 if (async_thread) { 3620 mutex_enter(&cpr_lock); 3621 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3622 mutex_exit(&cpr_lock); 3623 } 3624 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3625 if (async_thread) { 3626 mutex_enter(&cpr_lock); 3627 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3628 mutex_exit(&cpr_lock); 3629 } 3630 } 3631 mutex_exit(&mi->mi_lock); 3632 3633 /* 3634 * The original purge of the dnlc via 'dounmount' 3635 * doesn't guarantee that another dnlc entry was not 3636 * added while we waitied for all outstanding OTW 3637 * and recovery calls to finish. So re-purge the 3638 * dnlc now. 3639 */ 3640 (void) dnlc_purge_vfsp(vfsp, 0); 3641 3642 /* 3643 * We need to explicitly stop the manager thread; the asyc worker 3644 * threads can timeout and exit on their own. 3645 */ 3646 mutex_enter(&mi->mi_async_lock); 3647 mi->mi_max_threads = 0; 3648 cv_broadcast(&mi->mi_async_work_cv); 3649 mutex_exit(&mi->mi_async_lock); 3650 if (mi->mi_manager_thread) 3651 nfs4_async_manager_stop(vfsp); 3652 3653 destroy_rtable4(vfsp, cr); 3654 3655 nfs4_remove_mi_from_server(mi, NULL); 3656 3657 if (async_thread) { 3658 mutex_enter(&cpr_lock); 3659 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3660 mutex_destroy(&cpr_lock); 3661 } 3662 3663 removed = nfs4_mi_zonelist_remove(mi); 3664 if (removed) 3665 zone_rele(mi->mi_zone); 3666 } 3667