1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/time.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/dnlc.h> 59 #include <sys/list.h> 60 #include <sys/mntent.h> 61 #include <sys/tsol/label.h> 62 63 #include <rpc/types.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpcsec_gss.h> 66 #include <rpc/clnt.h> 67 68 #include <nfs/nfs.h> 69 #include <nfs/nfs_clnt.h> 70 #include <nfs/mount.h> 71 #include <nfs/nfs_acl.h> 72 73 #include <fs/fs_subr.h> 74 75 #include <nfs/nfs4.h> 76 #include <nfs/rnode4.h> 77 #include <nfs/nfs4_clnt.h> 78 79 /* 80 * Arguments passed to thread to free data structures from forced unmount. 81 */ 82 83 typedef struct { 84 vfs_t *fm_vfsp; 85 cred_t *fm_cr; 86 } freemountargs_t; 87 88 static void async_free_mount(vfs_t *, cred_t *); 89 static void nfs4_free_mount(vfs_t *, cred_t *); 90 static void nfs4_free_mount_thread(freemountargs_t *); 91 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 92 93 /* 94 * From rpcsec module (common/rpcsec). 95 */ 96 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 97 extern void sec_clnt_freeinfo(struct sec_data *); 98 99 /* 100 * The order and contents of this structure must be kept in sync with that of 101 * rfsreqcnt_v4_tmpl in nfs_stats.c 102 */ 103 static char *rfsnames_v4[] = { 104 "null", "compound", "reserved", "access", "close", "commit", "create", 105 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 106 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 107 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 108 "read", "readdir", "readlink", "remove", "rename", "renew", 109 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 110 "setclientid_confirm", "verify", "write" 111 }; 112 113 /* 114 * nfs4_max_mount_retry is the number of times the client will redrive 115 * a mount compound before giving up and returning failure. The intent 116 * is to redrive mount compounds which fail NFS4ERR_STALE so that 117 * if a component of the server path being mounted goes stale, it can 118 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 119 * code is needed outside of the recovery framework because mount is a 120 * special case. The client doesn't create vnodes/rnodes for components 121 * of the server path being mounted. The recovery code recovers real 122 * client objects, not STALE FHs which map to components of the server 123 * path being mounted. 124 * 125 * We could just fail the mount on the first time, but that would 126 * instantly trigger failover (from nfs4_mount), and the client should 127 * try to re-lookup the STALE FH before doing failover. The easiest 128 * way to "re-lookup" is to simply redrive the mount compound. 129 */ 130 static int nfs4_max_mount_retry = 2; 131 132 /* 133 * nfs4 vfs operations. 134 */ 135 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 136 static int nfs4_unmount(vfs_t *, int, cred_t *); 137 static int nfs4_root(vfs_t *, vnode_t **); 138 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 139 static int nfs4_sync(vfs_t *, short, cred_t *); 140 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 141 static int nfs4_mountroot(vfs_t *, whymountroot_t); 142 static void nfs4_freevfs(vfs_t *); 143 144 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 145 int, cred_t *, zone_t *); 146 147 vfsops_t *nfs4_vfsops; 148 149 int nfs4_vfsinit(void); 150 void nfs4_vfsfini(void); 151 static void nfs4setclientid_init(void); 152 static void nfs4setclientid_fini(void); 153 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 154 struct nfs4_server *, nfs4_error_t *, int *); 155 static void destroy_nfs4_server(nfs4_server_t *); 156 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 157 158 /* 159 * Initialize the vfs structure 160 */ 161 162 static int nfs4fstyp; 163 164 165 /* 166 * Debug variable to check for rdma based 167 * transport startup and cleanup. Controlled 168 * through /etc/system. Off by default. 169 */ 170 extern int rdma_debug; 171 172 int 173 nfs4init(int fstyp, char *name) 174 { 175 static const fs_operation_def_t nfs4_vfsops_template[] = { 176 VFSNAME_MOUNT, nfs4_mount, 177 VFSNAME_UNMOUNT, nfs4_unmount, 178 VFSNAME_ROOT, nfs4_root, 179 VFSNAME_STATVFS, nfs4_statvfs, 180 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 181 VFSNAME_VGET, nfs4_vget, 182 VFSNAME_MOUNTROOT, nfs4_mountroot, 183 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 184 NULL, NULL 185 }; 186 int error; 187 188 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 189 if (error != 0) { 190 zcmn_err(GLOBAL_ZONEID, CE_WARN, 191 "nfs4init: bad vfs ops template"); 192 return (error); 193 } 194 195 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 196 if (error != 0) { 197 (void) vfs_freevfsops_by_type(fstyp); 198 zcmn_err(GLOBAL_ZONEID, CE_WARN, 199 "nfs4init: bad vnode ops template"); 200 return (error); 201 } 202 203 nfs4fstyp = fstyp; 204 205 (void) nfs4_vfsinit(); 206 207 (void) nfs4_init_dot_entries(); 208 209 return (0); 210 } 211 212 void 213 nfs4fini(void) 214 { 215 (void) nfs4_destroy_dot_entries(); 216 nfs4_vfsfini(); 217 } 218 219 /* 220 * Create a new sec_data structure to store AUTH_DH related data: 221 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 222 * flag set for NFS V4 since we are avoiding to contact the rpcbind 223 * daemon and is using the IP time service (IPPORT_TIMESERVER). 224 * 225 * sec_data can be freed by sec_clnt_freeinfo(). 226 */ 227 struct sec_data * 228 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 229 struct knetconfig *knconf) { 230 struct sec_data *secdata; 231 dh_k4_clntdata_t *data; 232 char *pf, *p; 233 234 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 235 return (NULL); 236 237 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 238 secdata->flags = 0; 239 240 data = kmem_alloc(sizeof (*data), KM_SLEEP); 241 242 data->syncaddr.maxlen = syncaddr->maxlen; 243 data->syncaddr.len = syncaddr->len; 244 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 245 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 246 247 /* 248 * duplicate the knconf information for the 249 * new opaque data. 250 */ 251 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 252 *data->knconf = *knconf; 253 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 254 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 255 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 256 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 257 data->knconf->knc_protofmly = pf; 258 data->knconf->knc_proto = p; 259 260 /* move server netname to the sec_data structure */ 261 data->netname = kmem_alloc(nlen, KM_SLEEP); 262 bcopy(netname, data->netname, nlen); 263 data->netnamelen = (int)nlen; 264 265 secdata->secmod = AUTH_DH; 266 secdata->rpcflavor = AUTH_DH; 267 secdata->data = (caddr_t)data; 268 269 return (secdata); 270 } 271 272 static int 273 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 274 { 275 servinfo4_t *si; 276 277 /* 278 * Iterate over the servinfo4 list to make sure 279 * we do not have a duplicate. Skip any servinfo4 280 * that has been marked "NOT IN USE" 281 */ 282 for (si = svp_head; si; si = si->sv_next) { 283 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 284 if (si->sv_flags & SV4_NOTINUSE) { 285 nfs_rw_exit(&si->sv_lock); 286 continue; 287 } 288 nfs_rw_exit(&si->sv_lock); 289 if (si == svp) 290 continue; 291 if (si->sv_addr.len == svp->sv_addr.len && 292 strcmp(si->sv_knconf->knc_protofmly, 293 svp->sv_knconf->knc_protofmly) == 0 && 294 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 295 si->sv_addr.len) == 0) { 296 /* it's a duplicate */ 297 return (1); 298 } 299 } 300 /* it's not a duplicate */ 301 return (0); 302 } 303 304 /* 305 * nfs mount vfsop 306 * Set up mount info record and attach it to vfs struct. 307 */ 308 static int 309 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 310 { 311 char *data = uap->dataptr; 312 int error; 313 vnode_t *rtvp; /* the server's root */ 314 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 315 size_t hlen; /* length of hostname */ 316 size_t nlen; /* length of netname */ 317 char netname[MAXNETNAMELEN+1]; /* server's netname */ 318 struct netbuf addr; /* server's address */ 319 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 320 struct knetconfig *knconf; /* transport knetconfig structure */ 321 struct knetconfig *rdma_knconf; /* rdma transport structure */ 322 rnode4_t *rp; 323 struct servinfo4 *svp; /* nfs server info */ 324 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 325 struct servinfo4 *svp_head; /* first nfs server info */ 326 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 327 struct sec_data *secdata; /* security data */ 328 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 329 STRUCT_DECL(knetconfig, knconf_tmp); 330 STRUCT_DECL(netbuf, addr_tmp); 331 int flags, addr_type; 332 char *p, *pf; 333 struct pathname pn; 334 char *userbufptr; 335 zone_t *zone = nfs_zone(); 336 nfs4_error_t n4e; 337 zone_t *mntzone = NULL; 338 339 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 340 return (EPERM); 341 if (mvp->v_type != VDIR) 342 return (ENOTDIR); 343 /* 344 * get arguments 345 * 346 * nfs_args is now versioned and is extensible, so 347 * uap->datalen might be different from sizeof (args) 348 * in a compatible situation. 349 */ 350 more: 351 STRUCT_INIT(args, get_udatamodel()); 352 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 353 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 354 STRUCT_SIZE(args)))) 355 return (EFAULT); 356 357 flags = STRUCT_FGET(args, flags); 358 359 /* 360 * If the request changes the locking type, disallow the remount, 361 * because it's questionable whether we can transfer the 362 * locking state correctly. 363 */ 364 if (uap->flags & MS_REMOUNT) { 365 if ((mi = VFTOMI4(vfsp)) != NULL) { 366 uint_t new_mi_llock; 367 uint_t old_mi_llock; 368 369 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 370 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 371 if (old_mi_llock != new_mi_llock) 372 return (EBUSY); 373 } 374 return (0); 375 } 376 377 mutex_enter(&mvp->v_lock); 378 if (!(uap->flags & MS_OVERLAY) && 379 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 380 mutex_exit(&mvp->v_lock); 381 return (EBUSY); 382 } 383 mutex_exit(&mvp->v_lock); 384 385 /* make sure things are zeroed for errout: */ 386 rtvp = NULL; 387 mi = NULL; 388 addr.buf = NULL; 389 syncaddr.buf = NULL; 390 secdata = NULL; 391 392 /* 393 * A valid knetconfig structure is required. 394 */ 395 if (!(flags & NFSMNT_KNCONF)) 396 return (EINVAL); 397 398 /* 399 * Allocate a servinfo4 struct. 400 */ 401 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 402 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 403 if (svp_tail) { 404 svp_2ndlast = svp_tail; 405 svp_tail->sv_next = svp; 406 } else { 407 svp_head = svp; 408 svp_2ndlast = svp; 409 } 410 411 svp_tail = svp; 412 413 /* 414 * Allocate space for a knetconfig structure and 415 * its strings and copy in from user-land. 416 */ 417 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 418 svp->sv_knconf = knconf; 419 STRUCT_INIT(knconf_tmp, get_udatamodel()); 420 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 421 STRUCT_SIZE(knconf_tmp))) { 422 sv4_free(svp_head); 423 return (EFAULT); 424 } 425 426 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 427 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 428 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 429 if (get_udatamodel() != DATAMODEL_LP64) { 430 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 431 } else { 432 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 433 } 434 435 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 436 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 437 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 438 if (error) { 439 kmem_free(pf, KNC_STRSIZE); 440 kmem_free(p, KNC_STRSIZE); 441 sv4_free(svp_head); 442 return (error); 443 } 444 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 445 if (error) { 446 kmem_free(pf, KNC_STRSIZE); 447 kmem_free(p, KNC_STRSIZE); 448 sv4_free(svp_head); 449 return (error); 450 } 451 if (strcmp(p, NC_UDP) == 0) { 452 kmem_free(pf, KNC_STRSIZE); 453 kmem_free(p, KNC_STRSIZE); 454 sv4_free(svp_head); 455 return (ENOTSUP); 456 } 457 knconf->knc_protofmly = pf; 458 knconf->knc_proto = p; 459 460 /* 461 * Get server address 462 */ 463 STRUCT_INIT(addr_tmp, get_udatamodel()); 464 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 465 STRUCT_SIZE(addr_tmp))) { 466 error = EFAULT; 467 goto errout; 468 } 469 470 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 471 addr.len = STRUCT_FGET(addr_tmp, len); 472 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 473 addr.maxlen = addr.len; 474 if (copyin(userbufptr, addr.buf, addr.len)) { 475 kmem_free(addr.buf, addr.len); 476 error = EFAULT; 477 goto errout; 478 } 479 480 svp->sv_addr = addr; 481 482 /* 483 * Get the root fhandle 484 */ 485 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 486 487 if (error) 488 goto errout; 489 490 /* Volatile fh: keep server paths, so use actual-size strings */ 491 svp->sv_path = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 492 bcopy(pn.pn_path, svp->sv_path, pn.pn_pathlen); 493 svp->sv_path[pn.pn_pathlen] = '\0'; 494 svp->sv_pathlen = pn.pn_pathlen + 1; 495 pn_free(&pn); 496 497 /* 498 * Get server's hostname 499 */ 500 if (flags & NFSMNT_HOSTNAME) { 501 error = copyinstr(STRUCT_FGETP(args, hostname), 502 netname, sizeof (netname), &hlen); 503 if (error) 504 goto errout; 505 } else { 506 char *p = "unknown-host"; 507 hlen = strlen(p) + 1; 508 (void) strcpy(netname, p); 509 } 510 svp->sv_hostnamelen = hlen; 511 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 512 (void) strcpy(svp->sv_hostname, netname); 513 514 /* 515 * RDMA MOUNT SUPPORT FOR NFS v4. 516 * Establish, is it possible to use RDMA, if so overload the 517 * knconf with rdma specific knconf and free the orignal knconf. 518 */ 519 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 520 /* 521 * Determine the addr type for RDMA, IPv4 or v6. 522 */ 523 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 524 addr_type = AF_INET; 525 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 526 addr_type = AF_INET6; 527 528 if (rdma_reachable(addr_type, &svp->sv_addr, 529 &rdma_knconf) == 0) { 530 /* 531 * If successful, hijack the orignal knconf and 532 * replace with the new one, depending on the flags. 533 */ 534 svp->sv_origknconf = svp->sv_knconf; 535 svp->sv_knconf = rdma_knconf; 536 knconf = rdma_knconf; 537 } else { 538 if (flags & NFSMNT_TRYRDMA) { 539 #ifdef DEBUG 540 if (rdma_debug) 541 zcmn_err(getzoneid(), CE_WARN, 542 "no RDMA onboard, revert\n"); 543 #endif 544 } 545 546 if (flags & NFSMNT_DORDMA) { 547 /* 548 * If proto=rdma is specified and no RDMA 549 * path to this server is avialable then 550 * ditch this server. 551 * This is not included in the mountable 552 * server list or the replica list. 553 * Check if more servers are specified; 554 * Failover case, otherwise bail out of mount. 555 */ 556 if (STRUCT_FGET(args, nfs_args_ext) == 557 NFS_ARGS_EXTB && STRUCT_FGETP(args, 558 nfs_ext_u.nfs_extB.next) != NULL) { 559 if (uap->flags & MS_RDONLY && 560 !(flags & NFSMNT_SOFT)) { 561 data = (char *) 562 STRUCT_FGETP(args, 563 nfs_ext_u.nfs_extB.next); 564 if (svp_head->sv_next == NULL) { 565 svp_tail = NULL; 566 svp_2ndlast = NULL; 567 sv4_free(svp_head); 568 goto more; 569 } else { 570 svp_tail = svp_2ndlast; 571 svp_2ndlast->sv_next = 572 NULL; 573 sv4_free(svp); 574 goto more; 575 } 576 } 577 } else { 578 /* 579 * This is the last server specified 580 * in the nfs_args list passed down 581 * and its not rdma capable. 582 */ 583 if (svp_head->sv_next == NULL) { 584 /* 585 * Is this the only one 586 */ 587 error = EINVAL; 588 #ifdef DEBUG 589 if (rdma_debug) 590 zcmn_err(getzoneid(), 591 CE_WARN, 592 "No RDMA srv"); 593 #endif 594 goto errout; 595 } else { 596 /* 597 * There is list, since some 598 * servers specified before 599 * this passed all requirements 600 */ 601 svp_tail = svp_2ndlast; 602 svp_2ndlast->sv_next = NULL; 603 sv4_free(svp); 604 goto proceed; 605 } 606 } 607 } 608 } 609 } 610 611 /* 612 * If there are syncaddr and netname data, load them in. This is 613 * to support data needed for NFSV4 when AUTH_DH is the negotiated 614 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 615 */ 616 netname[0] = '\0'; 617 if (flags & NFSMNT_SECURE) { 618 619 /* get syncaddr */ 620 STRUCT_INIT(addr_tmp, get_udatamodel()); 621 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 622 STRUCT_SIZE(addr_tmp))) { 623 error = EINVAL; 624 goto errout; 625 } 626 userbufptr = STRUCT_FGETP(addr_tmp, buf); 627 syncaddr.len = STRUCT_FGET(addr_tmp, len); 628 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 629 syncaddr.maxlen = syncaddr.len; 630 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 631 kmem_free(syncaddr.buf, syncaddr.len); 632 error = EFAULT; 633 goto errout; 634 } 635 636 /* get server's netname */ 637 if (copyinstr(STRUCT_FGETP(args, netname), netname, 638 sizeof (netname), &nlen)) { 639 kmem_free(syncaddr.buf, syncaddr.len); 640 error = EFAULT; 641 goto errout; 642 } 643 netname[nlen] = '\0'; 644 645 svp->sv_dhsec = create_authdh_data(netname, nlen, &syncaddr, 646 knconf); 647 } 648 649 /* 650 * Get the extention data which has the security data structure. 651 * This includes data for AUTH_SYS as well. 652 */ 653 if (flags & NFSMNT_NEWARGS) { 654 switch (STRUCT_FGET(args, nfs_args_ext)) { 655 case NFS_ARGS_EXTA: 656 case NFS_ARGS_EXTB: 657 /* 658 * Indicating the application is using the new 659 * sec_data structure to pass in the security 660 * data. 661 */ 662 if (STRUCT_FGETP(args, 663 nfs_ext_u.nfs_extA.secdata) == NULL) { 664 error = EINVAL; 665 } else { 666 error = sec_clnt_loadinfo( 667 (struct sec_data *)STRUCT_FGETP(args, 668 nfs_ext_u.nfs_extA.secdata), 669 &secdata, get_udatamodel()); 670 } 671 break; 672 673 default: 674 error = EINVAL; 675 break; 676 } 677 678 } else if (flags & NFSMNT_SECURE) { 679 /* 680 * NFSMNT_SECURE is deprecated but we keep it 681 * to support the rouge user generated application 682 * that may use this undocumented interface to do 683 * AUTH_DH security. 684 */ 685 secdata = create_authdh_data(netname, nlen, &syncaddr, knconf); 686 687 } else { 688 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 689 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 690 secdata->data = NULL; 691 } 692 693 svp->sv_secdata = secdata; 694 695 /* syncaddr is no longer needed. */ 696 if (syncaddr.buf != NULL) 697 kmem_free(syncaddr.buf, syncaddr.len); 698 699 /* 700 * User does not explictly specify a flavor, and a user 701 * defined default flavor is passed down. 702 */ 703 if (flags & NFSMNT_SECDEFAULT) { 704 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 705 svp->sv_flags |= SV4_TRYSECDEFAULT; 706 nfs_rw_exit(&svp->sv_lock); 707 } 708 709 /* 710 * Failover support: 711 * 712 * We may have a linked list of nfs_args structures, 713 * which means the user is looking for failover. If 714 * the mount is either not "read-only" or "soft", 715 * we want to bail out with EINVAL. 716 */ 717 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 718 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 719 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 720 data = (char *)STRUCT_FGETP(args, 721 nfs_ext_u.nfs_extB.next); 722 goto more; 723 } 724 error = EINVAL; 725 goto errout; 726 } 727 728 /* 729 * Determine the zone we're being mounted into. 730 */ 731 zone_hold(mntzone = zone); /* start with this assumption */ 732 if (getzoneid() == GLOBAL_ZONEID) { 733 zone_rele(mntzone); 734 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 735 ASSERT(mntzone != NULL); 736 if (mntzone != zone) { 737 error = EBUSY; 738 goto errout; 739 } 740 } 741 742 if (is_system_labeled()) { 743 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 744 svp->sv_knconf, cr); 745 746 if (error > 0) 747 goto errout; 748 749 if (error == -1) { 750 /* change mount to read-only to prevent write-down */ 751 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 752 } 753 } 754 755 /* 756 * Stop the mount from going any further if the zone is going away. 757 */ 758 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 759 error = EBUSY; 760 goto errout; 761 } 762 763 /* 764 * Get root vnode. 765 */ 766 proceed: 767 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 768 769 if (error) 770 goto errout; 771 772 mi = VTOMI4(rtvp); 773 774 /* 775 * Send client id to the server, if necessary 776 */ 777 nfs4_error_zinit(&n4e); 778 nfs4setclientid(mi, cr, FALSE, &n4e); 779 error = n4e.error; 780 781 if (error) 782 goto errout; 783 784 /* 785 * Set option fields in the mount info record 786 */ 787 788 if (svp_head->sv_next) { 789 mutex_enter(&mi->mi_lock); 790 mi->mi_flags |= MI4_LLOCK; 791 mutex_exit(&mi->mi_lock); 792 } 793 794 error = nfs4_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 795 796 errout: 797 if (error) { 798 if (rtvp != NULL) { 799 rp = VTOR4(rtvp); 800 if (rp->r_flags & R4HASHED) 801 rp4_rmhash(rp); 802 } 803 if (mi != NULL) { 804 nfs4_async_stop(vfsp); 805 nfs4_async_manager_stop(vfsp); 806 nfs4_remove_mi_from_server(mi, NULL); 807 /* 808 * In this error path we need to sfh4_rele() before 809 * we free the mntinfo4_t as sfh4_rele() has a 810 * dependency on mi_fh_lock. 811 */ 812 if (rtvp != NULL) { 813 VN_RELE(rtvp); 814 rtvp = NULL; 815 } 816 if (mi->mi_io_kstats) { 817 kstat_delete(mi->mi_io_kstats); 818 mi->mi_io_kstats = NULL; 819 } 820 if (mi->mi_ro_kstats) { 821 kstat_delete(mi->mi_ro_kstats); 822 mi->mi_ro_kstats = NULL; 823 } 824 if (mi->mi_recov_ksp) { 825 kstat_delete(mi->mi_recov_ksp); 826 mi->mi_recov_ksp = NULL; 827 } 828 nfs_free_mi4(mi); 829 if (mntzone != NULL) 830 zone_rele(mntzone); 831 return (error); 832 } 833 sv4_free(svp_head); 834 } 835 836 if (rtvp != NULL) 837 VN_RELE(rtvp); 838 839 if (mntzone != NULL) 840 zone_rele(mntzone); 841 842 return (error); 843 } 844 845 #ifdef DEBUG 846 #define VERS_MSG "NFS4 server " 847 #else 848 #define VERS_MSG "NFS server " 849 #endif 850 851 #define READ_MSG \ 852 VERS_MSG "%s returned 0 for read transfer size" 853 #define WRITE_MSG \ 854 VERS_MSG "%s returned 0 for write transfer size" 855 #define SIZE_MSG \ 856 VERS_MSG "%s returned 0 for maximum file size" 857 858 /* 859 * Get the symbolic link text from the server for a given filehandle 860 * of that symlink. 861 * 862 * (get symlink text) PUTFH READLINK 863 */ 864 static int 865 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 866 int flags) 867 { 868 COMPOUND4args_clnt args; 869 COMPOUND4res_clnt res; 870 int doqueue; 871 nfs_argop4 argop[2]; 872 nfs_resop4 *resop; 873 READLINK4res *lr_res; 874 uint_t len; 875 bool_t needrecov = FALSE; 876 nfs4_recov_state_t recov_state; 877 nfs4_sharedfh_t *sfh; 878 nfs4_error_t e; 879 int num_retry = nfs4_max_mount_retry; 880 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 881 882 sfh = sfh4_get(fh, mi); 883 recov_state.rs_flags = 0; 884 recov_state.rs_num_retry_despite_err = 0; 885 886 recov_retry: 887 nfs4_error_zinit(&e); 888 889 args.array_len = 2; 890 args.array = argop; 891 args.ctag = TAG_GET_SYMLINK; 892 893 if (! recovery) { 894 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 895 if (e.error) { 896 sfh4_rele(&sfh); 897 return (e.error); 898 } 899 } 900 901 /* 0. putfh symlink fh */ 902 argop[0].argop = OP_CPUTFH; 903 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 904 905 /* 1. readlink */ 906 argop[1].argop = OP_READLINK; 907 908 doqueue = 1; 909 910 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 911 912 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 913 914 if (needrecov && !recovery && num_retry-- > 0) { 915 916 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 917 "getlinktext_otw: initiating recovery\n")); 918 919 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 920 OP_READLINK, NULL) == FALSE) { 921 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 922 if (!e.error) 923 (void) xdr_free(xdr_COMPOUND4res_clnt, 924 (caddr_t)&res); 925 goto recov_retry; 926 } 927 } 928 929 /* 930 * If non-NFS4 pcol error and/or we weren't able to recover. 931 */ 932 if (e.error != 0) { 933 if (! recovery) 934 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 935 sfh4_rele(&sfh); 936 return (e.error); 937 } 938 939 if (res.status) { 940 e.error = geterrno4(res.status); 941 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 942 if (! recovery) 943 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 944 sfh4_rele(&sfh); 945 return (e.error); 946 } 947 948 /* res.status == NFS4_OK */ 949 ASSERT(res.status == NFS4_OK); 950 951 resop = &res.array[1]; /* readlink res */ 952 lr_res = &resop->nfs_resop4_u.opreadlink; 953 954 /* treat symlink name as data */ 955 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 956 957 if (! recovery) 958 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 959 sfh4_rele(&sfh); 960 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 961 962 return (0); 963 } 964 965 /* 966 * Skip over consecutive slashes and "/./" in a pathname. 967 */ 968 void 969 pathname_skipslashdot(struct pathname *pnp) 970 { 971 char *c1, *c2; 972 973 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 974 975 c1 = pnp->pn_path + 1; 976 c2 = pnp->pn_path + 2; 977 978 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 979 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 980 pnp->pn_pathlen = pnp->pn_pathlen - 2; 981 } else { 982 pnp->pn_path++; 983 pnp->pn_pathlen--; 984 } 985 } 986 } 987 988 /* 989 * Resolve a symbolic link path. The symlink is in the nth component of 990 * svp->sv_path and has an nfs4 file handle "fh". 991 * Upon return, the sv_path will point to the new path that has the nth 992 * component resolved to its symlink text. 993 */ 994 int 995 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 996 cred_t *cr, int flags) 997 { 998 char *oldpath; 999 char *symlink, *newpath; 1000 struct pathname oldpn, newpn; 1001 char component[MAXNAMELEN]; 1002 int i, addlen, error = 0; 1003 int oldpathlen; 1004 1005 /* Get the symbolic link text over the wire. */ 1006 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 1007 1008 if (error || symlink == NULL || strlen(symlink) == 0) 1009 return (error); 1010 1011 /* 1012 * Compose the new pathname. 1013 * Note: 1014 * - only the nth component is resolved for the pathname. 1015 * - pathname.pn_pathlen does not count the ending null byte. 1016 */ 1017 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1018 oldpath = svp->sv_path; 1019 oldpathlen = svp->sv_pathlen; 1020 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1021 nfs_rw_exit(&svp->sv_lock); 1022 kmem_free(symlink, strlen(symlink) + 1); 1023 return (error); 1024 } 1025 nfs_rw_exit(&svp->sv_lock); 1026 pn_alloc(&newpn); 1027 1028 /* 1029 * Skip over previous components from the oldpath so that the 1030 * oldpn.pn_path will point to the symlink component. Skip 1031 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1032 * pn_getcompnent can get the component. 1033 */ 1034 for (i = 1; i < nth; i++) { 1035 pathname_skipslashdot(&oldpn); 1036 error = pn_getcomponent(&oldpn, component); 1037 if (error) 1038 goto out; 1039 } 1040 1041 /* 1042 * Copy the old path upto the component right before the symlink 1043 * if the symlink is not an absolute path. 1044 */ 1045 if (symlink[0] != '/') { 1046 addlen = oldpn.pn_path - oldpn.pn_buf; 1047 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1048 newpn.pn_pathlen += addlen; 1049 newpn.pn_path += addlen; 1050 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1051 newpn.pn_pathlen++; 1052 newpn.pn_path++; 1053 } 1054 1055 /* copy the resolved symbolic link text */ 1056 addlen = strlen(symlink); 1057 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1058 error = ENAMETOOLONG; 1059 goto out; 1060 } 1061 bcopy(symlink, newpn.pn_path, addlen); 1062 newpn.pn_pathlen += addlen; 1063 newpn.pn_path += addlen; 1064 1065 /* 1066 * Check if there is any remaining path after the symlink component. 1067 * First, skip the symlink component. 1068 */ 1069 pathname_skipslashdot(&oldpn); 1070 if (error = pn_getcomponent(&oldpn, component)) 1071 goto out; 1072 1073 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1074 1075 /* 1076 * Copy the remaining path to the new pathname if there is any. 1077 */ 1078 if (addlen > 0) { 1079 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1080 error = ENAMETOOLONG; 1081 goto out; 1082 } 1083 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1084 newpn.pn_pathlen += addlen; 1085 } 1086 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1087 1088 /* get the newpath and store it in the servinfo4_t */ 1089 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1090 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1091 newpath[newpn.pn_pathlen] = '\0'; 1092 1093 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1094 svp->sv_path = newpath; 1095 svp->sv_pathlen = strlen(newpath) + 1; 1096 nfs_rw_exit(&svp->sv_lock); 1097 1098 kmem_free(oldpath, oldpathlen); 1099 out: 1100 kmem_free(symlink, strlen(symlink) + 1); 1101 pn_free(&newpn); 1102 pn_free(&oldpn); 1103 1104 return (error); 1105 } 1106 1107 /* 1108 * Get the root filehandle for the given filesystem and server, and update 1109 * svp. 1110 * 1111 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1112 * to coordinate with recovery. Otherwise, the caller is assumed to be 1113 * the recovery thread or have already done a start_fop. 1114 * 1115 * Errors are returned by the nfs4_error_t parameter. 1116 */ 1117 1118 static void 1119 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1120 int flags, cred_t *cr, nfs4_error_t *ep) 1121 { 1122 COMPOUND4args_clnt args; 1123 COMPOUND4res_clnt res; 1124 int doqueue = 1; 1125 nfs_argop4 *argop; 1126 nfs_resop4 *resop; 1127 nfs4_ga_res_t *garp; 1128 int num_argops; 1129 lookup4_param_t lookuparg; 1130 nfs_fh4 *tmpfhp; 1131 nfs_fh4 *resfhp; 1132 bool_t needrecov = FALSE; 1133 nfs4_recov_state_t recov_state; 1134 int llndx; 1135 int nthcomp; 1136 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1137 1138 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1139 ASSERT(svp->sv_path != NULL); 1140 if (svp->sv_path[0] == '\0') { 1141 nfs_rw_exit(&svp->sv_lock); 1142 nfs4_error_init(ep, EINVAL); 1143 return; 1144 } 1145 nfs_rw_exit(&svp->sv_lock); 1146 1147 recov_state.rs_flags = 0; 1148 recov_state.rs_num_retry_despite_err = 0; 1149 recov_retry: 1150 nfs4_error_zinit(ep); 1151 1152 if (!recovery) { 1153 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1154 &recov_state, NULL); 1155 1156 /* 1157 * If recovery has been started and this request as 1158 * initiated by a mount, then we must wait for recovery 1159 * to finish before proceeding, otherwise, the error 1160 * cleanup would remove data structures needed by the 1161 * recovery thread. 1162 */ 1163 if (ep->error) { 1164 mutex_enter(&mi->mi_lock); 1165 if (mi->mi_flags & MI4_MOUNTING) { 1166 mi->mi_flags |= MI4_RECOV_FAIL; 1167 mi->mi_error = EIO; 1168 1169 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1170 "nfs4getfh_otw: waiting 4 recovery\n")); 1171 1172 while (mi->mi_flags & MI4_RECOV_ACTIV) 1173 cv_wait(&mi->mi_failover_cv, 1174 &mi->mi_lock); 1175 } 1176 mutex_exit(&mi->mi_lock); 1177 return; 1178 } 1179 1180 /* 1181 * If the client does not specify a specific flavor to use 1182 * and has not gotten a secinfo list from the server yet, 1183 * retrieve the secinfo list from the server and use a 1184 * flavor from the list to mount. 1185 * 1186 * If fail to get the secinfo list from the server, then 1187 * try the default flavor. 1188 */ 1189 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1190 svp->sv_secinfo == NULL) { 1191 (void) nfs4_secinfo_path(mi, cr, FALSE); 1192 } 1193 } 1194 1195 if (recovery) 1196 args.ctag = TAG_REMAP_MOUNT; 1197 else 1198 args.ctag = TAG_MOUNT; 1199 1200 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1201 lookuparg.argsp = &args; 1202 lookuparg.resp = &res; 1203 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1204 lookuparg.trailer_len = 0; 1205 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1206 lookuparg.mi = mi; 1207 1208 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1209 ASSERT(svp->sv_path != NULL); 1210 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1211 nfs_rw_exit(&svp->sv_lock); 1212 1213 argop = args.array; 1214 num_argops = args.array_len; 1215 1216 /* choose public or root filehandle */ 1217 if (flags & NFS4_GETFH_PUBLIC) 1218 argop[0].argop = OP_PUTPUBFH; 1219 else 1220 argop[0].argop = OP_PUTROOTFH; 1221 1222 /* get fh */ 1223 argop[1].argop = OP_GETFH; 1224 1225 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1226 "nfs4getfh_otw: %s call, mi 0x%p", 1227 needrecov ? "recov" : "first", (void *)mi)); 1228 1229 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1230 1231 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1232 1233 if (needrecov) { 1234 bool_t abort; 1235 1236 if (recovery) { 1237 nfs4args_lookup_free(argop, num_argops); 1238 kmem_free(argop, 1239 lookuparg.arglen * sizeof (nfs_argop4)); 1240 if (!ep->error) 1241 (void) xdr_free(xdr_COMPOUND4res_clnt, 1242 (caddr_t)&res); 1243 return; 1244 } 1245 1246 NFS4_DEBUG(nfs4_client_recov_debug, 1247 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1248 1249 abort = nfs4_start_recovery(ep, mi, NULL, 1250 NULL, NULL, NULL, OP_GETFH, NULL); 1251 if (!ep->error) { 1252 ep->error = geterrno4(res.status); 1253 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1254 } 1255 nfs4args_lookup_free(argop, num_argops); 1256 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1257 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1258 /* have another go? */ 1259 if (abort == FALSE) 1260 goto recov_retry; 1261 return; 1262 } 1263 1264 /* 1265 * No recovery, but check if error is set. 1266 */ 1267 if (ep->error) { 1268 nfs4args_lookup_free(argop, num_argops); 1269 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1270 if (!recovery) 1271 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1272 needrecov); 1273 return; 1274 } 1275 1276 is_link_err: 1277 1278 /* for non-recovery errors */ 1279 if (res.status && res.status != NFS4ERR_SYMLINK) { 1280 if (!recovery) { 1281 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1282 needrecov); 1283 } 1284 nfs4args_lookup_free(argop, num_argops); 1285 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1286 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1287 return; 1288 } 1289 1290 /* 1291 * If any intermediate component in the path is a symbolic link, 1292 * resolve the symlink, then try mount again using the new path. 1293 */ 1294 if (res.status == NFS4ERR_SYMLINK) { 1295 int where; 1296 1297 /* 1298 * This must be from OP_LOOKUP failure. The (cfh) for this 1299 * OP_LOOKUP is a symlink node. Found out where the 1300 * OP_GETFH is for the (cfh) that is a symlink node. 1301 * 1302 * Example: 1303 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1304 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1305 * 1306 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1307 * In this case, where = 7, nthcomp = 2. 1308 */ 1309 where = res.array_len - 2; 1310 ASSERT(where > 0); 1311 1312 resop = &res.array[where - 1]; 1313 ASSERT(resop->resop == OP_GETFH); 1314 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1315 nthcomp = res.array_len/3 - 1; 1316 1317 /* 1318 * Need to call nfs4_end_op before resolve_sympath to avoid 1319 * potential nfs4_start_op deadlock. 1320 */ 1321 if (!recovery) 1322 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1323 needrecov); 1324 1325 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1326 flags); 1327 1328 nfs4args_lookup_free(argop, num_argops); 1329 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1330 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1331 1332 if (ep->error) 1333 return; 1334 1335 goto recov_retry; 1336 } 1337 1338 /* getfh */ 1339 resop = &res.array[res.array_len - 2]; 1340 ASSERT(resop->resop == OP_GETFH); 1341 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1342 1343 /* getattr fsinfo res */ 1344 resop++; 1345 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1346 1347 *vtp = garp->n4g_va.va_type; 1348 1349 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1350 1351 mutex_enter(&mi->mi_lock); 1352 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1353 mi->mi_flags |= MI4_LINK; 1354 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1355 mi->mi_flags |= MI4_SYMLINK; 1356 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1357 mi->mi_flags |= MI4_ACL; 1358 mutex_exit(&mi->mi_lock); 1359 1360 if (garp->n4g_ext_res->n4g_maxread == 0) 1361 mi->mi_tsize = 1362 MIN(MAXBSIZE, mi->mi_tsize); 1363 else 1364 mi->mi_tsize = 1365 MIN(garp->n4g_ext_res->n4g_maxread, 1366 mi->mi_tsize); 1367 1368 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1369 mi->mi_stsize = 1370 MIN(MAXBSIZE, mi->mi_stsize); 1371 else 1372 mi->mi_stsize = 1373 MIN(garp->n4g_ext_res->n4g_maxwrite, 1374 mi->mi_stsize); 1375 1376 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1377 mi->mi_maxfilesize = 1378 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1379 mi->mi_maxfilesize); 1380 1381 /* 1382 * If the final component is a a symbolic link, resolve the symlink, 1383 * then try mount again using the new path. 1384 * 1385 * Assume no symbolic link for root filesysm "/". 1386 */ 1387 if (*vtp == VLNK) { 1388 /* 1389 * nthcomp is the total result length minus 1390 * the 1st 2 OPs (PUTROOTFH, GETFH), 1391 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1392 * 1393 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1394 * LOOKUP 2nd-comp GETFH GETATTR 1395 * 1396 * (8 - 2)/3 = 2 1397 */ 1398 nthcomp = (res.array_len - 2)/3; 1399 1400 /* 1401 * Need to call nfs4_end_op before resolve_sympath to avoid 1402 * potential nfs4_start_op deadlock. See RFE 4777612. 1403 */ 1404 if (!recovery) 1405 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1406 needrecov); 1407 1408 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1409 flags); 1410 1411 nfs4args_lookup_free(argop, num_argops); 1412 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1413 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1414 1415 if (ep->error) 1416 return; 1417 1418 goto recov_retry; 1419 } 1420 1421 /* 1422 * We need to figure out where in the compound the getfh 1423 * for the parent directory is. If the object to be mounted is 1424 * the root, then there is no lookup at all: 1425 * PUTROOTFH, GETFH. 1426 * If the object to be mounted is in the root, then the compound is: 1427 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1428 * In either of these cases, the index of the GETFH is 1. 1429 * If it is not at the root, then it's something like: 1430 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1431 * LOOKUP, GETFH, GETATTR 1432 * In this case, the index is llndx (last lookup index) - 2. 1433 */ 1434 if (llndx == -1 || llndx == 2) 1435 resop = &res.array[1]; 1436 else { 1437 ASSERT(llndx > 2); 1438 resop = &res.array[llndx-2]; 1439 } 1440 1441 ASSERT(resop->resop == OP_GETFH); 1442 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1443 1444 /* save the filehandles for the replica */ 1445 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1446 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1447 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1448 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1449 tmpfhp->nfs_fh4_len); 1450 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1451 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1452 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1453 1454 /* initialize fsid and supp_attrs for server fs */ 1455 svp->sv_fsid = garp->n4g_fsid; 1456 svp->sv_supp_attrs = 1457 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1458 1459 nfs_rw_exit(&svp->sv_lock); 1460 1461 nfs4args_lookup_free(argop, num_argops); 1462 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1463 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1464 if (!recovery) 1465 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1466 } 1467 1468 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1469 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1470 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1471 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1472 1473 /* 1474 * Remap the root filehandle for the given filesystem. 1475 * 1476 * results returned via the nfs4_error_t parameter. 1477 */ 1478 void 1479 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1480 { 1481 struct servinfo4 *svp; 1482 vtype_t vtype; 1483 nfs_fh4 rootfh; 1484 int getfh_flags; 1485 char *orig_sv_path; 1486 int orig_sv_pathlen, num_retry; 1487 1488 mutex_enter(&mi->mi_lock); 1489 1490 remap_retry: 1491 svp = mi->mi_curr_serv; 1492 getfh_flags = 1493 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1494 getfh_flags |= 1495 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1496 mutex_exit(&mi->mi_lock); 1497 1498 /* 1499 * Just in case server path being mounted contains 1500 * symlinks and fails w/STALE, save the initial sv_path 1501 * so we can redrive the initial mount compound with the 1502 * initial sv_path -- not a symlink-expanded version. 1503 * 1504 * This could only happen if a symlink was expanded 1505 * and the expanded mount compound failed stale. Because 1506 * it could be the case that the symlink was removed at 1507 * the server (and replaced with another symlink/dir, 1508 * we need to use the initial sv_path when attempting 1509 * to re-lookup everything and recover. 1510 */ 1511 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1512 orig_sv_pathlen = svp->sv_pathlen; 1513 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1514 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1515 nfs_rw_exit(&svp->sv_lock); 1516 1517 num_retry = nfs4_max_mount_retry; 1518 1519 do { 1520 /* 1521 * Get the root fh from the server. Retry nfs4_max_mount_retry 1522 * (2) times if it fails with STALE since the recovery 1523 * infrastructure doesn't do STALE recovery for components 1524 * of the server path to the object being mounted. 1525 */ 1526 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1527 1528 if (ep->error == 0 && ep->stat == NFS4_OK) 1529 break; 1530 1531 /* 1532 * For some reason, the mount compound failed. Before 1533 * retrying, we need to restore the original sv_path 1534 * because it might have contained symlinks that were 1535 * expanded by nfsgetfh_otw before the failure occurred. 1536 * replace current sv_path with orig sv_path -- just in case 1537 * it changed due to embedded symlinks. 1538 */ 1539 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1540 if (orig_sv_pathlen != svp->sv_pathlen) { 1541 kmem_free(svp->sv_path, svp->sv_pathlen); 1542 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1543 svp->sv_pathlen = orig_sv_pathlen; 1544 } 1545 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1546 nfs_rw_exit(&svp->sv_lock); 1547 1548 } while (num_retry-- > 0); 1549 1550 kmem_free(orig_sv_path, orig_sv_pathlen); 1551 1552 if (ep->error != 0 || ep->stat != 0) { 1553 return; 1554 } 1555 1556 if (vtype != VNON && vtype != mi->mi_type) { 1557 /* shouldn't happen */ 1558 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1559 "nfs4_remap_root: server root vnode type (%d) doesn't " 1560 "match mount info (%d)", vtype, mi->mi_type); 1561 } 1562 1563 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1564 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1565 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1566 nfs_rw_exit(&svp->sv_lock); 1567 sfh4_update(mi->mi_rootfh, &rootfh); 1568 1569 /* 1570 * It's possible that recovery took place on the filesystem 1571 * and the server has been updated between the time we did 1572 * the nfs4getfh_otw and now. Re-drive the otw operation 1573 * to make sure we have a good fh. 1574 */ 1575 mutex_enter(&mi->mi_lock); 1576 if (mi->mi_curr_serv != svp) 1577 goto remap_retry; 1578 1579 mutex_exit(&mi->mi_lock); 1580 } 1581 1582 static int 1583 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1584 int flags, cred_t *cr, zone_t *zone) 1585 { 1586 vnode_t *rtvp = NULL; 1587 mntinfo4_t *mi; 1588 dev_t nfs_dev; 1589 int error = 0; 1590 rnode4_t *rp; 1591 int i; 1592 struct vattr va; 1593 vtype_t vtype = VNON; 1594 vtype_t tmp_vtype = VNON; 1595 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1596 nfs4_oo_hash_bucket_t *bucketp; 1597 nfs_fh4 fh; 1598 char *droptext = ""; 1599 struct nfs_stats *nfsstatsp; 1600 nfs4_fname_t *mfname; 1601 nfs4_error_t e; 1602 char *orig_sv_path; 1603 int orig_sv_pathlen, num_retry; 1604 cred_t *lcr = NULL, *tcr = cr; 1605 1606 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1607 ASSERT(nfsstatsp != NULL); 1608 1609 ASSERT(nfs_zone() == zone); 1610 ASSERT(crgetref(cr)); 1611 1612 /* 1613 * Create a mount record and link it to the vfs struct. 1614 */ 1615 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1616 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1617 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1618 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1619 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1620 1621 if (!(flags & NFSMNT_SOFT)) 1622 mi->mi_flags |= MI4_HARD; 1623 if ((flags & NFSMNT_NOPRINT)) 1624 mi->mi_flags |= MI4_NOPRINT; 1625 if (flags & NFSMNT_INT) 1626 mi->mi_flags |= MI4_INT; 1627 if (flags & NFSMNT_PUBLIC) 1628 mi->mi_flags |= MI4_PUBLIC; 1629 mi->mi_retrans = NFS_RETRIES; 1630 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1631 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1632 mi->mi_timeo = nfs4_cots_timeo; 1633 else 1634 mi->mi_timeo = NFS_TIMEO; 1635 mi->mi_prog = NFS_PROGRAM; 1636 mi->mi_vers = NFS_V4; 1637 mi->mi_rfsnames = rfsnames_v4; 1638 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1639 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1640 mi->mi_servers = svp; 1641 mi->mi_curr_serv = svp; 1642 mi->mi_acregmin = SEC2HR(ACREGMIN); 1643 mi->mi_acregmax = SEC2HR(ACREGMAX); 1644 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1645 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1646 mi->mi_fh_expire_type = FH4_PERSISTENT; 1647 mi->mi_clientid_next = NULL; 1648 mi->mi_clientid_prev = NULL; 1649 mi->mi_grace_wait = 0; 1650 mi->mi_error = 0; 1651 mi->mi_srvsettime = 0; 1652 1653 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1654 mi->mi_stsize = mi->mi_tsize; 1655 1656 if (flags & NFSMNT_DIRECTIO) 1657 mi->mi_flags |= MI4_DIRECTIO; 1658 1659 mi->mi_flags |= MI4_MOUNTING; 1660 1661 /* 1662 * Make a vfs struct for nfs. We do this here instead of below 1663 * because rtvp needs a vfs before we can do a getattr on it. 1664 * 1665 * Assign a unique device id to the mount 1666 */ 1667 mutex_enter(&nfs_minor_lock); 1668 do { 1669 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1670 nfs_dev = makedevice(nfs_major, nfs_minor); 1671 } while (vfs_devismounted(nfs_dev)); 1672 mutex_exit(&nfs_minor_lock); 1673 1674 vfsp->vfs_dev = nfs_dev; 1675 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1676 vfsp->vfs_data = (caddr_t)mi; 1677 vfsp->vfs_fstype = nfsfstyp; 1678 vfsp->vfs_bsize = nfs4_bsize; 1679 1680 /* 1681 * Initialize fields used to support async putpage operations. 1682 */ 1683 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1684 mi->mi_async_clusters[i] = nfs4_async_clusters; 1685 mi->mi_async_init_clusters = nfs4_async_clusters; 1686 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1687 mi->mi_max_threads = nfs4_max_threads; 1688 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1689 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1690 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1691 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1692 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1693 1694 mi->mi_vfsp = vfsp; 1695 zone_hold(mi->mi_zone = zone); 1696 nfs4_mi_zonelist_add(mi); 1697 1698 /* 1699 * Initialize the <open owner/cred> hash table. 1700 */ 1701 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1702 bucketp = &(mi->mi_oo_list[i]); 1703 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1704 list_create(&bucketp->b_oo_hash_list, 1705 sizeof (nfs4_open_owner_t), 1706 offsetof(nfs4_open_owner_t, oo_hash_node)); 1707 } 1708 1709 /* 1710 * Initialize the freed open owner list. 1711 */ 1712 mi->mi_foo_num = 0; 1713 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1714 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1715 offsetof(nfs4_open_owner_t, oo_foo_node)); 1716 1717 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1718 offsetof(nfs4_lost_rqst_t, lr_node)); 1719 1720 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1721 offsetof(nfs4_bseqid_entry_t, bs_node)); 1722 1723 /* 1724 * Initialize the msg buffer. 1725 */ 1726 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1727 offsetof(nfs4_debug_msg_t, msg_node)); 1728 mi->mi_msg_count = 0; 1729 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1730 1731 /* 1732 * Initialize kstats 1733 */ 1734 nfs4_mnt_kstat_init(vfsp); 1735 1736 /* 1737 * Initialize the shared filehandle pool, and get the fname for 1738 * the filesystem root. 1739 */ 1740 sfh4_createtab(&mi->mi_filehandles); 1741 mi->mi_fname = fn_get(NULL, "."); 1742 1743 /* 1744 * Save server path we're attempting to mount. 1745 */ 1746 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1747 orig_sv_pathlen = svp_head->sv_pathlen; 1748 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1749 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1750 nfs_rw_exit(&svp->sv_lock); 1751 1752 /* 1753 * Make the GETFH call to get root fh for each replica. 1754 */ 1755 if (svp_head->sv_next) 1756 droptext = ", dropping replica"; 1757 1758 /* 1759 * If the uid is set then set the creds for secure mounts 1760 * by proxy processes such as automountd. 1761 */ 1762 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1763 if (svp->sv_secdata->uid != 0) { 1764 lcr = crdup(cr); 1765 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1766 tcr = lcr; 1767 } 1768 nfs_rw_exit(&svp->sv_lock); 1769 for (svp = svp_head; svp; svp = svp->sv_next) { 1770 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 1771 nfs_cmn_err(error, CE_WARN, 1772 VERS_MSG "Host %s is a duplicate%s", 1773 svp->sv_hostname, droptext); 1774 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1775 svp->sv_flags |= SV4_NOTINUSE; 1776 nfs_rw_exit(&svp->sv_lock); 1777 continue; 1778 } 1779 mi->mi_curr_serv = svp; 1780 1781 /* 1782 * Just in case server path being mounted contains 1783 * symlinks and fails w/STALE, save the initial sv_path 1784 * so we can redrive the initial mount compound with the 1785 * initial sv_path -- not a symlink-expanded version. 1786 * 1787 * This could only happen if a symlink was expanded 1788 * and the expanded mount compound failed stale. Because 1789 * it could be the case that the symlink was removed at 1790 * the server (and replaced with another symlink/dir, 1791 * we need to use the initial sv_path when attempting 1792 * to re-lookup everything and recover. 1793 * 1794 * Other mount errors should evenutally be handled here also 1795 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 1796 * failures will result in mount being redriven a few times. 1797 */ 1798 num_retry = nfs4_max_mount_retry; 1799 do { 1800 nfs4getfh_otw(mi, svp, &tmp_vtype, 1801 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 1802 NFS4_GETFH_NEEDSOP, tcr, &e); 1803 1804 if (e.error == 0 && e.stat == NFS4_OK) 1805 break; 1806 1807 /* 1808 * replace current sv_path with orig sv_path -- just in 1809 * case it changed due to embedded symlinks. 1810 */ 1811 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1812 if (orig_sv_pathlen != svp->sv_pathlen) { 1813 kmem_free(svp->sv_path, svp->sv_pathlen); 1814 svp->sv_path = kmem_alloc(orig_sv_pathlen, 1815 KM_SLEEP); 1816 svp->sv_pathlen = orig_sv_pathlen; 1817 } 1818 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1819 nfs_rw_exit(&svp->sv_lock); 1820 1821 } while (num_retry-- > 0); 1822 1823 error = e.error ? e.error : geterrno4(e.stat); 1824 if (error) { 1825 nfs_cmn_err(error, CE_WARN, 1826 VERS_MSG "initial call to %s failed%s: %m", 1827 svp->sv_hostname, droptext); 1828 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1829 svp->sv_flags |= SV4_NOTINUSE; 1830 nfs_rw_exit(&svp->sv_lock); 1831 mi->mi_flags &= ~MI4_RECOV_FAIL; 1832 mi->mi_error = 0; 1833 continue; 1834 } 1835 1836 if (tmp_vtype == VBAD) { 1837 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1838 VERS_MSG "%s returned a bad file type for " 1839 "root%s", svp->sv_hostname, droptext); 1840 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1841 svp->sv_flags |= SV4_NOTINUSE; 1842 nfs_rw_exit(&svp->sv_lock); 1843 continue; 1844 } 1845 1846 if (vtype == VNON) { 1847 vtype = tmp_vtype; 1848 } else if (vtype != tmp_vtype) { 1849 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1850 VERS_MSG "%s returned a different file type " 1851 "for root%s", svp->sv_hostname, droptext); 1852 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1853 svp->sv_flags |= SV4_NOTINUSE; 1854 nfs_rw_exit(&svp->sv_lock); 1855 continue; 1856 } 1857 if (firstsvp == NULL) 1858 firstsvp = svp; 1859 } 1860 1861 kmem_free(orig_sv_path, orig_sv_pathlen); 1862 1863 if (firstsvp == NULL) { 1864 if (error == 0) 1865 error = ENOENT; 1866 goto bad; 1867 } 1868 1869 mi->mi_curr_serv = svp = firstsvp; 1870 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1871 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 1872 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1873 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1874 mi->mi_rootfh = sfh4_get(&fh, mi); 1875 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 1876 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 1877 mi->mi_srvparentfh = sfh4_get(&fh, mi); 1878 nfs_rw_exit(&svp->sv_lock); 1879 1880 /* 1881 * Make the root vnode without attributes. 1882 */ 1883 mfname = mi->mi_fname; 1884 fn_hold(mfname); 1885 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 1886 &mfname, NULL, mi, cr, gethrtime()); 1887 rtvp->v_type = vtype; 1888 1889 mi->mi_curread = mi->mi_tsize; 1890 mi->mi_curwrite = mi->mi_stsize; 1891 1892 /* 1893 * Start the manager thread responsible for handling async worker 1894 * threads. 1895 */ 1896 VFS_HOLD(vfsp); /* add reference for thread */ 1897 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 1898 vfsp, 0, minclsyspri); 1899 ASSERT(mi->mi_manager_thread != NULL); 1900 /* 1901 * Create the thread that handles over-the-wire calls for 1902 * VOP_INACTIVE. 1903 * This needs to happen after the manager thread is created. 1904 */ 1905 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 1906 mi, 0, minclsyspri); 1907 ASSERT(mi->mi_inactive_thread != NULL); 1908 1909 /* If we didn't get a type, get one now */ 1910 if (rtvp->v_type == VNON) { 1911 va.va_mask = AT_TYPE; 1912 error = nfs4getattr(rtvp, &va, tcr); 1913 if (error) 1914 goto bad; 1915 rtvp->v_type = va.va_type; 1916 } 1917 1918 mi->mi_type = rtvp->v_type; 1919 1920 mutex_enter(&mi->mi_lock); 1921 mi->mi_flags &= ~MI4_MOUNTING; 1922 mutex_exit(&mi->mi_lock); 1923 1924 *rtvpp = rtvp; 1925 if (lcr != NULL) 1926 crfree(lcr); 1927 1928 return (0); 1929 bad: 1930 /* 1931 * An error occurred somewhere, need to clean up... 1932 * 1933 * XXX Should not svp be cleaned too? 1934 */ 1935 if (lcr != NULL) 1936 crfree(lcr); 1937 if (rtvp != NULL) { 1938 /* 1939 * We need to release our reference to the root vnode and 1940 * destroy the mntinfo4 struct that we just created. 1941 */ 1942 rp = VTOR4(rtvp); 1943 if (rp->r_flags & R4HASHED) 1944 rp4_rmhash(rp); 1945 VN_RELE(rtvp); 1946 } 1947 nfs4_async_stop(vfsp); 1948 nfs4_async_manager_stop(vfsp); 1949 if (mi->mi_io_kstats) { 1950 kstat_delete(mi->mi_io_kstats); 1951 mi->mi_io_kstats = NULL; 1952 } 1953 if (mi->mi_ro_kstats) { 1954 kstat_delete(mi->mi_ro_kstats); 1955 mi->mi_ro_kstats = NULL; 1956 } 1957 if (mi->mi_recov_ksp) { 1958 kstat_delete(mi->mi_recov_ksp); 1959 mi->mi_recov_ksp = NULL; 1960 } 1961 nfs_free_mi4(mi); 1962 *rtvpp = NULL; 1963 return (error); 1964 } 1965 1966 /* 1967 * vfs operations 1968 */ 1969 static int 1970 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1971 { 1972 mntinfo4_t *mi; 1973 ushort_t omax; 1974 1975 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1976 return (EPERM); 1977 1978 mi = VFTOMI4(vfsp); 1979 1980 if (flag & MS_FORCE) { 1981 vfsp->vfs_flag |= VFS_UNMOUNTED; 1982 if (nfs_zone() != mi->mi_zone) { 1983 /* 1984 * If the request is coming from the wrong zone, 1985 * we don't want to create any new threads, and 1986 * performance is not a concern. Do everything 1987 * inline. 1988 */ 1989 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 1990 "nfs4_unmount x-zone forced unmount of vfs %p\n", 1991 (void *)vfsp)); 1992 nfs4_free_mount(vfsp, cr); 1993 } else { 1994 /* 1995 * Free data structures asynchronously, to avoid 1996 * blocking the current thread (for performance 1997 * reasons only). 1998 */ 1999 async_free_mount(vfsp, cr); 2000 } 2001 return (0); 2002 } 2003 /* 2004 * Wait until all asynchronous putpage operations on 2005 * this file system are complete before flushing rnodes 2006 * from the cache. 2007 */ 2008 omax = mi->mi_max_threads; 2009 if (nfs4_async_stop_sig(vfsp)) { 2010 return (EINTR); 2011 } 2012 r4flush(vfsp, cr); 2013 /* 2014 * If there are any active vnodes on this file system, 2015 * then the file system is busy and can't be umounted. 2016 */ 2017 if (check_rtable4(vfsp)) { 2018 mutex_enter(&mi->mi_async_lock); 2019 mi->mi_max_threads = omax; 2020 mutex_exit(&mi->mi_async_lock); 2021 return (EBUSY); 2022 } 2023 /* 2024 * The unmount can't fail from now on, and there are no active 2025 * files that could require over-the-wire calls to the server, 2026 * so stop the async manager and the inactive thread. 2027 */ 2028 nfs4_async_manager_stop(vfsp); 2029 /* 2030 * Destroy all rnodes belonging to this file system from the 2031 * rnode hash queues and purge any resources allocated to 2032 * them. 2033 */ 2034 destroy_rtable4(vfsp, cr); 2035 vfsp->vfs_flag |= VFS_UNMOUNTED; 2036 nfs4_remove_mi_from_server(mi, NULL); 2037 if (mi->mi_io_kstats) { 2038 kstat_delete(mi->mi_io_kstats); 2039 mi->mi_io_kstats = NULL; 2040 } 2041 if (mi->mi_ro_kstats) { 2042 kstat_delete(mi->mi_ro_kstats); 2043 mi->mi_ro_kstats = NULL; 2044 } 2045 if (mi->mi_recov_ksp) { 2046 kstat_delete(mi->mi_recov_ksp); 2047 mi->mi_recov_ksp = NULL; 2048 } 2049 return (0); 2050 } 2051 2052 /* 2053 * find root of nfs 2054 */ 2055 static int 2056 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2057 { 2058 mntinfo4_t *mi; 2059 vnode_t *vp; 2060 nfs4_fname_t *mfname; 2061 servinfo4_t *svp; 2062 2063 mi = VFTOMI4(vfsp); 2064 2065 if (nfs_zone() != mi->mi_zone) 2066 return (EPERM); 2067 2068 svp = mi->mi_curr_serv; 2069 if (svp) { 2070 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2071 if (svp->sv_flags & SV4_ROOT_STALE) { 2072 nfs_rw_exit(&svp->sv_lock); 2073 2074 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2075 if (svp->sv_flags & SV4_ROOT_STALE) { 2076 svp->sv_flags &= ~SV4_ROOT_STALE; 2077 nfs_rw_exit(&svp->sv_lock); 2078 return (ENOENT); 2079 } 2080 nfs_rw_exit(&svp->sv_lock); 2081 } else 2082 nfs_rw_exit(&svp->sv_lock); 2083 } 2084 2085 mfname = mi->mi_fname; 2086 fn_hold(mfname); 2087 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2088 VFTOMI4(vfsp), CRED(), gethrtime()); 2089 2090 if (VTOR4(vp)->r_flags & R4STALE) { 2091 VN_RELE(vp); 2092 return (ENOENT); 2093 } 2094 2095 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2096 2097 vp->v_type = mi->mi_type; 2098 2099 *vpp = vp; 2100 2101 return (0); 2102 } 2103 2104 static int 2105 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2106 { 2107 int error; 2108 nfs4_ga_res_t gar; 2109 nfs4_ga_ext_res_t ger; 2110 2111 gar.n4g_ext_res = &ger; 2112 2113 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2114 NFS4_STATFS_ATTR_MASK, cr)) 2115 return (error); 2116 2117 *sbp = gar.n4g_ext_res->n4g_sb; 2118 2119 return (0); 2120 } 2121 2122 /* 2123 * Get file system statistics. 2124 */ 2125 static int 2126 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2127 { 2128 int error; 2129 vnode_t *vp; 2130 cred_t *cr; 2131 2132 error = nfs4_root(vfsp, &vp); 2133 if (error) 2134 return (error); 2135 2136 cr = CRED(); 2137 2138 error = nfs4_statfs_otw(vp, sbp, cr); 2139 if (!error) { 2140 (void) strncpy(sbp->f_basetype, 2141 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2142 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2143 } else { 2144 nfs4_purge_stale_fh(error, vp, cr); 2145 } 2146 2147 VN_RELE(vp); 2148 2149 return (error); 2150 } 2151 2152 static kmutex_t nfs4_syncbusy; 2153 2154 /* 2155 * Flush dirty nfs files for file system vfsp. 2156 * If vfsp == NULL, all nfs files are flushed. 2157 * 2158 * SYNC_CLOSE in flag is passed to us to 2159 * indicate that we are shutting down and or 2160 * rebooting. 2161 */ 2162 static int 2163 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2164 { 2165 /* 2166 * Cross-zone calls are OK here, since this translates to a 2167 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2168 */ 2169 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2170 r4flush(vfsp, cr); 2171 mutex_exit(&nfs4_syncbusy); 2172 } 2173 2174 /* 2175 * if SYNC_CLOSE is set then we know that 2176 * the system is rebooting, mark the mntinfo 2177 * for later examination. 2178 */ 2179 if (vfsp && (flag & SYNC_CLOSE)) { 2180 mntinfo4_t *mi; 2181 2182 mi = VFTOMI4(vfsp); 2183 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2184 mutex_enter(&mi->mi_lock); 2185 mi->mi_flags |= MI4_SHUTDOWN; 2186 mutex_exit(&mi->mi_lock); 2187 } 2188 } 2189 return (0); 2190 } 2191 2192 /* 2193 * vget is difficult, if not impossible, to support in v4 because we don't 2194 * know the parent directory or name, which makes it impossible to create a 2195 * useful shadow vnode. And we need the shadow vnode for things like 2196 * OPEN. 2197 */ 2198 2199 /* ARGSUSED */ 2200 /* 2201 * XXX Check nfs4_vget_pseudo() for dependency. 2202 */ 2203 static int 2204 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2205 { 2206 return (EREMOTE); 2207 } 2208 2209 /* 2210 * nfs4_mountroot get called in the case where we are diskless booting. All 2211 * we need from here is the ability to get the server info and from there we 2212 * can simply call nfs4_rootvp. 2213 */ 2214 /* ARGSUSED */ 2215 static int 2216 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2217 { 2218 vnode_t *rtvp; 2219 char root_hostname[SYS_NMLN+1]; 2220 struct servinfo4 *svp; 2221 int error; 2222 int vfsflags; 2223 size_t size; 2224 char *root_path; 2225 struct pathname pn; 2226 char *name; 2227 cred_t *cr; 2228 mntinfo4_t *mi; 2229 struct nfs_args args; /* nfs mount arguments */ 2230 static char token[10]; 2231 nfs4_error_t n4e; 2232 2233 bzero(&args, sizeof (args)); 2234 2235 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2236 clkset(-1L); /* hack for now - until we get time svc? */ 2237 2238 if (why == ROOT_REMOUNT) { 2239 /* 2240 * Shouldn't happen. 2241 */ 2242 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2243 } 2244 2245 if (why == ROOT_UNMOUNT) { 2246 /* 2247 * Nothing to do for NFS. 2248 */ 2249 return (0); 2250 } 2251 2252 /* 2253 * why == ROOT_INIT 2254 */ 2255 2256 name = token; 2257 *name = 0; 2258 (void) getfsname("root", name, sizeof (token)); 2259 2260 pn_alloc(&pn); 2261 root_path = pn.pn_path; 2262 2263 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2264 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2265 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2266 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2267 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2268 2269 /* 2270 * Get server address 2271 * Get the root path 2272 * Get server's transport 2273 * Get server's hostname 2274 * Get options 2275 */ 2276 args.addr = &svp->sv_addr; 2277 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2278 args.fh = (char *)&svp->sv_fhandle; 2279 args.knconf = svp->sv_knconf; 2280 args.hostname = root_hostname; 2281 vfsflags = 0; 2282 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2283 &args, &vfsflags)) { 2284 if (error == EPROTONOSUPPORT) 2285 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2286 "mount_root failed: server doesn't support NFS V4"); 2287 else 2288 nfs_cmn_err(error, CE_WARN, 2289 "nfs4_mountroot: mount_root failed: %m"); 2290 nfs_rw_exit(&svp->sv_lock); 2291 sv4_free(svp); 2292 pn_free(&pn); 2293 return (error); 2294 } 2295 nfs_rw_exit(&svp->sv_lock); 2296 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2297 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2298 (void) strcpy(svp->sv_hostname, root_hostname); 2299 2300 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2301 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2302 (void) strcpy(svp->sv_path, root_path); 2303 2304 /* 2305 * Force root partition to always be mounted with AUTH_UNIX for now 2306 */ 2307 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2308 svp->sv_secdata->secmod = AUTH_UNIX; 2309 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2310 svp->sv_secdata->data = NULL; 2311 2312 cr = crgetcred(); 2313 rtvp = NULL; 2314 2315 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2316 2317 if (error) { 2318 crfree(cr); 2319 pn_free(&pn); 2320 goto errout; 2321 } 2322 2323 mi = VTOMI4(rtvp); 2324 2325 /* 2326 * Send client id to the server, if necessary 2327 */ 2328 nfs4_error_zinit(&n4e); 2329 nfs4setclientid(mi, cr, FALSE, &n4e); 2330 error = n4e.error; 2331 2332 crfree(cr); 2333 2334 if (error) { 2335 pn_free(&pn); 2336 goto errout; 2337 } 2338 2339 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2340 if (error) { 2341 nfs_cmn_err(error, CE_WARN, 2342 "nfs4_mountroot: invalid root mount options"); 2343 pn_free(&pn); 2344 goto errout; 2345 } 2346 2347 (void) vfs_lock_wait(vfsp); 2348 vfs_add(NULL, vfsp, vfsflags); 2349 vfs_unlock(vfsp); 2350 2351 size = strlen(svp->sv_hostname); 2352 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2353 rootfs.bo_name[size] = ':'; 2354 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2355 2356 pn_free(&pn); 2357 2358 errout: 2359 if (error) { 2360 sv4_free(svp); 2361 nfs4_async_stop(vfsp); 2362 nfs4_async_manager_stop(vfsp); 2363 } 2364 2365 if (rtvp != NULL) 2366 VN_RELE(rtvp); 2367 2368 return (error); 2369 } 2370 2371 /* 2372 * Initialization routine for VFS routines. Should only be called once 2373 */ 2374 int 2375 nfs4_vfsinit(void) 2376 { 2377 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2378 nfs4setclientid_init(); 2379 return (0); 2380 } 2381 2382 void 2383 nfs4_vfsfini(void) 2384 { 2385 nfs4setclientid_fini(); 2386 mutex_destroy(&nfs4_syncbusy); 2387 } 2388 2389 void 2390 nfs4_freevfs(vfs_t *vfsp) 2391 { 2392 mntinfo4_t *mi; 2393 servinfo4_t *svp; 2394 2395 /* free up the resources */ 2396 mi = VFTOMI4(vfsp); 2397 svp = mi->mi_servers; 2398 mi->mi_servers = mi->mi_curr_serv = NULL; 2399 sv4_free(svp); 2400 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_freevfs: " 2401 "free mi %p", (void *)mi)); 2402 2403 /* 2404 * By this time we should have already deleted the 2405 * mi kstats in the unmount code. If they are still around 2406 * somethings wrong 2407 */ 2408 ASSERT(mi->mi_io_kstats == NULL); 2409 2410 nfs_free_mi4(mi); 2411 } 2412 2413 /* 2414 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2415 */ 2416 struct nfs4_server nfs4_server_lst = 2417 { &nfs4_server_lst, &nfs4_server_lst }; 2418 2419 kmutex_t nfs4_server_lst_lock; 2420 2421 static void 2422 nfs4setclientid_init(void) 2423 { 2424 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2425 } 2426 2427 static void 2428 nfs4setclientid_fini(void) 2429 { 2430 mutex_destroy(&nfs4_server_lst_lock); 2431 } 2432 2433 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2434 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2435 2436 /* 2437 * Set the clientid for the server for "mi". No-op if the clientid is 2438 * already set. 2439 * 2440 * The recovery boolean should be set to TRUE if this function was called 2441 * by the recovery code, and FALSE otherwise. This is used to determine 2442 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2443 * for adding a mntinfo4_t to a nfs4_server_t. 2444 * 2445 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2446 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2447 */ 2448 void 2449 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2450 { 2451 struct nfs4_server *np; 2452 struct servinfo4 *svp = mi->mi_curr_serv; 2453 nfs4_recov_state_t recov_state; 2454 int num_retries = 0; 2455 bool_t retry = FALSE; 2456 cred_t *lcr = NULL; 2457 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2458 time_t lease_time = 0; 2459 2460 recov_state.rs_flags = 0; 2461 recov_state.rs_num_retry_despite_err = 0; 2462 ASSERT(n4ep != NULL); 2463 2464 recov_retry: 2465 nfs4_error_zinit(n4ep); 2466 if (!recovery) 2467 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2468 2469 mutex_enter(&nfs4_server_lst_lock); 2470 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2471 mutex_exit(&nfs4_server_lst_lock); 2472 if (!np) { 2473 struct nfs4_server *tnp; 2474 np = new_nfs4_server(svp, cr); 2475 2476 mutex_enter(&nfs4_server_lst_lock); 2477 tnp = servinfo4_to_nfs4_server(svp); 2478 if (tnp) { 2479 /* 2480 * another thread snuck in and put server on list. 2481 * since we aren't adding it to the nfs4_server_list 2482 * we need to set the ref count to 0 and destroy it. 2483 */ 2484 np->s_refcnt = 0; 2485 destroy_nfs4_server(np); 2486 np = tnp; 2487 } else { 2488 /* 2489 * do not give list a reference until everything 2490 * succeeds 2491 */ 2492 mutex_enter(&np->s_lock); 2493 insque(np, &nfs4_server_lst); 2494 } 2495 mutex_exit(&nfs4_server_lst_lock); 2496 } 2497 ASSERT(MUTEX_HELD(&np->s_lock)); 2498 /* 2499 * If we find the server already has N4S_CLIENTID_SET, then 2500 * just return, we've already done SETCLIENTID to that server 2501 */ 2502 if (np->s_flags & N4S_CLIENTID_SET) { 2503 /* add mi to np's mntinfo4_list */ 2504 nfs4_add_mi_to_server(np, mi); 2505 if (!recovery) 2506 nfs_rw_exit(&mi->mi_recovlock); 2507 mutex_exit(&np->s_lock); 2508 nfs4_server_rele(np); 2509 return; 2510 } 2511 mutex_exit(&np->s_lock); 2512 2513 2514 /* 2515 * Drop the mi_recovlock since nfs4_start_op will 2516 * acquire it again for us. 2517 */ 2518 if (!recovery) { 2519 nfs_rw_exit(&mi->mi_recovlock); 2520 2521 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2522 if (n4ep->error) { 2523 nfs4_server_rele(np); 2524 return; 2525 } 2526 } 2527 2528 mutex_enter(&np->s_lock); 2529 while (np->s_flags & N4S_CLIENTID_PEND) { 2530 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 2531 mutex_exit(&np->s_lock); 2532 nfs4_server_rele(np); 2533 if (!recovery) 2534 nfs4_end_op(mi, NULL, NULL, &recov_state, 2535 recovery); 2536 n4ep->error = EINTR; 2537 return; 2538 } 2539 } 2540 2541 if (np->s_flags & N4S_CLIENTID_SET) { 2542 /* XXX copied/pasted from above */ 2543 /* add mi to np's mntinfo4_list */ 2544 nfs4_add_mi_to_server(np, mi); 2545 mutex_exit(&np->s_lock); 2546 nfs4_server_rele(np); 2547 if (!recovery) 2548 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2549 return; 2550 } 2551 2552 /* 2553 * Reset the N4S_CB_PINGED flag. This is used to 2554 * indicate if we have received a CB_NULL from the 2555 * server. Also we reset the waiter flag. 2556 */ 2557 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2558 /* any failure must now clear this flag */ 2559 np->s_flags |= N4S_CLIENTID_PEND; 2560 mutex_exit(&np->s_lock); 2561 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2562 2563 if (n4ep->error == EACCES) { 2564 /* 2565 * If the uid is set then set the creds for secure mounts 2566 * by proxy processes such as automountd. 2567 */ 2568 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2569 if (svp->sv_secdata->uid != 0) { 2570 lcr = crdup(cr); 2571 (void) crsetugid(lcr, svp->sv_secdata->uid, 2572 crgetgid(cr)); 2573 } 2574 nfs_rw_exit(&svp->sv_lock); 2575 2576 if (lcr != NULL) { 2577 mutex_enter(&np->s_lock); 2578 crfree(np->s_cred); 2579 np->s_cred = lcr; 2580 mutex_exit(&np->s_lock); 2581 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2582 &retry_inuse); 2583 } 2584 } 2585 mutex_enter(&np->s_lock); 2586 lease_time = np->s_lease_time; 2587 np->s_flags &= ~N4S_CLIENTID_PEND; 2588 mutex_exit(&np->s_lock); 2589 2590 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2591 /* 2592 * Start recovery if failover is a possibility. If 2593 * invoked by the recovery thread itself, then just 2594 * return and let it handle the failover first. NB: 2595 * recovery is not allowed if the mount is in progress 2596 * since the infrastructure is not sufficiently setup 2597 * to allow it. Just return the error (after suitable 2598 * retries). 2599 */ 2600 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2601 (void) nfs4_start_recovery(n4ep, mi, NULL, 2602 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2603 /* 2604 * Don't retry here, just return and let 2605 * recovery take over. 2606 */ 2607 if (recovery) 2608 retry = FALSE; 2609 } else if (nfs4_rpc_retry_error(n4ep->error) || 2610 n4ep->stat == NFS4ERR_RESOURCE || 2611 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2612 2613 retry = TRUE; 2614 /* 2615 * Always retry if in recovery or once had 2616 * contact with the server (but now it's 2617 * overloaded). 2618 */ 2619 if (recovery == TRUE || 2620 n4ep->error == ETIMEDOUT || 2621 n4ep->error == ECONNRESET) 2622 num_retries = 0; 2623 } else if (retry_inuse && n4ep->error == 0 && 2624 n4ep->stat == NFS4ERR_CLID_INUSE) { 2625 retry = TRUE; 2626 num_retries = 0; 2627 } 2628 } else { 2629 /* Since everything succeeded give the list a reference count */ 2630 mutex_enter(&np->s_lock); 2631 np->s_refcnt++; 2632 mutex_exit(&np->s_lock); 2633 } 2634 2635 if (!recovery) 2636 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2637 2638 2639 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2640 if (retry_inuse) { 2641 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2642 retry_inuse = 0; 2643 } else 2644 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2645 2646 nfs4_server_rele(np); 2647 goto recov_retry; 2648 } 2649 2650 2651 if (n4ep->error == 0) 2652 n4ep->error = geterrno4(n4ep->stat); 2653 2654 /* broadcast before release in case no other threads are waiting */ 2655 cv_broadcast(&np->s_clientid_pend); 2656 nfs4_server_rele(np); 2657 } 2658 2659 int nfs4setclientid_otw_debug = 0; 2660 2661 /* 2662 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2663 * but nothing else; the calling function must be designed to handle those 2664 * other errors. 2665 */ 2666 static void 2667 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2668 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2669 { 2670 COMPOUND4args_clnt args; 2671 COMPOUND4res_clnt res; 2672 nfs_argop4 argop[3]; 2673 SETCLIENTID4args *s_args; 2674 SETCLIENTID4resok *s_resok; 2675 int doqueue = 1; 2676 nfs4_ga_res_t *garp = NULL; 2677 timespec_t prop_time, after_time; 2678 verifier4 verf; 2679 clientid4 tmp_clientid; 2680 2681 ASSERT(!MUTEX_HELD(&np->s_lock)); 2682 2683 args.ctag = TAG_SETCLIENTID; 2684 2685 args.array = argop; 2686 args.array_len = 3; 2687 2688 /* PUTROOTFH */ 2689 argop[0].argop = OP_PUTROOTFH; 2690 2691 /* GETATTR */ 2692 argop[1].argop = OP_GETATTR; 2693 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2694 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2695 2696 /* SETCLIENTID */ 2697 argop[2].argop = OP_SETCLIENTID; 2698 2699 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2700 2701 mutex_enter(&np->s_lock); 2702 2703 s_args->client.verifier = np->clidtosend.verifier; 2704 s_args->client.id_len = np->clidtosend.id_len; 2705 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2706 s_args->client.id_val = np->clidtosend.id_val; 2707 2708 /* 2709 * Callback needs to happen on non-RDMA transport 2710 * Check if we have saved the original knetconfig 2711 * if so, use that instead. 2712 */ 2713 if (svp->sv_origknconf != NULL) 2714 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2715 else 2716 nfs4_cb_args(np, svp->sv_knconf, s_args); 2717 2718 mutex_exit(&np->s_lock); 2719 2720 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2721 2722 if (ep->error) 2723 return; 2724 2725 /* getattr lease_time res */ 2726 if (res.array_len >= 2) { 2727 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2728 2729 #ifndef _LP64 2730 /* 2731 * The 32 bit client cannot handle a lease time greater than 2732 * (INT32_MAX/1000000). This is due to the use of the 2733 * lease_time in calls to drv_usectohz() in 2734 * nfs4_renew_lease_thread(). The problem is that 2735 * drv_usectohz() takes a time_t (which is just a long = 4 2736 * bytes) as its parameter. The lease_time is multiplied by 2737 * 1000000 to convert seconds to usecs for the parameter. If 2738 * a number bigger than (INT32_MAX/1000000) is used then we 2739 * overflow on the 32bit client. 2740 */ 2741 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2742 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2743 } 2744 #endif 2745 2746 mutex_enter(&np->s_lock); 2747 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2748 2749 /* 2750 * Keep track of the lease period for the mi's 2751 * mi_msg_list. We need an appropiate time 2752 * bound to associate past facts with a current 2753 * event. The lease period is perfect for this. 2754 */ 2755 mutex_enter(&mi->mi_msg_list_lock); 2756 mi->mi_lease_period = np->s_lease_time; 2757 mutex_exit(&mi->mi_msg_list_lock); 2758 mutex_exit(&np->s_lock); 2759 } 2760 2761 2762 if (res.status == NFS4ERR_CLID_INUSE) { 2763 clientaddr4 *clid_inuse; 2764 2765 if (!(*retry_inusep)) { 2766 clid_inuse = &res.array->nfs_resop4_u. 2767 opsetclientid.SETCLIENTID4res_u.client_using; 2768 2769 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2770 "NFS4 mount (SETCLIENTID failed)." 2771 " nfs4_client_id.id is in" 2772 "use already by: r_netid<%s> r_addr<%s>", 2773 clid_inuse->r_netid, clid_inuse->r_addr); 2774 } 2775 2776 /* 2777 * XXX - The client should be more robust in its 2778 * handling of clientid in use errors (regen another 2779 * clientid and try again?) 2780 */ 2781 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2782 return; 2783 } 2784 2785 if (res.status) { 2786 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2787 return; 2788 } 2789 2790 s_resok = &res.array[2].nfs_resop4_u. 2791 opsetclientid.SETCLIENTID4res_u.resok4; 2792 2793 tmp_clientid = s_resok->clientid; 2794 2795 verf = s_resok->setclientid_confirm; 2796 2797 #ifdef DEBUG 2798 if (nfs4setclientid_otw_debug) { 2799 union { 2800 clientid4 clientid; 2801 int foo[2]; 2802 } cid; 2803 2804 cid.clientid = s_resok->clientid; 2805 2806 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2807 "nfs4setclientid_otw: OK, clientid = %x,%x, " 2808 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 2809 } 2810 #endif 2811 2812 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2813 2814 /* Confirm the client id and get the lease_time attribute */ 2815 2816 args.ctag = TAG_SETCLIENTID_CF; 2817 2818 args.array = argop; 2819 args.array_len = 1; 2820 2821 argop[0].argop = OP_SETCLIENTID_CONFIRM; 2822 2823 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 2824 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 2825 2826 /* used to figure out RTT for np */ 2827 gethrestime(&prop_time); 2828 2829 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 2830 "start time: %ld sec %ld nsec", prop_time.tv_sec, 2831 prop_time.tv_nsec)); 2832 2833 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2834 2835 gethrestime(&after_time); 2836 mutex_enter(&np->s_lock); 2837 np->propagation_delay.tv_sec = 2838 MAX(1, after_time.tv_sec - prop_time.tv_sec); 2839 mutex_exit(&np->s_lock); 2840 2841 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 2842 "finish time: %ld sec ", after_time.tv_sec)); 2843 2844 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 2845 "propagation delay set to %ld sec", 2846 np->propagation_delay.tv_sec)); 2847 2848 if (ep->error) 2849 return; 2850 2851 if (res.status == NFS4ERR_CLID_INUSE) { 2852 clientaddr4 *clid_inuse; 2853 2854 if (!(*retry_inusep)) { 2855 clid_inuse = &res.array->nfs_resop4_u. 2856 opsetclientid.SETCLIENTID4res_u.client_using; 2857 2858 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2859 "SETCLIENTID_CONFIRM failed. " 2860 "nfs4_client_id.id is in use already by: " 2861 "r_netid<%s> r_addr<%s>", 2862 clid_inuse->r_netid, clid_inuse->r_addr); 2863 } 2864 2865 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2866 return; 2867 } 2868 2869 if (res.status) { 2870 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2871 return; 2872 } 2873 2874 mutex_enter(&np->s_lock); 2875 np->clientid = tmp_clientid; 2876 np->s_flags |= N4S_CLIENTID_SET; 2877 2878 /* Add mi to np's mntinfo4 list */ 2879 nfs4_add_mi_to_server(np, mi); 2880 2881 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 2882 /* 2883 * Start lease management thread. 2884 * Keep trying until we succeed. 2885 */ 2886 2887 np->s_refcnt++; /* pass reference to thread */ 2888 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 2889 minclsyspri); 2890 } 2891 mutex_exit(&np->s_lock); 2892 2893 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2894 } 2895 2896 /* 2897 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 2898 * mi's clientid the same as sp's. 2899 * Assumes sp is locked down. 2900 */ 2901 void 2902 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 2903 { 2904 mntinfo4_t *tmi; 2905 int in_list = 0; 2906 2907 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 2908 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 2909 ASSERT(sp != &nfs4_server_lst); 2910 ASSERT(MUTEX_HELD(&sp->s_lock)); 2911 2912 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2913 "nfs4_add_mi_to_server: add mi %p to sp %p", 2914 (void*)mi, (void*)sp)); 2915 2916 for (tmi = sp->mntinfo4_list; 2917 tmi != NULL; 2918 tmi = tmi->mi_clientid_next) { 2919 if (tmi == mi) { 2920 NFS4_DEBUG(nfs4_client_lease_debug, 2921 (CE_NOTE, 2922 "nfs4_add_mi_to_server: mi in list")); 2923 in_list = 1; 2924 } 2925 } 2926 2927 /* 2928 * First put a hold on the mntinfo4's vfsp so that references via 2929 * mntinfo4_list will be valid. 2930 */ 2931 if (!in_list) 2932 VFS_HOLD(mi->mi_vfsp); 2933 2934 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 2935 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 2936 2937 if (!in_list) { 2938 if (sp->mntinfo4_list) 2939 sp->mntinfo4_list->mi_clientid_prev = mi; 2940 mi->mi_clientid_next = sp->mntinfo4_list; 2941 sp->mntinfo4_list = mi; 2942 mi->mi_srvsettime = gethrestime_sec(); 2943 } 2944 2945 /* set mi's clientid to that of sp's for later matching */ 2946 mi->mi_clientid = sp->clientid; 2947 2948 /* 2949 * Update the clientid for any other mi's belonging to sp. This 2950 * must be done here while we hold sp->s_lock, so that 2951 * find_nfs4_server() continues to work. 2952 */ 2953 2954 for (tmi = sp->mntinfo4_list; 2955 tmi != NULL; 2956 tmi = tmi->mi_clientid_next) { 2957 if (tmi != mi) { 2958 tmi->mi_clientid = sp->clientid; 2959 } 2960 } 2961 } 2962 2963 /* 2964 * Remove the mi from sp's mntinfo4_list and release its reference. 2965 * Exception: if mi still has open files, flag it for later removal (when 2966 * all the files are closed). 2967 * 2968 * If this is the last mntinfo4 in sp's list then tell the lease renewal 2969 * thread to exit. 2970 */ 2971 static void 2972 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 2973 { 2974 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2975 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 2976 (void*)mi, (void*)sp)); 2977 2978 ASSERT(sp != NULL); 2979 ASSERT(MUTEX_HELD(&sp->s_lock)); 2980 ASSERT(mi->mi_open_files >= 0); 2981 2982 /* 2983 * First make sure this mntinfo4 can be taken off of the list, 2984 * ie: it doesn't have any open files remaining. 2985 */ 2986 if (mi->mi_open_files > 0) { 2987 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2988 "nfs4_remove_mi_from_server_nolock: don't " 2989 "remove mi since it still has files open")); 2990 2991 mutex_enter(&mi->mi_lock); 2992 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 2993 mutex_exit(&mi->mi_lock); 2994 return; 2995 } 2996 2997 remove_mi(sp, mi); 2998 2999 if (sp->mntinfo4_list == NULL) { 3000 /* last fs unmounted, kill the thread */ 3001 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3002 "remove_mi_from_nfs4_server_nolock: kill the thread")); 3003 nfs4_mark_srv_dead(sp); 3004 } 3005 } 3006 3007 /* 3008 * Remove mi from sp's mntinfo4_list and release the vfs reference. 3009 */ 3010 static void 3011 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 3012 { 3013 ASSERT(MUTEX_HELD(&sp->s_lock)); 3014 3015 /* 3016 * We release a reference, and the caller must still have a 3017 * reference. 3018 */ 3019 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3020 3021 if (mi->mi_clientid_prev) { 3022 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3023 } else { 3024 /* This is the first mi in sp's mntinfo4_list */ 3025 /* 3026 * Make sure the first mntinfo4 in the list is the actual 3027 * mntinfo4 passed in. 3028 */ 3029 ASSERT(sp->mntinfo4_list == mi); 3030 3031 sp->mntinfo4_list = mi->mi_clientid_next; 3032 } 3033 if (mi->mi_clientid_next) 3034 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3035 3036 /* Now mark the mntinfo4's links as being removed */ 3037 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3038 3039 VFS_RELE(mi->mi_vfsp); 3040 } 3041 3042 /* 3043 * Free all the entries in sp's mntinfo4_list. 3044 */ 3045 static void 3046 remove_all_mi(nfs4_server_t *sp) 3047 { 3048 mntinfo4_t *mi; 3049 3050 ASSERT(MUTEX_HELD(&sp->s_lock)); 3051 3052 while (sp->mntinfo4_list != NULL) { 3053 mi = sp->mntinfo4_list; 3054 /* 3055 * Grab a reference in case there is only one left (which 3056 * remove_mi() frees). 3057 */ 3058 VFS_HOLD(mi->mi_vfsp); 3059 remove_mi(sp, mi); 3060 VFS_RELE(mi->mi_vfsp); 3061 } 3062 } 3063 3064 /* 3065 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3066 * 3067 * This version can be called with a null nfs4_server_t arg, 3068 * and will either find the right one and handle locking, or 3069 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3070 */ 3071 void 3072 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3073 { 3074 nfs4_server_t *sp; 3075 3076 if (esp == NULL) { 3077 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3078 sp = find_nfs4_server_all(mi, 1); 3079 } else 3080 sp = esp; 3081 3082 if (sp != NULL) 3083 nfs4_remove_mi_from_server_nolock(mi, sp); 3084 3085 /* 3086 * If we had a valid esp as input, the calling function will be 3087 * responsible for unlocking the esp nfs4_server. 3088 */ 3089 if (esp == NULL) { 3090 if (sp != NULL) 3091 mutex_exit(&sp->s_lock); 3092 nfs_rw_exit(&mi->mi_recovlock); 3093 if (sp != NULL) 3094 nfs4_server_rele(sp); 3095 } 3096 } 3097 3098 /* 3099 * Return TRUE if the given server has any non-unmounted filesystems. 3100 */ 3101 3102 bool_t 3103 nfs4_fs_active(nfs4_server_t *sp) 3104 { 3105 mntinfo4_t *mi; 3106 3107 ASSERT(MUTEX_HELD(&sp->s_lock)); 3108 3109 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3110 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3111 return (TRUE); 3112 } 3113 3114 return (FALSE); 3115 } 3116 3117 /* 3118 * Mark sp as finished and notify any waiters. 3119 */ 3120 3121 void 3122 nfs4_mark_srv_dead(nfs4_server_t *sp) 3123 { 3124 ASSERT(MUTEX_HELD(&sp->s_lock)); 3125 3126 sp->s_thread_exit = NFS4_THREAD_EXIT; 3127 cv_broadcast(&sp->cv_thread_exit); 3128 } 3129 3130 /* 3131 * Create a new nfs4_server_t structure. 3132 * Returns new node unlocked and not in list, but with a reference count of 3133 * 1. 3134 */ 3135 struct nfs4_server * 3136 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3137 { 3138 struct nfs4_server *np; 3139 timespec_t tt; 3140 union { 3141 struct { 3142 uint32_t sec; 3143 uint32_t subsec; 3144 } un_curtime; 3145 verifier4 un_verifier; 3146 } nfs4clientid_verifier; 3147 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3148 int len; 3149 3150 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3151 np->saddr.len = svp->sv_addr.len; 3152 np->saddr.maxlen = svp->sv_addr.maxlen; 3153 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3154 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3155 np->s_refcnt = 1; 3156 3157 /* 3158 * Build the nfs_client_id4 for this server mount. Ensure 3159 * the verifier is useful and that the identification is 3160 * somehow based on the server's address for the case of 3161 * multi-homed servers. 3162 */ 3163 nfs4clientid_verifier.un_verifier = 0; 3164 gethrestime(&tt); 3165 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3166 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3167 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3168 3169 /* 3170 * calculate the length of the opaque identifier. Subtract 2 3171 * for the "%s" and add the traditional +1 for null 3172 * termination. 3173 */ 3174 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3175 np->clidtosend.id_len = len + np->saddr.maxlen; 3176 3177 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3178 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3179 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3180 3181 np->s_flags = 0; 3182 np->mntinfo4_list = NULL; 3183 /* save cred for issuing rfs4calls inside the renew thread */ 3184 crhold(cr); 3185 np->s_cred = cr; 3186 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3187 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3188 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3189 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3190 offsetof(rnode4_t, r_deleg_link)); 3191 np->s_thread_exit = 0; 3192 np->state_ref_count = 0; 3193 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3194 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3195 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3196 np->s_otw_call_count = 0; 3197 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3198 np->zoneid = getzoneid(); 3199 np->zone_globals = nfs4_get_callback_globals(); 3200 ASSERT(np->zone_globals != NULL); 3201 return (np); 3202 } 3203 3204 /* 3205 * Create a new nfs4_server_t structure and add it to the list. 3206 * Returns new node locked; reference must eventually be freed. 3207 */ 3208 static struct nfs4_server * 3209 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3210 { 3211 nfs4_server_t *sp; 3212 3213 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3214 sp = new_nfs4_server(svp, cr); 3215 mutex_enter(&sp->s_lock); 3216 insque(sp, &nfs4_server_lst); 3217 sp->s_refcnt++; /* list gets a reference */ 3218 sp->clientid = 0; 3219 return (sp); 3220 } 3221 3222 int nfs4_server_t_debug = 0; 3223 3224 #ifdef lint 3225 extern void 3226 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3227 #endif 3228 3229 #ifndef lint 3230 #ifdef DEBUG 3231 void 3232 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3233 { 3234 int hash16(void *p, int len); 3235 nfs4_server_t *np; 3236 3237 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3238 "dumping nfs4_server_t list in %s", txt)); 3239 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3240 "mi 0x%p, want clientid %llx, addr %d/%04X", 3241 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3242 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3243 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3244 np = np->forw) { 3245 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3246 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3247 np, (longlong_t)np->clientid, np->saddr.len, 3248 hash16((void *)np->saddr.buf, np->saddr.len), 3249 np->state_ref_count)); 3250 if (np->saddr.len == srv_p->sv_addr.len && 3251 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3252 np->saddr.len) == 0) 3253 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3254 " - address matches")); 3255 if (np->clientid == clientid || np->clientid == 0) 3256 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3257 " - clientid matches")); 3258 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3259 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3260 " - thread not exiting")); 3261 } 3262 delay(hz); 3263 } 3264 #endif 3265 #endif 3266 3267 3268 /* 3269 * Move a mntinfo4_t from one server list to another. 3270 * Locking of the two nfs4_server_t nodes will be done in list order. 3271 * 3272 * Returns NULL if the current nfs4_server_t for the filesystem could not 3273 * be found (e.g., due to forced unmount). Otherwise returns a reference 3274 * to the new nfs4_server_t, which must eventually be freed. 3275 */ 3276 nfs4_server_t * 3277 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3278 { 3279 nfs4_server_t *p, *op = NULL, *np = NULL; 3280 int num_open; 3281 zoneid_t zoneid = nfs_zoneid(); 3282 3283 ASSERT(nfs_zone() == mi->mi_zone); 3284 3285 mutex_enter(&nfs4_server_lst_lock); 3286 #ifdef DEBUG 3287 if (nfs4_server_t_debug) 3288 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3289 #endif 3290 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3291 if (p->zoneid != zoneid) 3292 continue; 3293 if (p->saddr.len == old->sv_addr.len && 3294 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3295 p->s_thread_exit != NFS4_THREAD_EXIT) { 3296 op = p; 3297 mutex_enter(&op->s_lock); 3298 op->s_refcnt++; 3299 } 3300 if (p->saddr.len == new->sv_addr.len && 3301 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3302 p->s_thread_exit != NFS4_THREAD_EXIT) { 3303 np = p; 3304 mutex_enter(&np->s_lock); 3305 } 3306 if (op != NULL && np != NULL) 3307 break; 3308 } 3309 if (op == NULL) { 3310 /* 3311 * Filesystem has been forcibly unmounted. Bail out. 3312 */ 3313 if (np != NULL) 3314 mutex_exit(&np->s_lock); 3315 mutex_exit(&nfs4_server_lst_lock); 3316 return (NULL); 3317 } 3318 if (np != NULL) { 3319 np->s_refcnt++; 3320 } else { 3321 #ifdef DEBUG 3322 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3323 "nfs4_move_mi: no target nfs4_server, will create.")); 3324 #endif 3325 np = add_new_nfs4_server(new, kcred); 3326 } 3327 mutex_exit(&nfs4_server_lst_lock); 3328 3329 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3330 "nfs4_move_mi: for mi 0x%p, " 3331 "old servinfo4 0x%p, new servinfo4 0x%p, " 3332 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3333 (void*)mi, (void*)old, (void*)new, 3334 (void*)op, (void*)np)); 3335 ASSERT(op != NULL && np != NULL); 3336 3337 /* discard any delegations */ 3338 nfs4_deleg_discard(mi, op); 3339 3340 num_open = mi->mi_open_files; 3341 mi->mi_open_files = 0; 3342 op->state_ref_count -= num_open; 3343 ASSERT(op->state_ref_count >= 0); 3344 np->state_ref_count += num_open; 3345 nfs4_remove_mi_from_server_nolock(mi, op); 3346 mi->mi_open_files = num_open; 3347 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3348 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3349 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3350 3351 nfs4_add_mi_to_server(np, mi); 3352 3353 mutex_exit(&op->s_lock); 3354 nfs4_server_rele(op); 3355 mutex_exit(&np->s_lock); 3356 3357 return (np); 3358 } 3359 3360 /* 3361 * Need to have the nfs4_server_lst_lock. 3362 * Search the nfs4_server list to find a match on this servinfo4 3363 * based on its address. 3364 * 3365 * Returns NULL if no match is found. Otherwise returns a reference (which 3366 * must eventually be freed) to a locked nfs4_server. 3367 */ 3368 nfs4_server_t * 3369 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3370 { 3371 nfs4_server_t *np; 3372 zoneid_t zoneid = nfs_zoneid(); 3373 3374 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3375 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3376 if (np->zoneid == zoneid && 3377 np->saddr.len == srv_p->sv_addr.len && 3378 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3379 np->saddr.len) == 0 && 3380 np->s_thread_exit != NFS4_THREAD_EXIT) { 3381 mutex_enter(&np->s_lock); 3382 np->s_refcnt++; 3383 return (np); 3384 } 3385 } 3386 return (NULL); 3387 } 3388 3389 /* 3390 * Search the nfs4_server_lst to find a match based on clientid and 3391 * addr. 3392 * Locks the nfs4_server down if it is found and returns a reference that 3393 * must eventually be freed. 3394 * 3395 * Returns NULL it no match is found. This means one of two things: either 3396 * mi is in the process of being mounted, or mi has been unmounted. 3397 * 3398 * The caller should be holding mi->mi_recovlock, and it should continue to 3399 * hold the lock until done with the returned nfs4_server_t. Once 3400 * mi->mi_recovlock is released, there is no guarantee that the returned 3401 * mi->nfs4_server_t will continue to correspond to mi. 3402 */ 3403 nfs4_server_t * 3404 find_nfs4_server(mntinfo4_t *mi) 3405 { 3406 return (find_nfs4_server_all(mi, 0)); 3407 } 3408 3409 /* 3410 * Same as above, but takes an "all" parameter which can be 3411 * set to 1 if the caller wishes to find nfs4_server_t's which 3412 * have been marked for termination by the exit of the renew 3413 * thread. This should only be used by operations which are 3414 * cleaning up and will not cause an OTW op. 3415 */ 3416 nfs4_server_t * 3417 find_nfs4_server_all(mntinfo4_t *mi, int all) 3418 { 3419 nfs4_server_t *np; 3420 servinfo4_t *svp; 3421 zoneid_t zoneid = mi->mi_zone->zone_id; 3422 3423 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3424 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3425 /* 3426 * This can be called from nfs4_unmount() which can be called from the 3427 * global zone, hence it's legal for the global zone to muck with 3428 * another zone's server list, as long as it doesn't try to contact 3429 * them. 3430 */ 3431 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3432 nfs_global_client_only != 0); 3433 3434 /* 3435 * The nfs4_server_lst_lock global lock is held when we get a new 3436 * clientid (via SETCLIENTID OTW). Holding this global lock and 3437 * mi_recovlock (READER is fine) ensures that the nfs4_server 3438 * and this mntinfo4 can't get out of sync, so the following search is 3439 * always valid. 3440 */ 3441 mutex_enter(&nfs4_server_lst_lock); 3442 #ifdef DEBUG 3443 if (nfs4_server_t_debug) { 3444 /* mi->mi_clientid is unprotected, ok for debug output */ 3445 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3446 mi->mi_curr_serv); 3447 } 3448 #endif 3449 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3450 mutex_enter(&np->s_lock); 3451 svp = mi->mi_curr_serv; 3452 3453 if (np->zoneid == zoneid && 3454 np->clientid == mi->mi_clientid && 3455 np->saddr.len == svp->sv_addr.len && 3456 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3457 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3458 mutex_exit(&nfs4_server_lst_lock); 3459 np->s_refcnt++; 3460 return (np); 3461 } 3462 mutex_exit(&np->s_lock); 3463 } 3464 mutex_exit(&nfs4_server_lst_lock); 3465 3466 return (NULL); 3467 } 3468 3469 /* 3470 * Release the reference to sp and destroy it if that's the last one. 3471 */ 3472 3473 void 3474 nfs4_server_rele(nfs4_server_t *sp) 3475 { 3476 mutex_enter(&sp->s_lock); 3477 ASSERT(sp->s_refcnt > 0); 3478 sp->s_refcnt--; 3479 if (sp->s_refcnt > 0) { 3480 mutex_exit(&sp->s_lock); 3481 return; 3482 } 3483 mutex_exit(&sp->s_lock); 3484 3485 mutex_enter(&nfs4_server_lst_lock); 3486 mutex_enter(&sp->s_lock); 3487 if (sp->s_refcnt > 0) { 3488 mutex_exit(&sp->s_lock); 3489 mutex_exit(&nfs4_server_lst_lock); 3490 return; 3491 } 3492 remque(sp); 3493 sp->forw = sp->back = NULL; 3494 mutex_exit(&nfs4_server_lst_lock); 3495 destroy_nfs4_server(sp); 3496 } 3497 3498 static void 3499 destroy_nfs4_server(nfs4_server_t *sp) 3500 { 3501 ASSERT(MUTEX_HELD(&sp->s_lock)); 3502 ASSERT(sp->s_refcnt == 0); 3503 ASSERT(sp->s_otw_call_count == 0); 3504 3505 remove_all_mi(sp); 3506 3507 crfree(sp->s_cred); 3508 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3509 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3510 mutex_exit(&sp->s_lock); 3511 3512 /* destroy the nfs4_server */ 3513 nfs4callback_destroy(sp); 3514 list_destroy(&sp->s_deleg_list); 3515 mutex_destroy(&sp->s_lock); 3516 cv_destroy(&sp->cv_thread_exit); 3517 cv_destroy(&sp->s_cv_otw_count); 3518 cv_destroy(&sp->s_clientid_pend); 3519 cv_destroy(&sp->wait_cb_null); 3520 nfs_rw_destroy(&sp->s_recovlock); 3521 kmem_free(sp, sizeof (*sp)); 3522 } 3523 3524 /* 3525 * Lock sp, but only if it's still active (in the list and hasn't been 3526 * flagged as exiting) or 'all' is non-zero. 3527 * Returns TRUE if sp got locked and adds a reference to sp. 3528 */ 3529 bool_t 3530 nfs4_server_vlock(nfs4_server_t *sp, int all) 3531 { 3532 nfs4_server_t *np; 3533 3534 mutex_enter(&nfs4_server_lst_lock); 3535 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3536 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3537 all != 0)) { 3538 mutex_enter(&np->s_lock); 3539 np->s_refcnt++; 3540 mutex_exit(&nfs4_server_lst_lock); 3541 return (TRUE); 3542 } 3543 } 3544 mutex_exit(&nfs4_server_lst_lock); 3545 return (FALSE); 3546 } 3547 3548 /* 3549 * Fork off a thread to free the data structures for a mount. 3550 */ 3551 3552 static void 3553 async_free_mount(vfs_t *vfsp, cred_t *cr) 3554 { 3555 freemountargs_t *args; 3556 3557 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3558 args->fm_vfsp = vfsp; 3559 VFS_HOLD(vfsp); 3560 args->fm_cr = cr; 3561 crhold(cr); 3562 3563 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3564 minclsyspri); 3565 } 3566 3567 static void 3568 nfs4_free_mount_thread(freemountargs_t *args) 3569 { 3570 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3571 VFS_RELE(args->fm_vfsp); 3572 crfree(args->fm_cr); 3573 kmem_free(args, sizeof (freemountargs_t)); 3574 zthread_exit(); 3575 /* NOTREACHED */ 3576 } 3577 3578 /* 3579 * Thread to free the data structures for a given filesystem. 3580 */ 3581 static void 3582 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3583 { 3584 mntinfo4_t *mi = VFTOMI4(vfsp); 3585 nfs4_server_t *sp; 3586 callb_cpr_t cpr_info; 3587 kmutex_t cpr_lock; 3588 boolean_t async_thread; 3589 3590 /* 3591 * We need to participate in the CPR framework if this is a kernel 3592 * thread. 3593 */ 3594 async_thread = (curproc == nfs_zone()->zone_zsched); 3595 if (async_thread) { 3596 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3597 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3598 "nfsv4AsyncUnmount"); 3599 } 3600 3601 /* 3602 * We need to wait for all outstanding OTW calls 3603 * and recovery to finish before we remove the mi 3604 * from the nfs4_server_t, as current pending 3605 * calls might still need this linkage (in order 3606 * to find a nfs4_server_t from a mntinfo4_t). 3607 */ 3608 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3609 sp = find_nfs4_server(mi); 3610 nfs_rw_exit(&mi->mi_recovlock); 3611 3612 if (sp) { 3613 while (sp->s_otw_call_count != 0) { 3614 if (async_thread) { 3615 mutex_enter(&cpr_lock); 3616 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3617 mutex_exit(&cpr_lock); 3618 } 3619 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3620 if (async_thread) { 3621 mutex_enter(&cpr_lock); 3622 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3623 mutex_exit(&cpr_lock); 3624 } 3625 } 3626 mutex_exit(&sp->s_lock); 3627 nfs4_server_rele(sp); 3628 sp = NULL; 3629 } 3630 3631 3632 mutex_enter(&mi->mi_lock); 3633 while (mi->mi_in_recovery != 0) { 3634 if (async_thread) { 3635 mutex_enter(&cpr_lock); 3636 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3637 mutex_exit(&cpr_lock); 3638 } 3639 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3640 if (async_thread) { 3641 mutex_enter(&cpr_lock); 3642 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3643 mutex_exit(&cpr_lock); 3644 } 3645 } 3646 mutex_exit(&mi->mi_lock); 3647 3648 /* 3649 * The original purge of the dnlc via 'dounmount' 3650 * doesn't guarantee that another dnlc entry was not 3651 * added while we waitied for all outstanding OTW 3652 * and recovery calls to finish. So re-purge the 3653 * dnlc now. 3654 */ 3655 (void) dnlc_purge_vfsp(vfsp, 0); 3656 3657 /* 3658 * We need to explicitly stop the manager thread; the asyc worker 3659 * threads can timeout and exit on their own. 3660 */ 3661 nfs4_async_manager_stop(vfsp); 3662 3663 destroy_rtable4(vfsp, cr); 3664 3665 nfs4_remove_mi_from_server(mi, NULL); 3666 3667 if (mi->mi_io_kstats) { 3668 kstat_delete(mi->mi_io_kstats); 3669 mi->mi_io_kstats = NULL; 3670 } 3671 if (mi->mi_ro_kstats) { 3672 kstat_delete(mi->mi_ro_kstats); 3673 mi->mi_ro_kstats = NULL; 3674 } 3675 if (mi->mi_recov_ksp) { 3676 kstat_delete(mi->mi_recov_ksp); 3677 mi->mi_recov_ksp = NULL; 3678 } 3679 3680 if (async_thread) { 3681 mutex_enter(&cpr_lock); 3682 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3683 mutex_destroy(&cpr_lock); 3684 } 3685 } 3686