1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 29 * All Rights Reserved 30 */ 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/pathname.h> 41 #include <sys/sysmacros.h> 42 #include <sys/kmem.h> 43 #include <sys/mkdev.h> 44 #include <sys/mount.h> 45 #include <sys/statvfs.h> 46 #include <sys/errno.h> 47 #include <sys/debug.h> 48 #include <sys/cmn_err.h> 49 #include <sys/utsname.h> 50 #include <sys/bootconf.h> 51 #include <sys/modctl.h> 52 #include <sys/acl.h> 53 #include <sys/flock.h> 54 #include <sys/time.h> 55 #include <sys/disp.h> 56 #include <sys/policy.h> 57 #include <sys/socket.h> 58 #include <sys/netconfig.h> 59 #include <sys/dnlc.h> 60 #include <sys/list.h> 61 62 #include <rpc/types.h> 63 #include <rpc/auth.h> 64 #include <rpc/rpcsec_gss.h> 65 #include <rpc/clnt.h> 66 67 #include <nfs/nfs.h> 68 #include <nfs/nfs_clnt.h> 69 #include <nfs/mount.h> 70 #include <nfs/nfs_acl.h> 71 72 #include <fs/fs_subr.h> 73 74 #include <nfs/nfs4.h> 75 #include <nfs/rnode4.h> 76 #include <nfs/nfs4_clnt.h> 77 78 /* 79 * Arguments passed to thread to free data structures from forced unmount. 80 */ 81 82 typedef struct { 83 vfs_t *fm_vfsp; 84 cred_t *fm_cr; 85 } freemountargs_t; 86 87 static void async_free_mount(vfs_t *, cred_t *); 88 static void nfs4_free_mount(vfs_t *, cred_t *); 89 static void nfs4_free_mount_thread(freemountargs_t *); 90 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 91 92 /* 93 * From rpcsec module (common/rpcsec). 94 */ 95 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 96 extern void sec_clnt_freeinfo(struct sec_data *); 97 98 /* 99 * The order and contents of this structure must be kept in sync with that of 100 * rfsreqcnt_v4_tmpl in nfs_stats.c 101 */ 102 static char *rfsnames_v4[] = { 103 "null", "compound", "reserved", "access", "close", "commit", "create", 104 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 105 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 106 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 107 "read", "readdir", "readlink", "remove", "rename", "renew", 108 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 109 "setclientid_confirm", "verify", "write" 110 }; 111 112 /* 113 * nfs4_max_mount_retry is the number of times the client will redrive 114 * a mount compound before giving up and returning failure. The intent 115 * is to redrive mount compounds which fail NFS4ERR_STALE so that 116 * if a component of the server path being mounted goes stale, it can 117 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 118 * code is needed outside of the recovery framework because mount is a 119 * special case. The client doesn't create vnodes/rnodes for components 120 * of the server path being mounted. The recovery code recovers real 121 * client objects, not STALE FHs which map to components of the server 122 * path being mounted. 123 * 124 * We could just fail the mount on the first time, but that would 125 * instantly trigger failover (from nfs4_mount), and the client should 126 * try to re-lookup the STALE FH before doing failover. The easiest 127 * way to "re-lookup" is to simply redrive the mount compound. 128 */ 129 static int nfs4_max_mount_retry = 2; 130 131 /* 132 * nfs4 vfs operations. 133 */ 134 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 135 static int nfs4_unmount(vfs_t *, int, cred_t *); 136 static int nfs4_root(vfs_t *, vnode_t **); 137 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 138 static int nfs4_sync(vfs_t *, short, cred_t *); 139 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 140 static int nfs4_mountroot(vfs_t *, whymountroot_t); 141 static void nfs4_freevfs(vfs_t *); 142 143 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 144 int, cred_t *, zone_t *); 145 146 vfsops_t *nfs4_vfsops; 147 148 int nfs4_vfsinit(void); 149 void nfs4_vfsfini(void); 150 static void nfs4setclientid_init(void); 151 static void nfs4setclientid_fini(void); 152 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 153 struct nfs4_server *, nfs4_error_t *, int *); 154 static void destroy_nfs4_server(nfs4_server_t *); 155 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 156 157 /* 158 * Initialize the vfs structure 159 */ 160 161 static int nfs4fstyp; 162 163 164 /* 165 * Debug variable to check for rdma based 166 * transport startup and cleanup. Controlled 167 * through /etc/system. Off by default. 168 */ 169 extern int rdma_debug; 170 171 int 172 nfs4init(int fstyp, char *name) 173 { 174 static const fs_operation_def_t nfs4_vfsops_template[] = { 175 VFSNAME_MOUNT, nfs4_mount, 176 VFSNAME_UNMOUNT, nfs4_unmount, 177 VFSNAME_ROOT, nfs4_root, 178 VFSNAME_STATVFS, nfs4_statvfs, 179 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 180 VFSNAME_VGET, nfs4_vget, 181 VFSNAME_MOUNTROOT, nfs4_mountroot, 182 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 183 NULL, NULL 184 }; 185 int error; 186 187 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 188 if (error != 0) { 189 zcmn_err(GLOBAL_ZONEID, CE_WARN, 190 "nfs4init: bad vfs ops template"); 191 return (error); 192 } 193 194 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 195 if (error != 0) { 196 (void) vfs_freevfsops_by_type(fstyp); 197 zcmn_err(GLOBAL_ZONEID, CE_WARN, 198 "nfs4init: bad vnode ops template"); 199 return (error); 200 } 201 202 nfs4fstyp = fstyp; 203 204 (void) nfs4_vfsinit(); 205 206 (void) nfs4_init_dot_entries(); 207 208 return (0); 209 } 210 211 void 212 nfs4fini(void) 213 { 214 (void) nfs4_destroy_dot_entries(); 215 nfs4_vfsfini(); 216 } 217 218 /* 219 * Create a new sec_data structure to store AUTH_DH related data: 220 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 221 * flag set for NFS V4 since we are avoiding to contact the rpcbind 222 * daemon and is using the IP time service (IPPORT_TIMESERVER). 223 * 224 * sec_data can be freed by sec_clnt_freeinfo(). 225 */ 226 struct sec_data * 227 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 228 struct knetconfig *knconf) { 229 struct sec_data *secdata; 230 dh_k4_clntdata_t *data; 231 char *pf, *p; 232 233 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 234 return (NULL); 235 236 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 237 secdata->flags = 0; 238 239 data = kmem_alloc(sizeof (*data), KM_SLEEP); 240 241 data->syncaddr.maxlen = syncaddr->maxlen; 242 data->syncaddr.len = syncaddr->len; 243 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 244 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 245 246 /* 247 * duplicate the knconf information for the 248 * new opaque data. 249 */ 250 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 251 *data->knconf = *knconf; 252 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 253 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 254 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 255 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 256 data->knconf->knc_protofmly = pf; 257 data->knconf->knc_proto = p; 258 259 /* move server netname to the sec_data structure */ 260 data->netname = kmem_alloc(nlen, KM_SLEEP); 261 bcopy(netname, data->netname, nlen); 262 data->netnamelen = (int)nlen; 263 264 secdata->secmod = AUTH_DH; 265 secdata->rpcflavor = AUTH_DH; 266 secdata->data = (caddr_t)data; 267 268 return (secdata); 269 } 270 271 static int 272 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 273 { 274 servinfo4_t *si; 275 276 /* 277 * Iterate over the servinfo4 list to make sure 278 * we do not have a duplicate. Skip any servinfo4 279 * that has been marked "NOT IN USE" 280 */ 281 for (si = svp_head; si; si = si->sv_next) { 282 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 283 if (si->sv_flags & SV4_NOTINUSE) { 284 nfs_rw_exit(&si->sv_lock); 285 continue; 286 } 287 nfs_rw_exit(&si->sv_lock); 288 if (si == svp) 289 continue; 290 if (si->sv_addr.len == svp->sv_addr.len && 291 strcmp(si->sv_knconf->knc_protofmly, 292 svp->sv_knconf->knc_protofmly) == 0 && 293 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 294 si->sv_addr.len) == 0) { 295 /* it's a duplicate */ 296 return (1); 297 } 298 } 299 /* it's not a duplicate */ 300 return (0); 301 } 302 303 /* 304 * nfs mount vfsop 305 * Set up mount info record and attach it to vfs struct. 306 */ 307 static int 308 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 309 { 310 char *data = uap->dataptr; 311 int error; 312 vnode_t *rtvp; /* the server's root */ 313 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 314 size_t hlen; /* length of hostname */ 315 size_t nlen; /* length of netname */ 316 char netname[MAXNETNAMELEN+1]; /* server's netname */ 317 struct netbuf addr; /* server's address */ 318 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 319 struct knetconfig *knconf; /* transport knetconfig structure */ 320 struct knetconfig *rdma_knconf; /* rdma transport structure */ 321 rnode4_t *rp; 322 struct servinfo4 *svp; /* nfs server info */ 323 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 324 struct servinfo4 *svp_head; /* first nfs server info */ 325 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 326 struct sec_data *secdata; /* security data */ 327 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 328 STRUCT_DECL(knetconfig, knconf_tmp); 329 STRUCT_DECL(netbuf, addr_tmp); 330 int flags, addr_type; 331 char *p, *pf; 332 struct pathname pn; 333 char *userbufptr; 334 zone_t *zone = curproc->p_zone; 335 nfs4_error_t n4e; 336 337 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 338 return (EPERM); 339 if (mvp->v_type != VDIR) 340 return (ENOTDIR); 341 /* 342 * get arguments 343 * 344 * nfs_args is now versioned and is extensible, so 345 * uap->datalen might be different from sizeof (args) 346 * in a compatible situation. 347 */ 348 more: 349 STRUCT_INIT(args, get_udatamodel()); 350 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 351 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 352 STRUCT_SIZE(args)))) 353 return (EFAULT); 354 355 flags = STRUCT_FGET(args, flags); 356 357 /* 358 * If the request changes the locking type, disallow the remount, 359 * because it's questionable whether we can transfer the 360 * locking state correctly. 361 */ 362 if (uap->flags & MS_REMOUNT) { 363 if ((mi = VFTOMI4(vfsp)) != NULL) { 364 uint_t new_mi_llock; 365 uint_t old_mi_llock; 366 367 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 368 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 369 if (old_mi_llock != new_mi_llock) 370 return (EBUSY); 371 } 372 return (0); 373 } 374 375 mutex_enter(&mvp->v_lock); 376 if (!(uap->flags & MS_OVERLAY) && 377 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 378 mutex_exit(&mvp->v_lock); 379 return (EBUSY); 380 } 381 mutex_exit(&mvp->v_lock); 382 383 /* make sure things are zeroed for errout: */ 384 rtvp = NULL; 385 mi = NULL; 386 addr.buf = NULL; 387 syncaddr.buf = NULL; 388 secdata = NULL; 389 390 /* 391 * A valid knetconfig structure is required. 392 */ 393 if (!(flags & NFSMNT_KNCONF)) 394 return (EINVAL); 395 396 /* 397 * Allocate a servinfo4 struct. 398 */ 399 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 400 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 401 if (svp_tail) { 402 svp_2ndlast = svp_tail; 403 svp_tail->sv_next = svp; 404 } else { 405 svp_head = svp; 406 svp_2ndlast = svp; 407 } 408 409 svp_tail = svp; 410 411 /* 412 * Allocate space for a knetconfig structure and 413 * its strings and copy in from user-land. 414 */ 415 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 416 svp->sv_knconf = knconf; 417 STRUCT_INIT(knconf_tmp, get_udatamodel()); 418 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 419 STRUCT_SIZE(knconf_tmp))) { 420 sv4_free(svp_head); 421 return (EFAULT); 422 } 423 424 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 425 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 426 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 427 if (get_udatamodel() != DATAMODEL_LP64) { 428 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 429 } else { 430 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 431 } 432 433 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 434 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 435 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 436 if (error) { 437 kmem_free(pf, KNC_STRSIZE); 438 kmem_free(p, KNC_STRSIZE); 439 sv4_free(svp_head); 440 return (error); 441 } 442 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 443 if (error) { 444 kmem_free(pf, KNC_STRSIZE); 445 kmem_free(p, KNC_STRSIZE); 446 sv4_free(svp_head); 447 return (error); 448 } 449 if (strcmp(p, NC_UDP) == 0) { 450 kmem_free(pf, KNC_STRSIZE); 451 kmem_free(p, KNC_STRSIZE); 452 sv4_free(svp_head); 453 return (ENOTSUP); 454 } 455 knconf->knc_protofmly = pf; 456 knconf->knc_proto = p; 457 458 /* 459 * Get server address 460 */ 461 STRUCT_INIT(addr_tmp, get_udatamodel()); 462 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 463 STRUCT_SIZE(addr_tmp))) { 464 error = EFAULT; 465 goto errout; 466 } 467 468 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 469 addr.len = STRUCT_FGET(addr_tmp, len); 470 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 471 addr.maxlen = addr.len; 472 if (copyin(userbufptr, addr.buf, addr.len)) { 473 kmem_free(addr.buf, addr.len); 474 error = EFAULT; 475 goto errout; 476 } 477 478 svp->sv_addr = addr; 479 480 /* 481 * Get the root fhandle 482 */ 483 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 484 485 if (error) 486 goto errout; 487 488 /* Volatile fh: keep server paths, so use actual-size strings */ 489 svp->sv_path = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 490 bcopy(pn.pn_path, svp->sv_path, pn.pn_pathlen); 491 svp->sv_path[pn.pn_pathlen] = '\0'; 492 svp->sv_pathlen = pn.pn_pathlen + 1; 493 pn_free(&pn); 494 495 /* 496 * Get server's hostname 497 */ 498 if (flags & NFSMNT_HOSTNAME) { 499 error = copyinstr(STRUCT_FGETP(args, hostname), 500 netname, sizeof (netname), &hlen); 501 if (error) 502 goto errout; 503 } else { 504 char *p = "unknown-host"; 505 hlen = strlen(p) + 1; 506 (void) strcpy(netname, p); 507 } 508 svp->sv_hostnamelen = hlen; 509 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 510 (void) strcpy(svp->sv_hostname, netname); 511 512 /* 513 * RDMA MOUNT SUPPORT FOR NFS v4. 514 * Establish, is it possible to use RDMA, if so overload the 515 * knconf with rdma specific knconf and free the orignal knconf. 516 */ 517 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 518 /* 519 * Determine the addr type for RDMA, IPv4 or v6. 520 */ 521 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 522 addr_type = AF_INET; 523 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 524 addr_type = AF_INET6; 525 526 if (rdma_reachable(addr_type, &svp->sv_addr, 527 &rdma_knconf) == 0) { 528 /* 529 * If successful, hijack the orignal knconf and 530 * replace with the new one, depending on the flags. 531 */ 532 svp->sv_origknconf = svp->sv_knconf; 533 svp->sv_knconf = rdma_knconf; 534 knconf = rdma_knconf; 535 } else { 536 if (flags & NFSMNT_TRYRDMA) { 537 #ifdef DEBUG 538 if (rdma_debug) 539 zcmn_err(getzoneid(), CE_WARN, 540 "no RDMA onboard, revert\n"); 541 #endif 542 } 543 544 if (flags & NFSMNT_DORDMA) { 545 /* 546 * If proto=rdma is specified and no RDMA 547 * path to this server is avialable then 548 * ditch this server. 549 * This is not included in the mountable 550 * server list or the replica list. 551 * Check if more servers are specified; 552 * Failover case, otherwise bail out of mount. 553 */ 554 if (STRUCT_FGET(args, nfs_args_ext) == 555 NFS_ARGS_EXTB && STRUCT_FGETP(args, 556 nfs_ext_u.nfs_extB.next) != NULL) { 557 if (uap->flags & MS_RDONLY && 558 !(flags & NFSMNT_SOFT)) { 559 data = (char *) 560 STRUCT_FGETP(args, 561 nfs_ext_u.nfs_extB.next); 562 if (svp_head->sv_next == NULL) { 563 svp_tail = NULL; 564 svp_2ndlast = NULL; 565 sv4_free(svp_head); 566 goto more; 567 } else { 568 svp_tail = svp_2ndlast; 569 svp_2ndlast->sv_next = 570 NULL; 571 sv4_free(svp); 572 goto more; 573 } 574 } 575 } else { 576 /* 577 * This is the last server specified 578 * in the nfs_args list passed down 579 * and its not rdma capable. 580 */ 581 if (svp_head->sv_next == NULL) { 582 /* 583 * Is this the only one 584 */ 585 error = EINVAL; 586 #ifdef DEBUG 587 if (rdma_debug) 588 zcmn_err(getzoneid(), 589 CE_WARN, 590 "No RDMA srv"); 591 #endif 592 goto errout; 593 } else { 594 /* 595 * There is list, since some 596 * servers specified before 597 * this passed all requirements 598 */ 599 svp_tail = svp_2ndlast; 600 svp_2ndlast->sv_next = NULL; 601 sv4_free(svp); 602 goto proceed; 603 } 604 } 605 } 606 } 607 } 608 609 /* 610 * If there are syncaddr and netname data, load them in. This is 611 * to support data needed for NFSV4 when AUTH_DH is the negotiated 612 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 613 */ 614 netname[0] = '\0'; 615 if (flags & NFSMNT_SECURE) { 616 617 /* get syncaddr */ 618 STRUCT_INIT(addr_tmp, get_udatamodel()); 619 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 620 STRUCT_SIZE(addr_tmp))) { 621 error = EINVAL; 622 goto errout; 623 } 624 userbufptr = STRUCT_FGETP(addr_tmp, buf); 625 syncaddr.len = STRUCT_FGET(addr_tmp, len); 626 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 627 syncaddr.maxlen = syncaddr.len; 628 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 629 kmem_free(syncaddr.buf, syncaddr.len); 630 error = EFAULT; 631 goto errout; 632 } 633 634 /* get server's netname */ 635 if (copyinstr(STRUCT_FGETP(args, netname), netname, 636 sizeof (netname), &nlen)) { 637 kmem_free(syncaddr.buf, syncaddr.len); 638 error = EFAULT; 639 goto errout; 640 } 641 netname[nlen] = '\0'; 642 643 svp->sv_dhsec = create_authdh_data(netname, nlen, &syncaddr, 644 knconf); 645 } 646 647 /* 648 * Get the extention data which has the security data structure. 649 * This includes data for AUTH_SYS as well. 650 */ 651 if (flags & NFSMNT_NEWARGS) { 652 switch (STRUCT_FGET(args, nfs_args_ext)) { 653 case NFS_ARGS_EXTA: 654 case NFS_ARGS_EXTB: 655 /* 656 * Indicating the application is using the new 657 * sec_data structure to pass in the security 658 * data. 659 */ 660 if (STRUCT_FGETP(args, 661 nfs_ext_u.nfs_extA.secdata) == NULL) { 662 error = EINVAL; 663 } else { 664 error = sec_clnt_loadinfo( 665 (struct sec_data *)STRUCT_FGETP(args, 666 nfs_ext_u.nfs_extA.secdata), 667 &secdata, get_udatamodel()); 668 } 669 break; 670 671 default: 672 error = EINVAL; 673 break; 674 } 675 676 } else if (flags & NFSMNT_SECURE) { 677 /* 678 * NFSMNT_SECURE is deprecated but we keep it 679 * to support the rouge user generated application 680 * that may use this undocumented interface to do 681 * AUTH_DH security. 682 */ 683 secdata = create_authdh_data(netname, nlen, &syncaddr, knconf); 684 685 } else { 686 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 687 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 688 secdata->data = NULL; 689 } 690 691 svp->sv_secdata = secdata; 692 693 /* syncaddr is no longer needed. */ 694 if (syncaddr.buf != NULL) 695 kmem_free(syncaddr.buf, syncaddr.len); 696 697 /* 698 * User does not explictly specify a flavor, and a user 699 * defined default flavor is passed down. 700 */ 701 if (flags & NFSMNT_SECDEFAULT) { 702 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 703 svp->sv_flags |= SV4_TRYSECDEFAULT; 704 nfs_rw_exit(&svp->sv_lock); 705 } 706 707 /* 708 * Failover support: 709 * 710 * We may have a linked list of nfs_args structures, 711 * which means the user is looking for failover. If 712 * the mount is either not "read-only" or "soft", 713 * we want to bail out with EINVAL. 714 */ 715 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 716 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 717 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 718 data = (char *)STRUCT_FGETP(args, 719 nfs_ext_u.nfs_extB.next); 720 goto more; 721 } 722 error = EINVAL; 723 goto errout; 724 } 725 726 /* 727 * Determine the zone we're being mounted into. 728 */ 729 if (getzoneid() == GLOBAL_ZONEID) { 730 zone_t *mntzone; 731 732 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 733 ASSERT(mntzone != NULL); 734 zone_rele(mntzone); 735 if (mntzone != zone) { 736 error = EBUSY; 737 goto errout; 738 } 739 } 740 741 /* 742 * Stop the mount from going any further if the zone is going away. 743 */ 744 if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { 745 error = EBUSY; 746 goto errout; 747 } 748 749 /* 750 * Get root vnode. 751 */ 752 proceed: 753 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, zone); 754 755 if (error) 756 goto errout; 757 758 mi = VTOMI4(rtvp); 759 760 /* 761 * Send client id to the server, if necessary 762 */ 763 nfs4_error_zinit(&n4e); 764 nfs4setclientid(mi, cr, FALSE, &n4e); 765 error = n4e.error; 766 767 if (error) 768 goto errout; 769 770 /* 771 * Set option fields in the mount info record 772 */ 773 774 if (svp_head->sv_next) { 775 mutex_enter(&mi->mi_lock); 776 mi->mi_flags |= MI4_LLOCK; 777 mutex_exit(&mi->mi_lock); 778 } 779 780 error = nfs4_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 781 782 errout: 783 if (error) { 784 if (rtvp != NULL) { 785 rp = VTOR4(rtvp); 786 if (rp->r_flags & R4HASHED) 787 rp4_rmhash(rp); 788 if (rp->r_flags & R4FILEIDMAP) 789 rp4_fileid_map_remove(rp); 790 } 791 if (mi != NULL) { 792 nfs4_async_stop(vfsp); 793 nfs4_async_manager_stop(vfsp); 794 nfs4_remove_mi_from_server(mi, NULL); 795 /* 796 * In this error path we need to sfh4_rele() before 797 * we free the mntinfo4_t as sfh4_rele() has a 798 * dependancy on mi_fh_lock. 799 */ 800 if (rtvp != NULL) 801 VN_RELE(rtvp); 802 if (mi->mi_io_kstats) { 803 kstat_delete(mi->mi_io_kstats); 804 mi->mi_io_kstats = NULL; 805 } 806 if (mi->mi_ro_kstats) { 807 kstat_delete(mi->mi_ro_kstats); 808 mi->mi_ro_kstats = NULL; 809 } 810 if (mi->mi_recov_ksp) { 811 kstat_delete(mi->mi_recov_ksp); 812 mi->mi_recov_ksp = NULL; 813 } 814 nfs_free_mi4(mi); 815 return (error); 816 } 817 sv4_free(svp_head); 818 } 819 820 if (rtvp != NULL) 821 VN_RELE(rtvp); 822 823 return (error); 824 } 825 826 #ifdef DEBUG 827 #define VERS_MSG "NFS4 server " 828 #else 829 #define VERS_MSG "NFS server " 830 #endif 831 832 #define READ_MSG \ 833 VERS_MSG "%s returned 0 for read transfer size" 834 #define WRITE_MSG \ 835 VERS_MSG "%s returned 0 for write transfer size" 836 #define SIZE_MSG \ 837 VERS_MSG "%s returned 0 for maximum file size" 838 839 /* 840 * Get the symbolic link text from the server for a given filehandle 841 * of that symlink. 842 * 843 * (get symlink text) PUTFH READLINK 844 */ 845 static int 846 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 847 int flags) 848 { 849 COMPOUND4args_clnt args; 850 COMPOUND4res_clnt res; 851 int doqueue; 852 nfs_argop4 argop[2]; 853 nfs_resop4 *resop; 854 READLINK4res *lr_res; 855 uint_t len; 856 bool_t needrecov = FALSE; 857 nfs4_recov_state_t recov_state; 858 nfs4_sharedfh_t *sfh; 859 nfs4_error_t e; 860 int num_retry = nfs4_max_mount_retry; 861 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 862 863 sfh = sfh4_get(fh, mi); 864 recov_state.rs_flags = 0; 865 recov_state.rs_num_retry_despite_err = 0; 866 867 recov_retry: 868 nfs4_error_zinit(&e); 869 870 args.array_len = 2; 871 args.array = argop; 872 args.ctag = TAG_GET_SYMLINK; 873 874 if (! recovery) { 875 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 876 if (e.error) { 877 sfh4_rele(&sfh); 878 return (e.error); 879 } 880 } 881 882 /* 0. putfh symlink fh */ 883 argop[0].argop = OP_CPUTFH; 884 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 885 886 /* 1. readlink */ 887 argop[1].argop = OP_READLINK; 888 889 doqueue = 1; 890 891 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 892 893 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 894 895 if (needrecov && !recovery && num_retry-- > 0) { 896 897 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 898 "getlinktext_otw: initiating recovery\n")); 899 900 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 901 OP_READLINK, NULL) == FALSE) { 902 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 903 if (!e.error) 904 (void) xdr_free(xdr_COMPOUND4res_clnt, 905 (caddr_t)&res); 906 goto recov_retry; 907 } 908 } 909 910 /* 911 * If non-NFS4 pcol error and/or we weren't able to recover. 912 */ 913 if (e.error != 0) { 914 if (! recovery) 915 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 916 sfh4_rele(&sfh); 917 return (e.error); 918 } 919 920 if (res.status) { 921 e.error = geterrno4(res.status); 922 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 923 if (! recovery) 924 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 925 sfh4_rele(&sfh); 926 return (e.error); 927 } 928 929 /* res.status == NFS4_OK */ 930 ASSERT(res.status == NFS4_OK); 931 932 resop = &res.array[1]; /* readlink res */ 933 lr_res = &resop->nfs_resop4_u.opreadlink; 934 935 /* treat symlink name as data */ 936 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 937 938 if (! recovery) 939 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 940 sfh4_rele(&sfh); 941 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 942 943 return (0); 944 } 945 946 /* 947 * Skip over consecutive slashes and "/./" in a pathname. 948 */ 949 void 950 pathname_skipslashdot(struct pathname *pnp) 951 { 952 char *c1, *c2; 953 954 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 955 956 c1 = pnp->pn_path + 1; 957 c2 = pnp->pn_path + 2; 958 959 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 960 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 961 pnp->pn_pathlen = pnp->pn_pathlen - 2; 962 } else { 963 pnp->pn_path++; 964 pnp->pn_pathlen--; 965 } 966 } 967 } 968 969 /* 970 * Resolve a symbolic link path. The symlink is in the nth component of 971 * svp->sv_path and has an nfs4 file handle "fh". 972 * Upon return, the sv_path will point to the new path that has the nth 973 * component resolved to its symlink text. 974 */ 975 int 976 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 977 cred_t *cr, int flags) 978 { 979 char *oldpath; 980 char *symlink, *newpath; 981 struct pathname oldpn, newpn; 982 char component[MAXNAMELEN]; 983 int i, addlen, error = 0; 984 int oldpathlen; 985 986 /* Get the symbolic link text over the wire. */ 987 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 988 989 if (error || symlink == NULL || strlen(symlink) == 0) 990 return (error); 991 992 /* 993 * Compose the new pathname. 994 * Note: 995 * - only the nth component is resolved for the pathname. 996 * - pathname.pn_pathlen does not count the ending null byte. 997 */ 998 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 999 oldpath = svp->sv_path; 1000 oldpathlen = svp->sv_pathlen; 1001 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1002 nfs_rw_exit(&svp->sv_lock); 1003 kmem_free(symlink, strlen(symlink) + 1); 1004 return (error); 1005 } 1006 nfs_rw_exit(&svp->sv_lock); 1007 pn_alloc(&newpn); 1008 1009 /* 1010 * Skip over previous components from the oldpath so that the 1011 * oldpn.pn_path will point to the symlink component. Skip 1012 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1013 * pn_getcompnent can get the component. 1014 */ 1015 for (i = 1; i < nth; i++) { 1016 pathname_skipslashdot(&oldpn); 1017 error = pn_getcomponent(&oldpn, component); 1018 if (error) 1019 goto out; 1020 } 1021 1022 /* 1023 * Copy the old path upto the component right before the symlink 1024 * if the symlink is not an absolute path. 1025 */ 1026 if (symlink[0] != '/') { 1027 addlen = oldpn.pn_path - oldpn.pn_buf; 1028 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1029 newpn.pn_pathlen += addlen; 1030 newpn.pn_path += addlen; 1031 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1032 newpn.pn_pathlen++; 1033 newpn.pn_path++; 1034 } 1035 1036 /* copy the resolved symbolic link text */ 1037 addlen = strlen(symlink); 1038 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1039 error = ENAMETOOLONG; 1040 goto out; 1041 } 1042 bcopy(symlink, newpn.pn_path, addlen); 1043 newpn.pn_pathlen += addlen; 1044 newpn.pn_path += addlen; 1045 1046 /* 1047 * Check if there is any remaining path after the symlink component. 1048 * First, skip the symlink component. 1049 */ 1050 pathname_skipslashdot(&oldpn); 1051 if (error = pn_getcomponent(&oldpn, component)) 1052 goto out; 1053 1054 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1055 1056 /* 1057 * Copy the remaining path to the new pathname if there is any. 1058 */ 1059 if (addlen > 0) { 1060 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1061 error = ENAMETOOLONG; 1062 goto out; 1063 } 1064 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1065 newpn.pn_pathlen += addlen; 1066 } 1067 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1068 1069 /* get the newpath and store it in the servinfo4_t */ 1070 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1071 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1072 newpath[newpn.pn_pathlen] = '\0'; 1073 1074 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1075 svp->sv_path = newpath; 1076 svp->sv_pathlen = strlen(newpath) + 1; 1077 nfs_rw_exit(&svp->sv_lock); 1078 1079 kmem_free(oldpath, oldpathlen); 1080 out: 1081 kmem_free(symlink, strlen(symlink) + 1); 1082 pn_free(&newpn); 1083 pn_free(&oldpn); 1084 1085 return (error); 1086 } 1087 1088 /* 1089 * Get the root filehandle for the given filesystem and server, and update 1090 * svp. 1091 * 1092 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1093 * to coordinate with recovery. Otherwise, the caller is assumed to be 1094 * the recovery thread or have already done a start_fop. 1095 * 1096 * Errors are returned by the nfs4_error_t parameter. 1097 */ 1098 1099 static void 1100 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1101 int flags, cred_t *cr, nfs4_error_t *ep) 1102 { 1103 COMPOUND4args_clnt args; 1104 COMPOUND4res_clnt res; 1105 int doqueue = 1; 1106 nfs_argop4 *argop; 1107 nfs_resop4 *resop; 1108 nfs4_ga_res_t *garp; 1109 int num_argops; 1110 lookup4_param_t lookuparg; 1111 nfs_fh4 *tmpfhp; 1112 nfs_fh4 *resfhp; 1113 bool_t needrecov = FALSE; 1114 nfs4_recov_state_t recov_state; 1115 int llndx; 1116 int nthcomp; 1117 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1118 1119 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1120 ASSERT(svp->sv_path != NULL); 1121 if (svp->sv_path[0] == '\0') { 1122 nfs_rw_exit(&svp->sv_lock); 1123 nfs4_error_init(ep, EINVAL); 1124 return; 1125 } 1126 nfs_rw_exit(&svp->sv_lock); 1127 1128 recov_state.rs_flags = 0; 1129 recov_state.rs_num_retry_despite_err = 0; 1130 recov_retry: 1131 nfs4_error_zinit(ep); 1132 1133 if (!recovery) { 1134 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1135 &recov_state, NULL); 1136 1137 /* 1138 * If recovery has been started and this request as 1139 * initiated by a mount, then we must wait for recovery 1140 * to finish before proceeding, otherwise, the error 1141 * cleanup would remove data structures needed by the 1142 * recovery thread. 1143 */ 1144 if (ep->error) { 1145 mutex_enter(&mi->mi_lock); 1146 if (mi->mi_flags & MI4_MOUNTING) { 1147 mi->mi_flags |= MI4_RECOV_FAIL; 1148 mi->mi_error = EIO; 1149 1150 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1151 "nfs4getfh_otw: waiting 4 recovery\n")); 1152 1153 while (mi->mi_flags & MI4_RECOV_ACTIV) 1154 cv_wait(&mi->mi_failover_cv, 1155 &mi->mi_lock); 1156 } 1157 mutex_exit(&mi->mi_lock); 1158 return; 1159 } 1160 1161 /* 1162 * If the client does not specify a specific flavor to use 1163 * and has not gotten a secinfo list from the server yet, 1164 * retrieve the secinfo list from the server and use a 1165 * flavor from the list to mount. 1166 * 1167 * If fail to get the secinfo list from the server, then 1168 * try the default flavor. 1169 */ 1170 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1171 svp->sv_secinfo == NULL) { 1172 (void) nfs4_secinfo_path(mi, cr, FALSE); 1173 } 1174 } 1175 1176 if (recovery) 1177 args.ctag = TAG_REMAP_MOUNT; 1178 else 1179 args.ctag = TAG_MOUNT; 1180 1181 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1182 lookuparg.argsp = &args; 1183 lookuparg.resp = &res; 1184 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1185 lookuparg.trailer_len = 0; 1186 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1187 lookuparg.mi = mi; 1188 1189 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1190 ASSERT(svp->sv_path != NULL); 1191 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1192 nfs_rw_exit(&svp->sv_lock); 1193 1194 argop = args.array; 1195 num_argops = args.array_len; 1196 1197 /* choose public or root filehandle */ 1198 if (flags & NFS4_GETFH_PUBLIC) 1199 argop[0].argop = OP_PUTPUBFH; 1200 else 1201 argop[0].argop = OP_PUTROOTFH; 1202 1203 /* get fh */ 1204 argop[1].argop = OP_GETFH; 1205 1206 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1207 "nfs4getfh_otw: %s call, mi 0x%p", 1208 needrecov ? "recov" : "first", (void *)mi)); 1209 1210 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1211 1212 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1213 1214 if (needrecov) { 1215 bool_t abort; 1216 1217 if (recovery) { 1218 nfs4args_lookup_free(argop, num_argops); 1219 kmem_free(argop, 1220 lookuparg.arglen * sizeof (nfs_argop4)); 1221 if (!ep->error) 1222 (void) xdr_free(xdr_COMPOUND4res_clnt, 1223 (caddr_t)&res); 1224 return; 1225 } 1226 1227 NFS4_DEBUG(nfs4_client_recov_debug, 1228 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1229 1230 abort = nfs4_start_recovery(ep, mi, NULL, 1231 NULL, NULL, NULL, OP_GETFH, NULL); 1232 if (!ep->error) { 1233 ep->error = geterrno4(res.status); 1234 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1235 } 1236 nfs4args_lookup_free(argop, num_argops); 1237 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1238 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1239 /* have another go? */ 1240 if (abort == FALSE) 1241 goto recov_retry; 1242 return; 1243 } 1244 1245 /* 1246 * No recovery, but check if error is set. 1247 */ 1248 if (ep->error) { 1249 nfs4args_lookup_free(argop, num_argops); 1250 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1251 if (!recovery) 1252 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1253 needrecov); 1254 return; 1255 } 1256 1257 is_link_err: 1258 1259 /* for non-recovery errors */ 1260 if (res.status && res.status != NFS4ERR_SYMLINK) { 1261 if (!recovery) { 1262 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1263 needrecov); 1264 } 1265 nfs4args_lookup_free(argop, num_argops); 1266 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1267 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1268 return; 1269 } 1270 1271 /* 1272 * If any intermediate component in the path is a symbolic link, 1273 * resolve the symlink, then try mount again using the new path. 1274 */ 1275 if (res.status == NFS4ERR_SYMLINK) { 1276 int where; 1277 1278 /* 1279 * This must be from OP_LOOKUP failure. The (cfh) for this 1280 * OP_LOOKUP is a symlink node. Found out where the 1281 * OP_GETFH is for the (cfh) that is a symlink node. 1282 * 1283 * Example: 1284 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1285 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1286 * 1287 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1288 * In this case, where = 7, nthcomp = 2. 1289 */ 1290 where = res.array_len - 2; 1291 ASSERT(where > 0); 1292 1293 resop = &res.array[where - 1]; 1294 ASSERT(resop->resop == OP_GETFH); 1295 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1296 nthcomp = res.array_len/3 - 1; 1297 1298 /* 1299 * Need to call nfs4_end_op before resolve_sympath to avoid 1300 * potential nfs4_start_op deadlock. 1301 */ 1302 if (!recovery) 1303 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1304 needrecov); 1305 1306 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1307 flags); 1308 1309 nfs4args_lookup_free(argop, num_argops); 1310 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1311 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1312 1313 if (ep->error) 1314 return; 1315 1316 goto recov_retry; 1317 } 1318 1319 /* getfh */ 1320 resop = &res.array[res.array_len - 2]; 1321 ASSERT(resop->resop == OP_GETFH); 1322 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1323 1324 /* getattr fsinfo res */ 1325 resop++; 1326 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1327 1328 *vtp = garp->n4g_va.va_type; 1329 1330 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1331 1332 mutex_enter(&mi->mi_lock); 1333 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1334 mi->mi_flags |= MI4_LINK; 1335 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1336 mi->mi_flags |= MI4_SYMLINK; 1337 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1338 mi->mi_flags |= MI4_ACL; 1339 mutex_exit(&mi->mi_lock); 1340 1341 if (garp->n4g_ext_res->n4g_maxread == 0) 1342 mi->mi_tsize = 1343 MIN(MAXBSIZE, mi->mi_tsize); 1344 else 1345 mi->mi_tsize = 1346 MIN(garp->n4g_ext_res->n4g_maxread, 1347 mi->mi_tsize); 1348 1349 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1350 mi->mi_stsize = 1351 MIN(MAXBSIZE, mi->mi_stsize); 1352 else 1353 mi->mi_stsize = 1354 MIN(garp->n4g_ext_res->n4g_maxwrite, 1355 mi->mi_stsize); 1356 1357 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1358 mi->mi_maxfilesize = 1359 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1360 mi->mi_maxfilesize); 1361 1362 /* 1363 * If the final component is a a symbolic link, resolve the symlink, 1364 * then try mount again using the new path. 1365 * 1366 * Assume no symbolic link for root filesysm "/". 1367 */ 1368 if (*vtp == VLNK) { 1369 /* 1370 * nthcomp is the total result length minus 1371 * the 1st 2 OPs (PUTROOTFH, GETFH), 1372 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1373 * 1374 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1375 * LOOKUP 2nd-comp GETFH GETATTR 1376 * 1377 * (8 - 2)/3 = 2 1378 */ 1379 nthcomp = (res.array_len - 2)/3; 1380 1381 /* 1382 * Need to call nfs4_end_op before resolve_sympath to avoid 1383 * potential nfs4_start_op deadlock. See RFE 4777612. 1384 */ 1385 if (!recovery) 1386 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1387 needrecov); 1388 1389 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1390 flags); 1391 1392 nfs4args_lookup_free(argop, num_argops); 1393 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1394 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1395 1396 if (ep->error) 1397 return; 1398 1399 goto recov_retry; 1400 } 1401 1402 /* 1403 * We need to figure out where in the compound the getfh 1404 * for the parent directory is. If the object to be mounted is 1405 * the root, then there is no lookup at all: 1406 * PUTROOTFH, GETFH. 1407 * If the object to be mounted is in the root, then the compound is: 1408 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1409 * In either of these cases, the index of the GETFH is 1. 1410 * If it is not at the root, then it's something like: 1411 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1412 * LOOKUP, GETFH, GETATTR 1413 * In this case, the index is llndx (last lookup index) - 2. 1414 */ 1415 if (llndx == -1 || llndx == 2) 1416 resop = &res.array[1]; 1417 else { 1418 ASSERT(llndx > 2); 1419 resop = &res.array[llndx-2]; 1420 } 1421 1422 ASSERT(resop->resop == OP_GETFH); 1423 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1424 1425 /* save the filehandles for the replica */ 1426 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1427 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1428 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1429 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1430 tmpfhp->nfs_fh4_len); 1431 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1432 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1433 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1434 1435 /* initialize fsid and supp_attrs for server fs */ 1436 svp->sv_fsid = garp->n4g_fsid; 1437 svp->sv_supp_attrs = 1438 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1439 1440 nfs_rw_exit(&svp->sv_lock); 1441 1442 nfs4args_lookup_free(argop, num_argops); 1443 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1444 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1445 if (!recovery) 1446 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1447 } 1448 1449 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1450 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1451 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1452 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1453 1454 /* 1455 * Remap the root filehandle for the given filesystem. 1456 * 1457 * results returned via the nfs4_error_t parameter. 1458 */ 1459 void 1460 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1461 { 1462 struct servinfo4 *svp; 1463 vtype_t vtype; 1464 nfs_fh4 rootfh; 1465 int getfh_flags; 1466 char *orig_sv_path; 1467 int orig_sv_pathlen, num_retry; 1468 1469 mutex_enter(&mi->mi_lock); 1470 svp = mi->mi_curr_serv; 1471 getfh_flags = 1472 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1473 getfh_flags |= 1474 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1475 mutex_exit(&mi->mi_lock); 1476 1477 /* 1478 * Just in case server path being mounted contains 1479 * symlinks and fails w/STALE, save the initial sv_path 1480 * so we can redrive the initial mount compound with the 1481 * initial sv_path -- not a symlink-expanded version. 1482 * 1483 * This could only happen if a symlink was expanded 1484 * and the expanded mount compound failed stale. Because 1485 * it could be the case that the symlink was removed at 1486 * the server (and replaced with another symlink/dir, 1487 * we need to use the initial sv_path when attempting 1488 * to re-lookup everything and recover. 1489 */ 1490 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1491 orig_sv_pathlen = svp->sv_pathlen; 1492 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1493 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1494 nfs_rw_exit(&svp->sv_lock); 1495 1496 num_retry = nfs4_max_mount_retry; 1497 1498 do { 1499 /* 1500 * Get the root fh from the server. Retry nfs4_max_mount_retry 1501 * (2) times if it fails with STALE since the recovery 1502 * infrastructure doesn't do STALE recovery for components 1503 * of the server path to the object being mounted. 1504 */ 1505 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1506 1507 if (ep->error == 0 && ep->stat == NFS4_OK) 1508 break; 1509 1510 /* 1511 * For some reason, the mount compound failed. Before 1512 * retrying, we need to restore the original sv_path 1513 * because it might have contained symlinks that were 1514 * expanded by nfsgetfh_otw before the failure occurred. 1515 * replace current sv_path with orig sv_path -- just in case 1516 * it changed due to embedded symlinks. 1517 */ 1518 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1519 if (orig_sv_pathlen != svp->sv_pathlen) { 1520 kmem_free(svp->sv_path, svp->sv_pathlen); 1521 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1522 svp->sv_pathlen = orig_sv_pathlen; 1523 1524 } 1525 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1526 nfs_rw_exit(&svp->sv_lock); 1527 1528 } while (num_retry-- > 0); 1529 1530 kmem_free(orig_sv_path, orig_sv_pathlen); 1531 1532 if (ep->error != 0 || ep->stat != 0) { 1533 return; 1534 } 1535 1536 if (vtype != VNON && vtype != mi->mi_type) { 1537 /* shouldn't happen */ 1538 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1539 "nfs4_remap_root: server root vnode type (%d) doesn't " 1540 "match mount info (%d)", vtype, mi->mi_type); 1541 } 1542 1543 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1544 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1545 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1546 nfs_rw_exit(&svp->sv_lock); 1547 sfh4_update(mi->mi_rootfh, &rootfh); 1548 1549 #ifdef DEBUG 1550 /* 1551 * There shouldn't have been any other recovery activity on the 1552 * filesystem. 1553 */ 1554 mutex_enter(&mi->mi_lock); 1555 ASSERT(mi->mi_curr_serv == svp); 1556 mutex_exit(&mi->mi_lock); 1557 #endif 1558 } 1559 1560 static int 1561 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1562 int flags, cred_t *cr, zone_t *zone) 1563 { 1564 vnode_t *rtvp = NULL; 1565 mntinfo4_t *mi; 1566 dev_t nfs_dev; 1567 int error = 0; 1568 rnode4_t *rp; 1569 int i; 1570 struct vattr va; 1571 vtype_t vtype = VNON; 1572 vtype_t tmp_vtype = VNON; 1573 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1574 nfs4_oo_hash_bucket_t *bucketp; 1575 nfs_fh4 fh; 1576 char *droptext = ""; 1577 struct nfs_stats *nfsstatsp; 1578 nfs4_fname_t *mfname; 1579 nfs4_error_t e; 1580 char *orig_sv_path; 1581 int orig_sv_pathlen, num_retry; 1582 cred_t *lcr = NULL, *tcr = cr; 1583 1584 nfsstatsp = zone_getspecific(nfsstat_zone_key, curproc->p_zone); 1585 ASSERT(nfsstatsp != NULL); 1586 1587 ASSERT(curproc->p_zone == zone); 1588 ASSERT(crgetref(cr)); 1589 1590 /* 1591 * Create a mount record and link it to the vfs struct. 1592 */ 1593 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1594 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1595 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1596 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1597 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1598 1599 if (!(flags & NFSMNT_SOFT)) 1600 mi->mi_flags |= MI4_HARD; 1601 if ((flags & NFSMNT_NOPRINT)) 1602 mi->mi_flags |= MI4_NOPRINT; 1603 if (flags & NFSMNT_INT) 1604 mi->mi_flags |= MI4_INT; 1605 if (flags & NFSMNT_PUBLIC) 1606 mi->mi_flags |= MI4_PUBLIC; 1607 mi->mi_retrans = NFS_RETRIES; 1608 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1609 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1610 mi->mi_timeo = nfs4_cots_timeo; 1611 else 1612 mi->mi_timeo = NFS_TIMEO; 1613 mi->mi_prog = NFS_PROGRAM; 1614 mi->mi_vers = NFS_V4; 1615 mi->mi_rfsnames = rfsnames_v4; 1616 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1617 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1618 mi->mi_servers = svp; 1619 mi->mi_curr_serv = svp; 1620 mi->mi_acregmin = SEC2HR(ACREGMIN); 1621 mi->mi_acregmax = SEC2HR(ACREGMAX); 1622 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1623 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1624 mi->mi_fh_expire_type = FH4_PERSISTENT; 1625 mi->mi_clientid_next = NULL; 1626 mi->mi_clientid_prev = NULL; 1627 mi->mi_grace_wait = 0; 1628 mi->mi_error = 0; 1629 mi->mi_srvsettime = 0; 1630 1631 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1632 mi->mi_stsize = mi->mi_tsize; 1633 1634 if (flags & NFSMNT_DIRECTIO) 1635 mi->mi_flags |= MI4_DIRECTIO; 1636 1637 mi->mi_flags |= MI4_MOUNTING; 1638 1639 /* 1640 * Make a vfs struct for nfs. We do this here instead of below 1641 * because rtvp needs a vfs before we can do a getattr on it. 1642 * 1643 * Assign a unique device id to the mount 1644 */ 1645 mutex_enter(&nfs_minor_lock); 1646 do { 1647 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1648 nfs_dev = makedevice(nfs_major, nfs_minor); 1649 } while (vfs_devismounted(nfs_dev)); 1650 mutex_exit(&nfs_minor_lock); 1651 1652 vfsp->vfs_dev = nfs_dev; 1653 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1654 vfsp->vfs_data = (caddr_t)mi; 1655 vfsp->vfs_fstype = nfsfstyp; 1656 vfsp->vfs_bsize = nfs4_bsize; 1657 1658 /* 1659 * Initialize fields used to support async putpage operations. 1660 */ 1661 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1662 mi->mi_async_clusters[i] = nfs4_async_clusters; 1663 mi->mi_async_init_clusters = nfs4_async_clusters; 1664 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1665 mi->mi_max_threads = nfs4_max_threads; 1666 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1667 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1668 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1669 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1670 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1671 1672 mi->mi_vfsp = vfsp; 1673 zone_hold(mi->mi_zone = zone); 1674 nfs4_mi_zonelist_add(mi); 1675 1676 /* 1677 * Initialize the <open owner/cred> hash table. 1678 */ 1679 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1680 bucketp = &(mi->mi_oo_list[i]); 1681 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1682 list_create(&bucketp->b_oo_hash_list, 1683 sizeof (nfs4_open_owner_t), 1684 offsetof(nfs4_open_owner_t, oo_hash_node)); 1685 } 1686 1687 /* 1688 * Initialize the freed open owner list. 1689 */ 1690 mi->mi_foo_num = 0; 1691 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1692 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1693 offsetof(nfs4_open_owner_t, oo_foo_node)); 1694 1695 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1696 offsetof(nfs4_lost_rqst_t, lr_node)); 1697 1698 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1699 offsetof(nfs4_bseqid_entry_t, bs_node)); 1700 1701 /* 1702 * Initialize the msg buffer. 1703 */ 1704 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1705 offsetof(nfs4_debug_msg_t, msg_node)); 1706 mi->mi_msg_count = 0; 1707 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1708 1709 /* 1710 * Initialize kstats 1711 */ 1712 nfs4_mnt_kstat_init(vfsp); 1713 1714 /* 1715 * Initialize the shared filehandle pool, and get the fname for 1716 * the filesystem root. 1717 */ 1718 sfh4_createtab(&mi->mi_filehandles); 1719 mi->mi_fname = fn_get(NULL, "."); 1720 1721 /* 1722 * Initialize the fileid map. 1723 */ 1724 mutex_init(&mi->mi_fileid_lock, NULL, MUTEX_DEFAULT, NULL); 1725 rp4_fileid_map_init(&mi->mi_fileid_map); 1726 1727 /* 1728 * Save server path we're attempting to mount. 1729 */ 1730 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1731 orig_sv_pathlen = svp_head->sv_pathlen; 1732 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1733 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1734 nfs_rw_exit(&svp->sv_lock); 1735 1736 /* 1737 * Make the GETFH call to get root fh for each replica. 1738 */ 1739 if (svp_head->sv_next) 1740 droptext = ", dropping replica"; 1741 1742 /* 1743 * If the uid is set then set the creds for secure mounts 1744 * by proxy processes such as automountd. 1745 */ 1746 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1747 if (svp->sv_secdata->uid != 0) { 1748 lcr = crdup(cr); 1749 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1750 tcr = lcr; 1751 } 1752 nfs_rw_exit(&svp->sv_lock); 1753 for (svp = svp_head; svp; svp = svp->sv_next) { 1754 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 1755 nfs_cmn_err(error, CE_WARN, 1756 VERS_MSG "Host %s is a duplicate%s", 1757 svp->sv_hostname, droptext); 1758 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1759 svp->sv_flags |= SV4_NOTINUSE; 1760 nfs_rw_exit(&svp->sv_lock); 1761 continue; 1762 } 1763 mi->mi_curr_serv = svp; 1764 1765 /* 1766 * Just in case server path being mounted contains 1767 * symlinks and fails w/STALE, save the initial sv_path 1768 * so we can redrive the initial mount compound with the 1769 * initial sv_path -- not a symlink-expanded version. 1770 * 1771 * This could only happen if a symlink was expanded 1772 * and the expanded mount compound failed stale. Because 1773 * it could be the case that the symlink was removed at 1774 * the server (and replaced with another symlink/dir, 1775 * we need to use the initial sv_path when attempting 1776 * to re-lookup everything and recover. 1777 * 1778 * Other mount errors should evenutally be handled here also 1779 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 1780 * failures will result in mount being redriven a few times. 1781 */ 1782 num_retry = nfs4_max_mount_retry; 1783 do { 1784 nfs4getfh_otw(mi, svp, &tmp_vtype, 1785 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 1786 NFS4_GETFH_NEEDSOP, tcr, &e); 1787 1788 if (e.error == 0 && e.stat == NFS4_OK) 1789 break; 1790 1791 /* 1792 * replace current sv_path with orig sv_path -- just in 1793 * case it changed due to embedded symlinks. 1794 */ 1795 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1796 if (orig_sv_pathlen != svp->sv_pathlen) { 1797 kmem_free(svp->sv_path, svp->sv_pathlen); 1798 svp->sv_path = kmem_alloc(orig_sv_pathlen, 1799 KM_SLEEP); 1800 svp->sv_pathlen = orig_sv_pathlen; 1801 } 1802 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1803 nfs_rw_exit(&svp->sv_lock); 1804 1805 } while (num_retry-- > 0); 1806 1807 error = e.error ? e.error : geterrno4(e.stat); 1808 if (error) { 1809 nfs_cmn_err(error, CE_WARN, 1810 VERS_MSG "initial call to %s failed%s: %m", 1811 svp->sv_hostname, droptext); 1812 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1813 svp->sv_flags |= SV4_NOTINUSE; 1814 nfs_rw_exit(&svp->sv_lock); 1815 mi->mi_flags &= ~MI4_RECOV_FAIL; 1816 mi->mi_error = 0; 1817 continue; 1818 } 1819 1820 if (tmp_vtype == VBAD) { 1821 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1822 VERS_MSG "%s returned a bad file type for " 1823 "root%s", svp->sv_hostname, droptext); 1824 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1825 svp->sv_flags |= SV4_NOTINUSE; 1826 nfs_rw_exit(&svp->sv_lock); 1827 continue; 1828 } 1829 1830 if (vtype == VNON) { 1831 vtype = tmp_vtype; 1832 } else if (vtype != tmp_vtype) { 1833 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1834 VERS_MSG "%s returned a different file type " 1835 "for root%s", svp->sv_hostname, droptext); 1836 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1837 svp->sv_flags |= SV4_NOTINUSE; 1838 nfs_rw_exit(&svp->sv_lock); 1839 continue; 1840 } 1841 if (firstsvp == NULL) 1842 firstsvp = svp; 1843 } 1844 1845 kmem_free(orig_sv_path, orig_sv_pathlen); 1846 1847 if (firstsvp == NULL) { 1848 if (error == 0) 1849 error = ENOENT; 1850 goto bad; 1851 } 1852 1853 mi->mi_curr_serv = svp = firstsvp; 1854 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1855 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 1856 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1857 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1858 mi->mi_rootfh = sfh4_get(&fh, mi); 1859 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 1860 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 1861 mi->mi_srvparentfh = sfh4_get(&fh, mi); 1862 nfs_rw_exit(&svp->sv_lock); 1863 1864 /* 1865 * Make the root vnode without attributes. 1866 */ 1867 mfname = mi->mi_fname; 1868 fn_hold(mfname); 1869 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 1870 &mfname, NULL, mi, cr, gethrtime()); 1871 rtvp->v_type = vtype; 1872 1873 mi->mi_curread = mi->mi_tsize; 1874 mi->mi_curwrite = mi->mi_stsize; 1875 1876 /* 1877 * Start the manager thread responsible for handling async worker 1878 * threads. 1879 */ 1880 VFS_HOLD(vfsp); /* add reference for thread */ 1881 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 1882 vfsp, 0, minclsyspri); 1883 ASSERT(mi->mi_manager_thread != NULL); 1884 /* 1885 * Create the thread that handles over-the-wire calls for 1886 * VOP_INACTIVE. 1887 * This needs to happen after the manager thread is created. 1888 */ 1889 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 1890 mi, 0, minclsyspri); 1891 ASSERT(mi->mi_inactive_thread != NULL); 1892 1893 /* If we didn't get a type, get one now */ 1894 if (rtvp->v_type == VNON) { 1895 va.va_mask = AT_TYPE; 1896 error = nfs4getattr(rtvp, &va, tcr); 1897 if (error) 1898 goto bad; 1899 rtvp->v_type = va.va_type; 1900 } 1901 1902 mi->mi_type = rtvp->v_type; 1903 1904 mutex_enter(&mi->mi_lock); 1905 mi->mi_flags &= ~MI4_MOUNTING; 1906 mutex_exit(&mi->mi_lock); 1907 1908 *rtvpp = rtvp; 1909 if (lcr != NULL) 1910 crfree(lcr); 1911 1912 return (0); 1913 bad: 1914 /* 1915 * An error occurred somewhere, need to clean up... 1916 * 1917 * XXX Should not svp be cleaned too? 1918 */ 1919 if (lcr != NULL) 1920 crfree(lcr); 1921 if (rtvp != NULL) { 1922 /* 1923 * We need to release our reference to the root vnode and 1924 * destroy the mntinfo4 struct that we just created. 1925 */ 1926 rp = VTOR4(rtvp); 1927 if (rp->r_flags & R4HASHED) 1928 rp4_rmhash(rp); 1929 if (rp->r_flags & R4FILEIDMAP) 1930 rp4_fileid_map_remove(rp); 1931 VN_RELE(rtvp); 1932 } 1933 nfs4_async_stop(vfsp); 1934 nfs4_async_manager_stop(vfsp); 1935 if (mi->mi_io_kstats) { 1936 kstat_delete(mi->mi_io_kstats); 1937 mi->mi_io_kstats = NULL; 1938 } 1939 if (mi->mi_ro_kstats) { 1940 kstat_delete(mi->mi_ro_kstats); 1941 mi->mi_ro_kstats = NULL; 1942 } 1943 if (mi->mi_recov_ksp) { 1944 kstat_delete(mi->mi_recov_ksp); 1945 mi->mi_recov_ksp = NULL; 1946 } 1947 nfs_free_mi4(mi); 1948 *rtvpp = NULL; 1949 return (error); 1950 } 1951 1952 /* 1953 * vfs operations 1954 */ 1955 static int 1956 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1957 { 1958 mntinfo4_t *mi; 1959 ushort_t omax; 1960 1961 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1962 return (EPERM); 1963 1964 mi = VFTOMI4(vfsp); 1965 1966 if (flag & MS_FORCE) { 1967 vfsp->vfs_flag |= VFS_UNMOUNTED; 1968 if (curproc->p_zone != mi->mi_zone) { 1969 /* 1970 * If the request is coming from the wrong zone, 1971 * we don't want to create any new threads, and 1972 * performance is not a concern. Do everything 1973 * inline. 1974 */ 1975 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 1976 "nfs4_unmount x-zone forced unmount of vfs %p\n", 1977 (void *)vfsp)); 1978 nfs4_free_mount(vfsp, cr); 1979 } else { 1980 /* 1981 * Free data structures asynchronously, to avoid 1982 * blocking the current thread (for performance 1983 * reasons only). 1984 */ 1985 async_free_mount(vfsp, cr); 1986 } 1987 return (0); 1988 } 1989 /* 1990 * Wait until all asynchronous putpage operations on 1991 * this file system are complete before flushing rnodes 1992 * from the cache. 1993 */ 1994 omax = mi->mi_max_threads; 1995 if (nfs4_async_stop_sig(vfsp)) { 1996 return (EINTR); 1997 } 1998 r4flush(vfsp, cr); 1999 /* 2000 * If there are any active vnodes on this file system, 2001 * then the file system is busy and can't be umounted. 2002 */ 2003 if (check_rtable4(vfsp)) { 2004 mutex_enter(&mi->mi_async_lock); 2005 mi->mi_max_threads = omax; 2006 mutex_exit(&mi->mi_async_lock); 2007 return (EBUSY); 2008 } 2009 /* 2010 * The unmount can't fail from now on, and there are no active 2011 * files that could require over-the-wire calls to the server, 2012 * so stop the async manager and the inactive thread. 2013 */ 2014 nfs4_async_manager_stop(vfsp); 2015 /* 2016 * Destroy all rnodes belonging to this file system from the 2017 * rnode hash queues and purge any resources allocated to 2018 * them. 2019 */ 2020 destroy_fileid_map(vfsp); 2021 destroy_rtable4(vfsp, cr); 2022 vfsp->vfs_flag |= VFS_UNMOUNTED; 2023 nfs4_remove_mi_from_server(mi, NULL); 2024 if (mi->mi_io_kstats) { 2025 kstat_delete(mi->mi_io_kstats); 2026 mi->mi_io_kstats = NULL; 2027 } 2028 if (mi->mi_ro_kstats) { 2029 kstat_delete(mi->mi_ro_kstats); 2030 mi->mi_ro_kstats = NULL; 2031 } 2032 if (mi->mi_recov_ksp) { 2033 kstat_delete(mi->mi_recov_ksp); 2034 mi->mi_recov_ksp = NULL; 2035 } 2036 return (0); 2037 } 2038 2039 /* 2040 * find root of nfs 2041 */ 2042 static int 2043 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2044 { 2045 mntinfo4_t *mi; 2046 vnode_t *vp; 2047 nfs4_fname_t *mfname; 2048 servinfo4_t *svp; 2049 2050 mi = VFTOMI4(vfsp); 2051 2052 if (curproc->p_zone != mi->mi_zone) 2053 return (EPERM); 2054 2055 svp = mi->mi_curr_serv; 2056 if (svp) { 2057 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2058 if (svp->sv_flags & SV4_ROOT_STALE) { 2059 nfs_rw_exit(&svp->sv_lock); 2060 2061 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2062 if (svp->sv_flags & SV4_ROOT_STALE) { 2063 svp->sv_flags &= ~SV4_ROOT_STALE; 2064 nfs_rw_exit(&svp->sv_lock); 2065 return (ENOENT); 2066 } 2067 nfs_rw_exit(&svp->sv_lock); 2068 } else 2069 nfs_rw_exit(&svp->sv_lock); 2070 } 2071 2072 mfname = mi->mi_fname; 2073 fn_hold(mfname); 2074 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2075 VFTOMI4(vfsp), CRED(), gethrtime()); 2076 2077 if (VTOR4(vp)->r_flags & R4STALE) { 2078 VN_RELE(vp); 2079 return (ENOENT); 2080 } 2081 2082 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2083 2084 vp->v_type = mi->mi_type; 2085 2086 *vpp = vp; 2087 2088 return (0); 2089 } 2090 2091 static int 2092 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2093 { 2094 int error; 2095 nfs4_ga_res_t gar; 2096 nfs4_ga_ext_res_t ger; 2097 2098 gar.n4g_ext_res = &ger; 2099 2100 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2101 NFS4_STATFS_ATTR_MASK, cr)) 2102 return (error); 2103 2104 *sbp = gar.n4g_ext_res->n4g_sb; 2105 2106 return (0); 2107 } 2108 2109 /* 2110 * Get file system statistics. 2111 */ 2112 static int 2113 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2114 { 2115 int error; 2116 vnode_t *vp; 2117 cred_t *cr; 2118 2119 error = nfs4_root(vfsp, &vp); 2120 if (error) 2121 return (error); 2122 2123 cr = CRED(); 2124 2125 error = nfs4_statfs_otw(vp, sbp, cr); 2126 if (!error) { 2127 (void) strncpy(sbp->f_basetype, 2128 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2129 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2130 } else { 2131 nfs4_purge_stale_fh(error, vp, cr); 2132 } 2133 2134 VN_RELE(vp); 2135 2136 return (error); 2137 } 2138 2139 static kmutex_t nfs4_syncbusy; 2140 2141 /* 2142 * Flush dirty nfs files for file system vfsp. 2143 * If vfsp == NULL, all nfs files are flushed. 2144 * 2145 * SYNC_CLOSE in flag is passed to us to 2146 * indicate that we are shutting down and or 2147 * rebooting. 2148 */ 2149 static int 2150 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2151 { 2152 /* 2153 * Cross-zone calls are OK here, since this translates to a 2154 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2155 */ 2156 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2157 r4flush(vfsp, cr); 2158 mutex_exit(&nfs4_syncbusy); 2159 } 2160 2161 /* 2162 * if SYNC_CLOSE is set then we know that 2163 * the system is rebooting, mark the mntinfo 2164 * for later examination. 2165 */ 2166 if (vfsp && (flag & SYNC_CLOSE)) { 2167 mntinfo4_t *mi; 2168 2169 mi = VFTOMI4(vfsp); 2170 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2171 mutex_enter(&mi->mi_lock); 2172 mi->mi_flags |= MI4_SHUTDOWN; 2173 mutex_exit(&mi->mi_lock); 2174 } 2175 } 2176 return (0); 2177 } 2178 2179 /* 2180 * vget is difficult, if not impossible, to support in v4 because we don't 2181 * know the parent directory or name, which makes it impossible to create a 2182 * useful shadow vnode. And we need the shadow vnode for things like 2183 * OPEN. 2184 */ 2185 2186 /* ARGSUSED */ 2187 /* 2188 * XXX Check nfs4_vget_pseudo() for dependency. 2189 */ 2190 static int 2191 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2192 { 2193 return (EREMOTE); 2194 } 2195 2196 /* 2197 * nfs4_mountroot get called in the case where we are diskless booting. All 2198 * we need from here is the ability to get the server info and from there we 2199 * can simply call nfs4_rootvp. 2200 */ 2201 /* ARGSUSED */ 2202 static int 2203 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2204 { 2205 vnode_t *rtvp; 2206 char root_hostname[SYS_NMLN+1]; 2207 struct servinfo4 *svp; 2208 int error; 2209 int vfsflags; 2210 size_t size; 2211 char *root_path; 2212 struct pathname pn; 2213 char *name; 2214 cred_t *cr; 2215 mntinfo4_t *mi; 2216 struct nfs_args args; /* nfs mount arguments */ 2217 static char token[10]; 2218 nfs4_error_t n4e; 2219 2220 bzero(&args, sizeof (args)); 2221 2222 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2223 clkset(-1L); /* hack for now - until we get time svc? */ 2224 2225 if (why == ROOT_REMOUNT) { 2226 /* 2227 * Shouldn't happen. 2228 */ 2229 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2230 } 2231 2232 if (why == ROOT_UNMOUNT) { 2233 /* 2234 * Nothing to do for NFS. 2235 */ 2236 return (0); 2237 } 2238 2239 /* 2240 * why == ROOT_INIT 2241 */ 2242 2243 name = token; 2244 *name = 0; 2245 (void) getfsname("root", name, sizeof (token)); 2246 2247 pn_alloc(&pn); 2248 root_path = pn.pn_path; 2249 2250 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2251 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2252 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2253 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2254 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2255 2256 /* 2257 * Get server address 2258 * Get the root path 2259 * Get server's transport 2260 * Get server's hostname 2261 * Get options 2262 */ 2263 args.addr = &svp->sv_addr; 2264 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2265 args.fh = (char *)&svp->sv_fhandle; 2266 args.knconf = svp->sv_knconf; 2267 args.hostname = root_hostname; 2268 vfsflags = 0; 2269 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2270 &args, &vfsflags)) { 2271 if (error == EPROTONOSUPPORT) 2272 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2273 "mount_root failed: server doesn't support NFS V4"); 2274 else 2275 nfs_cmn_err(error, CE_WARN, 2276 "nfs4_mountroot: mount_root failed: %m"); 2277 nfs_rw_exit(&svp->sv_lock); 2278 sv4_free(svp); 2279 pn_free(&pn); 2280 return (error); 2281 } 2282 nfs_rw_exit(&svp->sv_lock); 2283 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2284 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2285 (void) strcpy(svp->sv_hostname, root_hostname); 2286 2287 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2288 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2289 (void) strcpy(svp->sv_path, root_path); 2290 2291 /* 2292 * Force root partition to always be mounted with AUTH_UNIX for now 2293 */ 2294 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2295 svp->sv_secdata->secmod = AUTH_UNIX; 2296 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2297 svp->sv_secdata->data = NULL; 2298 2299 cr = crgetcred(); 2300 rtvp = NULL; 2301 2302 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2303 2304 if (error) { 2305 crfree(cr); 2306 pn_free(&pn); 2307 goto errout; 2308 } 2309 2310 mi = VTOMI4(rtvp); 2311 2312 /* 2313 * Send client id to the server, if necessary 2314 */ 2315 nfs4_error_zinit(&n4e); 2316 nfs4setclientid(mi, cr, FALSE, &n4e); 2317 error = n4e.error; 2318 2319 crfree(cr); 2320 2321 if (error) { 2322 pn_free(&pn); 2323 goto errout; 2324 } 2325 2326 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2327 if (error) { 2328 nfs_cmn_err(error, CE_WARN, 2329 "nfs4_mountroot: invalid root mount options"); 2330 pn_free(&pn); 2331 goto errout; 2332 } 2333 2334 (void) vfs_lock_wait(vfsp); 2335 vfs_add(NULL, vfsp, vfsflags); 2336 vfs_unlock(vfsp); 2337 2338 size = strlen(svp->sv_hostname); 2339 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2340 rootfs.bo_name[size] = ':'; 2341 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2342 2343 pn_free(&pn); 2344 2345 errout: 2346 if (error) { 2347 sv4_free(svp); 2348 nfs4_async_stop(vfsp); 2349 nfs4_async_manager_stop(vfsp); 2350 } 2351 2352 if (rtvp != NULL) 2353 VN_RELE(rtvp); 2354 2355 return (error); 2356 } 2357 2358 /* 2359 * Initialization routine for VFS routines. Should only be called once 2360 */ 2361 int 2362 nfs4_vfsinit(void) 2363 { 2364 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2365 nfs4setclientid_init(); 2366 return (0); 2367 } 2368 2369 void 2370 nfs4_vfsfini(void) 2371 { 2372 nfs4setclientid_fini(); 2373 mutex_destroy(&nfs4_syncbusy); 2374 } 2375 2376 void 2377 nfs4_freevfs(vfs_t *vfsp) 2378 { 2379 mntinfo4_t *mi; 2380 servinfo4_t *svp; 2381 2382 /* free up the resources */ 2383 mi = VFTOMI4(vfsp); 2384 svp = mi->mi_servers; 2385 mi->mi_servers = mi->mi_curr_serv = NULL; 2386 sv4_free(svp); 2387 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_freevfs: " 2388 "free mi %p", (void *)mi)); 2389 2390 /* 2391 * By this time we should have already deleted the 2392 * mi kstats in the unmount code. If they are still around 2393 * somethings wrong 2394 */ 2395 ASSERT(mi->mi_io_kstats == NULL); 2396 2397 nfs_free_mi4(mi); 2398 } 2399 2400 /* 2401 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2402 */ 2403 struct nfs4_server nfs4_server_lst = 2404 { &nfs4_server_lst, &nfs4_server_lst }; 2405 2406 kmutex_t nfs4_server_lst_lock; 2407 2408 static void 2409 nfs4setclientid_init(void) 2410 { 2411 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2412 } 2413 2414 static void 2415 nfs4setclientid_fini(void) 2416 { 2417 mutex_destroy(&nfs4_server_lst_lock); 2418 } 2419 2420 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2421 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2422 2423 /* 2424 * Set the clientid for the server for "mi". No-op if the clientid is 2425 * already set. 2426 * 2427 * The recovery boolean should be set to TRUE if this function was called 2428 * by the recovery code, and FALSE otherwise. This is used to determine 2429 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2430 * for adding a mntinfo4_t to a nfs4_server_t. 2431 * 2432 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2433 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2434 */ 2435 void 2436 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2437 { 2438 struct nfs4_server *np; 2439 struct servinfo4 *svp = mi->mi_curr_serv; 2440 nfs4_recov_state_t recov_state; 2441 int num_retries = 0; 2442 bool_t retry = FALSE; 2443 cred_t *lcr = NULL; 2444 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2445 time_t lease_time = 0; 2446 2447 recov_state.rs_flags = 0; 2448 recov_state.rs_num_retry_despite_err = 0; 2449 ASSERT(n4ep != NULL); 2450 2451 recov_retry: 2452 nfs4_error_zinit(n4ep); 2453 if (!recovery) 2454 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2455 2456 /* This locks np if it is found */ 2457 np = servinfo4_to_nfs4_server(svp); 2458 ASSERT(np == NULL || MUTEX_HELD(&np->s_lock)); 2459 2460 /* 2461 * If we find the server already in the list, then just 2462 * return, we've already done SETCLIENTID to that server 2463 */ 2464 2465 if (np && (np->s_flags & N4S_CLIENTID_SET)) { 2466 /* 2467 * XXX - more is needed here. SETCLIENTID may not 2468 * be completed. A VFS lock may prevent multiple 2469 * mounts and provide needed serialization. 2470 */ 2471 /* add mi to np's mntinfo4_list */ 2472 nfs4_add_mi_to_server(np, mi); 2473 if (!recovery) 2474 nfs_rw_exit(&mi->mi_recovlock); 2475 mutex_exit(&np->s_lock); 2476 nfs4_server_rele(np); 2477 return; 2478 } 2479 2480 /* 2481 * Drop the mi_recovlock since nfs4_start_op will 2482 * acquire it again for us. 2483 */ 2484 if (!recovery) 2485 nfs_rw_exit(&mi->mi_recovlock); 2486 2487 if (!np) 2488 np = new_nfs4_server(svp, cr); 2489 else 2490 mutex_exit(&np->s_lock); 2491 2492 if (!recovery) { 2493 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2494 if (n4ep->error) { 2495 nfs4_server_rele(np); 2496 return; 2497 } 2498 } 2499 2500 /* 2501 * Will potentially add np to global list, which transfers 2502 * ownership of the reference to the list. 2503 */ 2504 mutex_enter(&nfs4_server_lst_lock); 2505 mutex_enter(&np->s_lock); 2506 2507 /* 2508 * Reset the N4S_CB_PINGED flag. This is used to 2509 * indicate if we have received a CB_NULL from the 2510 * server. Also we reset the waiter flag. 2511 */ 2512 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2513 2514 if (np->s_flags & N4S_CLIENTID_SET) { 2515 /* XXX copied/pasted from above */ 2516 /* 2517 * XXX - more is needed here. SETCLIENTID may not 2518 * be completed. A VFS lock may prevent multiple 2519 * mounts and provide needed serialization. 2520 */ 2521 /* add mi to np's mntinfo4_list */ 2522 nfs4_add_mi_to_server(np, mi); 2523 mutex_exit(&np->s_lock); 2524 mutex_exit(&nfs4_server_lst_lock); 2525 nfs4_server_rele(np); 2526 if (!recovery) 2527 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2528 return; 2529 } 2530 2531 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2532 2533 if (n4ep->error == EACCES) { 2534 /* 2535 * If the uid is set then set the creds for secure mounts 2536 * by proxy processes such as automountd. 2537 */ 2538 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2539 if (svp->sv_secdata->uid != 0) { 2540 lcr = crdup(cr); 2541 (void) crsetugid(lcr, svp->sv_secdata->uid, 2542 crgetgid(cr)); 2543 crfree(np->s_cred); 2544 np->s_cred = lcr; 2545 } 2546 nfs_rw_exit(&svp->sv_lock); 2547 2548 if (lcr != NULL) 2549 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2550 &retry_inuse); 2551 } 2552 lease_time = np->s_lease_time; 2553 mutex_exit(&np->s_lock); 2554 mutex_exit(&nfs4_server_lst_lock); 2555 2556 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2557 /* 2558 * Start recovery if failover is a possibility. If 2559 * invoked by the recovery thread itself, then just 2560 * return and let it handle the failover first. NB: 2561 * recovery is not allowed if the mount is in progress 2562 * since the infrastructure is not sufficiently setup 2563 * to allow it. Just return the error (after suitable 2564 * retries). 2565 */ 2566 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2567 (void) nfs4_start_recovery(n4ep, mi, NULL, 2568 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2569 /* 2570 * Don't retry here, just return and let 2571 * recovery take over. 2572 */ 2573 if (recovery) 2574 retry = FALSE; 2575 } else if (nfs4_rpc_retry_error(n4ep->error) || 2576 n4ep->stat == NFS4ERR_RESOURCE || 2577 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2578 2579 retry = TRUE; 2580 /* 2581 * Always retry if in recovery or once had 2582 * contact with the server (but now it's 2583 * overloaded). 2584 */ 2585 if (recovery == TRUE || 2586 n4ep->error == ETIMEDOUT || 2587 n4ep->error == ECONNRESET) 2588 num_retries = 0; 2589 } else if (retry_inuse && n4ep->error == 0 && 2590 n4ep->stat == NFS4ERR_CLID_INUSE) { 2591 retry = TRUE; 2592 num_retries = 0; 2593 } 2594 } 2595 2596 if (!recovery) 2597 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2598 nfs4_server_rele(np); 2599 2600 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2601 if (retry_inuse) { 2602 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2603 retry_inuse = 0; 2604 } else 2605 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2606 goto recov_retry; 2607 } 2608 2609 if (n4ep->error == 0) 2610 n4ep->error = geterrno4(n4ep->stat); 2611 } 2612 2613 int nfs4setclientid_otw_debug = 0; 2614 2615 /* 2616 * This assumes np is locked down. 2617 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2618 * but nothing else; the calling function must be designed to handle those 2619 * other errors. 2620 */ 2621 static void 2622 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2623 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2624 { 2625 COMPOUND4args_clnt args; 2626 COMPOUND4res_clnt res; 2627 nfs_argop4 argop[3]; 2628 SETCLIENTID4args *s_args; 2629 SETCLIENTID4resok *s_resok; 2630 int doqueue = 1; 2631 nfs4_ga_res_t *garp = NULL; 2632 timespec_t prop_time, after_time; 2633 verifier4 verf; 2634 clientid4 tmp_clientid; 2635 2636 ASSERT(MUTEX_HELD(&np->s_lock)); 2637 2638 args.ctag = TAG_SETCLIENTID; 2639 2640 args.array = argop; 2641 args.array_len = 3; 2642 2643 /* PUTROOTFH */ 2644 argop[0].argop = OP_PUTROOTFH; 2645 2646 /* GETATTR */ 2647 argop[1].argop = OP_GETATTR; 2648 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2649 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2650 2651 /* SETCLIENTID */ 2652 argop[2].argop = OP_SETCLIENTID; 2653 2654 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2655 2656 s_args->client.verifier = np->clidtosend.verifier; 2657 s_args->client.id_len = np->clidtosend.id_len; 2658 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2659 s_args->client.id_val = np->clidtosend.id_val; 2660 2661 /* 2662 * Callback needs to happen on non-RDMA transport 2663 * Check if we have saved the original knetconfig 2664 * if so, use that instead. 2665 */ 2666 if (svp->sv_origknconf != NULL) 2667 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2668 else 2669 nfs4_cb_args(np, svp->sv_knconf, s_args); 2670 2671 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 2672 2673 if (ep->error) 2674 return; 2675 2676 /* getattr lease_time res */ 2677 if (res.array_len >= 2) { 2678 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2679 2680 #ifndef _LP64 2681 /* 2682 * The 32 bit client cannot handle a lease time greater than 2683 * (INT32_MAX/1000000). This is due to the use of the 2684 * lease_time in calls to drv_usectohz() in 2685 * nfs4_renew_lease_thread(). The problem is that 2686 * drv_usectohz() takes a time_t (which is just a long = 4 2687 * bytes) as its parameter. The lease_time is multiplied by 2688 * 1000000 to convert seconds to usecs for the parameter. If 2689 * a number bigger than (INT32_MAX/1000000) is used then we 2690 * overflow on the 32bit client. 2691 */ 2692 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2693 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2694 } 2695 #endif 2696 2697 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2698 2699 /* 2700 * Keep track of the lease period for the mi's 2701 * mi_msg_list. We need an appropiate time 2702 * bound to associate past facts with a current 2703 * event. The lease period is perfect for this. 2704 */ 2705 mutex_enter(&mi->mi_msg_list_lock); 2706 mi->mi_lease_period = np->s_lease_time; 2707 mutex_exit(&mi->mi_msg_list_lock); 2708 } 2709 2710 2711 if (res.status == NFS4ERR_CLID_INUSE) { 2712 clientaddr4 *clid_inuse; 2713 2714 if (!(*retry_inusep)) { 2715 clid_inuse = &res.array->nfs_resop4_u. 2716 opsetclientid.SETCLIENTID4res_u.client_using; 2717 2718 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2719 "NFS4 mount (SETCLIENTID failed)." 2720 " nfs4_client_id.id is in" 2721 "use already by: r_netid<%s> r_addr<%s>", 2722 clid_inuse->r_netid, clid_inuse->r_addr); 2723 } 2724 2725 /* 2726 * XXX - The client should be more robust in its 2727 * handling of clientid in use errors (regen another 2728 * clientid and try again?) 2729 */ 2730 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2731 return; 2732 } 2733 2734 if (res.status) { 2735 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2736 return; 2737 } 2738 2739 s_resok = &res.array[2].nfs_resop4_u. 2740 opsetclientid.SETCLIENTID4res_u.resok4; 2741 2742 tmp_clientid = s_resok->clientid; 2743 2744 verf = s_resok->setclientid_confirm; 2745 2746 #ifdef DEBUG 2747 if (nfs4setclientid_otw_debug) { 2748 union { 2749 clientid4 clientid; 2750 int foo[2]; 2751 } cid; 2752 2753 cid.clientid = s_resok->clientid; 2754 2755 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2756 "nfs4setclientid_otw: OK, clientid = %x,%x, " 2757 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 2758 } 2759 #endif 2760 2761 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2762 2763 /* Confirm the client id and get the lease_time attribute */ 2764 2765 args.ctag = TAG_SETCLIENTID_CF; 2766 2767 args.array = argop; 2768 args.array_len = 1; 2769 2770 argop[0].argop = OP_SETCLIENTID_CONFIRM; 2771 2772 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 2773 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 2774 2775 /* used to figure out RTT for np */ 2776 gethrestime(&prop_time); 2777 2778 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 2779 "start time: %ld sec %ld nsec", prop_time.tv_sec, 2780 prop_time.tv_nsec)); 2781 2782 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2783 2784 gethrestime(&after_time); 2785 np->propagation_delay.tv_sec = 2786 MAX(1, after_time.tv_sec - prop_time.tv_sec); 2787 2788 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 2789 "finish time: %ld sec ", after_time.tv_sec)); 2790 2791 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 2792 "propagation delay set to %ld sec", 2793 np->propagation_delay.tv_sec)); 2794 2795 if (ep->error) 2796 return; 2797 2798 if (res.status == NFS4ERR_CLID_INUSE) { 2799 clientaddr4 *clid_inuse; 2800 2801 if (!(*retry_inusep)) { 2802 clid_inuse = &res.array->nfs_resop4_u. 2803 opsetclientid.SETCLIENTID4res_u.client_using; 2804 2805 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2806 "SETCLIENTID_CONFIRM failed. " 2807 "nfs4_client_id.id is in use already by: " 2808 "r_netid<%s> r_addr<%s>", 2809 clid_inuse->r_netid, clid_inuse->r_addr); 2810 } 2811 2812 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2813 return; 2814 } 2815 2816 if (res.status) { 2817 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2818 return; 2819 } 2820 2821 if (!(np->s_flags & N4S_INSERTED)) { 2822 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 2823 insque(np, &nfs4_server_lst); 2824 ASSERT(MUTEX_HELD(&np->s_lock)); 2825 np->s_flags |= N4S_INSERTED; 2826 np->s_refcnt++; /* list gets a reference */ 2827 } 2828 2829 np->clientid = tmp_clientid; 2830 np->s_flags |= N4S_CLIENTID_SET; 2831 2832 /* Add mi to np's mntinfo4 list */ 2833 nfs4_add_mi_to_server(np, mi); 2834 2835 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 2836 /* 2837 * Start lease management thread. 2838 * Keep trying until we succeed. 2839 */ 2840 2841 np->s_refcnt++; /* pass reference to thread */ 2842 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 2843 minclsyspri); 2844 } 2845 2846 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2847 } 2848 2849 /* 2850 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 2851 * mi's clientid the same as sp's. 2852 * Assumes sp is locked down. 2853 */ 2854 void 2855 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 2856 { 2857 mntinfo4_t *tmi; 2858 int in_list = 0; 2859 2860 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 2861 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 2862 ASSERT(sp != &nfs4_server_lst); 2863 ASSERT(MUTEX_HELD(&sp->s_lock)); 2864 2865 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2866 "nfs4_add_mi_to_server: add mi %p to sp %p", 2867 (void*)mi, (void*)sp)); 2868 2869 for (tmi = sp->mntinfo4_list; 2870 tmi != NULL; 2871 tmi = tmi->mi_clientid_next) { 2872 if (tmi == mi) { 2873 NFS4_DEBUG(nfs4_client_lease_debug, 2874 (CE_NOTE, 2875 "nfs4_add_mi_to_server: mi in list")); 2876 in_list = 1; 2877 } 2878 } 2879 2880 /* 2881 * First put a hold on the mntinfo4's vfsp so that references via 2882 * mntinfo4_list will be valid. 2883 */ 2884 if (!in_list) 2885 VFS_HOLD(mi->mi_vfsp); 2886 2887 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 2888 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 2889 2890 if (!in_list) { 2891 if (sp->mntinfo4_list) 2892 sp->mntinfo4_list->mi_clientid_prev = mi; 2893 mi->mi_clientid_next = sp->mntinfo4_list; 2894 sp->mntinfo4_list = mi; 2895 mi->mi_srvsettime = gethrestime_sec(); 2896 } 2897 2898 /* set mi's clientid to that of sp's for later matching */ 2899 mi->mi_clientid = sp->clientid; 2900 2901 /* 2902 * Update the clientid for any other mi's belonging to sp. This 2903 * must be done here while we hold sp->s_lock, so that 2904 * find_nfs4_server() continues to work. 2905 */ 2906 2907 for (tmi = sp->mntinfo4_list; 2908 tmi != NULL; 2909 tmi = tmi->mi_clientid_next) { 2910 if (tmi != mi) { 2911 tmi->mi_clientid = sp->clientid; 2912 } 2913 } 2914 } 2915 2916 /* 2917 * Remove the mi from sp's mntinfo4_list and release its reference. 2918 * Exception: if mi still has open files, flag it for later removal (when 2919 * all the files are closed). 2920 * 2921 * If this is the last mntinfo4 in sp's list then tell the lease renewal 2922 * thread to exit. 2923 */ 2924 static void 2925 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 2926 { 2927 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2928 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 2929 (void*)mi, (void*)sp)); 2930 2931 ASSERT(sp != NULL); 2932 ASSERT(MUTEX_HELD(&sp->s_lock)); 2933 ASSERT(mi->mi_open_files >= 0); 2934 2935 /* 2936 * First make sure this mntinfo4 can be taken off of the list, 2937 * ie: it doesn't have any open files remaining. 2938 */ 2939 if (mi->mi_open_files > 0) { 2940 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2941 "nfs4_remove_mi_from_server_nolock: don't " 2942 "remove mi since it still has files open")); 2943 2944 mutex_enter(&mi->mi_lock); 2945 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 2946 mutex_exit(&mi->mi_lock); 2947 return; 2948 } 2949 2950 remove_mi(sp, mi); 2951 2952 if (sp->mntinfo4_list == NULL) { 2953 /* last fs unmounted, kill the thread */ 2954 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 2955 "remove_mi_from_nfs4_server_nolock: kill the thread")); 2956 nfs4_mark_srv_dead(sp); 2957 } 2958 } 2959 2960 /* 2961 * Remove mi from sp's mntinfo4_list and release the vfs reference. 2962 */ 2963 static void 2964 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 2965 { 2966 ASSERT(MUTEX_HELD(&sp->s_lock)); 2967 2968 /* 2969 * We release a reference, and the caller must still have a 2970 * reference. 2971 */ 2972 ASSERT(mi->mi_vfsp->vfs_count >= 2); 2973 2974 if (mi->mi_clientid_prev) { 2975 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 2976 } else { 2977 /* This is the first mi in sp's mntinfo4_list */ 2978 /* 2979 * Make sure the first mntinfo4 in the list is the actual 2980 * mntinfo4 passed in. 2981 */ 2982 ASSERT(sp->mntinfo4_list == mi); 2983 2984 sp->mntinfo4_list = mi->mi_clientid_next; 2985 } 2986 if (mi->mi_clientid_next) 2987 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 2988 2989 /* Now mark the mntinfo4's links as being removed */ 2990 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 2991 2992 VFS_RELE(mi->mi_vfsp); 2993 } 2994 2995 /* 2996 * Free all the entries in sp's mntinfo4_list. 2997 */ 2998 static void 2999 remove_all_mi(nfs4_server_t *sp) 3000 { 3001 mntinfo4_t *mi; 3002 3003 ASSERT(MUTEX_HELD(&sp->s_lock)); 3004 3005 while (sp->mntinfo4_list != NULL) { 3006 mi = sp->mntinfo4_list; 3007 /* 3008 * Grab a reference in case there is only one left (which 3009 * remove_mi() frees). 3010 */ 3011 VFS_HOLD(mi->mi_vfsp); 3012 remove_mi(sp, mi); 3013 VFS_RELE(mi->mi_vfsp); 3014 } 3015 } 3016 3017 /* 3018 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3019 * 3020 * This version can be called with a null nfs4_server_t arg, 3021 * and will either find the right one and handle locking, or 3022 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3023 */ 3024 void 3025 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3026 { 3027 nfs4_server_t *sp; 3028 3029 if (esp == NULL) { 3030 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3031 sp = find_nfs4_server_all(mi, 1); 3032 } else 3033 sp = esp; 3034 3035 if (sp != NULL) 3036 nfs4_remove_mi_from_server_nolock(mi, sp); 3037 3038 /* 3039 * If we had a valid esp as input, the calling function will be 3040 * responsible for unlocking the esp nfs4_server. 3041 */ 3042 if (esp == NULL) { 3043 if (sp != NULL) 3044 mutex_exit(&sp->s_lock); 3045 nfs_rw_exit(&mi->mi_recovlock); 3046 if (sp != NULL) 3047 nfs4_server_rele(sp); 3048 } 3049 } 3050 3051 /* 3052 * Return TRUE if the given server has any non-unmounted filesystems. 3053 */ 3054 3055 bool_t 3056 nfs4_fs_active(nfs4_server_t *sp) 3057 { 3058 mntinfo4_t *mi; 3059 3060 ASSERT(MUTEX_HELD(&sp->s_lock)); 3061 3062 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3063 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3064 return (TRUE); 3065 } 3066 3067 return (FALSE); 3068 } 3069 3070 /* 3071 * Mark sp as finished and notify any waiters. 3072 */ 3073 3074 void 3075 nfs4_mark_srv_dead(nfs4_server_t *sp) 3076 { 3077 ASSERT(MUTEX_HELD(&sp->s_lock)); 3078 3079 sp->s_thread_exit = NFS4_THREAD_EXIT; 3080 cv_broadcast(&sp->cv_thread_exit); 3081 } 3082 3083 /* 3084 * Create a new nfs4_server_t structure. 3085 * Returns new node unlocked and not in list, but with a reference count of 3086 * 1. 3087 */ 3088 struct nfs4_server * 3089 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3090 { 3091 struct nfs4_server *np; 3092 timespec_t tt; 3093 union { 3094 struct { 3095 uint32_t sec; 3096 uint32_t subsec; 3097 } un_curtime; 3098 verifier4 un_verifier; 3099 } nfs4clientid_verifier; 3100 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3101 int len; 3102 3103 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3104 np->saddr.len = svp->sv_addr.len; 3105 np->saddr.maxlen = svp->sv_addr.maxlen; 3106 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3107 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3108 np->s_refcnt = 1; 3109 3110 /* 3111 * Build the nfs_client_id4 for this server mount. Ensure 3112 * the verifier is useful and that the identification is 3113 * somehow based on the server's address for the case of 3114 * multi-homed servers. 3115 */ 3116 nfs4clientid_verifier.un_verifier = 0; 3117 gethrestime(&tt); 3118 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3119 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3120 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3121 3122 /* 3123 * calculate the length of the opaque identifier. Subtract 2 3124 * for the "%s" and add the traditional +1 for null 3125 * termination. 3126 */ 3127 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3128 np->clidtosend.id_len = len + np->saddr.maxlen; 3129 3130 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3131 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3132 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3133 3134 np->s_flags = 0; 3135 np->mntinfo4_list = NULL; 3136 /* save cred for issuing rfs4calls inside the renew thread */ 3137 crhold(cr); 3138 np->s_cred = cr; 3139 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3140 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3141 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3142 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3143 offsetof(rnode4_t, r_deleg_link)); 3144 np->s_thread_exit = 0; 3145 np->state_ref_count = 0; 3146 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3147 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3148 np->s_otw_call_count = 0; 3149 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3150 np->zoneid = getzoneid(); 3151 np->zone_globals = nfs4_get_callback_globals(); 3152 ASSERT(np->zone_globals != NULL); 3153 return (np); 3154 } 3155 3156 /* 3157 * Create a new nfs4_server_t structure and add it to the list. 3158 * Returns new node locked; reference must eventually be freed. 3159 */ 3160 static struct nfs4_server * 3161 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3162 { 3163 nfs4_server_t *sp; 3164 3165 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3166 sp = new_nfs4_server(svp, cr); 3167 mutex_enter(&sp->s_lock); 3168 insque(sp, &nfs4_server_lst); 3169 sp->s_refcnt++; /* list gets a reference */ 3170 sp->clientid = 0; 3171 sp->s_flags |= N4S_INSERTED; 3172 return (sp); 3173 } 3174 3175 int nfs4_server_t_debug = 0; 3176 3177 #ifdef lint 3178 extern void 3179 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3180 #endif 3181 3182 #ifndef lint 3183 #ifdef DEBUG 3184 void 3185 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3186 { 3187 int hash16(void *p, int len); 3188 nfs4_server_t *np; 3189 3190 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3191 "dumping nfs4_server_t list in %s", txt)); 3192 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3193 "mi 0x%p, want clientid %llx, addr %d/%04X", 3194 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3195 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3196 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3197 np = np->forw) { 3198 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3199 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3200 np, (longlong_t)np->clientid, np->saddr.len, 3201 hash16((void *)np->saddr.buf, np->saddr.len), 3202 np->state_ref_count)); 3203 if (np->saddr.len == srv_p->sv_addr.len && 3204 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3205 np->saddr.len) == 0) 3206 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3207 " - address matches")); 3208 if (np->clientid == clientid || np->clientid == 0) 3209 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3210 " - clientid matches")); 3211 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3212 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3213 " - thread not exiting")); 3214 } 3215 delay(hz); 3216 } 3217 #endif 3218 #endif 3219 3220 3221 /* 3222 * Move a mntinfo4_t from one server list to another. 3223 * Locking of the two nfs4_server_t nodes will be done in list order. 3224 * 3225 * Returns NULL if the current nfs4_server_t for the filesystem could not 3226 * be found (e.g., due to forced unmount). Otherwise returns a reference 3227 * to the new nfs4_server_t, which must eventually be freed. 3228 */ 3229 nfs4_server_t * 3230 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3231 { 3232 nfs4_server_t *p, *op = NULL, *np = NULL; 3233 int num_open; 3234 zoneid_t zoneid = getzoneid(); 3235 3236 ASSERT(curproc->p_zone == mi->mi_zone); 3237 3238 mutex_enter(&nfs4_server_lst_lock); 3239 #ifdef DEBUG 3240 if (nfs4_server_t_debug) 3241 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3242 #endif 3243 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3244 if (p->zoneid != zoneid) 3245 continue; 3246 if (p->saddr.len == old->sv_addr.len && 3247 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3248 p->s_thread_exit != NFS4_THREAD_EXIT) { 3249 op = p; 3250 mutex_enter(&op->s_lock); 3251 op->s_refcnt++; 3252 } 3253 if (p->saddr.len == new->sv_addr.len && 3254 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3255 p->s_thread_exit != NFS4_THREAD_EXIT) { 3256 np = p; 3257 mutex_enter(&np->s_lock); 3258 } 3259 if (op != NULL && np != NULL) 3260 break; 3261 } 3262 if (op == NULL) { 3263 /* 3264 * Filesystem has been forcibly unmounted. Bail out. 3265 */ 3266 if (np != NULL) 3267 mutex_exit(&np->s_lock); 3268 mutex_exit(&nfs4_server_lst_lock); 3269 return (NULL); 3270 } 3271 if (np != NULL) { 3272 np->s_refcnt++; 3273 } else { 3274 #ifdef DEBUG 3275 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3276 "nfs4_move_mi: no target nfs4_server, will create.")); 3277 #endif 3278 np = add_new_nfs4_server(new, kcred); 3279 } 3280 mutex_exit(&nfs4_server_lst_lock); 3281 3282 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3283 "nfs4_move_mi: for mi 0x%p, " 3284 "old servinfo4 0x%p, new servinfo4 0x%p, " 3285 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3286 (void*)mi, (void*)old, (void*)new, 3287 (void*)op, (void*)np)); 3288 ASSERT(op != NULL && np != NULL); 3289 3290 /* discard any delegations */ 3291 nfs4_deleg_discard(mi, op); 3292 3293 num_open = mi->mi_open_files; 3294 mi->mi_open_files = 0; 3295 op->state_ref_count -= num_open; 3296 ASSERT(op->state_ref_count >= 0); 3297 np->state_ref_count += num_open; 3298 nfs4_remove_mi_from_server_nolock(mi, op); 3299 mi->mi_open_files = num_open; 3300 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3301 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3302 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3303 3304 nfs4_add_mi_to_server(np, mi); 3305 3306 mutex_exit(&op->s_lock); 3307 nfs4_server_rele(op); 3308 mutex_exit(&np->s_lock); 3309 3310 return (np); 3311 } 3312 3313 /* 3314 * Search the nfs4_server list to find a match on this servinfo4 3315 * based on its address. 3316 * 3317 * Returns NULL if no match is found. Otherwise returns a reference (which 3318 * must eventually be freed) to a locked nfs4_server. 3319 */ 3320 nfs4_server_t * 3321 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3322 { 3323 nfs4_server_t *np; 3324 zoneid_t zoneid = getzoneid(); 3325 3326 mutex_enter(&nfs4_server_lst_lock); 3327 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3328 if (np->zoneid == zoneid && 3329 np->saddr.len == srv_p->sv_addr.len && 3330 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3331 np->saddr.len) == 0 && 3332 np->s_thread_exit != NFS4_THREAD_EXIT) { 3333 mutex_enter(&np->s_lock); 3334 np->s_refcnt++; 3335 mutex_exit(&nfs4_server_lst_lock); 3336 return (np); 3337 } 3338 } 3339 mutex_exit(&nfs4_server_lst_lock); 3340 return (NULL); 3341 } 3342 3343 /* 3344 * Search the nfs4_server_lst to find a match based on clientid and 3345 * addr. 3346 * Locks the nfs4_server down if it is found and returns a reference that 3347 * must eventually be freed. 3348 * 3349 * Returns NULL it no match is found. This means one of two things: either 3350 * mi is in the process of being mounted, or mi has been unmounted. 3351 * 3352 * The caller should be holding mi->mi_recovlock, and it should continue to 3353 * hold the lock until done with the returned nfs4_server_t. Once 3354 * mi->mi_recovlock is released, there is no guarantee that the returned 3355 * mi->nfs4_server_t will continue to correspond to mi. 3356 */ 3357 nfs4_server_t * 3358 find_nfs4_server(mntinfo4_t *mi) 3359 { 3360 return (find_nfs4_server_all(mi, 0)); 3361 } 3362 3363 /* 3364 * Same as above, but takes an "all" parameter which can be 3365 * set to 1 if the caller wishes to find nfs4_server_t's which 3366 * have been marked for termination by the exit of the renew 3367 * thread. This should only be used by operations which are 3368 * cleaning up and will not cause an OTW op. 3369 */ 3370 nfs4_server_t * 3371 find_nfs4_server_all(mntinfo4_t *mi, int all) 3372 { 3373 nfs4_server_t *np; 3374 servinfo4_t *svp; 3375 zoneid_t zoneid = mi->mi_zone->zone_id; 3376 3377 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3378 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3379 /* 3380 * This can be called from nfs4_unmount() which can be called from the 3381 * global zone, hence it's legal for the global zone to muck with 3382 * another zone's server list, as long as it doesn't try to contact 3383 * them. 3384 */ 3385 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID); 3386 3387 /* 3388 * The nfs4_server_lst_lock global lock is held when we get a new 3389 * clientid (via SETCLIENTID OTW). Holding this global lock and 3390 * mi_recovlock (READER is fine) ensures that the nfs4_server 3391 * and this mntinfo4 can't get out of sync, so the following search is 3392 * always valid. 3393 */ 3394 mutex_enter(&nfs4_server_lst_lock); 3395 #ifdef DEBUG 3396 if (nfs4_server_t_debug) { 3397 /* mi->mi_clientid is unprotected, ok for debug output */ 3398 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3399 mi->mi_curr_serv); 3400 } 3401 #endif 3402 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3403 mutex_enter(&np->s_lock); 3404 svp = mi->mi_curr_serv; 3405 3406 if (np->zoneid == zoneid && 3407 np->clientid == mi->mi_clientid && 3408 np->saddr.len == svp->sv_addr.len && 3409 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3410 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3411 mutex_exit(&nfs4_server_lst_lock); 3412 np->s_refcnt++; 3413 return (np); 3414 } 3415 mutex_exit(&np->s_lock); 3416 } 3417 mutex_exit(&nfs4_server_lst_lock); 3418 3419 return (NULL); 3420 } 3421 3422 /* 3423 * Release the reference to sp and destroy it if that's the last one. 3424 */ 3425 3426 void 3427 nfs4_server_rele(nfs4_server_t *sp) 3428 { 3429 mutex_enter(&sp->s_lock); 3430 ASSERT(sp->s_refcnt > 0); 3431 sp->s_refcnt--; 3432 if (sp->s_refcnt > 0) { 3433 mutex_exit(&sp->s_lock); 3434 return; 3435 } 3436 if (!(sp->s_flags & N4S_INSERTED)) { 3437 destroy_nfs4_server(sp); 3438 return; 3439 } 3440 mutex_exit(&sp->s_lock); 3441 mutex_enter(&nfs4_server_lst_lock); 3442 mutex_enter(&sp->s_lock); 3443 if (sp->s_refcnt > 0) { 3444 mutex_exit(&sp->s_lock); 3445 mutex_exit(&nfs4_server_lst_lock); 3446 return; 3447 } 3448 if (sp->s_flags & N4S_INSERTED) { 3449 remque(sp); 3450 sp->forw = sp->back = NULL; 3451 sp->s_flags &= ~N4S_INSERTED; 3452 } 3453 mutex_exit(&nfs4_server_lst_lock); 3454 destroy_nfs4_server(sp); 3455 } 3456 3457 static void 3458 destroy_nfs4_server(nfs4_server_t *sp) 3459 { 3460 ASSERT(MUTEX_HELD(&sp->s_lock)); 3461 ASSERT(!(sp->s_flags & N4S_INSERTED)); 3462 ASSERT(sp->s_refcnt == 0); 3463 ASSERT(sp->s_otw_call_count == 0); 3464 3465 remove_all_mi(sp); 3466 3467 crfree(sp->s_cred); 3468 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3469 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3470 mutex_exit(&sp->s_lock); 3471 3472 /* destroy the nfs4_server */ 3473 nfs4callback_destroy(sp); 3474 list_destroy(&sp->s_deleg_list); 3475 mutex_destroy(&sp->s_lock); 3476 cv_destroy(&sp->cv_thread_exit); 3477 cv_destroy(&sp->s_cv_otw_count); 3478 cv_destroy(&sp->wait_cb_null); 3479 nfs_rw_destroy(&sp->s_recovlock); 3480 kmem_free(sp, sizeof (*sp)); 3481 } 3482 3483 /* 3484 * Lock sp, but only if it's still active (in the list and hasn't been 3485 * flagged as exiting) or 'all' is non-zero. 3486 * Returns TRUE if sp got locked and adds a reference to sp. 3487 */ 3488 bool_t 3489 nfs4_server_vlock(nfs4_server_t *sp, int all) 3490 { 3491 nfs4_server_t *np; 3492 3493 mutex_enter(&nfs4_server_lst_lock); 3494 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3495 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3496 all != 0)) { 3497 mutex_enter(&np->s_lock); 3498 np->s_refcnt++; 3499 mutex_exit(&nfs4_server_lst_lock); 3500 return (TRUE); 3501 } 3502 } 3503 mutex_exit(&nfs4_server_lst_lock); 3504 return (FALSE); 3505 } 3506 3507 /* 3508 * Fork off a thread to free the data structures for a mount. 3509 */ 3510 3511 static void 3512 async_free_mount(vfs_t *vfsp, cred_t *cr) 3513 { 3514 freemountargs_t *args; 3515 3516 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3517 args->fm_vfsp = vfsp; 3518 VFS_HOLD(vfsp); 3519 args->fm_cr = cr; 3520 crhold(cr); 3521 3522 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3523 minclsyspri); 3524 } 3525 3526 static void 3527 nfs4_free_mount_thread(freemountargs_t *args) 3528 { 3529 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3530 VFS_RELE(args->fm_vfsp); 3531 crfree(args->fm_cr); 3532 kmem_free(args, sizeof (freemountargs_t)); 3533 zthread_exit(); 3534 /* NOTREACHED */ 3535 } 3536 3537 /* 3538 * Thread to free the data structures for a given filesystem. 3539 */ 3540 static void 3541 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3542 { 3543 mntinfo4_t *mi = VFTOMI4(vfsp); 3544 nfs4_server_t *sp; 3545 callb_cpr_t cpr_info; 3546 kmutex_t cpr_lock; 3547 boolean_t async_thread; 3548 3549 /* 3550 * We need to participate in the CPR framework if this is a kernel 3551 * thread. 3552 */ 3553 async_thread = (curproc == curproc->p_zone->zone_zsched); 3554 if (async_thread) { 3555 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3556 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3557 "nfsv4AsyncUnmount"); 3558 } 3559 3560 /* 3561 * We need to wait for all outstanding OTW calls 3562 * and recovery to finish before we remove the mi 3563 * from the nfs4_server_t, as current pending 3564 * calls might still need this linkage (in order 3565 * to find a nfs4_server_t from a mntinfo4_t). 3566 */ 3567 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3568 sp = find_nfs4_server(mi); 3569 nfs_rw_exit(&mi->mi_recovlock); 3570 3571 if (sp) { 3572 while (sp->s_otw_call_count != 0) { 3573 if (async_thread) { 3574 mutex_enter(&cpr_lock); 3575 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3576 mutex_exit(&cpr_lock); 3577 } 3578 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3579 if (async_thread) { 3580 mutex_enter(&cpr_lock); 3581 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3582 mutex_exit(&cpr_lock); 3583 } 3584 } 3585 mutex_exit(&sp->s_lock); 3586 nfs4_server_rele(sp); 3587 sp = NULL; 3588 } 3589 3590 3591 mutex_enter(&mi->mi_lock); 3592 while (mi->mi_in_recovery != 0) { 3593 if (async_thread) { 3594 mutex_enter(&cpr_lock); 3595 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3596 mutex_exit(&cpr_lock); 3597 } 3598 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3599 if (async_thread) { 3600 mutex_enter(&cpr_lock); 3601 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3602 mutex_exit(&cpr_lock); 3603 } 3604 } 3605 mutex_exit(&mi->mi_lock); 3606 3607 /* 3608 * The original purge of the dnlc via 'dounmount' 3609 * doesn't guarantee that another dnlc entry was not 3610 * added while we waitied for all outstanding OTW 3611 * and recovery calls to finish. So re-purge the 3612 * dnlc now. 3613 */ 3614 (void) dnlc_purge_vfsp(vfsp, 0); 3615 3616 /* 3617 * We need to explicitly stop the manager thread; the asyc worker 3618 * threads can timeout and exit on their own. 3619 */ 3620 nfs4_async_manager_stop(vfsp); 3621 3622 destroy_fileid_map(vfsp); 3623 destroy_rtable4(vfsp, cr); 3624 3625 nfs4_remove_mi_from_server(mi, NULL); 3626 3627 if (mi->mi_io_kstats) { 3628 kstat_delete(mi->mi_io_kstats); 3629 mi->mi_io_kstats = NULL; 3630 } 3631 if (mi->mi_ro_kstats) { 3632 kstat_delete(mi->mi_ro_kstats); 3633 mi->mi_ro_kstats = NULL; 3634 } 3635 if (mi->mi_recov_ksp) { 3636 kstat_delete(mi->mi_recov_ksp); 3637 mi->mi_recov_ksp = NULL; 3638 } 3639 3640 if (async_thread) { 3641 mutex_enter(&cpr_lock); 3642 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3643 mutex_destroy(&cpr_lock); 3644 } 3645 } 3646