1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/time.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/dnlc.h> 59 #include <sys/list.h> 60 #include <sys/mntent.h> 61 #include <sys/tsol/label.h> 62 63 #include <rpc/types.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpcsec_gss.h> 66 #include <rpc/clnt.h> 67 68 #include <nfs/nfs.h> 69 #include <nfs/nfs_clnt.h> 70 #include <nfs/mount.h> 71 #include <nfs/nfs_acl.h> 72 73 #include <fs/fs_subr.h> 74 75 #include <nfs/nfs4.h> 76 #include <nfs/rnode4.h> 77 #include <nfs/nfs4_clnt.h> 78 #include <sys/fs/autofs.h> 79 80 81 /* 82 * Arguments passed to thread to free data structures from forced unmount. 83 */ 84 85 typedef struct { 86 vfs_t *fm_vfsp; 87 cred_t *fm_cr; 88 } freemountargs_t; 89 90 static void async_free_mount(vfs_t *, cred_t *); 91 static void nfs4_free_mount(vfs_t *, cred_t *); 92 static void nfs4_free_mount_thread(freemountargs_t *); 93 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 94 95 /* 96 * From rpcsec module (common/rpcsec). 97 */ 98 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 99 extern void sec_clnt_freeinfo(struct sec_data *); 100 101 /* 102 * The order and contents of this structure must be kept in sync with that of 103 * rfsreqcnt_v4_tmpl in nfs_stats.c 104 */ 105 static char *rfsnames_v4[] = { 106 "null", "compound", "reserved", "access", "close", "commit", "create", 107 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 108 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 109 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 110 "read", "readdir", "readlink", "remove", "rename", "renew", 111 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 112 "setclientid_confirm", "verify", "write" 113 }; 114 115 /* 116 * nfs4_max_mount_retry is the number of times the client will redrive 117 * a mount compound before giving up and returning failure. The intent 118 * is to redrive mount compounds which fail NFS4ERR_STALE so that 119 * if a component of the server path being mounted goes stale, it can 120 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 121 * code is needed outside of the recovery framework because mount is a 122 * special case. The client doesn't create vnodes/rnodes for components 123 * of the server path being mounted. The recovery code recovers real 124 * client objects, not STALE FHs which map to components of the server 125 * path being mounted. 126 * 127 * We could just fail the mount on the first time, but that would 128 * instantly trigger failover (from nfs4_mount), and the client should 129 * try to re-lookup the STALE FH before doing failover. The easiest 130 * way to "re-lookup" is to simply redrive the mount compound. 131 */ 132 static int nfs4_max_mount_retry = 2; 133 134 /* 135 * nfs4 vfs operations. 136 */ 137 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 138 static int nfs4_unmount(vfs_t *, int, cred_t *); 139 static int nfs4_root(vfs_t *, vnode_t **); 140 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 141 static int nfs4_sync(vfs_t *, short, cred_t *); 142 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 143 static int nfs4_mountroot(vfs_t *, whymountroot_t); 144 static void nfs4_freevfs(vfs_t *); 145 146 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 147 int, cred_t *, zone_t *); 148 149 vfsops_t *nfs4_vfsops; 150 151 int nfs4_vfsinit(void); 152 void nfs4_vfsfini(void); 153 static void nfs4setclientid_init(void); 154 static void nfs4setclientid_fini(void); 155 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 156 struct nfs4_server *, nfs4_error_t *, int *); 157 static void destroy_nfs4_server(nfs4_server_t *); 158 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 159 160 /* 161 * Initialize the vfs structure 162 */ 163 164 static int nfs4fstyp; 165 166 167 /* 168 * Debug variable to check for rdma based 169 * transport startup and cleanup. Controlled 170 * through /etc/system. Off by default. 171 */ 172 extern int rdma_debug; 173 174 int 175 nfs4init(int fstyp, char *name) 176 { 177 static const fs_operation_def_t nfs4_vfsops_template[] = { 178 VFSNAME_MOUNT, nfs4_mount, 179 VFSNAME_UNMOUNT, nfs4_unmount, 180 VFSNAME_ROOT, nfs4_root, 181 VFSNAME_STATVFS, nfs4_statvfs, 182 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 183 VFSNAME_VGET, nfs4_vget, 184 VFSNAME_MOUNTROOT, nfs4_mountroot, 185 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 186 NULL, NULL 187 }; 188 int error; 189 190 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 191 if (error != 0) { 192 zcmn_err(GLOBAL_ZONEID, CE_WARN, 193 "nfs4init: bad vfs ops template"); 194 return (error); 195 } 196 197 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 198 if (error != 0) { 199 (void) vfs_freevfsops_by_type(fstyp); 200 zcmn_err(GLOBAL_ZONEID, CE_WARN, 201 "nfs4init: bad vnode ops template"); 202 return (error); 203 } 204 205 nfs4fstyp = fstyp; 206 207 (void) nfs4_vfsinit(); 208 209 (void) nfs4_init_dot_entries(); 210 211 return (0); 212 } 213 214 void 215 nfs4fini(void) 216 { 217 (void) nfs4_destroy_dot_entries(); 218 nfs4_vfsfini(); 219 } 220 221 /* 222 * Create a new sec_data structure to store AUTH_DH related data: 223 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 224 * flag set for NFS V4 since we are avoiding to contact the rpcbind 225 * daemon and is using the IP time service (IPPORT_TIMESERVER). 226 * 227 * sec_data can be freed by sec_clnt_freeinfo(). 228 */ 229 struct sec_data * 230 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 231 struct knetconfig *knconf) { 232 struct sec_data *secdata; 233 dh_k4_clntdata_t *data; 234 char *pf, *p; 235 236 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 237 return (NULL); 238 239 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 240 secdata->flags = 0; 241 242 data = kmem_alloc(sizeof (*data), KM_SLEEP); 243 244 data->syncaddr.maxlen = syncaddr->maxlen; 245 data->syncaddr.len = syncaddr->len; 246 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 247 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 248 249 /* 250 * duplicate the knconf information for the 251 * new opaque data. 252 */ 253 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 254 *data->knconf = *knconf; 255 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 256 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 257 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 258 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 259 data->knconf->knc_protofmly = pf; 260 data->knconf->knc_proto = p; 261 262 /* move server netname to the sec_data structure */ 263 data->netname = kmem_alloc(nlen, KM_SLEEP); 264 bcopy(netname, data->netname, nlen); 265 data->netnamelen = (int)nlen; 266 267 secdata->secmod = AUTH_DH; 268 secdata->rpcflavor = AUTH_DH; 269 secdata->data = (caddr_t)data; 270 271 return (secdata); 272 } 273 274 static int 275 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 276 { 277 servinfo4_t *si; 278 279 /* 280 * Iterate over the servinfo4 list to make sure 281 * we do not have a duplicate. Skip any servinfo4 282 * that has been marked "NOT IN USE" 283 */ 284 for (si = svp_head; si; si = si->sv_next) { 285 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 286 if (si->sv_flags & SV4_NOTINUSE) { 287 nfs_rw_exit(&si->sv_lock); 288 continue; 289 } 290 nfs_rw_exit(&si->sv_lock); 291 if (si == svp) 292 continue; 293 if (si->sv_addr.len == svp->sv_addr.len && 294 strcmp(si->sv_knconf->knc_protofmly, 295 svp->sv_knconf->knc_protofmly) == 0 && 296 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 297 si->sv_addr.len) == 0) { 298 /* it's a duplicate */ 299 return (1); 300 } 301 } 302 /* it's not a duplicate */ 303 return (0); 304 } 305 306 void 307 nfs4_free_args(struct nfs_args *nargs) 308 { 309 if (nargs->knconf) { 310 if (nargs->knconf->knc_protofmly) 311 kmem_free(nargs->knconf->knc_protofmly, 312 KNC_STRSIZE); 313 if (nargs->knconf->knc_proto) 314 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 315 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 316 nargs->knconf = NULL; 317 } 318 319 if (nargs->fh) { 320 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 321 nargs->fh = NULL; 322 } 323 324 if (nargs->hostname) { 325 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 326 nargs->hostname = NULL; 327 } 328 329 if (nargs->addr) { 330 if (nargs->addr->buf) { 331 ASSERT(nargs->addr->len); 332 kmem_free(nargs->addr->buf, nargs->addr->len); 333 } 334 kmem_free(nargs->addr, sizeof (struct netbuf)); 335 nargs->addr = NULL; 336 } 337 338 if (nargs->syncaddr) { 339 ASSERT(nargs->syncaddr->len); 340 if (nargs->syncaddr->buf) { 341 ASSERT(nargs->syncaddr->len); 342 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 343 } 344 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 345 nargs->syncaddr = NULL; 346 } 347 348 if (nargs->netname) { 349 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 350 nargs->netname = NULL; 351 } 352 353 if (nargs->nfs_ext_u.nfs_extA.secdata) { 354 sec_clnt_freeinfo( 355 nargs->nfs_ext_u.nfs_extA.secdata); 356 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 357 } 358 } 359 360 361 int 362 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs) 363 { 364 365 int error; 366 size_t hlen; /* length of hostname */ 367 size_t nlen; /* length of netname */ 368 char netname[MAXNETNAMELEN+1]; /* server's netname */ 369 struct netbuf addr; /* server's address */ 370 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 371 struct knetconfig *knconf; /* transport structure */ 372 struct sec_data *secdata = NULL; /* security data */ 373 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 374 STRUCT_DECL(knetconfig, knconf_tmp); 375 STRUCT_DECL(netbuf, addr_tmp); 376 int flags; 377 char *p, *pf; 378 struct pathname pn; 379 char *userbufptr; 380 381 382 bzero(nargs, sizeof (*nargs)); 383 384 STRUCT_INIT(args, get_udatamodel()); 385 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 386 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 387 STRUCT_SIZE(args)))) 388 return (EFAULT); 389 390 nargs->wsize = STRUCT_FGET(args, wsize); 391 nargs->rsize = STRUCT_FGET(args, rsize); 392 nargs->timeo = STRUCT_FGET(args, timeo); 393 nargs->retrans = STRUCT_FGET(args, retrans); 394 nargs->acregmin = STRUCT_FGET(args, acregmin); 395 nargs->acregmax = STRUCT_FGET(args, acregmax); 396 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 397 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 398 399 flags = STRUCT_FGET(args, flags); 400 nargs->flags = flags; 401 402 addr.buf = NULL; 403 syncaddr.buf = NULL; 404 405 406 /* 407 * Allocate space for a knetconfig structure and 408 * its strings and copy in from user-land. 409 */ 410 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 411 STRUCT_INIT(knconf_tmp, get_udatamodel()); 412 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 413 STRUCT_SIZE(knconf_tmp))) { 414 kmem_free(knconf, sizeof (*knconf)); 415 return (EFAULT); 416 } 417 418 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 419 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 420 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 421 if (get_udatamodel() != DATAMODEL_LP64) { 422 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 423 } else { 424 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 425 } 426 427 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 428 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 429 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 430 if (error) { 431 kmem_free(pf, KNC_STRSIZE); 432 kmem_free(p, KNC_STRSIZE); 433 kmem_free(knconf, sizeof (*knconf)); 434 return (error); 435 } 436 437 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 438 if (error) { 439 kmem_free(pf, KNC_STRSIZE); 440 kmem_free(p, KNC_STRSIZE); 441 kmem_free(knconf, sizeof (*knconf)); 442 return (error); 443 } 444 445 446 knconf->knc_protofmly = pf; 447 knconf->knc_proto = p; 448 449 nargs->knconf = knconf; 450 451 /* 452 * Get server address 453 */ 454 STRUCT_INIT(addr_tmp, get_udatamodel()); 455 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 456 STRUCT_SIZE(addr_tmp))) { 457 error = EFAULT; 458 goto errout; 459 } 460 461 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 462 userbufptr = STRUCT_FGETP(addr_tmp, buf); 463 addr.len = STRUCT_FGET(addr_tmp, len); 464 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 465 addr.maxlen = addr.len; 466 if (copyin(userbufptr, addr.buf, addr.len)) { 467 kmem_free(addr.buf, addr.len); 468 error = EFAULT; 469 goto errout; 470 } 471 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 472 473 /* 474 * Get the root fhandle 475 */ 476 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 477 if (error) 478 goto errout; 479 480 /* Volatile fh: keep server paths, so use actual-size strings */ 481 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 482 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen); 483 nargs->fh[pn.pn_pathlen] = '\0'; 484 pn_free(&pn); 485 486 487 /* 488 * Get server's hostname 489 */ 490 if (flags & NFSMNT_HOSTNAME) { 491 error = copyinstr(STRUCT_FGETP(args, hostname), 492 netname, sizeof (netname), &hlen); 493 if (error) 494 goto errout; 495 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 496 (void) strcpy(nargs->hostname, netname); 497 498 } else { 499 nargs->hostname = NULL; 500 } 501 502 503 /* 504 * If there are syncaddr and netname data, load them in. This is 505 * to support data needed for NFSV4 when AUTH_DH is the negotiated 506 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 507 */ 508 netname[0] = '\0'; 509 if (flags & NFSMNT_SECURE) { 510 511 /* get syncaddr */ 512 STRUCT_INIT(addr_tmp, get_udatamodel()); 513 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 514 STRUCT_SIZE(addr_tmp))) { 515 error = EINVAL; 516 goto errout; 517 } 518 userbufptr = STRUCT_FGETP(addr_tmp, buf); 519 syncaddr.len = STRUCT_FGET(addr_tmp, len); 520 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 521 syncaddr.maxlen = syncaddr.len; 522 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 523 kmem_free(syncaddr.buf, syncaddr.len); 524 error = EFAULT; 525 goto errout; 526 } 527 528 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 529 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 530 } 531 /* get server's netname */ 532 if (copyinstr(STRUCT_FGETP(args, netname), netname, 533 sizeof (netname), &nlen)) { 534 error = EFAULT; 535 goto errout; 536 } 537 538 netname[nlen] = '\0'; 539 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 540 (void) strcpy(nargs->netname, netname); 541 542 /* 543 * Get the extention data which has the security data structure. 544 * This includes data for AUTH_SYS as well. 545 */ 546 if (flags & NFSMNT_NEWARGS) { 547 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 548 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 549 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 550 /* 551 * Indicating the application is using the new 552 * sec_data structure to pass in the security 553 * data. 554 */ 555 if (STRUCT_FGETP(args, 556 nfs_ext_u.nfs_extA.secdata) != NULL) { 557 error = sec_clnt_loadinfo( 558 (struct sec_data *)STRUCT_FGETP(args, 559 nfs_ext_u.nfs_extA.secdata), 560 &secdata, get_udatamodel()); 561 } 562 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 563 } 564 } 565 566 if (error) 567 goto errout; 568 569 /* 570 * Failover support: 571 * 572 * We may have a linked list of nfs_args structures, 573 * which means the user is looking for failover. If 574 * the mount is either not "read-only" or "soft", 575 * we want to bail out with EINVAL. 576 */ 577 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 578 nargs->nfs_ext_u.nfs_extB.next = 579 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 580 581 errout: 582 if (error) 583 nfs4_free_args(nargs); 584 585 return (error); 586 } 587 588 589 /* 590 * nfs mount vfsop 591 * Set up mount info record and attach it to vfs struct. 592 */ 593 static int 594 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 595 { 596 char *data = uap->dataptr; 597 int error; 598 vnode_t *rtvp; /* the server's root */ 599 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 600 struct knetconfig *rdma_knconf; /* rdma transport structure */ 601 rnode4_t *rp; 602 struct servinfo4 *svp; /* nfs server info */ 603 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 604 struct servinfo4 *svp_head; /* first nfs server info */ 605 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 606 struct sec_data *secdata; /* security data */ 607 struct nfs_args *args = NULL; 608 int flags, addr_type, removed; 609 zone_t *zone = nfs_zone(); 610 nfs4_error_t n4e; 611 zone_t *mntzone = NULL; 612 613 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 614 return (EPERM); 615 if (mvp->v_type != VDIR) 616 return (ENOTDIR); 617 /* 618 * get arguments 619 * 620 * nfs_args is now versioned and is extensible, so 621 * uap->datalen might be different from sizeof (args) 622 * in a compatible situation. 623 */ 624 more: 625 if (!(uap->flags & MS_SYSSPACE)) { 626 if (args == NULL) 627 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 628 else 629 nfs4_free_args(args); 630 error = nfs4_copyin(data, uap->datalen, args); 631 if (error) { 632 if (args) { 633 kmem_free(args, sizeof (*args)); 634 } 635 return (error); 636 } 637 } else { 638 args = (struct nfs_args *)data; 639 } 640 641 642 flags = args->flags; 643 644 /* 645 * If the request changes the locking type, disallow the remount, 646 * because it's questionable whether we can transfer the 647 * locking state correctly. 648 */ 649 if (uap->flags & MS_REMOUNT) { 650 if (!(uap->flags & MS_SYSSPACE)) { 651 nfs4_free_args(args); 652 kmem_free(args, sizeof (*args)); 653 } 654 if ((mi = VFTOMI4(vfsp)) != NULL) { 655 uint_t new_mi_llock; 656 uint_t old_mi_llock; 657 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 658 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 659 if (old_mi_llock != new_mi_llock) 660 return (EBUSY); 661 } 662 return (0); 663 } 664 665 mutex_enter(&mvp->v_lock); 666 if (!(uap->flags & MS_OVERLAY) && 667 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 668 mutex_exit(&mvp->v_lock); 669 if (!(uap->flags & MS_SYSSPACE)) { 670 nfs4_free_args(args); 671 kmem_free(args, sizeof (*args)); 672 } 673 return (EBUSY); 674 } 675 mutex_exit(&mvp->v_lock); 676 677 /* make sure things are zeroed for errout: */ 678 rtvp = NULL; 679 mi = NULL; 680 secdata = NULL; 681 682 /* 683 * A valid knetconfig structure is required. 684 */ 685 686 if (!(flags & NFSMNT_KNCONF) || 687 args->knconf == NULL || args->knconf->knc_protofmly == NULL || 688 args->knconf->knc_proto == NULL || 689 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) { 690 if (!(uap->flags & MS_SYSSPACE)) { 691 nfs4_free_args(args); 692 kmem_free(args, sizeof (*args)); 693 } 694 return (EINVAL); 695 } 696 697 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 698 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 699 if (!(uap->flags & MS_SYSSPACE)) { 700 nfs4_free_args(args); 701 kmem_free(args, sizeof (*args)); 702 } 703 return (EINVAL); 704 } 705 706 707 /* 708 * Allocate a servinfo4 struct. 709 */ 710 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 711 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 712 if (svp_tail) { 713 svp_2ndlast = svp_tail; 714 svp_tail->sv_next = svp; 715 } else { 716 svp_head = svp; 717 svp_2ndlast = svp; 718 } 719 720 svp_tail = svp; 721 svp->sv_knconf = args->knconf; 722 args->knconf = NULL; 723 724 725 /* 726 * Get server address 727 */ 728 729 if (args->addr == NULL || args->addr->buf == NULL) { 730 error = EINVAL; 731 goto errout; 732 } 733 734 svp->sv_addr.maxlen = args->addr->maxlen; 735 svp->sv_addr.len = args->addr->len; 736 svp->sv_addr.buf = args->addr->buf; 737 args->addr->buf = NULL; 738 739 740 /* 741 * Get the root fhandle 742 */ 743 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) { 744 error = EINVAL; 745 goto errout; 746 } 747 748 svp->sv_path = args->fh; 749 svp->sv_pathlen = strlen(args->fh) + 1; 750 args->fh = NULL; 751 752 /* 753 * Get server's hostname 754 */ 755 if (flags & NFSMNT_HOSTNAME) { 756 if (args->hostname == NULL || (strlen(args->hostname) > 757 MAXNETNAMELEN)) { 758 error = EINVAL; 759 goto errout; 760 } 761 svp->sv_hostnamelen = strlen(args->hostname) + 1; 762 svp->sv_hostname = args->hostname; 763 args->hostname = NULL; 764 } else { 765 char *p = "unknown-host"; 766 svp->sv_hostnamelen = strlen(p) + 1; 767 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 768 (void) strcpy(svp->sv_hostname, p); 769 } 770 771 /* 772 * RDMA MOUNT SUPPORT FOR NFS v4. 773 * Establish, is it possible to use RDMA, if so overload the 774 * knconf with rdma specific knconf and free the orignal knconf. 775 */ 776 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 777 /* 778 * Determine the addr type for RDMA, IPv4 or v6. 779 */ 780 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 781 addr_type = AF_INET; 782 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 783 addr_type = AF_INET6; 784 785 if (rdma_reachable(addr_type, &svp->sv_addr, 786 &rdma_knconf) == 0) { 787 /* 788 * If successful, hijack the orignal knconf and 789 * replace with the new one, depending on the flags. 790 */ 791 svp->sv_origknconf = svp->sv_knconf; 792 svp->sv_knconf = rdma_knconf; 793 } else { 794 if (flags & NFSMNT_TRYRDMA) { 795 #ifdef DEBUG 796 if (rdma_debug) 797 zcmn_err(getzoneid(), CE_WARN, 798 "no RDMA onboard, revert\n"); 799 #endif 800 } 801 802 if (flags & NFSMNT_DORDMA) { 803 /* 804 * If proto=rdma is specified and no RDMA 805 * path to this server is avialable then 806 * ditch this server. 807 * This is not included in the mountable 808 * server list or the replica list. 809 * Check if more servers are specified; 810 * Failover case, otherwise bail out of mount. 811 */ 812 if (args->nfs_args_ext == 813 NFS_ARGS_EXTB && 814 args->nfs_ext_u.nfs_extB.next 815 != NULL) { 816 data = (char *) 817 args->nfs_ext_u.nfs_extB.next; 818 if (uap->flags & MS_RDONLY && 819 !(flags & NFSMNT_SOFT)) { 820 if (svp_head->sv_next == NULL) { 821 svp_tail = NULL; 822 svp_2ndlast = NULL; 823 sv4_free(svp_head); 824 goto more; 825 } else { 826 svp_tail = svp_2ndlast; 827 svp_2ndlast->sv_next = 828 NULL; 829 sv4_free(svp); 830 goto more; 831 } 832 } 833 } else { 834 /* 835 * This is the last server specified 836 * in the nfs_args list passed down 837 * and its not rdma capable. 838 */ 839 if (svp_head->sv_next == NULL) { 840 /* 841 * Is this the only one 842 */ 843 error = EINVAL; 844 #ifdef DEBUG 845 if (rdma_debug) 846 zcmn_err(getzoneid(), 847 CE_WARN, 848 "No RDMA srv"); 849 #endif 850 goto errout; 851 } else { 852 /* 853 * There is list, since some 854 * servers specified before 855 * this passed all requirements 856 */ 857 svp_tail = svp_2ndlast; 858 svp_2ndlast->sv_next = NULL; 859 sv4_free(svp); 860 goto proceed; 861 } 862 } 863 } 864 } 865 } 866 867 /* 868 * If there are syncaddr and netname data, load them in. This is 869 * to support data needed for NFSV4 when AUTH_DH is the negotiated 870 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 871 */ 872 if (args->flags & NFSMNT_SECURE) { 873 svp->sv_dhsec = create_authdh_data(args->netname, 874 strlen(args->netname), 875 args->syncaddr, svp->sv_knconf); 876 } 877 878 /* 879 * Get the extention data which has the security data structure. 880 * This includes data for AUTH_SYS as well. 881 */ 882 if (flags & NFSMNT_NEWARGS) { 883 switch (args->nfs_args_ext) { 884 case NFS_ARGS_EXTA: 885 case NFS_ARGS_EXTB: 886 /* 887 * Indicating the application is using the new 888 * sec_data structure to pass in the security 889 * data. 890 */ 891 secdata = args->nfs_ext_u.nfs_extA.secdata; 892 if (secdata == NULL) { 893 error = EINVAL; 894 } else if (uap->flags & MS_SYSSPACE) { 895 /* 896 * Need to validate the flavor here if 897 * sysspace, userspace was already 898 * validate from the nfs_copyin function. 899 */ 900 switch (secdata->rpcflavor) { 901 case AUTH_NONE: 902 case AUTH_UNIX: 903 case AUTH_LOOPBACK: 904 case AUTH_DES: 905 case RPCSEC_GSS: 906 break; 907 default: 908 error = EINVAL; 909 goto errout; 910 } 911 } 912 args->nfs_ext_u.nfs_extA.secdata = NULL; 913 break; 914 915 default: 916 error = EINVAL; 917 break; 918 } 919 920 } else if (flags & NFSMNT_SECURE) { 921 /* 922 * NFSMNT_SECURE is deprecated but we keep it 923 * to support the rouge user generated application 924 * that may use this undocumented interface to do 925 * AUTH_DH security. 926 */ 927 secdata = create_authdh_data(args->netname, 928 strlen(args->netname), args->syncaddr, svp->sv_knconf); 929 930 } else { 931 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 932 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 933 secdata->data = NULL; 934 } 935 936 svp->sv_secdata = secdata; 937 938 /* 939 * User does not explictly specify a flavor, and a user 940 * defined default flavor is passed down. 941 */ 942 if (flags & NFSMNT_SECDEFAULT) { 943 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 944 svp->sv_flags |= SV4_TRYSECDEFAULT; 945 nfs_rw_exit(&svp->sv_lock); 946 } 947 948 /* 949 * Failover support: 950 * 951 * We may have a linked list of nfs_args structures, 952 * which means the user is looking for failover. If 953 * the mount is either not "read-only" or "soft", 954 * we want to bail out with EINVAL. 955 */ 956 if (args->nfs_args_ext == NFS_ARGS_EXTB && 957 args->nfs_ext_u.nfs_extB.next != NULL) { 958 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 959 data = (char *)args->nfs_ext_u.nfs_extB.next; 960 goto more; 961 } 962 error = EINVAL; 963 goto errout; 964 } 965 966 /* 967 * Determine the zone we're being mounted into. 968 */ 969 zone_hold(mntzone = zone); /* start with this assumption */ 970 if (getzoneid() == GLOBAL_ZONEID) { 971 zone_rele(mntzone); 972 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 973 ASSERT(mntzone != NULL); 974 if (mntzone != zone) { 975 error = EBUSY; 976 goto errout; 977 } 978 } 979 980 if (is_system_labeled()) { 981 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 982 svp->sv_knconf, cr); 983 984 if (error > 0) 985 goto errout; 986 987 if (error == -1) { 988 /* change mount to read-only to prevent write-down */ 989 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 990 } 991 } 992 993 /* 994 * Stop the mount from going any further if the zone is going away. 995 */ 996 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 997 error = EBUSY; 998 goto errout; 999 } 1000 1001 /* 1002 * Get root vnode. 1003 */ 1004 proceed: 1005 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 1006 1007 if (error) { 1008 /* if nfs4rootvp failed, it will free svp_head */ 1009 svp_head = NULL; 1010 goto errout; 1011 } 1012 1013 mi = VTOMI4(rtvp); 1014 1015 /* 1016 * Send client id to the server, if necessary 1017 */ 1018 nfs4_error_zinit(&n4e); 1019 nfs4setclientid(mi, cr, FALSE, &n4e); 1020 error = n4e.error; 1021 1022 if (error) 1023 goto errout; 1024 1025 /* 1026 * Set option fields in the mount info record 1027 */ 1028 1029 if (svp_head->sv_next) { 1030 mutex_enter(&mi->mi_lock); 1031 mi->mi_flags |= MI4_LLOCK; 1032 mutex_exit(&mi->mi_lock); 1033 } 1034 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args); 1035 1036 errout: 1037 if (error) { 1038 if (rtvp != NULL) { 1039 rp = VTOR4(rtvp); 1040 if (rp->r_flags & R4HASHED) 1041 rp4_rmhash(rp); 1042 } 1043 if (mi != NULL) { 1044 nfs4_async_stop(vfsp); 1045 nfs4_async_manager_stop(vfsp); 1046 nfs4_remove_mi_from_server(mi, NULL); 1047 if (rtvp != NULL) 1048 VN_RELE(rtvp); 1049 if (mntzone != NULL) 1050 zone_rele(mntzone); 1051 /* need to remove it from the zone */ 1052 removed = nfs4_mi_zonelist_remove(mi); 1053 if (removed) 1054 zone_rele(mi->mi_zone); 1055 MI4_RELE(mi); 1056 if (!(uap->flags & MS_SYSSPACE) && args) { 1057 nfs4_free_args(args); 1058 kmem_free(args, sizeof (*args)); 1059 } 1060 return (error); 1061 } 1062 if (svp_head) 1063 sv4_free(svp_head); 1064 } 1065 1066 if (!(uap->flags & MS_SYSSPACE) && args) { 1067 nfs4_free_args(args); 1068 kmem_free(args, sizeof (*args)); 1069 } 1070 if (rtvp != NULL) 1071 VN_RELE(rtvp); 1072 1073 if (mntzone != NULL) 1074 zone_rele(mntzone); 1075 1076 return (error); 1077 } 1078 1079 #ifdef DEBUG 1080 #define VERS_MSG "NFS4 server " 1081 #else 1082 #define VERS_MSG "NFS server " 1083 #endif 1084 1085 #define READ_MSG \ 1086 VERS_MSG "%s returned 0 for read transfer size" 1087 #define WRITE_MSG \ 1088 VERS_MSG "%s returned 0 for write transfer size" 1089 #define SIZE_MSG \ 1090 VERS_MSG "%s returned 0 for maximum file size" 1091 1092 /* 1093 * Get the symbolic link text from the server for a given filehandle 1094 * of that symlink. 1095 * 1096 * (get symlink text) PUTFH READLINK 1097 */ 1098 static int 1099 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 1100 int flags) 1101 1102 { 1103 COMPOUND4args_clnt args; 1104 COMPOUND4res_clnt res; 1105 int doqueue; 1106 nfs_argop4 argop[2]; 1107 nfs_resop4 *resop; 1108 READLINK4res *lr_res; 1109 uint_t len; 1110 bool_t needrecov = FALSE; 1111 nfs4_recov_state_t recov_state; 1112 nfs4_sharedfh_t *sfh; 1113 nfs4_error_t e; 1114 int num_retry = nfs4_max_mount_retry; 1115 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1116 1117 sfh = sfh4_get(fh, mi); 1118 recov_state.rs_flags = 0; 1119 recov_state.rs_num_retry_despite_err = 0; 1120 1121 recov_retry: 1122 nfs4_error_zinit(&e); 1123 1124 args.array_len = 2; 1125 args.array = argop; 1126 args.ctag = TAG_GET_SYMLINK; 1127 1128 if (! recovery) { 1129 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 1130 if (e.error) { 1131 sfh4_rele(&sfh); 1132 return (e.error); 1133 } 1134 } 1135 1136 /* 0. putfh symlink fh */ 1137 argop[0].argop = OP_CPUTFH; 1138 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1139 1140 /* 1. readlink */ 1141 argop[1].argop = OP_READLINK; 1142 1143 doqueue = 1; 1144 1145 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1146 1147 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 1148 1149 if (needrecov && !recovery && num_retry-- > 0) { 1150 1151 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1152 "getlinktext_otw: initiating recovery\n")); 1153 1154 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 1155 OP_READLINK, NULL) == FALSE) { 1156 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1157 if (!e.error) 1158 (void) xdr_free(xdr_COMPOUND4res_clnt, 1159 (caddr_t)&res); 1160 goto recov_retry; 1161 } 1162 } 1163 1164 /* 1165 * If non-NFS4 pcol error and/or we weren't able to recover. 1166 */ 1167 if (e.error != 0) { 1168 if (! recovery) 1169 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1170 sfh4_rele(&sfh); 1171 return (e.error); 1172 } 1173 1174 if (res.status) { 1175 e.error = geterrno4(res.status); 1176 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1177 if (! recovery) 1178 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1179 sfh4_rele(&sfh); 1180 return (e.error); 1181 } 1182 1183 /* res.status == NFS4_OK */ 1184 ASSERT(res.status == NFS4_OK); 1185 1186 resop = &res.array[1]; /* readlink res */ 1187 lr_res = &resop->nfs_resop4_u.opreadlink; 1188 1189 /* treat symlink name as data */ 1190 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 1191 1192 if (! recovery) 1193 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1194 sfh4_rele(&sfh); 1195 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1196 return (0); 1197 } 1198 1199 /* 1200 * Skip over consecutive slashes and "/./" in a pathname. 1201 */ 1202 void 1203 pathname_skipslashdot(struct pathname *pnp) 1204 { 1205 char *c1, *c2; 1206 1207 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 1208 1209 c1 = pnp->pn_path + 1; 1210 c2 = pnp->pn_path + 2; 1211 1212 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 1213 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 1214 pnp->pn_pathlen = pnp->pn_pathlen - 2; 1215 } else { 1216 pnp->pn_path++; 1217 pnp->pn_pathlen--; 1218 } 1219 } 1220 } 1221 1222 /* 1223 * Resolve a symbolic link path. The symlink is in the nth component of 1224 * svp->sv_path and has an nfs4 file handle "fh". 1225 * Upon return, the sv_path will point to the new path that has the nth 1226 * component resolved to its symlink text. 1227 */ 1228 int 1229 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 1230 cred_t *cr, int flags) 1231 { 1232 char *oldpath; 1233 char *symlink, *newpath; 1234 struct pathname oldpn, newpn; 1235 char component[MAXNAMELEN]; 1236 int i, addlen, error = 0; 1237 int oldpathlen; 1238 1239 /* Get the symbolic link text over the wire. */ 1240 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 1241 1242 if (error || symlink == NULL || strlen(symlink) == 0) 1243 return (error); 1244 1245 /* 1246 * Compose the new pathname. 1247 * Note: 1248 * - only the nth component is resolved for the pathname. 1249 * - pathname.pn_pathlen does not count the ending null byte. 1250 */ 1251 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1252 oldpath = svp->sv_path; 1253 oldpathlen = svp->sv_pathlen; 1254 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1255 nfs_rw_exit(&svp->sv_lock); 1256 kmem_free(symlink, strlen(symlink) + 1); 1257 return (error); 1258 } 1259 nfs_rw_exit(&svp->sv_lock); 1260 pn_alloc(&newpn); 1261 1262 /* 1263 * Skip over previous components from the oldpath so that the 1264 * oldpn.pn_path will point to the symlink component. Skip 1265 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1266 * pn_getcompnent can get the component. 1267 */ 1268 for (i = 1; i < nth; i++) { 1269 pathname_skipslashdot(&oldpn); 1270 error = pn_getcomponent(&oldpn, component); 1271 if (error) 1272 goto out; 1273 } 1274 1275 /* 1276 * Copy the old path upto the component right before the symlink 1277 * if the symlink is not an absolute path. 1278 */ 1279 if (symlink[0] != '/') { 1280 addlen = oldpn.pn_path - oldpn.pn_buf; 1281 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1282 newpn.pn_pathlen += addlen; 1283 newpn.pn_path += addlen; 1284 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1285 newpn.pn_pathlen++; 1286 newpn.pn_path++; 1287 } 1288 1289 /* copy the resolved symbolic link text */ 1290 addlen = strlen(symlink); 1291 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1292 error = ENAMETOOLONG; 1293 goto out; 1294 } 1295 bcopy(symlink, newpn.pn_path, addlen); 1296 newpn.pn_pathlen += addlen; 1297 newpn.pn_path += addlen; 1298 1299 /* 1300 * Check if there is any remaining path after the symlink component. 1301 * First, skip the symlink component. 1302 */ 1303 pathname_skipslashdot(&oldpn); 1304 if (error = pn_getcomponent(&oldpn, component)) 1305 goto out; 1306 1307 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1308 1309 /* 1310 * Copy the remaining path to the new pathname if there is any. 1311 */ 1312 if (addlen > 0) { 1313 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1314 error = ENAMETOOLONG; 1315 goto out; 1316 } 1317 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1318 newpn.pn_pathlen += addlen; 1319 } 1320 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1321 1322 /* get the newpath and store it in the servinfo4_t */ 1323 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1324 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1325 newpath[newpn.pn_pathlen] = '\0'; 1326 1327 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1328 svp->sv_path = newpath; 1329 svp->sv_pathlen = strlen(newpath) + 1; 1330 nfs_rw_exit(&svp->sv_lock); 1331 1332 kmem_free(oldpath, oldpathlen); 1333 out: 1334 kmem_free(symlink, strlen(symlink) + 1); 1335 pn_free(&newpn); 1336 pn_free(&oldpn); 1337 1338 return (error); 1339 } 1340 1341 /* 1342 * Get the root filehandle for the given filesystem and server, and update 1343 * svp. 1344 * 1345 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1346 * to coordinate with recovery. Otherwise, the caller is assumed to be 1347 * the recovery thread or have already done a start_fop. 1348 * 1349 * Errors are returned by the nfs4_error_t parameter. 1350 */ 1351 1352 static void 1353 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1354 int flags, cred_t *cr, nfs4_error_t *ep) 1355 { 1356 COMPOUND4args_clnt args; 1357 COMPOUND4res_clnt res; 1358 int doqueue = 1; 1359 nfs_argop4 *argop; 1360 nfs_resop4 *resop; 1361 nfs4_ga_res_t *garp; 1362 int num_argops; 1363 lookup4_param_t lookuparg; 1364 nfs_fh4 *tmpfhp; 1365 nfs_fh4 *resfhp; 1366 bool_t needrecov = FALSE; 1367 nfs4_recov_state_t recov_state; 1368 int llndx; 1369 int nthcomp; 1370 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1371 1372 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1373 ASSERT(svp->sv_path != NULL); 1374 if (svp->sv_path[0] == '\0') { 1375 nfs_rw_exit(&svp->sv_lock); 1376 nfs4_error_init(ep, EINVAL); 1377 return; 1378 } 1379 nfs_rw_exit(&svp->sv_lock); 1380 1381 recov_state.rs_flags = 0; 1382 recov_state.rs_num_retry_despite_err = 0; 1383 recov_retry: 1384 nfs4_error_zinit(ep); 1385 1386 if (!recovery) { 1387 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1388 &recov_state, NULL); 1389 1390 /* 1391 * If recovery has been started and this request as 1392 * initiated by a mount, then we must wait for recovery 1393 * to finish before proceeding, otherwise, the error 1394 * cleanup would remove data structures needed by the 1395 * recovery thread. 1396 */ 1397 if (ep->error) { 1398 mutex_enter(&mi->mi_lock); 1399 if (mi->mi_flags & MI4_MOUNTING) { 1400 mi->mi_flags |= MI4_RECOV_FAIL; 1401 mi->mi_error = EIO; 1402 1403 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1404 "nfs4getfh_otw: waiting 4 recovery\n")); 1405 1406 while (mi->mi_flags & MI4_RECOV_ACTIV) 1407 cv_wait(&mi->mi_failover_cv, 1408 &mi->mi_lock); 1409 } 1410 mutex_exit(&mi->mi_lock); 1411 return; 1412 } 1413 1414 /* 1415 * If the client does not specify a specific flavor to use 1416 * and has not gotten a secinfo list from the server yet, 1417 * retrieve the secinfo list from the server and use a 1418 * flavor from the list to mount. 1419 * 1420 * If fail to get the secinfo list from the server, then 1421 * try the default flavor. 1422 */ 1423 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1424 svp->sv_secinfo == NULL) { 1425 (void) nfs4_secinfo_path(mi, cr, FALSE); 1426 } 1427 } 1428 1429 if (recovery) 1430 args.ctag = TAG_REMAP_MOUNT; 1431 else 1432 args.ctag = TAG_MOUNT; 1433 1434 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1435 lookuparg.argsp = &args; 1436 lookuparg.resp = &res; 1437 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1438 lookuparg.trailer_len = 0; 1439 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1440 lookuparg.mi = mi; 1441 1442 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1443 ASSERT(svp->sv_path != NULL); 1444 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1445 nfs_rw_exit(&svp->sv_lock); 1446 1447 argop = args.array; 1448 num_argops = args.array_len; 1449 1450 /* choose public or root filehandle */ 1451 if (flags & NFS4_GETFH_PUBLIC) 1452 argop[0].argop = OP_PUTPUBFH; 1453 else 1454 argop[0].argop = OP_PUTROOTFH; 1455 1456 /* get fh */ 1457 argop[1].argop = OP_GETFH; 1458 1459 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1460 "nfs4getfh_otw: %s call, mi 0x%p", 1461 needrecov ? "recov" : "first", (void *)mi)); 1462 1463 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1464 1465 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1466 1467 if (needrecov) { 1468 bool_t abort; 1469 1470 if (recovery) { 1471 nfs4args_lookup_free(argop, num_argops); 1472 kmem_free(argop, 1473 lookuparg.arglen * sizeof (nfs_argop4)); 1474 if (!ep->error) 1475 (void) xdr_free(xdr_COMPOUND4res_clnt, 1476 (caddr_t)&res); 1477 return; 1478 } 1479 1480 NFS4_DEBUG(nfs4_client_recov_debug, 1481 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1482 1483 abort = nfs4_start_recovery(ep, mi, NULL, 1484 NULL, NULL, NULL, OP_GETFH, NULL); 1485 if (!ep->error) { 1486 ep->error = geterrno4(res.status); 1487 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1488 } 1489 nfs4args_lookup_free(argop, num_argops); 1490 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1491 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1492 /* have another go? */ 1493 if (abort == FALSE) 1494 goto recov_retry; 1495 return; 1496 } 1497 1498 /* 1499 * No recovery, but check if error is set. 1500 */ 1501 if (ep->error) { 1502 nfs4args_lookup_free(argop, num_argops); 1503 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1504 if (!recovery) 1505 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1506 needrecov); 1507 return; 1508 } 1509 1510 is_link_err: 1511 1512 /* for non-recovery errors */ 1513 if (res.status && res.status != NFS4ERR_SYMLINK) { 1514 if (!recovery) { 1515 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1516 needrecov); 1517 } 1518 nfs4args_lookup_free(argop, num_argops); 1519 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1520 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1521 return; 1522 } 1523 1524 /* 1525 * If any intermediate component in the path is a symbolic link, 1526 * resolve the symlink, then try mount again using the new path. 1527 */ 1528 if (res.status == NFS4ERR_SYMLINK) { 1529 int where; 1530 1531 /* 1532 * This must be from OP_LOOKUP failure. The (cfh) for this 1533 * OP_LOOKUP is a symlink node. Found out where the 1534 * OP_GETFH is for the (cfh) that is a symlink node. 1535 * 1536 * Example: 1537 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1538 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1539 * 1540 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1541 * In this case, where = 7, nthcomp = 2. 1542 */ 1543 where = res.array_len - 2; 1544 ASSERT(where > 0); 1545 1546 resop = &res.array[where - 1]; 1547 ASSERT(resop->resop == OP_GETFH); 1548 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1549 nthcomp = res.array_len/3 - 1; 1550 1551 /* 1552 * Need to call nfs4_end_op before resolve_sympath to avoid 1553 * potential nfs4_start_op deadlock. 1554 */ 1555 if (!recovery) 1556 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1557 needrecov); 1558 1559 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1560 flags); 1561 1562 nfs4args_lookup_free(argop, num_argops); 1563 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1564 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1565 1566 if (ep->error) 1567 return; 1568 1569 goto recov_retry; 1570 } 1571 1572 /* getfh */ 1573 resop = &res.array[res.array_len - 2]; 1574 ASSERT(resop->resop == OP_GETFH); 1575 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1576 1577 /* getattr fsinfo res */ 1578 resop++; 1579 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1580 1581 *vtp = garp->n4g_va.va_type; 1582 1583 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1584 1585 mutex_enter(&mi->mi_lock); 1586 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1587 mi->mi_flags |= MI4_LINK; 1588 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1589 mi->mi_flags |= MI4_SYMLINK; 1590 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1591 mi->mi_flags |= MI4_ACL; 1592 mutex_exit(&mi->mi_lock); 1593 1594 if (garp->n4g_ext_res->n4g_maxread == 0) 1595 mi->mi_tsize = 1596 MIN(MAXBSIZE, mi->mi_tsize); 1597 else 1598 mi->mi_tsize = 1599 MIN(garp->n4g_ext_res->n4g_maxread, 1600 mi->mi_tsize); 1601 1602 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1603 mi->mi_stsize = 1604 MIN(MAXBSIZE, mi->mi_stsize); 1605 else 1606 mi->mi_stsize = 1607 MIN(garp->n4g_ext_res->n4g_maxwrite, 1608 mi->mi_stsize); 1609 1610 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1611 mi->mi_maxfilesize = 1612 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1613 mi->mi_maxfilesize); 1614 1615 /* 1616 * If the final component is a a symbolic link, resolve the symlink, 1617 * then try mount again using the new path. 1618 * 1619 * Assume no symbolic link for root filesysm "/". 1620 */ 1621 if (*vtp == VLNK) { 1622 /* 1623 * nthcomp is the total result length minus 1624 * the 1st 2 OPs (PUTROOTFH, GETFH), 1625 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1626 * 1627 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1628 * LOOKUP 2nd-comp GETFH GETATTR 1629 * 1630 * (8 - 2)/3 = 2 1631 */ 1632 nthcomp = (res.array_len - 2)/3; 1633 1634 /* 1635 * Need to call nfs4_end_op before resolve_sympath to avoid 1636 * potential nfs4_start_op deadlock. See RFE 4777612. 1637 */ 1638 if (!recovery) 1639 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1640 needrecov); 1641 1642 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1643 flags); 1644 1645 nfs4args_lookup_free(argop, num_argops); 1646 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1647 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1648 1649 if (ep->error) 1650 return; 1651 1652 goto recov_retry; 1653 } 1654 1655 /* 1656 * We need to figure out where in the compound the getfh 1657 * for the parent directory is. If the object to be mounted is 1658 * the root, then there is no lookup at all: 1659 * PUTROOTFH, GETFH. 1660 * If the object to be mounted is in the root, then the compound is: 1661 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1662 * In either of these cases, the index of the GETFH is 1. 1663 * If it is not at the root, then it's something like: 1664 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1665 * LOOKUP, GETFH, GETATTR 1666 * In this case, the index is llndx (last lookup index) - 2. 1667 */ 1668 if (llndx == -1 || llndx == 2) 1669 resop = &res.array[1]; 1670 else { 1671 ASSERT(llndx > 2); 1672 resop = &res.array[llndx-2]; 1673 } 1674 1675 ASSERT(resop->resop == OP_GETFH); 1676 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1677 1678 /* save the filehandles for the replica */ 1679 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1680 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1681 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1682 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1683 tmpfhp->nfs_fh4_len); 1684 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1685 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1686 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1687 1688 /* initialize fsid and supp_attrs for server fs */ 1689 svp->sv_fsid = garp->n4g_fsid; 1690 svp->sv_supp_attrs = 1691 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1692 1693 nfs_rw_exit(&svp->sv_lock); 1694 1695 nfs4args_lookup_free(argop, num_argops); 1696 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1697 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1698 if (!recovery) 1699 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1700 } 1701 1702 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1703 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1704 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1705 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1706 1707 /* 1708 * Remap the root filehandle for the given filesystem. 1709 * 1710 * results returned via the nfs4_error_t parameter. 1711 */ 1712 void 1713 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1714 { 1715 struct servinfo4 *svp; 1716 vtype_t vtype; 1717 nfs_fh4 rootfh; 1718 int getfh_flags; 1719 char *orig_sv_path; 1720 int orig_sv_pathlen, num_retry; 1721 1722 mutex_enter(&mi->mi_lock); 1723 1724 remap_retry: 1725 svp = mi->mi_curr_serv; 1726 getfh_flags = 1727 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1728 getfh_flags |= 1729 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1730 mutex_exit(&mi->mi_lock); 1731 1732 /* 1733 * Just in case server path being mounted contains 1734 * symlinks and fails w/STALE, save the initial sv_path 1735 * so we can redrive the initial mount compound with the 1736 * initial sv_path -- not a symlink-expanded version. 1737 * 1738 * This could only happen if a symlink was expanded 1739 * and the expanded mount compound failed stale. Because 1740 * it could be the case that the symlink was removed at 1741 * the server (and replaced with another symlink/dir, 1742 * we need to use the initial sv_path when attempting 1743 * to re-lookup everything and recover. 1744 */ 1745 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1746 orig_sv_pathlen = svp->sv_pathlen; 1747 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1748 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1749 nfs_rw_exit(&svp->sv_lock); 1750 1751 num_retry = nfs4_max_mount_retry; 1752 1753 do { 1754 /* 1755 * Get the root fh from the server. Retry nfs4_max_mount_retry 1756 * (2) times if it fails with STALE since the recovery 1757 * infrastructure doesn't do STALE recovery for components 1758 * of the server path to the object being mounted. 1759 */ 1760 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1761 1762 if (ep->error == 0 && ep->stat == NFS4_OK) 1763 break; 1764 1765 /* 1766 * For some reason, the mount compound failed. Before 1767 * retrying, we need to restore the original sv_path 1768 * because it might have contained symlinks that were 1769 * expanded by nfsgetfh_otw before the failure occurred. 1770 * replace current sv_path with orig sv_path -- just in case 1771 * it changed due to embedded symlinks. 1772 */ 1773 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1774 if (orig_sv_pathlen != svp->sv_pathlen) { 1775 kmem_free(svp->sv_path, svp->sv_pathlen); 1776 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1777 svp->sv_pathlen = orig_sv_pathlen; 1778 } 1779 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1780 nfs_rw_exit(&svp->sv_lock); 1781 1782 } while (num_retry-- > 0); 1783 1784 kmem_free(orig_sv_path, orig_sv_pathlen); 1785 1786 if (ep->error != 0 || ep->stat != 0) { 1787 return; 1788 } 1789 1790 if (vtype != VNON && vtype != mi->mi_type) { 1791 /* shouldn't happen */ 1792 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1793 "nfs4_remap_root: server root vnode type (%d) doesn't " 1794 "match mount info (%d)", vtype, mi->mi_type); 1795 } 1796 1797 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1798 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1799 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1800 nfs_rw_exit(&svp->sv_lock); 1801 sfh4_update(mi->mi_rootfh, &rootfh); 1802 1803 /* 1804 * It's possible that recovery took place on the filesystem 1805 * and the server has been updated between the time we did 1806 * the nfs4getfh_otw and now. Re-drive the otw operation 1807 * to make sure we have a good fh. 1808 */ 1809 mutex_enter(&mi->mi_lock); 1810 if (mi->mi_curr_serv != svp) 1811 goto remap_retry; 1812 1813 mutex_exit(&mi->mi_lock); 1814 } 1815 1816 static int 1817 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1818 int flags, cred_t *cr, zone_t *zone) 1819 { 1820 vnode_t *rtvp = NULL; 1821 mntinfo4_t *mi; 1822 dev_t nfs_dev; 1823 int error = 0; 1824 rnode4_t *rp; 1825 int i; 1826 struct vattr va; 1827 vtype_t vtype = VNON; 1828 vtype_t tmp_vtype = VNON; 1829 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1830 nfs4_oo_hash_bucket_t *bucketp; 1831 nfs_fh4 fh; 1832 char *droptext = ""; 1833 struct nfs_stats *nfsstatsp; 1834 nfs4_fname_t *mfname; 1835 nfs4_error_t e; 1836 char *orig_sv_path; 1837 int orig_sv_pathlen, num_retry, removed; 1838 cred_t *lcr = NULL, *tcr = cr; 1839 1840 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1841 ASSERT(nfsstatsp != NULL); 1842 1843 ASSERT(nfs_zone() == zone); 1844 ASSERT(crgetref(cr)); 1845 1846 /* 1847 * Create a mount record and link it to the vfs struct. 1848 */ 1849 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1850 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1851 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1852 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1853 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1854 1855 if (!(flags & NFSMNT_SOFT)) 1856 mi->mi_flags |= MI4_HARD; 1857 if ((flags & NFSMNT_NOPRINT)) 1858 mi->mi_flags |= MI4_NOPRINT; 1859 if (flags & NFSMNT_INT) 1860 mi->mi_flags |= MI4_INT; 1861 if (flags & NFSMNT_PUBLIC) 1862 mi->mi_flags |= MI4_PUBLIC; 1863 mi->mi_retrans = NFS_RETRIES; 1864 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1865 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1866 mi->mi_timeo = nfs4_cots_timeo; 1867 else 1868 mi->mi_timeo = NFS_TIMEO; 1869 mi->mi_prog = NFS_PROGRAM; 1870 mi->mi_vers = NFS_V4; 1871 mi->mi_rfsnames = rfsnames_v4; 1872 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1873 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1874 mi->mi_servers = svp; 1875 mi->mi_curr_serv = svp; 1876 mi->mi_acregmin = SEC2HR(ACREGMIN); 1877 mi->mi_acregmax = SEC2HR(ACREGMAX); 1878 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1879 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1880 mi->mi_fh_expire_type = FH4_PERSISTENT; 1881 mi->mi_clientid_next = NULL; 1882 mi->mi_clientid_prev = NULL; 1883 mi->mi_grace_wait = 0; 1884 mi->mi_error = 0; 1885 mi->mi_srvsettime = 0; 1886 1887 mi->mi_count = 1; 1888 1889 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1890 mi->mi_stsize = mi->mi_tsize; 1891 1892 if (flags & NFSMNT_DIRECTIO) 1893 mi->mi_flags |= MI4_DIRECTIO; 1894 1895 mi->mi_flags |= MI4_MOUNTING; 1896 1897 /* 1898 * Make a vfs struct for nfs. We do this here instead of below 1899 * because rtvp needs a vfs before we can do a getattr on it. 1900 * 1901 * Assign a unique device id to the mount 1902 */ 1903 mutex_enter(&nfs_minor_lock); 1904 do { 1905 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1906 nfs_dev = makedevice(nfs_major, nfs_minor); 1907 } while (vfs_devismounted(nfs_dev)); 1908 mutex_exit(&nfs_minor_lock); 1909 1910 vfsp->vfs_dev = nfs_dev; 1911 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1912 vfsp->vfs_data = (caddr_t)mi; 1913 vfsp->vfs_fstype = nfsfstyp; 1914 vfsp->vfs_bsize = nfs4_bsize; 1915 1916 /* 1917 * Initialize fields used to support async putpage operations. 1918 */ 1919 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1920 mi->mi_async_clusters[i] = nfs4_async_clusters; 1921 mi->mi_async_init_clusters = nfs4_async_clusters; 1922 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1923 mi->mi_max_threads = nfs4_max_threads; 1924 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1925 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1926 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1927 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1928 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1929 1930 mi->mi_vfsp = vfsp; 1931 zone_hold(mi->mi_zone = zone); 1932 nfs4_mi_zonelist_add(mi); 1933 1934 /* 1935 * Initialize the <open owner/cred> hash table. 1936 */ 1937 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1938 bucketp = &(mi->mi_oo_list[i]); 1939 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1940 list_create(&bucketp->b_oo_hash_list, 1941 sizeof (nfs4_open_owner_t), 1942 offsetof(nfs4_open_owner_t, oo_hash_node)); 1943 } 1944 1945 /* 1946 * Initialize the freed open owner list. 1947 */ 1948 mi->mi_foo_num = 0; 1949 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1950 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1951 offsetof(nfs4_open_owner_t, oo_foo_node)); 1952 1953 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1954 offsetof(nfs4_lost_rqst_t, lr_node)); 1955 1956 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1957 offsetof(nfs4_bseqid_entry_t, bs_node)); 1958 1959 /* 1960 * Initialize the msg buffer. 1961 */ 1962 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1963 offsetof(nfs4_debug_msg_t, msg_node)); 1964 mi->mi_msg_count = 0; 1965 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1966 1967 /* 1968 * Initialize kstats 1969 */ 1970 nfs4_mnt_kstat_init(vfsp); 1971 1972 /* 1973 * Initialize the shared filehandle pool, and get the fname for 1974 * the filesystem root. 1975 */ 1976 sfh4_createtab(&mi->mi_filehandles); 1977 mi->mi_fname = fn_get(NULL, "."); 1978 1979 /* 1980 * Save server path we're attempting to mount. 1981 */ 1982 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1983 orig_sv_pathlen = svp_head->sv_pathlen; 1984 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1985 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1986 nfs_rw_exit(&svp->sv_lock); 1987 1988 /* 1989 * Make the GETFH call to get root fh for each replica. 1990 */ 1991 if (svp_head->sv_next) 1992 droptext = ", dropping replica"; 1993 1994 /* 1995 * If the uid is set then set the creds for secure mounts 1996 * by proxy processes such as automountd. 1997 */ 1998 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1999 if (svp->sv_secdata->uid != 0) { 2000 lcr = crdup(cr); 2001 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 2002 tcr = lcr; 2003 } 2004 nfs_rw_exit(&svp->sv_lock); 2005 for (svp = svp_head; svp; svp = svp->sv_next) { 2006 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 2007 nfs_cmn_err(error, CE_WARN, 2008 VERS_MSG "Host %s is a duplicate%s", 2009 svp->sv_hostname, droptext); 2010 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2011 svp->sv_flags |= SV4_NOTINUSE; 2012 nfs_rw_exit(&svp->sv_lock); 2013 continue; 2014 } 2015 mi->mi_curr_serv = svp; 2016 2017 /* 2018 * Just in case server path being mounted contains 2019 * symlinks and fails w/STALE, save the initial sv_path 2020 * so we can redrive the initial mount compound with the 2021 * initial sv_path -- not a symlink-expanded version. 2022 * 2023 * This could only happen if a symlink was expanded 2024 * and the expanded mount compound failed stale. Because 2025 * it could be the case that the symlink was removed at 2026 * the server (and replaced with another symlink/dir, 2027 * we need to use the initial sv_path when attempting 2028 * to re-lookup everything and recover. 2029 * 2030 * Other mount errors should evenutally be handled here also 2031 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 2032 * failures will result in mount being redriven a few times. 2033 */ 2034 num_retry = nfs4_max_mount_retry; 2035 do { 2036 nfs4getfh_otw(mi, svp, &tmp_vtype, 2037 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 2038 NFS4_GETFH_NEEDSOP, tcr, &e); 2039 2040 if (e.error == 0 && e.stat == NFS4_OK) 2041 break; 2042 2043 /* 2044 * replace current sv_path with orig sv_path -- just in 2045 * case it changed due to embedded symlinks. 2046 */ 2047 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2048 if (orig_sv_pathlen != svp->sv_pathlen) { 2049 kmem_free(svp->sv_path, svp->sv_pathlen); 2050 svp->sv_path = kmem_alloc(orig_sv_pathlen, 2051 KM_SLEEP); 2052 svp->sv_pathlen = orig_sv_pathlen; 2053 } 2054 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 2055 nfs_rw_exit(&svp->sv_lock); 2056 2057 } while (num_retry-- > 0); 2058 2059 error = e.error ? e.error : geterrno4(e.stat); 2060 if (error) { 2061 nfs_cmn_err(error, CE_WARN, 2062 VERS_MSG "initial call to %s failed%s: %m", 2063 svp->sv_hostname, droptext); 2064 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2065 svp->sv_flags |= SV4_NOTINUSE; 2066 nfs_rw_exit(&svp->sv_lock); 2067 mi->mi_flags &= ~MI4_RECOV_FAIL; 2068 mi->mi_error = 0; 2069 continue; 2070 } 2071 2072 if (tmp_vtype == VBAD) { 2073 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2074 VERS_MSG "%s returned a bad file type for " 2075 "root%s", svp->sv_hostname, droptext); 2076 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2077 svp->sv_flags |= SV4_NOTINUSE; 2078 nfs_rw_exit(&svp->sv_lock); 2079 continue; 2080 } 2081 2082 if (vtype == VNON) { 2083 vtype = tmp_vtype; 2084 } else if (vtype != tmp_vtype) { 2085 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2086 VERS_MSG "%s returned a different file type " 2087 "for root%s", svp->sv_hostname, droptext); 2088 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2089 svp->sv_flags |= SV4_NOTINUSE; 2090 nfs_rw_exit(&svp->sv_lock); 2091 continue; 2092 } 2093 if (firstsvp == NULL) 2094 firstsvp = svp; 2095 } 2096 2097 kmem_free(orig_sv_path, orig_sv_pathlen); 2098 2099 if (firstsvp == NULL) { 2100 if (error == 0) 2101 error = ENOENT; 2102 goto bad; 2103 } 2104 2105 mi->mi_curr_serv = svp = firstsvp; 2106 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2107 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 2108 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2109 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2110 mi->mi_rootfh = sfh4_get(&fh, mi); 2111 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 2112 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 2113 mi->mi_srvparentfh = sfh4_get(&fh, mi); 2114 nfs_rw_exit(&svp->sv_lock); 2115 2116 /* 2117 * Make the root vnode without attributes. 2118 */ 2119 mfname = mi->mi_fname; 2120 fn_hold(mfname); 2121 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 2122 &mfname, NULL, mi, cr, gethrtime()); 2123 rtvp->v_type = vtype; 2124 2125 mi->mi_curread = mi->mi_tsize; 2126 mi->mi_curwrite = mi->mi_stsize; 2127 2128 /* 2129 * Start the manager thread responsible for handling async worker 2130 * threads. 2131 */ 2132 MI4_HOLD(mi); 2133 VFS_HOLD(vfsp); /* add reference for thread */ 2134 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 2135 vfsp, 0, minclsyspri); 2136 ASSERT(mi->mi_manager_thread != NULL); 2137 2138 /* 2139 * Create the thread that handles over-the-wire calls for 2140 * VOP_INACTIVE. 2141 * This needs to happen after the manager thread is created. 2142 */ 2143 MI4_HOLD(mi); 2144 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 2145 mi, 0, minclsyspri); 2146 ASSERT(mi->mi_inactive_thread != NULL); 2147 2148 /* If we didn't get a type, get one now */ 2149 if (rtvp->v_type == VNON) { 2150 va.va_mask = AT_TYPE; 2151 error = nfs4getattr(rtvp, &va, tcr); 2152 if (error) 2153 goto bad; 2154 rtvp->v_type = va.va_type; 2155 } 2156 2157 mi->mi_type = rtvp->v_type; 2158 2159 mutex_enter(&mi->mi_lock); 2160 mi->mi_flags &= ~MI4_MOUNTING; 2161 mutex_exit(&mi->mi_lock); 2162 2163 *rtvpp = rtvp; 2164 if (lcr != NULL) 2165 crfree(lcr); 2166 2167 return (0); 2168 bad: 2169 /* 2170 * An error occurred somewhere, need to clean up... 2171 */ 2172 if (lcr != NULL) 2173 crfree(lcr); 2174 if (rtvp != NULL) { 2175 /* 2176 * We need to release our reference to the root vnode and 2177 * destroy the mntinfo4 struct that we just created. 2178 */ 2179 rp = VTOR4(rtvp); 2180 if (rp->r_flags & R4HASHED) 2181 rp4_rmhash(rp); 2182 VN_RELE(rtvp); 2183 } 2184 nfs4_async_stop(vfsp); 2185 nfs4_async_manager_stop(vfsp); 2186 removed = nfs4_mi_zonelist_remove(mi); 2187 if (removed) 2188 zone_rele(mi->mi_zone); 2189 2190 /* 2191 * This releases the initial "hold" of the mi since it will never 2192 * be referenced by the vfsp. Also, when mount returns to vfs.c 2193 * with an error, the vfsp will be destroyed, not rele'd. 2194 */ 2195 MI4_RELE(mi); 2196 2197 *rtvpp = NULL; 2198 return (error); 2199 } 2200 2201 /* 2202 * vfs operations 2203 */ 2204 static int 2205 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 2206 { 2207 mntinfo4_t *mi; 2208 ushort_t omax; 2209 int removed; 2210 2211 if (secpolicy_fs_unmount(cr, vfsp) != 0) 2212 return (EPERM); 2213 2214 mi = VFTOMI4(vfsp); 2215 2216 if (flag & MS_FORCE) { 2217 vfsp->vfs_flag |= VFS_UNMOUNTED; 2218 if (nfs_zone() != mi->mi_zone) { 2219 /* 2220 * If the request is coming from the wrong zone, 2221 * we don't want to create any new threads, and 2222 * performance is not a concern. Do everything 2223 * inline. 2224 */ 2225 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 2226 "nfs4_unmount x-zone forced unmount of vfs %p\n", 2227 (void *)vfsp)); 2228 nfs4_free_mount(vfsp, cr); 2229 } else { 2230 /* 2231 * Free data structures asynchronously, to avoid 2232 * blocking the current thread (for performance 2233 * reasons only). 2234 */ 2235 async_free_mount(vfsp, cr); 2236 } 2237 return (0); 2238 } 2239 /* 2240 * Wait until all asynchronous putpage operations on 2241 * this file system are complete before flushing rnodes 2242 * from the cache. 2243 */ 2244 omax = mi->mi_max_threads; 2245 if (nfs4_async_stop_sig(vfsp)) { 2246 2247 return (EINTR); 2248 } 2249 r4flush(vfsp, cr); 2250 /* 2251 * If there are any active vnodes on this file system, 2252 * then the file system is busy and can't be umounted. 2253 */ 2254 if (check_rtable4(vfsp)) { 2255 mutex_enter(&mi->mi_async_lock); 2256 mi->mi_max_threads = omax; 2257 mutex_exit(&mi->mi_async_lock); 2258 return (EBUSY); 2259 } 2260 /* 2261 * The unmount can't fail from now on, and there are no active 2262 * files that could require over-the-wire calls to the server, 2263 * so stop the async manager and the inactive thread. 2264 */ 2265 nfs4_async_manager_stop(vfsp); 2266 /* 2267 * Destroy all rnodes belonging to this file system from the 2268 * rnode hash queues and purge any resources allocated to 2269 * them. 2270 */ 2271 destroy_rtable4(vfsp, cr); 2272 vfsp->vfs_flag |= VFS_UNMOUNTED; 2273 2274 nfs4_remove_mi_from_server(mi, NULL); 2275 removed = nfs4_mi_zonelist_remove(mi); 2276 if (removed) 2277 zone_rele(mi->mi_zone); 2278 2279 return (0); 2280 } 2281 2282 /* 2283 * find root of nfs 2284 */ 2285 static int 2286 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2287 { 2288 mntinfo4_t *mi; 2289 vnode_t *vp; 2290 nfs4_fname_t *mfname; 2291 servinfo4_t *svp; 2292 2293 mi = VFTOMI4(vfsp); 2294 2295 if (nfs_zone() != mi->mi_zone) 2296 return (EPERM); 2297 2298 svp = mi->mi_curr_serv; 2299 if (svp) { 2300 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2301 if (svp->sv_flags & SV4_ROOT_STALE) { 2302 nfs_rw_exit(&svp->sv_lock); 2303 2304 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2305 if (svp->sv_flags & SV4_ROOT_STALE) { 2306 svp->sv_flags &= ~SV4_ROOT_STALE; 2307 nfs_rw_exit(&svp->sv_lock); 2308 return (ENOENT); 2309 } 2310 nfs_rw_exit(&svp->sv_lock); 2311 } else 2312 nfs_rw_exit(&svp->sv_lock); 2313 } 2314 2315 mfname = mi->mi_fname; 2316 fn_hold(mfname); 2317 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2318 VFTOMI4(vfsp), CRED(), gethrtime()); 2319 2320 if (VTOR4(vp)->r_flags & R4STALE) { 2321 VN_RELE(vp); 2322 return (ENOENT); 2323 } 2324 2325 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2326 2327 vp->v_type = mi->mi_type; 2328 2329 *vpp = vp; 2330 2331 return (0); 2332 } 2333 2334 static int 2335 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2336 { 2337 int error; 2338 nfs4_ga_res_t gar; 2339 nfs4_ga_ext_res_t ger; 2340 2341 gar.n4g_ext_res = &ger; 2342 2343 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2344 NFS4_STATFS_ATTR_MASK, cr)) 2345 return (error); 2346 2347 *sbp = gar.n4g_ext_res->n4g_sb; 2348 2349 return (0); 2350 } 2351 2352 /* 2353 * Get file system statistics. 2354 */ 2355 static int 2356 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2357 { 2358 int error; 2359 vnode_t *vp; 2360 cred_t *cr; 2361 2362 error = nfs4_root(vfsp, &vp); 2363 if (error) 2364 return (error); 2365 2366 cr = CRED(); 2367 2368 error = nfs4_statfs_otw(vp, sbp, cr); 2369 if (!error) { 2370 (void) strncpy(sbp->f_basetype, 2371 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2372 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2373 } else { 2374 nfs4_purge_stale_fh(error, vp, cr); 2375 } 2376 2377 VN_RELE(vp); 2378 2379 return (error); 2380 } 2381 2382 static kmutex_t nfs4_syncbusy; 2383 2384 /* 2385 * Flush dirty nfs files for file system vfsp. 2386 * If vfsp == NULL, all nfs files are flushed. 2387 * 2388 * SYNC_CLOSE in flag is passed to us to 2389 * indicate that we are shutting down and or 2390 * rebooting. 2391 */ 2392 static int 2393 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2394 { 2395 /* 2396 * Cross-zone calls are OK here, since this translates to a 2397 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2398 */ 2399 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2400 r4flush(vfsp, cr); 2401 mutex_exit(&nfs4_syncbusy); 2402 } 2403 2404 /* 2405 * if SYNC_CLOSE is set then we know that 2406 * the system is rebooting, mark the mntinfo 2407 * for later examination. 2408 */ 2409 if (vfsp && (flag & SYNC_CLOSE)) { 2410 mntinfo4_t *mi; 2411 2412 mi = VFTOMI4(vfsp); 2413 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2414 mutex_enter(&mi->mi_lock); 2415 mi->mi_flags |= MI4_SHUTDOWN; 2416 mutex_exit(&mi->mi_lock); 2417 } 2418 } 2419 return (0); 2420 } 2421 2422 /* 2423 * vget is difficult, if not impossible, to support in v4 because we don't 2424 * know the parent directory or name, which makes it impossible to create a 2425 * useful shadow vnode. And we need the shadow vnode for things like 2426 * OPEN. 2427 */ 2428 2429 /* ARGSUSED */ 2430 /* 2431 * XXX Check nfs4_vget_pseudo() for dependency. 2432 */ 2433 static int 2434 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2435 { 2436 return (EREMOTE); 2437 } 2438 2439 /* 2440 * nfs4_mountroot get called in the case where we are diskless booting. All 2441 * we need from here is the ability to get the server info and from there we 2442 * can simply call nfs4_rootvp. 2443 */ 2444 /* ARGSUSED */ 2445 static int 2446 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2447 { 2448 vnode_t *rtvp; 2449 char root_hostname[SYS_NMLN+1]; 2450 struct servinfo4 *svp; 2451 int error; 2452 int vfsflags; 2453 size_t size; 2454 char *root_path; 2455 struct pathname pn; 2456 char *name; 2457 cred_t *cr; 2458 mntinfo4_t *mi; 2459 struct nfs_args args; /* nfs mount arguments */ 2460 static char token[10]; 2461 nfs4_error_t n4e; 2462 2463 bzero(&args, sizeof (args)); 2464 2465 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2466 clkset(-1L); /* hack for now - until we get time svc? */ 2467 2468 if (why == ROOT_REMOUNT) { 2469 /* 2470 * Shouldn't happen. 2471 */ 2472 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2473 } 2474 2475 if (why == ROOT_UNMOUNT) { 2476 /* 2477 * Nothing to do for NFS. 2478 */ 2479 return (0); 2480 } 2481 2482 /* 2483 * why == ROOT_INIT 2484 */ 2485 2486 name = token; 2487 *name = 0; 2488 (void) getfsname("root", name, sizeof (token)); 2489 2490 pn_alloc(&pn); 2491 root_path = pn.pn_path; 2492 2493 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2494 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2495 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2496 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2497 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2498 2499 /* 2500 * Get server address 2501 * Get the root path 2502 * Get server's transport 2503 * Get server's hostname 2504 * Get options 2505 */ 2506 args.addr = &svp->sv_addr; 2507 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2508 args.fh = (char *)&svp->sv_fhandle; 2509 args.knconf = svp->sv_knconf; 2510 args.hostname = root_hostname; 2511 vfsflags = 0; 2512 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2513 &args, &vfsflags)) { 2514 if (error == EPROTONOSUPPORT) 2515 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2516 "mount_root failed: server doesn't support NFS V4"); 2517 else 2518 nfs_cmn_err(error, CE_WARN, 2519 "nfs4_mountroot: mount_root failed: %m"); 2520 nfs_rw_exit(&svp->sv_lock); 2521 sv4_free(svp); 2522 pn_free(&pn); 2523 return (error); 2524 } 2525 nfs_rw_exit(&svp->sv_lock); 2526 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2527 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2528 (void) strcpy(svp->sv_hostname, root_hostname); 2529 2530 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2531 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2532 (void) strcpy(svp->sv_path, root_path); 2533 2534 /* 2535 * Force root partition to always be mounted with AUTH_UNIX for now 2536 */ 2537 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2538 svp->sv_secdata->secmod = AUTH_UNIX; 2539 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2540 svp->sv_secdata->data = NULL; 2541 2542 cr = crgetcred(); 2543 rtvp = NULL; 2544 2545 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2546 2547 if (error) { 2548 crfree(cr); 2549 pn_free(&pn); 2550 goto errout; 2551 } 2552 2553 mi = VTOMI4(rtvp); 2554 2555 /* 2556 * Send client id to the server, if necessary 2557 */ 2558 nfs4_error_zinit(&n4e); 2559 nfs4setclientid(mi, cr, FALSE, &n4e); 2560 error = n4e.error; 2561 2562 crfree(cr); 2563 2564 if (error) { 2565 pn_free(&pn); 2566 goto errout; 2567 } 2568 2569 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2570 if (error) { 2571 nfs_cmn_err(error, CE_WARN, 2572 "nfs4_mountroot: invalid root mount options"); 2573 pn_free(&pn); 2574 goto errout; 2575 } 2576 2577 (void) vfs_lock_wait(vfsp); 2578 vfs_add(NULL, vfsp, vfsflags); 2579 vfs_unlock(vfsp); 2580 2581 size = strlen(svp->sv_hostname); 2582 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2583 rootfs.bo_name[size] = ':'; 2584 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2585 2586 pn_free(&pn); 2587 2588 errout: 2589 if (error) { 2590 sv4_free(svp); 2591 nfs4_async_stop(vfsp); 2592 nfs4_async_manager_stop(vfsp); 2593 } 2594 2595 if (rtvp != NULL) 2596 VN_RELE(rtvp); 2597 2598 return (error); 2599 } 2600 2601 /* 2602 * Initialization routine for VFS routines. Should only be called once 2603 */ 2604 int 2605 nfs4_vfsinit(void) 2606 { 2607 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2608 nfs4setclientid_init(); 2609 return (0); 2610 } 2611 2612 void 2613 nfs4_vfsfini(void) 2614 { 2615 nfs4setclientid_fini(); 2616 mutex_destroy(&nfs4_syncbusy); 2617 } 2618 2619 void 2620 nfs4_freevfs(vfs_t *vfsp) 2621 { 2622 mntinfo4_t *mi; 2623 2624 /* need to release the initial hold */ 2625 mi = VFTOMI4(vfsp); 2626 MI4_RELE(mi); 2627 } 2628 2629 /* 2630 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2631 */ 2632 struct nfs4_server nfs4_server_lst = 2633 { &nfs4_server_lst, &nfs4_server_lst }; 2634 2635 kmutex_t nfs4_server_lst_lock; 2636 2637 static void 2638 nfs4setclientid_init(void) 2639 { 2640 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2641 } 2642 2643 static void 2644 nfs4setclientid_fini(void) 2645 { 2646 mutex_destroy(&nfs4_server_lst_lock); 2647 } 2648 2649 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2650 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2651 2652 /* 2653 * Set the clientid for the server for "mi". No-op if the clientid is 2654 * already set. 2655 * 2656 * The recovery boolean should be set to TRUE if this function was called 2657 * by the recovery code, and FALSE otherwise. This is used to determine 2658 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2659 * for adding a mntinfo4_t to a nfs4_server_t. 2660 * 2661 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2662 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2663 */ 2664 void 2665 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2666 { 2667 struct nfs4_server *np; 2668 struct servinfo4 *svp = mi->mi_curr_serv; 2669 nfs4_recov_state_t recov_state; 2670 int num_retries = 0; 2671 bool_t retry; 2672 cred_t *lcr = NULL; 2673 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2674 time_t lease_time = 0; 2675 2676 recov_state.rs_flags = 0; 2677 recov_state.rs_num_retry_despite_err = 0; 2678 ASSERT(n4ep != NULL); 2679 2680 recov_retry: 2681 retry = FALSE; 2682 nfs4_error_zinit(n4ep); 2683 if (!recovery) 2684 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2685 2686 mutex_enter(&nfs4_server_lst_lock); 2687 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2688 mutex_exit(&nfs4_server_lst_lock); 2689 if (!np) { 2690 struct nfs4_server *tnp; 2691 np = new_nfs4_server(svp, cr); 2692 mutex_enter(&np->s_lock); 2693 2694 mutex_enter(&nfs4_server_lst_lock); 2695 tnp = servinfo4_to_nfs4_server(svp); 2696 if (tnp) { 2697 /* 2698 * another thread snuck in and put server on list. 2699 * since we aren't adding it to the nfs4_server_list 2700 * we need to set the ref count to 0 and destroy it. 2701 */ 2702 np->s_refcnt = 0; 2703 destroy_nfs4_server(np); 2704 np = tnp; 2705 } else { 2706 /* 2707 * do not give list a reference until everything 2708 * succeeds 2709 */ 2710 insque(np, &nfs4_server_lst); 2711 } 2712 mutex_exit(&nfs4_server_lst_lock); 2713 } 2714 ASSERT(MUTEX_HELD(&np->s_lock)); 2715 /* 2716 * If we find the server already has N4S_CLIENTID_SET, then 2717 * just return, we've already done SETCLIENTID to that server 2718 */ 2719 if (np->s_flags & N4S_CLIENTID_SET) { 2720 /* add mi to np's mntinfo4_list */ 2721 nfs4_add_mi_to_server(np, mi); 2722 if (!recovery) 2723 nfs_rw_exit(&mi->mi_recovlock); 2724 mutex_exit(&np->s_lock); 2725 nfs4_server_rele(np); 2726 return; 2727 } 2728 mutex_exit(&np->s_lock); 2729 2730 2731 /* 2732 * Drop the mi_recovlock since nfs4_start_op will 2733 * acquire it again for us. 2734 */ 2735 if (!recovery) { 2736 nfs_rw_exit(&mi->mi_recovlock); 2737 2738 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2739 if (n4ep->error) { 2740 nfs4_server_rele(np); 2741 return; 2742 } 2743 } 2744 2745 mutex_enter(&np->s_lock); 2746 while (np->s_flags & N4S_CLIENTID_PEND) { 2747 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 2748 mutex_exit(&np->s_lock); 2749 nfs4_server_rele(np); 2750 if (!recovery) 2751 nfs4_end_op(mi, NULL, NULL, &recov_state, 2752 recovery); 2753 n4ep->error = EINTR; 2754 return; 2755 } 2756 } 2757 2758 if (np->s_flags & N4S_CLIENTID_SET) { 2759 /* XXX copied/pasted from above */ 2760 /* add mi to np's mntinfo4_list */ 2761 nfs4_add_mi_to_server(np, mi); 2762 mutex_exit(&np->s_lock); 2763 nfs4_server_rele(np); 2764 if (!recovery) 2765 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2766 return; 2767 } 2768 2769 /* 2770 * Reset the N4S_CB_PINGED flag. This is used to 2771 * indicate if we have received a CB_NULL from the 2772 * server. Also we reset the waiter flag. 2773 */ 2774 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2775 /* any failure must now clear this flag */ 2776 np->s_flags |= N4S_CLIENTID_PEND; 2777 mutex_exit(&np->s_lock); 2778 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2779 2780 if (n4ep->error == EACCES) { 2781 /* 2782 * If the uid is set then set the creds for secure mounts 2783 * by proxy processes such as automountd. 2784 */ 2785 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2786 if (svp->sv_secdata->uid != 0) { 2787 lcr = crdup(cr); 2788 (void) crsetugid(lcr, svp->sv_secdata->uid, 2789 crgetgid(cr)); 2790 } 2791 nfs_rw_exit(&svp->sv_lock); 2792 2793 if (lcr != NULL) { 2794 mutex_enter(&np->s_lock); 2795 crfree(np->s_cred); 2796 np->s_cred = lcr; 2797 mutex_exit(&np->s_lock); 2798 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2799 &retry_inuse); 2800 } 2801 } 2802 mutex_enter(&np->s_lock); 2803 lease_time = np->s_lease_time; 2804 np->s_flags &= ~N4S_CLIENTID_PEND; 2805 mutex_exit(&np->s_lock); 2806 2807 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2808 /* 2809 * Start recovery if failover is a possibility. If 2810 * invoked by the recovery thread itself, then just 2811 * return and let it handle the failover first. NB: 2812 * recovery is not allowed if the mount is in progress 2813 * since the infrastructure is not sufficiently setup 2814 * to allow it. Just return the error (after suitable 2815 * retries). 2816 */ 2817 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2818 (void) nfs4_start_recovery(n4ep, mi, NULL, 2819 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2820 /* 2821 * Don't retry here, just return and let 2822 * recovery take over. 2823 */ 2824 if (recovery) 2825 retry = FALSE; 2826 } else if (nfs4_rpc_retry_error(n4ep->error) || 2827 n4ep->stat == NFS4ERR_RESOURCE || 2828 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2829 2830 retry = TRUE; 2831 /* 2832 * Always retry if in recovery or once had 2833 * contact with the server (but now it's 2834 * overloaded). 2835 */ 2836 if (recovery == TRUE || 2837 n4ep->error == ETIMEDOUT || 2838 n4ep->error == ECONNRESET) 2839 num_retries = 0; 2840 } else if (retry_inuse && n4ep->error == 0 && 2841 n4ep->stat == NFS4ERR_CLID_INUSE) { 2842 retry = TRUE; 2843 num_retries = 0; 2844 } 2845 } else { 2846 /* 2847 * Since everything succeeded give the list a reference count if 2848 * it hasn't been given one by add_new_nfs4_server() or if this 2849 * is not a recovery situation in which case it is already on 2850 * the list. 2851 */ 2852 mutex_enter(&np->s_lock); 2853 if ((np->s_flags & N4S_INSERTED) == 0) { 2854 np->s_refcnt++; 2855 np->s_flags |= N4S_INSERTED; 2856 } 2857 mutex_exit(&np->s_lock); 2858 } 2859 2860 if (!recovery) 2861 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2862 2863 2864 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2865 if (retry_inuse) { 2866 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2867 retry_inuse = 0; 2868 } else 2869 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2870 2871 nfs4_server_rele(np); 2872 goto recov_retry; 2873 } 2874 2875 2876 if (n4ep->error == 0) 2877 n4ep->error = geterrno4(n4ep->stat); 2878 2879 /* broadcast before release in case no other threads are waiting */ 2880 cv_broadcast(&np->s_clientid_pend); 2881 nfs4_server_rele(np); 2882 } 2883 2884 int nfs4setclientid_otw_debug = 0; 2885 2886 /* 2887 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2888 * but nothing else; the calling function must be designed to handle those 2889 * other errors. 2890 */ 2891 static void 2892 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2893 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2894 { 2895 COMPOUND4args_clnt args; 2896 COMPOUND4res_clnt res; 2897 nfs_argop4 argop[3]; 2898 SETCLIENTID4args *s_args; 2899 SETCLIENTID4resok *s_resok; 2900 int doqueue = 1; 2901 nfs4_ga_res_t *garp = NULL; 2902 timespec_t prop_time, after_time; 2903 verifier4 verf; 2904 clientid4 tmp_clientid; 2905 2906 ASSERT(!MUTEX_HELD(&np->s_lock)); 2907 2908 args.ctag = TAG_SETCLIENTID; 2909 2910 args.array = argop; 2911 args.array_len = 3; 2912 2913 /* PUTROOTFH */ 2914 argop[0].argop = OP_PUTROOTFH; 2915 2916 /* GETATTR */ 2917 argop[1].argop = OP_GETATTR; 2918 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2919 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2920 2921 /* SETCLIENTID */ 2922 argop[2].argop = OP_SETCLIENTID; 2923 2924 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2925 2926 mutex_enter(&np->s_lock); 2927 2928 s_args->client.verifier = np->clidtosend.verifier; 2929 s_args->client.id_len = np->clidtosend.id_len; 2930 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2931 s_args->client.id_val = np->clidtosend.id_val; 2932 2933 /* 2934 * Callback needs to happen on non-RDMA transport 2935 * Check if we have saved the original knetconfig 2936 * if so, use that instead. 2937 */ 2938 if (svp->sv_origknconf != NULL) 2939 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2940 else 2941 nfs4_cb_args(np, svp->sv_knconf, s_args); 2942 2943 mutex_exit(&np->s_lock); 2944 2945 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2946 2947 if (ep->error) 2948 return; 2949 2950 /* getattr lease_time res */ 2951 if (res.array_len >= 2) { 2952 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2953 2954 #ifndef _LP64 2955 /* 2956 * The 32 bit client cannot handle a lease time greater than 2957 * (INT32_MAX/1000000). This is due to the use of the 2958 * lease_time in calls to drv_usectohz() in 2959 * nfs4_renew_lease_thread(). The problem is that 2960 * drv_usectohz() takes a time_t (which is just a long = 4 2961 * bytes) as its parameter. The lease_time is multiplied by 2962 * 1000000 to convert seconds to usecs for the parameter. If 2963 * a number bigger than (INT32_MAX/1000000) is used then we 2964 * overflow on the 32bit client. 2965 */ 2966 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2967 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2968 } 2969 #endif 2970 2971 mutex_enter(&np->s_lock); 2972 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2973 2974 /* 2975 * Keep track of the lease period for the mi's 2976 * mi_msg_list. We need an appropiate time 2977 * bound to associate past facts with a current 2978 * event. The lease period is perfect for this. 2979 */ 2980 mutex_enter(&mi->mi_msg_list_lock); 2981 mi->mi_lease_period = np->s_lease_time; 2982 mutex_exit(&mi->mi_msg_list_lock); 2983 mutex_exit(&np->s_lock); 2984 } 2985 2986 2987 if (res.status == NFS4ERR_CLID_INUSE) { 2988 clientaddr4 *clid_inuse; 2989 2990 if (!(*retry_inusep)) { 2991 clid_inuse = &res.array->nfs_resop4_u. 2992 opsetclientid.SETCLIENTID4res_u.client_using; 2993 2994 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2995 "NFS4 mount (SETCLIENTID failed)." 2996 " nfs4_client_id.id is in" 2997 "use already by: r_netid<%s> r_addr<%s>", 2998 clid_inuse->r_netid, clid_inuse->r_addr); 2999 } 3000 3001 /* 3002 * XXX - The client should be more robust in its 3003 * handling of clientid in use errors (regen another 3004 * clientid and try again?) 3005 */ 3006 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3007 return; 3008 } 3009 3010 if (res.status) { 3011 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3012 return; 3013 } 3014 3015 s_resok = &res.array[2].nfs_resop4_u. 3016 opsetclientid.SETCLIENTID4res_u.resok4; 3017 3018 tmp_clientid = s_resok->clientid; 3019 3020 verf = s_resok->setclientid_confirm; 3021 3022 #ifdef DEBUG 3023 if (nfs4setclientid_otw_debug) { 3024 union { 3025 clientid4 clientid; 3026 int foo[2]; 3027 } cid; 3028 3029 cid.clientid = s_resok->clientid; 3030 3031 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3032 "nfs4setclientid_otw: OK, clientid = %x,%x, " 3033 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 3034 } 3035 #endif 3036 3037 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3038 3039 /* Confirm the client id and get the lease_time attribute */ 3040 3041 args.ctag = TAG_SETCLIENTID_CF; 3042 3043 args.array = argop; 3044 args.array_len = 1; 3045 3046 argop[0].argop = OP_SETCLIENTID_CONFIRM; 3047 3048 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 3049 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 3050 3051 /* used to figure out RTT for np */ 3052 gethrestime(&prop_time); 3053 3054 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 3055 "start time: %ld sec %ld nsec", prop_time.tv_sec, 3056 prop_time.tv_nsec)); 3057 3058 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 3059 3060 gethrestime(&after_time); 3061 mutex_enter(&np->s_lock); 3062 np->propagation_delay.tv_sec = 3063 MAX(1, after_time.tv_sec - prop_time.tv_sec); 3064 mutex_exit(&np->s_lock); 3065 3066 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 3067 "finish time: %ld sec ", after_time.tv_sec)); 3068 3069 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 3070 "propagation delay set to %ld sec", 3071 np->propagation_delay.tv_sec)); 3072 3073 if (ep->error) 3074 return; 3075 3076 if (res.status == NFS4ERR_CLID_INUSE) { 3077 clientaddr4 *clid_inuse; 3078 3079 if (!(*retry_inusep)) { 3080 clid_inuse = &res.array->nfs_resop4_u. 3081 opsetclientid.SETCLIENTID4res_u.client_using; 3082 3083 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3084 "SETCLIENTID_CONFIRM failed. " 3085 "nfs4_client_id.id is in use already by: " 3086 "r_netid<%s> r_addr<%s>", 3087 clid_inuse->r_netid, clid_inuse->r_addr); 3088 } 3089 3090 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3091 return; 3092 } 3093 3094 if (res.status) { 3095 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3096 return; 3097 } 3098 3099 mutex_enter(&np->s_lock); 3100 np->clientid = tmp_clientid; 3101 np->s_flags |= N4S_CLIENTID_SET; 3102 3103 /* Add mi to np's mntinfo4 list */ 3104 nfs4_add_mi_to_server(np, mi); 3105 3106 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 3107 /* 3108 * Start lease management thread. 3109 * Keep trying until we succeed. 3110 */ 3111 3112 np->s_refcnt++; /* pass reference to thread */ 3113 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 3114 minclsyspri); 3115 } 3116 mutex_exit(&np->s_lock); 3117 3118 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3119 } 3120 3121 /* 3122 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 3123 * mi's clientid the same as sp's. 3124 * Assumes sp is locked down. 3125 */ 3126 void 3127 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 3128 { 3129 mntinfo4_t *tmi; 3130 int in_list = 0; 3131 3132 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3133 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3134 ASSERT(sp != &nfs4_server_lst); 3135 ASSERT(MUTEX_HELD(&sp->s_lock)); 3136 3137 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3138 "nfs4_add_mi_to_server: add mi %p to sp %p", 3139 (void*)mi, (void*)sp)); 3140 3141 for (tmi = sp->mntinfo4_list; 3142 tmi != NULL; 3143 tmi = tmi->mi_clientid_next) { 3144 if (tmi == mi) { 3145 NFS4_DEBUG(nfs4_client_lease_debug, 3146 (CE_NOTE, 3147 "nfs4_add_mi_to_server: mi in list")); 3148 in_list = 1; 3149 } 3150 } 3151 3152 /* 3153 * First put a hold on the mntinfo4's vfsp so that references via 3154 * mntinfo4_list will be valid. 3155 */ 3156 if (!in_list) 3157 VFS_HOLD(mi->mi_vfsp); 3158 3159 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 3160 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 3161 3162 if (!in_list) { 3163 if (sp->mntinfo4_list) 3164 sp->mntinfo4_list->mi_clientid_prev = mi; 3165 mi->mi_clientid_next = sp->mntinfo4_list; 3166 sp->mntinfo4_list = mi; 3167 mi->mi_srvsettime = gethrestime_sec(); 3168 } 3169 3170 /* set mi's clientid to that of sp's for later matching */ 3171 mi->mi_clientid = sp->clientid; 3172 3173 /* 3174 * Update the clientid for any other mi's belonging to sp. This 3175 * must be done here while we hold sp->s_lock, so that 3176 * find_nfs4_server() continues to work. 3177 */ 3178 3179 for (tmi = sp->mntinfo4_list; 3180 tmi != NULL; 3181 tmi = tmi->mi_clientid_next) { 3182 if (tmi != mi) { 3183 tmi->mi_clientid = sp->clientid; 3184 } 3185 } 3186 } 3187 3188 /* 3189 * Remove the mi from sp's mntinfo4_list and release its reference. 3190 * Exception: if mi still has open files, flag it for later removal (when 3191 * all the files are closed). 3192 * 3193 * If this is the last mntinfo4 in sp's list then tell the lease renewal 3194 * thread to exit. 3195 */ 3196 static void 3197 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 3198 { 3199 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3200 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 3201 (void*)mi, (void*)sp)); 3202 3203 ASSERT(sp != NULL); 3204 ASSERT(MUTEX_HELD(&sp->s_lock)); 3205 ASSERT(mi->mi_open_files >= 0); 3206 3207 /* 3208 * First make sure this mntinfo4 can be taken off of the list, 3209 * ie: it doesn't have any open files remaining. 3210 */ 3211 if (mi->mi_open_files > 0) { 3212 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3213 "nfs4_remove_mi_from_server_nolock: don't " 3214 "remove mi since it still has files open")); 3215 3216 mutex_enter(&mi->mi_lock); 3217 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 3218 mutex_exit(&mi->mi_lock); 3219 return; 3220 } 3221 3222 VFS_HOLD(mi->mi_vfsp); 3223 remove_mi(sp, mi); 3224 VFS_RELE(mi->mi_vfsp); 3225 3226 if (sp->mntinfo4_list == NULL) { 3227 /* last fs unmounted, kill the thread */ 3228 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3229 "remove_mi_from_nfs4_server_nolock: kill the thread")); 3230 nfs4_mark_srv_dead(sp); 3231 } 3232 } 3233 3234 /* 3235 * Remove mi from sp's mntinfo4_list and release the vfs reference. 3236 */ 3237 static void 3238 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 3239 { 3240 ASSERT(MUTEX_HELD(&sp->s_lock)); 3241 3242 /* 3243 * We release a reference, and the caller must still have a 3244 * reference. 3245 */ 3246 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3247 3248 if (mi->mi_clientid_prev) { 3249 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3250 } else { 3251 /* This is the first mi in sp's mntinfo4_list */ 3252 /* 3253 * Make sure the first mntinfo4 in the list is the actual 3254 * mntinfo4 passed in. 3255 */ 3256 ASSERT(sp->mntinfo4_list == mi); 3257 3258 sp->mntinfo4_list = mi->mi_clientid_next; 3259 } 3260 if (mi->mi_clientid_next) 3261 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3262 3263 /* Now mark the mntinfo4's links as being removed */ 3264 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3265 3266 VFS_RELE(mi->mi_vfsp); 3267 } 3268 3269 /* 3270 * Free all the entries in sp's mntinfo4_list. 3271 */ 3272 static void 3273 remove_all_mi(nfs4_server_t *sp) 3274 { 3275 mntinfo4_t *mi; 3276 3277 ASSERT(MUTEX_HELD(&sp->s_lock)); 3278 3279 while (sp->mntinfo4_list != NULL) { 3280 mi = sp->mntinfo4_list; 3281 /* 3282 * Grab a reference in case there is only one left (which 3283 * remove_mi() frees). 3284 */ 3285 VFS_HOLD(mi->mi_vfsp); 3286 remove_mi(sp, mi); 3287 VFS_RELE(mi->mi_vfsp); 3288 } 3289 } 3290 3291 /* 3292 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3293 * 3294 * This version can be called with a null nfs4_server_t arg, 3295 * and will either find the right one and handle locking, or 3296 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3297 */ 3298 void 3299 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3300 { 3301 nfs4_server_t *sp; 3302 3303 if (esp == NULL) { 3304 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3305 sp = find_nfs4_server_all(mi, 1); 3306 } else 3307 sp = esp; 3308 3309 if (sp != NULL) 3310 nfs4_remove_mi_from_server_nolock(mi, sp); 3311 3312 /* 3313 * If we had a valid esp as input, the calling function will be 3314 * responsible for unlocking the esp nfs4_server. 3315 */ 3316 if (esp == NULL) { 3317 if (sp != NULL) 3318 mutex_exit(&sp->s_lock); 3319 nfs_rw_exit(&mi->mi_recovlock); 3320 if (sp != NULL) 3321 nfs4_server_rele(sp); 3322 } 3323 } 3324 3325 /* 3326 * Return TRUE if the given server has any non-unmounted filesystems. 3327 */ 3328 3329 bool_t 3330 nfs4_fs_active(nfs4_server_t *sp) 3331 { 3332 mntinfo4_t *mi; 3333 3334 ASSERT(MUTEX_HELD(&sp->s_lock)); 3335 3336 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3337 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3338 return (TRUE); 3339 } 3340 3341 return (FALSE); 3342 } 3343 3344 /* 3345 * Mark sp as finished and notify any waiters. 3346 */ 3347 3348 void 3349 nfs4_mark_srv_dead(nfs4_server_t *sp) 3350 { 3351 ASSERT(MUTEX_HELD(&sp->s_lock)); 3352 3353 sp->s_thread_exit = NFS4_THREAD_EXIT; 3354 cv_broadcast(&sp->cv_thread_exit); 3355 } 3356 3357 /* 3358 * Create a new nfs4_server_t structure. 3359 * Returns new node unlocked and not in list, but with a reference count of 3360 * 1. 3361 */ 3362 struct nfs4_server * 3363 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3364 { 3365 struct nfs4_server *np; 3366 timespec_t tt; 3367 union { 3368 struct { 3369 uint32_t sec; 3370 uint32_t subsec; 3371 } un_curtime; 3372 verifier4 un_verifier; 3373 } nfs4clientid_verifier; 3374 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3375 int len; 3376 3377 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3378 np->saddr.len = svp->sv_addr.len; 3379 np->saddr.maxlen = svp->sv_addr.maxlen; 3380 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3381 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3382 np->s_refcnt = 1; 3383 3384 /* 3385 * Build the nfs_client_id4 for this server mount. Ensure 3386 * the verifier is useful and that the identification is 3387 * somehow based on the server's address for the case of 3388 * multi-homed servers. 3389 */ 3390 nfs4clientid_verifier.un_verifier = 0; 3391 gethrestime(&tt); 3392 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3393 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3394 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3395 3396 /* 3397 * calculate the length of the opaque identifier. Subtract 2 3398 * for the "%s" and add the traditional +1 for null 3399 * termination. 3400 */ 3401 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3402 np->clidtosend.id_len = len + np->saddr.maxlen; 3403 3404 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3405 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3406 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3407 3408 np->s_flags = 0; 3409 np->mntinfo4_list = NULL; 3410 /* save cred for issuing rfs4calls inside the renew thread */ 3411 crhold(cr); 3412 np->s_cred = cr; 3413 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3414 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3415 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3416 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3417 offsetof(rnode4_t, r_deleg_link)); 3418 np->s_thread_exit = 0; 3419 np->state_ref_count = 0; 3420 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3421 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3422 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3423 np->s_otw_call_count = 0; 3424 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3425 np->zoneid = getzoneid(); 3426 np->zone_globals = nfs4_get_callback_globals(); 3427 ASSERT(np->zone_globals != NULL); 3428 return (np); 3429 } 3430 3431 /* 3432 * Create a new nfs4_server_t structure and add it to the list. 3433 * Returns new node locked; reference must eventually be freed. 3434 */ 3435 static struct nfs4_server * 3436 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3437 { 3438 nfs4_server_t *sp; 3439 3440 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3441 sp = new_nfs4_server(svp, cr); 3442 mutex_enter(&sp->s_lock); 3443 insque(sp, &nfs4_server_lst); 3444 sp->s_refcnt++; /* list gets a reference */ 3445 sp->s_flags |= N4S_INSERTED; 3446 sp->clientid = 0; 3447 return (sp); 3448 } 3449 3450 int nfs4_server_t_debug = 0; 3451 3452 #ifdef lint 3453 extern void 3454 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3455 #endif 3456 3457 #ifndef lint 3458 #ifdef DEBUG 3459 void 3460 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3461 { 3462 int hash16(void *p, int len); 3463 nfs4_server_t *np; 3464 3465 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3466 "dumping nfs4_server_t list in %s", txt)); 3467 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3468 "mi 0x%p, want clientid %llx, addr %d/%04X", 3469 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3470 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3471 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3472 np = np->forw) { 3473 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3474 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3475 np, (longlong_t)np->clientid, np->saddr.len, 3476 hash16((void *)np->saddr.buf, np->saddr.len), 3477 np->state_ref_count)); 3478 if (np->saddr.len == srv_p->sv_addr.len && 3479 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3480 np->saddr.len) == 0) 3481 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3482 " - address matches")); 3483 if (np->clientid == clientid || np->clientid == 0) 3484 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3485 " - clientid matches")); 3486 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3487 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3488 " - thread not exiting")); 3489 } 3490 delay(hz); 3491 } 3492 #endif 3493 #endif 3494 3495 3496 /* 3497 * Move a mntinfo4_t from one server list to another. 3498 * Locking of the two nfs4_server_t nodes will be done in list order. 3499 * 3500 * Returns NULL if the current nfs4_server_t for the filesystem could not 3501 * be found (e.g., due to forced unmount). Otherwise returns a reference 3502 * to the new nfs4_server_t, which must eventually be freed. 3503 */ 3504 nfs4_server_t * 3505 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3506 { 3507 nfs4_server_t *p, *op = NULL, *np = NULL; 3508 int num_open; 3509 zoneid_t zoneid = nfs_zoneid(); 3510 3511 ASSERT(nfs_zone() == mi->mi_zone); 3512 3513 mutex_enter(&nfs4_server_lst_lock); 3514 #ifdef DEBUG 3515 if (nfs4_server_t_debug) 3516 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3517 #endif 3518 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3519 if (p->zoneid != zoneid) 3520 continue; 3521 if (p->saddr.len == old->sv_addr.len && 3522 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3523 p->s_thread_exit != NFS4_THREAD_EXIT) { 3524 op = p; 3525 mutex_enter(&op->s_lock); 3526 op->s_refcnt++; 3527 } 3528 if (p->saddr.len == new->sv_addr.len && 3529 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3530 p->s_thread_exit != NFS4_THREAD_EXIT) { 3531 np = p; 3532 mutex_enter(&np->s_lock); 3533 } 3534 if (op != NULL && np != NULL) 3535 break; 3536 } 3537 if (op == NULL) { 3538 /* 3539 * Filesystem has been forcibly unmounted. Bail out. 3540 */ 3541 if (np != NULL) 3542 mutex_exit(&np->s_lock); 3543 mutex_exit(&nfs4_server_lst_lock); 3544 return (NULL); 3545 } 3546 if (np != NULL) { 3547 np->s_refcnt++; 3548 } else { 3549 #ifdef DEBUG 3550 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3551 "nfs4_move_mi: no target nfs4_server, will create.")); 3552 #endif 3553 np = add_new_nfs4_server(new, kcred); 3554 } 3555 mutex_exit(&nfs4_server_lst_lock); 3556 3557 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3558 "nfs4_move_mi: for mi 0x%p, " 3559 "old servinfo4 0x%p, new servinfo4 0x%p, " 3560 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3561 (void*)mi, (void*)old, (void*)new, 3562 (void*)op, (void*)np)); 3563 ASSERT(op != NULL && np != NULL); 3564 3565 /* discard any delegations */ 3566 nfs4_deleg_discard(mi, op); 3567 3568 num_open = mi->mi_open_files; 3569 mi->mi_open_files = 0; 3570 op->state_ref_count -= num_open; 3571 ASSERT(op->state_ref_count >= 0); 3572 np->state_ref_count += num_open; 3573 nfs4_remove_mi_from_server_nolock(mi, op); 3574 mi->mi_open_files = num_open; 3575 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3576 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3577 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3578 3579 nfs4_add_mi_to_server(np, mi); 3580 3581 mutex_exit(&op->s_lock); 3582 nfs4_server_rele(op); 3583 mutex_exit(&np->s_lock); 3584 3585 return (np); 3586 } 3587 3588 /* 3589 * Need to have the nfs4_server_lst_lock. 3590 * Search the nfs4_server list to find a match on this servinfo4 3591 * based on its address. 3592 * 3593 * Returns NULL if no match is found. Otherwise returns a reference (which 3594 * must eventually be freed) to a locked nfs4_server. 3595 */ 3596 nfs4_server_t * 3597 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3598 { 3599 nfs4_server_t *np; 3600 zoneid_t zoneid = nfs_zoneid(); 3601 3602 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3603 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3604 if (np->zoneid == zoneid && 3605 np->saddr.len == srv_p->sv_addr.len && 3606 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3607 np->saddr.len) == 0 && 3608 np->s_thread_exit != NFS4_THREAD_EXIT) { 3609 mutex_enter(&np->s_lock); 3610 np->s_refcnt++; 3611 return (np); 3612 } 3613 } 3614 return (NULL); 3615 } 3616 3617 /* 3618 * Search the nfs4_server_lst to find a match based on clientid and 3619 * addr. 3620 * Locks the nfs4_server down if it is found and returns a reference that 3621 * must eventually be freed. 3622 * 3623 * Returns NULL it no match is found. This means one of two things: either 3624 * mi is in the process of being mounted, or mi has been unmounted. 3625 * 3626 * The caller should be holding mi->mi_recovlock, and it should continue to 3627 * hold the lock until done with the returned nfs4_server_t. Once 3628 * mi->mi_recovlock is released, there is no guarantee that the returned 3629 * mi->nfs4_server_t will continue to correspond to mi. 3630 */ 3631 nfs4_server_t * 3632 find_nfs4_server(mntinfo4_t *mi) 3633 { 3634 return (find_nfs4_server_all(mi, 0)); 3635 } 3636 3637 /* 3638 * Same as above, but takes an "all" parameter which can be 3639 * set to 1 if the caller wishes to find nfs4_server_t's which 3640 * have been marked for termination by the exit of the renew 3641 * thread. This should only be used by operations which are 3642 * cleaning up and will not cause an OTW op. 3643 */ 3644 nfs4_server_t * 3645 find_nfs4_server_all(mntinfo4_t *mi, int all) 3646 { 3647 nfs4_server_t *np; 3648 servinfo4_t *svp; 3649 zoneid_t zoneid = mi->mi_zone->zone_id; 3650 3651 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3652 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3653 /* 3654 * This can be called from nfs4_unmount() which can be called from the 3655 * global zone, hence it's legal for the global zone to muck with 3656 * another zone's server list, as long as it doesn't try to contact 3657 * them. 3658 */ 3659 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3660 nfs_global_client_only != 0); 3661 3662 /* 3663 * The nfs4_server_lst_lock global lock is held when we get a new 3664 * clientid (via SETCLIENTID OTW). Holding this global lock and 3665 * mi_recovlock (READER is fine) ensures that the nfs4_server 3666 * and this mntinfo4 can't get out of sync, so the following search is 3667 * always valid. 3668 */ 3669 mutex_enter(&nfs4_server_lst_lock); 3670 #ifdef DEBUG 3671 if (nfs4_server_t_debug) { 3672 /* mi->mi_clientid is unprotected, ok for debug output */ 3673 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3674 mi->mi_curr_serv); 3675 } 3676 #endif 3677 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3678 mutex_enter(&np->s_lock); 3679 svp = mi->mi_curr_serv; 3680 3681 if (np->zoneid == zoneid && 3682 np->clientid == mi->mi_clientid && 3683 np->saddr.len == svp->sv_addr.len && 3684 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3685 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3686 mutex_exit(&nfs4_server_lst_lock); 3687 np->s_refcnt++; 3688 return (np); 3689 } 3690 mutex_exit(&np->s_lock); 3691 } 3692 mutex_exit(&nfs4_server_lst_lock); 3693 3694 return (NULL); 3695 } 3696 3697 /* 3698 * Release the reference to sp and destroy it if that's the last one. 3699 */ 3700 3701 void 3702 nfs4_server_rele(nfs4_server_t *sp) 3703 { 3704 mutex_enter(&sp->s_lock); 3705 ASSERT(sp->s_refcnt > 0); 3706 sp->s_refcnt--; 3707 if (sp->s_refcnt > 0) { 3708 mutex_exit(&sp->s_lock); 3709 return; 3710 } 3711 mutex_exit(&sp->s_lock); 3712 3713 mutex_enter(&nfs4_server_lst_lock); 3714 mutex_enter(&sp->s_lock); 3715 if (sp->s_refcnt > 0) { 3716 mutex_exit(&sp->s_lock); 3717 mutex_exit(&nfs4_server_lst_lock); 3718 return; 3719 } 3720 remque(sp); 3721 sp->forw = sp->back = NULL; 3722 mutex_exit(&nfs4_server_lst_lock); 3723 destroy_nfs4_server(sp); 3724 } 3725 3726 static void 3727 destroy_nfs4_server(nfs4_server_t *sp) 3728 { 3729 ASSERT(MUTEX_HELD(&sp->s_lock)); 3730 ASSERT(sp->s_refcnt == 0); 3731 ASSERT(sp->s_otw_call_count == 0); 3732 3733 remove_all_mi(sp); 3734 3735 crfree(sp->s_cred); 3736 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3737 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3738 mutex_exit(&sp->s_lock); 3739 3740 /* destroy the nfs4_server */ 3741 nfs4callback_destroy(sp); 3742 list_destroy(&sp->s_deleg_list); 3743 mutex_destroy(&sp->s_lock); 3744 cv_destroy(&sp->cv_thread_exit); 3745 cv_destroy(&sp->s_cv_otw_count); 3746 cv_destroy(&sp->s_clientid_pend); 3747 cv_destroy(&sp->wait_cb_null); 3748 nfs_rw_destroy(&sp->s_recovlock); 3749 kmem_free(sp, sizeof (*sp)); 3750 } 3751 3752 /* 3753 * Lock sp, but only if it's still active (in the list and hasn't been 3754 * flagged as exiting) or 'all' is non-zero. 3755 * Returns TRUE if sp got locked and adds a reference to sp. 3756 */ 3757 bool_t 3758 nfs4_server_vlock(nfs4_server_t *sp, int all) 3759 { 3760 nfs4_server_t *np; 3761 3762 mutex_enter(&nfs4_server_lst_lock); 3763 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3764 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3765 all != 0)) { 3766 mutex_enter(&np->s_lock); 3767 np->s_refcnt++; 3768 mutex_exit(&nfs4_server_lst_lock); 3769 return (TRUE); 3770 } 3771 } 3772 mutex_exit(&nfs4_server_lst_lock); 3773 return (FALSE); 3774 } 3775 3776 /* 3777 * Fork off a thread to free the data structures for a mount. 3778 */ 3779 3780 static void 3781 async_free_mount(vfs_t *vfsp, cred_t *cr) 3782 { 3783 freemountargs_t *args; 3784 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3785 args->fm_vfsp = vfsp; 3786 VFS_HOLD(vfsp); 3787 MI4_HOLD(VFTOMI4(vfsp)); 3788 args->fm_cr = cr; 3789 crhold(cr); 3790 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3791 minclsyspri); 3792 } 3793 3794 static void 3795 nfs4_free_mount_thread(freemountargs_t *args) 3796 { 3797 mntinfo4_t *mi; 3798 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3799 mi = VFTOMI4(args->fm_vfsp); 3800 crfree(args->fm_cr); 3801 VFS_RELE(args->fm_vfsp); 3802 MI4_RELE(mi); 3803 kmem_free(args, sizeof (freemountargs_t)); 3804 zthread_exit(); 3805 /* NOTREACHED */ 3806 } 3807 3808 /* 3809 * Thread to free the data structures for a given filesystem. 3810 */ 3811 static void 3812 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3813 { 3814 mntinfo4_t *mi = VFTOMI4(vfsp); 3815 nfs4_server_t *sp; 3816 callb_cpr_t cpr_info; 3817 kmutex_t cpr_lock; 3818 boolean_t async_thread; 3819 int removed; 3820 3821 /* 3822 * We need to participate in the CPR framework if this is a kernel 3823 * thread. 3824 */ 3825 async_thread = (curproc == nfs_zone()->zone_zsched); 3826 if (async_thread) { 3827 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3828 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3829 "nfsv4AsyncUnmount"); 3830 } 3831 3832 /* 3833 * We need to wait for all outstanding OTW calls 3834 * and recovery to finish before we remove the mi 3835 * from the nfs4_server_t, as current pending 3836 * calls might still need this linkage (in order 3837 * to find a nfs4_server_t from a mntinfo4_t). 3838 */ 3839 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3840 sp = find_nfs4_server(mi); 3841 nfs_rw_exit(&mi->mi_recovlock); 3842 3843 if (sp) { 3844 while (sp->s_otw_call_count != 0) { 3845 if (async_thread) { 3846 mutex_enter(&cpr_lock); 3847 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3848 mutex_exit(&cpr_lock); 3849 } 3850 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3851 if (async_thread) { 3852 mutex_enter(&cpr_lock); 3853 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3854 mutex_exit(&cpr_lock); 3855 } 3856 } 3857 mutex_exit(&sp->s_lock); 3858 nfs4_server_rele(sp); 3859 sp = NULL; 3860 } 3861 3862 3863 mutex_enter(&mi->mi_lock); 3864 while (mi->mi_in_recovery != 0) { 3865 if (async_thread) { 3866 mutex_enter(&cpr_lock); 3867 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3868 mutex_exit(&cpr_lock); 3869 } 3870 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3871 if (async_thread) { 3872 mutex_enter(&cpr_lock); 3873 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3874 mutex_exit(&cpr_lock); 3875 } 3876 } 3877 mutex_exit(&mi->mi_lock); 3878 3879 /* 3880 * The original purge of the dnlc via 'dounmount' 3881 * doesn't guarantee that another dnlc entry was not 3882 * added while we waitied for all outstanding OTW 3883 * and recovery calls to finish. So re-purge the 3884 * dnlc now. 3885 */ 3886 (void) dnlc_purge_vfsp(vfsp, 0); 3887 3888 /* 3889 * We need to explicitly stop the manager thread; the asyc worker 3890 * threads can timeout and exit on their own. 3891 */ 3892 mutex_enter(&mi->mi_async_lock); 3893 mi->mi_max_threads = 0; 3894 cv_broadcast(&mi->mi_async_work_cv); 3895 mutex_exit(&mi->mi_async_lock); 3896 if (mi->mi_manager_thread) 3897 nfs4_async_manager_stop(vfsp); 3898 3899 destroy_rtable4(vfsp, cr); 3900 3901 nfs4_remove_mi_from_server(mi, NULL); 3902 3903 if (async_thread) { 3904 mutex_enter(&cpr_lock); 3905 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3906 mutex_destroy(&cpr_lock); 3907 } 3908 3909 removed = nfs4_mi_zonelist_remove(mi); 3910 if (removed) 3911 zone_rele(mi->mi_zone); 3912 } 3913