1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/time.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/dnlc.h> 59 #include <sys/list.h> 60 #include <sys/mntent.h> 61 #include <sys/tsol/label.h> 62 63 #include <rpc/types.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpcsec_gss.h> 66 #include <rpc/clnt.h> 67 68 #include <nfs/nfs.h> 69 #include <nfs/nfs_clnt.h> 70 #include <nfs/mount.h> 71 #include <nfs/nfs_acl.h> 72 73 #include <fs/fs_subr.h> 74 75 #include <nfs/nfs4.h> 76 #include <nfs/rnode4.h> 77 #include <nfs/nfs4_clnt.h> 78 #include <sys/fs/autofs.h> 79 80 81 /* 82 * Arguments passed to thread to free data structures from forced unmount. 83 */ 84 85 typedef struct { 86 vfs_t *fm_vfsp; 87 cred_t *fm_cr; 88 } freemountargs_t; 89 90 static void async_free_mount(vfs_t *, cred_t *); 91 static void nfs4_free_mount(vfs_t *, cred_t *); 92 static void nfs4_free_mount_thread(freemountargs_t *); 93 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 94 95 /* 96 * From rpcsec module (common/rpcsec). 97 */ 98 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 99 extern void sec_clnt_freeinfo(struct sec_data *); 100 101 /* 102 * The order and contents of this structure must be kept in sync with that of 103 * rfsreqcnt_v4_tmpl in nfs_stats.c 104 */ 105 static char *rfsnames_v4[] = { 106 "null", "compound", "reserved", "access", "close", "commit", "create", 107 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 108 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 109 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 110 "read", "readdir", "readlink", "remove", "rename", "renew", 111 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 112 "setclientid_confirm", "verify", "write" 113 }; 114 115 /* 116 * nfs4_max_mount_retry is the number of times the client will redrive 117 * a mount compound before giving up and returning failure. The intent 118 * is to redrive mount compounds which fail NFS4ERR_STALE so that 119 * if a component of the server path being mounted goes stale, it can 120 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 121 * code is needed outside of the recovery framework because mount is a 122 * special case. The client doesn't create vnodes/rnodes for components 123 * of the server path being mounted. The recovery code recovers real 124 * client objects, not STALE FHs which map to components of the server 125 * path being mounted. 126 * 127 * We could just fail the mount on the first time, but that would 128 * instantly trigger failover (from nfs4_mount), and the client should 129 * try to re-lookup the STALE FH before doing failover. The easiest 130 * way to "re-lookup" is to simply redrive the mount compound. 131 */ 132 static int nfs4_max_mount_retry = 2; 133 134 /* 135 * nfs4 vfs operations. 136 */ 137 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 138 static int nfs4_unmount(vfs_t *, int, cred_t *); 139 static int nfs4_root(vfs_t *, vnode_t **); 140 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 141 static int nfs4_sync(vfs_t *, short, cred_t *); 142 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 143 static int nfs4_mountroot(vfs_t *, whymountroot_t); 144 static void nfs4_freevfs(vfs_t *); 145 146 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 147 int, cred_t *, zone_t *); 148 149 vfsops_t *nfs4_vfsops; 150 151 int nfs4_vfsinit(void); 152 void nfs4_vfsfini(void); 153 static void nfs4setclientid_init(void); 154 static void nfs4setclientid_fini(void); 155 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 156 struct nfs4_server *, nfs4_error_t *, int *); 157 static void destroy_nfs4_server(nfs4_server_t *); 158 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 159 160 /* 161 * Initialize the vfs structure 162 */ 163 164 static int nfs4fstyp; 165 166 167 /* 168 * Debug variable to check for rdma based 169 * transport startup and cleanup. Controlled 170 * through /etc/system. Off by default. 171 */ 172 extern int rdma_debug; 173 174 int 175 nfs4init(int fstyp, char *name) 176 { 177 static const fs_operation_def_t nfs4_vfsops_template[] = { 178 VFSNAME_MOUNT, nfs4_mount, 179 VFSNAME_UNMOUNT, nfs4_unmount, 180 VFSNAME_ROOT, nfs4_root, 181 VFSNAME_STATVFS, nfs4_statvfs, 182 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 183 VFSNAME_VGET, nfs4_vget, 184 VFSNAME_MOUNTROOT, nfs4_mountroot, 185 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 186 NULL, NULL 187 }; 188 int error; 189 190 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 191 if (error != 0) { 192 zcmn_err(GLOBAL_ZONEID, CE_WARN, 193 "nfs4init: bad vfs ops template"); 194 return (error); 195 } 196 197 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 198 if (error != 0) { 199 (void) vfs_freevfsops_by_type(fstyp); 200 zcmn_err(GLOBAL_ZONEID, CE_WARN, 201 "nfs4init: bad vnode ops template"); 202 return (error); 203 } 204 205 nfs4fstyp = fstyp; 206 207 (void) nfs4_vfsinit(); 208 209 (void) nfs4_init_dot_entries(); 210 211 return (0); 212 } 213 214 void 215 nfs4fini(void) 216 { 217 (void) nfs4_destroy_dot_entries(); 218 nfs4_vfsfini(); 219 } 220 221 /* 222 * Create a new sec_data structure to store AUTH_DH related data: 223 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 224 * flag set for NFS V4 since we are avoiding to contact the rpcbind 225 * daemon and is using the IP time service (IPPORT_TIMESERVER). 226 * 227 * sec_data can be freed by sec_clnt_freeinfo(). 228 */ 229 struct sec_data * 230 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 231 struct knetconfig *knconf) { 232 struct sec_data *secdata; 233 dh_k4_clntdata_t *data; 234 char *pf, *p; 235 236 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 237 return (NULL); 238 239 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 240 secdata->flags = 0; 241 242 data = kmem_alloc(sizeof (*data), KM_SLEEP); 243 244 data->syncaddr.maxlen = syncaddr->maxlen; 245 data->syncaddr.len = syncaddr->len; 246 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 247 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 248 249 /* 250 * duplicate the knconf information for the 251 * new opaque data. 252 */ 253 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 254 *data->knconf = *knconf; 255 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 256 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 257 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 258 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 259 data->knconf->knc_protofmly = pf; 260 data->knconf->knc_proto = p; 261 262 /* move server netname to the sec_data structure */ 263 data->netname = kmem_alloc(nlen, KM_SLEEP); 264 bcopy(netname, data->netname, nlen); 265 data->netnamelen = (int)nlen; 266 267 secdata->secmod = AUTH_DH; 268 secdata->rpcflavor = AUTH_DH; 269 secdata->data = (caddr_t)data; 270 271 return (secdata); 272 } 273 274 static int 275 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 276 { 277 servinfo4_t *si; 278 279 /* 280 * Iterate over the servinfo4 list to make sure 281 * we do not have a duplicate. Skip any servinfo4 282 * that has been marked "NOT IN USE" 283 */ 284 for (si = svp_head; si; si = si->sv_next) { 285 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 286 if (si->sv_flags & SV4_NOTINUSE) { 287 nfs_rw_exit(&si->sv_lock); 288 continue; 289 } 290 nfs_rw_exit(&si->sv_lock); 291 if (si == svp) 292 continue; 293 if (si->sv_addr.len == svp->sv_addr.len && 294 strcmp(si->sv_knconf->knc_protofmly, 295 svp->sv_knconf->knc_protofmly) == 0 && 296 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 297 si->sv_addr.len) == 0) { 298 /* it's a duplicate */ 299 return (1); 300 } 301 } 302 /* it's not a duplicate */ 303 return (0); 304 } 305 306 void 307 nfs4_free_args(struct nfs_args *nargs) 308 { 309 if (nargs->knconf) { 310 if (nargs->knconf->knc_protofmly) 311 kmem_free(nargs->knconf->knc_protofmly, 312 KNC_STRSIZE); 313 if (nargs->knconf->knc_proto) 314 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 315 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 316 nargs->knconf = NULL; 317 } 318 319 if (nargs->fh) { 320 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 321 nargs->fh = NULL; 322 } 323 324 if (nargs->hostname) { 325 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 326 nargs->hostname = NULL; 327 } 328 329 if (nargs->addr) { 330 if (nargs->addr->buf) { 331 ASSERT(nargs->addr->len); 332 kmem_free(nargs->addr->buf, nargs->addr->len); 333 } 334 kmem_free(nargs->addr, sizeof (struct netbuf)); 335 nargs->addr = NULL; 336 } 337 338 if (nargs->syncaddr) { 339 ASSERT(nargs->syncaddr->len); 340 if (nargs->syncaddr->buf) { 341 ASSERT(nargs->syncaddr->len); 342 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 343 } 344 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 345 nargs->syncaddr = NULL; 346 } 347 348 if (nargs->netname) { 349 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 350 nargs->netname = NULL; 351 } 352 353 if (nargs->nfs_ext_u.nfs_extA.secdata) { 354 sec_clnt_freeinfo( 355 nargs->nfs_ext_u.nfs_extA.secdata); 356 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 357 } 358 } 359 360 361 int 362 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs) 363 { 364 365 int error; 366 size_t hlen; /* length of hostname */ 367 size_t nlen; /* length of netname */ 368 char netname[MAXNETNAMELEN+1]; /* server's netname */ 369 struct netbuf addr; /* server's address */ 370 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 371 struct knetconfig *knconf; /* transport structure */ 372 struct sec_data *secdata = NULL; /* security data */ 373 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 374 STRUCT_DECL(knetconfig, knconf_tmp); 375 STRUCT_DECL(netbuf, addr_tmp); 376 int flags; 377 char *p, *pf; 378 struct pathname pn; 379 char *userbufptr; 380 381 382 bzero(nargs, sizeof (*nargs)); 383 384 STRUCT_INIT(args, get_udatamodel()); 385 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 386 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 387 STRUCT_SIZE(args)))) 388 return (EFAULT); 389 390 nargs->wsize = STRUCT_FGET(args, wsize); 391 nargs->rsize = STRUCT_FGET(args, rsize); 392 nargs->timeo = STRUCT_FGET(args, timeo); 393 nargs->retrans = STRUCT_FGET(args, retrans); 394 nargs->acregmin = STRUCT_FGET(args, acregmin); 395 nargs->acregmax = STRUCT_FGET(args, acregmax); 396 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 397 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 398 399 flags = STRUCT_FGET(args, flags); 400 nargs->flags = flags; 401 402 addr.buf = NULL; 403 syncaddr.buf = NULL; 404 405 406 /* 407 * Allocate space for a knetconfig structure and 408 * its strings and copy in from user-land. 409 */ 410 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 411 STRUCT_INIT(knconf_tmp, get_udatamodel()); 412 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 413 STRUCT_SIZE(knconf_tmp))) { 414 kmem_free(knconf, sizeof (*knconf)); 415 return (EFAULT); 416 } 417 418 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 419 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 420 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 421 if (get_udatamodel() != DATAMODEL_LP64) { 422 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 423 } else { 424 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 425 } 426 427 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 428 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 429 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 430 if (error) { 431 kmem_free(pf, KNC_STRSIZE); 432 kmem_free(p, KNC_STRSIZE); 433 kmem_free(knconf, sizeof (*knconf)); 434 return (error); 435 } 436 437 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 438 if (error) { 439 kmem_free(pf, KNC_STRSIZE); 440 kmem_free(p, KNC_STRSIZE); 441 kmem_free(knconf, sizeof (*knconf)); 442 return (error); 443 } 444 445 446 knconf->knc_protofmly = pf; 447 knconf->knc_proto = p; 448 449 nargs->knconf = knconf; 450 451 /* 452 * Get server address 453 */ 454 STRUCT_INIT(addr_tmp, get_udatamodel()); 455 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 456 STRUCT_SIZE(addr_tmp))) { 457 error = EFAULT; 458 goto errout; 459 } 460 461 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 462 userbufptr = STRUCT_FGETP(addr_tmp, buf); 463 addr.len = STRUCT_FGET(addr_tmp, len); 464 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 465 addr.maxlen = addr.len; 466 if (copyin(userbufptr, addr.buf, addr.len)) { 467 kmem_free(addr.buf, addr.len); 468 error = EFAULT; 469 goto errout; 470 } 471 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 472 473 /* 474 * Get the root fhandle 475 */ 476 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 477 if (error) 478 goto errout; 479 480 /* Volatile fh: keep server paths, so use actual-size strings */ 481 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 482 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen); 483 nargs->fh[pn.pn_pathlen] = '\0'; 484 pn_free(&pn); 485 486 487 /* 488 * Get server's hostname 489 */ 490 if (flags & NFSMNT_HOSTNAME) { 491 error = copyinstr(STRUCT_FGETP(args, hostname), 492 netname, sizeof (netname), &hlen); 493 if (error) 494 goto errout; 495 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 496 (void) strcpy(nargs->hostname, netname); 497 498 } else { 499 nargs->hostname = NULL; 500 } 501 502 503 /* 504 * If there are syncaddr and netname data, load them in. This is 505 * to support data needed for NFSV4 when AUTH_DH is the negotiated 506 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 507 */ 508 netname[0] = '\0'; 509 if (flags & NFSMNT_SECURE) { 510 511 /* get syncaddr */ 512 STRUCT_INIT(addr_tmp, get_udatamodel()); 513 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 514 STRUCT_SIZE(addr_tmp))) { 515 error = EINVAL; 516 goto errout; 517 } 518 userbufptr = STRUCT_FGETP(addr_tmp, buf); 519 syncaddr.len = STRUCT_FGET(addr_tmp, len); 520 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 521 syncaddr.maxlen = syncaddr.len; 522 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 523 kmem_free(syncaddr.buf, syncaddr.len); 524 error = EFAULT; 525 goto errout; 526 } 527 528 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 529 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 530 531 /* get server's netname */ 532 if (copyinstr(STRUCT_FGETP(args, netname), netname, 533 sizeof (netname), &nlen)) { 534 error = EFAULT; 535 goto errout; 536 } 537 538 netname[nlen] = '\0'; 539 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 540 (void) strcpy(nargs->netname, netname); 541 } 542 543 /* 544 * Get the extention data which has the security data structure. 545 * This includes data for AUTH_SYS as well. 546 */ 547 if (flags & NFSMNT_NEWARGS) { 548 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 549 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 550 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 551 /* 552 * Indicating the application is using the new 553 * sec_data structure to pass in the security 554 * data. 555 */ 556 if (STRUCT_FGETP(args, 557 nfs_ext_u.nfs_extA.secdata) != NULL) { 558 error = sec_clnt_loadinfo( 559 (struct sec_data *)STRUCT_FGETP(args, 560 nfs_ext_u.nfs_extA.secdata), 561 &secdata, get_udatamodel()); 562 } 563 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 564 } 565 } 566 567 if (error) 568 goto errout; 569 570 /* 571 * Failover support: 572 * 573 * We may have a linked list of nfs_args structures, 574 * which means the user is looking for failover. If 575 * the mount is either not "read-only" or "soft", 576 * we want to bail out with EINVAL. 577 */ 578 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 579 nargs->nfs_ext_u.nfs_extB.next = 580 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 581 582 errout: 583 if (error) 584 nfs4_free_args(nargs); 585 586 return (error); 587 } 588 589 590 /* 591 * nfs mount vfsop 592 * Set up mount info record and attach it to vfs struct. 593 */ 594 static int 595 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 596 { 597 char *data = uap->dataptr; 598 int error; 599 vnode_t *rtvp; /* the server's root */ 600 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 601 struct knetconfig *rdma_knconf; /* rdma transport structure */ 602 rnode4_t *rp; 603 struct servinfo4 *svp; /* nfs server info */ 604 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 605 struct servinfo4 *svp_head; /* first nfs server info */ 606 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 607 struct sec_data *secdata; /* security data */ 608 struct nfs_args *args = NULL; 609 int flags, addr_type, removed; 610 zone_t *zone = nfs_zone(); 611 nfs4_error_t n4e; 612 zone_t *mntzone = NULL; 613 614 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 615 return (EPERM); 616 if (mvp->v_type != VDIR) 617 return (ENOTDIR); 618 /* 619 * get arguments 620 * 621 * nfs_args is now versioned and is extensible, so 622 * uap->datalen might be different from sizeof (args) 623 * in a compatible situation. 624 */ 625 more: 626 if (!(uap->flags & MS_SYSSPACE)) { 627 if (args == NULL) 628 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 629 else 630 nfs4_free_args(args); 631 error = nfs4_copyin(data, uap->datalen, args); 632 if (error) { 633 if (args) { 634 kmem_free(args, sizeof (*args)); 635 } 636 return (error); 637 } 638 } else { 639 args = (struct nfs_args *)data; 640 } 641 642 643 flags = args->flags; 644 645 /* 646 * If the request changes the locking type, disallow the remount, 647 * because it's questionable whether we can transfer the 648 * locking state correctly. 649 */ 650 if (uap->flags & MS_REMOUNT) { 651 if (!(uap->flags & MS_SYSSPACE)) { 652 nfs4_free_args(args); 653 kmem_free(args, sizeof (*args)); 654 } 655 if ((mi = VFTOMI4(vfsp)) != NULL) { 656 uint_t new_mi_llock; 657 uint_t old_mi_llock; 658 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 659 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 660 if (old_mi_llock != new_mi_llock) 661 return (EBUSY); 662 } 663 return (0); 664 } 665 666 mutex_enter(&mvp->v_lock); 667 if (!(uap->flags & MS_OVERLAY) && 668 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 669 mutex_exit(&mvp->v_lock); 670 if (!(uap->flags & MS_SYSSPACE)) { 671 nfs4_free_args(args); 672 kmem_free(args, sizeof (*args)); 673 } 674 return (EBUSY); 675 } 676 mutex_exit(&mvp->v_lock); 677 678 /* make sure things are zeroed for errout: */ 679 rtvp = NULL; 680 mi = NULL; 681 secdata = NULL; 682 683 /* 684 * A valid knetconfig structure is required. 685 */ 686 687 if (!(flags & NFSMNT_KNCONF) || 688 args->knconf == NULL || args->knconf->knc_protofmly == NULL || 689 args->knconf->knc_proto == NULL || 690 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) { 691 if (!(uap->flags & MS_SYSSPACE)) { 692 nfs4_free_args(args); 693 kmem_free(args, sizeof (*args)); 694 } 695 return (EINVAL); 696 } 697 698 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 699 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 700 if (!(uap->flags & MS_SYSSPACE)) { 701 nfs4_free_args(args); 702 kmem_free(args, sizeof (*args)); 703 } 704 return (EINVAL); 705 } 706 707 708 /* 709 * Allocate a servinfo4 struct. 710 */ 711 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 712 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 713 if (svp_tail) { 714 svp_2ndlast = svp_tail; 715 svp_tail->sv_next = svp; 716 } else { 717 svp_head = svp; 718 svp_2ndlast = svp; 719 } 720 721 svp_tail = svp; 722 svp->sv_knconf = args->knconf; 723 args->knconf = NULL; 724 725 726 /* 727 * Get server address 728 */ 729 730 if (args->addr == NULL || args->addr->buf == NULL) { 731 error = EINVAL; 732 goto errout; 733 } 734 735 svp->sv_addr.maxlen = args->addr->maxlen; 736 svp->sv_addr.len = args->addr->len; 737 svp->sv_addr.buf = args->addr->buf; 738 args->addr->buf = NULL; 739 740 741 /* 742 * Get the root fhandle 743 */ 744 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) { 745 error = EINVAL; 746 goto errout; 747 } 748 749 svp->sv_path = args->fh; 750 svp->sv_pathlen = strlen(args->fh) + 1; 751 args->fh = NULL; 752 753 /* 754 * Get server's hostname 755 */ 756 if (flags & NFSMNT_HOSTNAME) { 757 if (args->hostname == NULL || (strlen(args->hostname) > 758 MAXNETNAMELEN)) { 759 error = EINVAL; 760 goto errout; 761 } 762 svp->sv_hostnamelen = strlen(args->hostname) + 1; 763 svp->sv_hostname = args->hostname; 764 args->hostname = NULL; 765 } else { 766 char *p = "unknown-host"; 767 svp->sv_hostnamelen = strlen(p) + 1; 768 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 769 (void) strcpy(svp->sv_hostname, p); 770 } 771 772 /* 773 * RDMA MOUNT SUPPORT FOR NFS v4. 774 * Establish, is it possible to use RDMA, if so overload the 775 * knconf with rdma specific knconf and free the orignal knconf. 776 */ 777 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 778 /* 779 * Determine the addr type for RDMA, IPv4 or v6. 780 */ 781 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 782 addr_type = AF_INET; 783 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 784 addr_type = AF_INET6; 785 786 if (rdma_reachable(addr_type, &svp->sv_addr, 787 &rdma_knconf) == 0) { 788 /* 789 * If successful, hijack the orignal knconf and 790 * replace with the new one, depending on the flags. 791 */ 792 svp->sv_origknconf = svp->sv_knconf; 793 svp->sv_knconf = rdma_knconf; 794 } else { 795 if (flags & NFSMNT_TRYRDMA) { 796 #ifdef DEBUG 797 if (rdma_debug) 798 zcmn_err(getzoneid(), CE_WARN, 799 "no RDMA onboard, revert\n"); 800 #endif 801 } 802 803 if (flags & NFSMNT_DORDMA) { 804 /* 805 * If proto=rdma is specified and no RDMA 806 * path to this server is avialable then 807 * ditch this server. 808 * This is not included in the mountable 809 * server list or the replica list. 810 * Check if more servers are specified; 811 * Failover case, otherwise bail out of mount. 812 */ 813 if (args->nfs_args_ext == 814 NFS_ARGS_EXTB && 815 args->nfs_ext_u.nfs_extB.next 816 != NULL) { 817 data = (char *) 818 args->nfs_ext_u.nfs_extB.next; 819 if (uap->flags & MS_RDONLY && 820 !(flags & NFSMNT_SOFT)) { 821 if (svp_head->sv_next == NULL) { 822 svp_tail = NULL; 823 svp_2ndlast = NULL; 824 sv4_free(svp_head); 825 goto more; 826 } else { 827 svp_tail = svp_2ndlast; 828 svp_2ndlast->sv_next = 829 NULL; 830 sv4_free(svp); 831 goto more; 832 } 833 } 834 } else { 835 /* 836 * This is the last server specified 837 * in the nfs_args list passed down 838 * and its not rdma capable. 839 */ 840 if (svp_head->sv_next == NULL) { 841 /* 842 * Is this the only one 843 */ 844 error = EINVAL; 845 #ifdef DEBUG 846 if (rdma_debug) 847 zcmn_err(getzoneid(), 848 CE_WARN, 849 "No RDMA srv"); 850 #endif 851 goto errout; 852 } else { 853 /* 854 * There is list, since some 855 * servers specified before 856 * this passed all requirements 857 */ 858 svp_tail = svp_2ndlast; 859 svp_2ndlast->sv_next = NULL; 860 sv4_free(svp); 861 goto proceed; 862 } 863 } 864 } 865 } 866 } 867 868 /* 869 * If there are syncaddr and netname data, load them in. This is 870 * to support data needed for NFSV4 when AUTH_DH is the negotiated 871 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 872 */ 873 if (args->flags & NFSMNT_SECURE) { 874 svp->sv_dhsec = create_authdh_data(args->netname, 875 strlen(args->netname), 876 args->syncaddr, svp->sv_knconf); 877 } 878 879 /* 880 * Get the extention data which has the security data structure. 881 * This includes data for AUTH_SYS as well. 882 */ 883 if (flags & NFSMNT_NEWARGS) { 884 switch (args->nfs_args_ext) { 885 case NFS_ARGS_EXTA: 886 case NFS_ARGS_EXTB: 887 /* 888 * Indicating the application is using the new 889 * sec_data structure to pass in the security 890 * data. 891 */ 892 secdata = args->nfs_ext_u.nfs_extA.secdata; 893 if (secdata == NULL) { 894 error = EINVAL; 895 } else if (uap->flags & MS_SYSSPACE) { 896 /* 897 * Need to validate the flavor here if 898 * sysspace, userspace was already 899 * validate from the nfs_copyin function. 900 */ 901 switch (secdata->rpcflavor) { 902 case AUTH_NONE: 903 case AUTH_UNIX: 904 case AUTH_LOOPBACK: 905 case AUTH_DES: 906 case RPCSEC_GSS: 907 break; 908 default: 909 error = EINVAL; 910 goto errout; 911 } 912 } 913 args->nfs_ext_u.nfs_extA.secdata = NULL; 914 break; 915 916 default: 917 error = EINVAL; 918 break; 919 } 920 921 } else if (flags & NFSMNT_SECURE) { 922 /* 923 * NFSMNT_SECURE is deprecated but we keep it 924 * to support the rouge user generated application 925 * that may use this undocumented interface to do 926 * AUTH_DH security. 927 */ 928 secdata = create_authdh_data(args->netname, 929 strlen(args->netname), args->syncaddr, svp->sv_knconf); 930 931 } else { 932 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 933 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 934 secdata->data = NULL; 935 } 936 937 svp->sv_secdata = secdata; 938 939 /* 940 * User does not explictly specify a flavor, and a user 941 * defined default flavor is passed down. 942 */ 943 if (flags & NFSMNT_SECDEFAULT) { 944 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 945 svp->sv_flags |= SV4_TRYSECDEFAULT; 946 nfs_rw_exit(&svp->sv_lock); 947 } 948 949 /* 950 * Failover support: 951 * 952 * We may have a linked list of nfs_args structures, 953 * which means the user is looking for failover. If 954 * the mount is either not "read-only" or "soft", 955 * we want to bail out with EINVAL. 956 */ 957 if (args->nfs_args_ext == NFS_ARGS_EXTB && 958 args->nfs_ext_u.nfs_extB.next != NULL) { 959 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 960 data = (char *)args->nfs_ext_u.nfs_extB.next; 961 goto more; 962 } 963 error = EINVAL; 964 goto errout; 965 } 966 967 /* 968 * Determine the zone we're being mounted into. 969 */ 970 zone_hold(mntzone = zone); /* start with this assumption */ 971 if (getzoneid() == GLOBAL_ZONEID) { 972 zone_rele(mntzone); 973 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 974 ASSERT(mntzone != NULL); 975 if (mntzone != zone) { 976 error = EBUSY; 977 goto errout; 978 } 979 } 980 981 if (is_system_labeled()) { 982 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 983 svp->sv_knconf, cr); 984 985 if (error > 0) 986 goto errout; 987 988 if (error == -1) { 989 /* change mount to read-only to prevent write-down */ 990 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 991 } 992 } 993 994 /* 995 * Stop the mount from going any further if the zone is going away. 996 */ 997 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 998 error = EBUSY; 999 goto errout; 1000 } 1001 1002 /* 1003 * Get root vnode. 1004 */ 1005 proceed: 1006 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 1007 1008 if (error) { 1009 /* if nfs4rootvp failed, it will free svp_head */ 1010 svp_head = NULL; 1011 goto errout; 1012 } 1013 1014 mi = VTOMI4(rtvp); 1015 1016 /* 1017 * Send client id to the server, if necessary 1018 */ 1019 nfs4_error_zinit(&n4e); 1020 nfs4setclientid(mi, cr, FALSE, &n4e); 1021 error = n4e.error; 1022 1023 if (error) 1024 goto errout; 1025 1026 /* 1027 * Set option fields in the mount info record 1028 */ 1029 1030 if (svp_head->sv_next) { 1031 mutex_enter(&mi->mi_lock); 1032 mi->mi_flags |= MI4_LLOCK; 1033 mutex_exit(&mi->mi_lock); 1034 } 1035 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args); 1036 1037 errout: 1038 if (error) { 1039 if (rtvp != NULL) { 1040 rp = VTOR4(rtvp); 1041 if (rp->r_flags & R4HASHED) 1042 rp4_rmhash(rp); 1043 } 1044 if (mi != NULL) { 1045 nfs4_async_stop(vfsp); 1046 nfs4_async_manager_stop(vfsp); 1047 nfs4_remove_mi_from_server(mi, NULL); 1048 if (rtvp != NULL) 1049 VN_RELE(rtvp); 1050 if (mntzone != NULL) 1051 zone_rele(mntzone); 1052 /* need to remove it from the zone */ 1053 removed = nfs4_mi_zonelist_remove(mi); 1054 if (removed) 1055 zone_rele(mi->mi_zone); 1056 MI4_RELE(mi); 1057 if (!(uap->flags & MS_SYSSPACE) && args) { 1058 nfs4_free_args(args); 1059 kmem_free(args, sizeof (*args)); 1060 } 1061 return (error); 1062 } 1063 if (svp_head) 1064 sv4_free(svp_head); 1065 } 1066 1067 if (!(uap->flags & MS_SYSSPACE) && args) { 1068 nfs4_free_args(args); 1069 kmem_free(args, sizeof (*args)); 1070 } 1071 if (rtvp != NULL) 1072 VN_RELE(rtvp); 1073 1074 if (mntzone != NULL) 1075 zone_rele(mntzone); 1076 1077 return (error); 1078 } 1079 1080 #ifdef DEBUG 1081 #define VERS_MSG "NFS4 server " 1082 #else 1083 #define VERS_MSG "NFS server " 1084 #endif 1085 1086 #define READ_MSG \ 1087 VERS_MSG "%s returned 0 for read transfer size" 1088 #define WRITE_MSG \ 1089 VERS_MSG "%s returned 0 for write transfer size" 1090 #define SIZE_MSG \ 1091 VERS_MSG "%s returned 0 for maximum file size" 1092 1093 /* 1094 * Get the symbolic link text from the server for a given filehandle 1095 * of that symlink. 1096 * 1097 * (get symlink text) PUTFH READLINK 1098 */ 1099 static int 1100 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 1101 int flags) 1102 1103 { 1104 COMPOUND4args_clnt args; 1105 COMPOUND4res_clnt res; 1106 int doqueue; 1107 nfs_argop4 argop[2]; 1108 nfs_resop4 *resop; 1109 READLINK4res *lr_res; 1110 uint_t len; 1111 bool_t needrecov = FALSE; 1112 nfs4_recov_state_t recov_state; 1113 nfs4_sharedfh_t *sfh; 1114 nfs4_error_t e; 1115 int num_retry = nfs4_max_mount_retry; 1116 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1117 1118 sfh = sfh4_get(fh, mi); 1119 recov_state.rs_flags = 0; 1120 recov_state.rs_num_retry_despite_err = 0; 1121 1122 recov_retry: 1123 nfs4_error_zinit(&e); 1124 1125 args.array_len = 2; 1126 args.array = argop; 1127 args.ctag = TAG_GET_SYMLINK; 1128 1129 if (! recovery) { 1130 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 1131 if (e.error) { 1132 sfh4_rele(&sfh); 1133 return (e.error); 1134 } 1135 } 1136 1137 /* 0. putfh symlink fh */ 1138 argop[0].argop = OP_CPUTFH; 1139 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1140 1141 /* 1. readlink */ 1142 argop[1].argop = OP_READLINK; 1143 1144 doqueue = 1; 1145 1146 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1147 1148 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 1149 1150 if (needrecov && !recovery && num_retry-- > 0) { 1151 1152 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1153 "getlinktext_otw: initiating recovery\n")); 1154 1155 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 1156 OP_READLINK, NULL) == FALSE) { 1157 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1158 if (!e.error) 1159 (void) xdr_free(xdr_COMPOUND4res_clnt, 1160 (caddr_t)&res); 1161 goto recov_retry; 1162 } 1163 } 1164 1165 /* 1166 * If non-NFS4 pcol error and/or we weren't able to recover. 1167 */ 1168 if (e.error != 0) { 1169 if (! recovery) 1170 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1171 sfh4_rele(&sfh); 1172 return (e.error); 1173 } 1174 1175 if (res.status) { 1176 e.error = geterrno4(res.status); 1177 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1178 if (! recovery) 1179 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1180 sfh4_rele(&sfh); 1181 return (e.error); 1182 } 1183 1184 /* res.status == NFS4_OK */ 1185 ASSERT(res.status == NFS4_OK); 1186 1187 resop = &res.array[1]; /* readlink res */ 1188 lr_res = &resop->nfs_resop4_u.opreadlink; 1189 1190 /* treat symlink name as data */ 1191 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 1192 1193 if (! recovery) 1194 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1195 sfh4_rele(&sfh); 1196 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1197 return (0); 1198 } 1199 1200 /* 1201 * Skip over consecutive slashes and "/./" in a pathname. 1202 */ 1203 void 1204 pathname_skipslashdot(struct pathname *pnp) 1205 { 1206 char *c1, *c2; 1207 1208 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 1209 1210 c1 = pnp->pn_path + 1; 1211 c2 = pnp->pn_path + 2; 1212 1213 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 1214 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 1215 pnp->pn_pathlen = pnp->pn_pathlen - 2; 1216 } else { 1217 pnp->pn_path++; 1218 pnp->pn_pathlen--; 1219 } 1220 } 1221 } 1222 1223 /* 1224 * Resolve a symbolic link path. The symlink is in the nth component of 1225 * svp->sv_path and has an nfs4 file handle "fh". 1226 * Upon return, the sv_path will point to the new path that has the nth 1227 * component resolved to its symlink text. 1228 */ 1229 int 1230 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 1231 cred_t *cr, int flags) 1232 { 1233 char *oldpath; 1234 char *symlink, *newpath; 1235 struct pathname oldpn, newpn; 1236 char component[MAXNAMELEN]; 1237 int i, addlen, error = 0; 1238 int oldpathlen; 1239 1240 /* Get the symbolic link text over the wire. */ 1241 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 1242 1243 if (error || symlink == NULL || strlen(symlink) == 0) 1244 return (error); 1245 1246 /* 1247 * Compose the new pathname. 1248 * Note: 1249 * - only the nth component is resolved for the pathname. 1250 * - pathname.pn_pathlen does not count the ending null byte. 1251 */ 1252 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1253 oldpath = svp->sv_path; 1254 oldpathlen = svp->sv_pathlen; 1255 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1256 nfs_rw_exit(&svp->sv_lock); 1257 kmem_free(symlink, strlen(symlink) + 1); 1258 return (error); 1259 } 1260 nfs_rw_exit(&svp->sv_lock); 1261 pn_alloc(&newpn); 1262 1263 /* 1264 * Skip over previous components from the oldpath so that the 1265 * oldpn.pn_path will point to the symlink component. Skip 1266 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1267 * pn_getcompnent can get the component. 1268 */ 1269 for (i = 1; i < nth; i++) { 1270 pathname_skipslashdot(&oldpn); 1271 error = pn_getcomponent(&oldpn, component); 1272 if (error) 1273 goto out; 1274 } 1275 1276 /* 1277 * Copy the old path upto the component right before the symlink 1278 * if the symlink is not an absolute path. 1279 */ 1280 if (symlink[0] != '/') { 1281 addlen = oldpn.pn_path - oldpn.pn_buf; 1282 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1283 newpn.pn_pathlen += addlen; 1284 newpn.pn_path += addlen; 1285 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1286 newpn.pn_pathlen++; 1287 newpn.pn_path++; 1288 } 1289 1290 /* copy the resolved symbolic link text */ 1291 addlen = strlen(symlink); 1292 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1293 error = ENAMETOOLONG; 1294 goto out; 1295 } 1296 bcopy(symlink, newpn.pn_path, addlen); 1297 newpn.pn_pathlen += addlen; 1298 newpn.pn_path += addlen; 1299 1300 /* 1301 * Check if there is any remaining path after the symlink component. 1302 * First, skip the symlink component. 1303 */ 1304 pathname_skipslashdot(&oldpn); 1305 if (error = pn_getcomponent(&oldpn, component)) 1306 goto out; 1307 1308 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1309 1310 /* 1311 * Copy the remaining path to the new pathname if there is any. 1312 */ 1313 if (addlen > 0) { 1314 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1315 error = ENAMETOOLONG; 1316 goto out; 1317 } 1318 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1319 newpn.pn_pathlen += addlen; 1320 } 1321 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1322 1323 /* get the newpath and store it in the servinfo4_t */ 1324 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1325 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1326 newpath[newpn.pn_pathlen] = '\0'; 1327 1328 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1329 svp->sv_path = newpath; 1330 svp->sv_pathlen = strlen(newpath) + 1; 1331 nfs_rw_exit(&svp->sv_lock); 1332 1333 kmem_free(oldpath, oldpathlen); 1334 out: 1335 kmem_free(symlink, strlen(symlink) + 1); 1336 pn_free(&newpn); 1337 pn_free(&oldpn); 1338 1339 return (error); 1340 } 1341 1342 /* 1343 * Get the root filehandle for the given filesystem and server, and update 1344 * svp. 1345 * 1346 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1347 * to coordinate with recovery. Otherwise, the caller is assumed to be 1348 * the recovery thread or have already done a start_fop. 1349 * 1350 * Errors are returned by the nfs4_error_t parameter. 1351 */ 1352 1353 static void 1354 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1355 int flags, cred_t *cr, nfs4_error_t *ep) 1356 { 1357 COMPOUND4args_clnt args; 1358 COMPOUND4res_clnt res; 1359 int doqueue = 1; 1360 nfs_argop4 *argop; 1361 nfs_resop4 *resop; 1362 nfs4_ga_res_t *garp; 1363 int num_argops; 1364 lookup4_param_t lookuparg; 1365 nfs_fh4 *tmpfhp; 1366 nfs_fh4 *resfhp; 1367 bool_t needrecov = FALSE; 1368 nfs4_recov_state_t recov_state; 1369 int llndx; 1370 int nthcomp; 1371 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1372 1373 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1374 ASSERT(svp->sv_path != NULL); 1375 if (svp->sv_path[0] == '\0') { 1376 nfs_rw_exit(&svp->sv_lock); 1377 nfs4_error_init(ep, EINVAL); 1378 return; 1379 } 1380 nfs_rw_exit(&svp->sv_lock); 1381 1382 recov_state.rs_flags = 0; 1383 recov_state.rs_num_retry_despite_err = 0; 1384 recov_retry: 1385 nfs4_error_zinit(ep); 1386 1387 if (!recovery) { 1388 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1389 &recov_state, NULL); 1390 1391 /* 1392 * If recovery has been started and this request as 1393 * initiated by a mount, then we must wait for recovery 1394 * to finish before proceeding, otherwise, the error 1395 * cleanup would remove data structures needed by the 1396 * recovery thread. 1397 */ 1398 if (ep->error) { 1399 mutex_enter(&mi->mi_lock); 1400 if (mi->mi_flags & MI4_MOUNTING) { 1401 mi->mi_flags |= MI4_RECOV_FAIL; 1402 mi->mi_error = EIO; 1403 1404 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1405 "nfs4getfh_otw: waiting 4 recovery\n")); 1406 1407 while (mi->mi_flags & MI4_RECOV_ACTIV) 1408 cv_wait(&mi->mi_failover_cv, 1409 &mi->mi_lock); 1410 } 1411 mutex_exit(&mi->mi_lock); 1412 return; 1413 } 1414 1415 /* 1416 * If the client does not specify a specific flavor to use 1417 * and has not gotten a secinfo list from the server yet, 1418 * retrieve the secinfo list from the server and use a 1419 * flavor from the list to mount. 1420 * 1421 * If fail to get the secinfo list from the server, then 1422 * try the default flavor. 1423 */ 1424 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1425 svp->sv_secinfo == NULL) { 1426 (void) nfs4_secinfo_path(mi, cr, FALSE); 1427 } 1428 } 1429 1430 if (recovery) 1431 args.ctag = TAG_REMAP_MOUNT; 1432 else 1433 args.ctag = TAG_MOUNT; 1434 1435 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1436 lookuparg.argsp = &args; 1437 lookuparg.resp = &res; 1438 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1439 lookuparg.trailer_len = 0; 1440 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1441 lookuparg.mi = mi; 1442 1443 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1444 ASSERT(svp->sv_path != NULL); 1445 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1446 nfs_rw_exit(&svp->sv_lock); 1447 1448 argop = args.array; 1449 num_argops = args.array_len; 1450 1451 /* choose public or root filehandle */ 1452 if (flags & NFS4_GETFH_PUBLIC) 1453 argop[0].argop = OP_PUTPUBFH; 1454 else 1455 argop[0].argop = OP_PUTROOTFH; 1456 1457 /* get fh */ 1458 argop[1].argop = OP_GETFH; 1459 1460 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1461 "nfs4getfh_otw: %s call, mi 0x%p", 1462 needrecov ? "recov" : "first", (void *)mi)); 1463 1464 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1465 1466 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1467 1468 if (needrecov) { 1469 bool_t abort; 1470 1471 if (recovery) { 1472 nfs4args_lookup_free(argop, num_argops); 1473 kmem_free(argop, 1474 lookuparg.arglen * sizeof (nfs_argop4)); 1475 if (!ep->error) 1476 (void) xdr_free(xdr_COMPOUND4res_clnt, 1477 (caddr_t)&res); 1478 return; 1479 } 1480 1481 NFS4_DEBUG(nfs4_client_recov_debug, 1482 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1483 1484 abort = nfs4_start_recovery(ep, mi, NULL, 1485 NULL, NULL, NULL, OP_GETFH, NULL); 1486 if (!ep->error) { 1487 ep->error = geterrno4(res.status); 1488 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1489 } 1490 nfs4args_lookup_free(argop, num_argops); 1491 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1492 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1493 /* have another go? */ 1494 if (abort == FALSE) 1495 goto recov_retry; 1496 return; 1497 } 1498 1499 /* 1500 * No recovery, but check if error is set. 1501 */ 1502 if (ep->error) { 1503 nfs4args_lookup_free(argop, num_argops); 1504 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1505 if (!recovery) 1506 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1507 needrecov); 1508 return; 1509 } 1510 1511 is_link_err: 1512 1513 /* for non-recovery errors */ 1514 if (res.status && res.status != NFS4ERR_SYMLINK) { 1515 if (!recovery) { 1516 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1517 needrecov); 1518 } 1519 nfs4args_lookup_free(argop, num_argops); 1520 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1521 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1522 return; 1523 } 1524 1525 /* 1526 * If any intermediate component in the path is a symbolic link, 1527 * resolve the symlink, then try mount again using the new path. 1528 */ 1529 if (res.status == NFS4ERR_SYMLINK) { 1530 int where; 1531 1532 /* 1533 * This must be from OP_LOOKUP failure. The (cfh) for this 1534 * OP_LOOKUP is a symlink node. Found out where the 1535 * OP_GETFH is for the (cfh) that is a symlink node. 1536 * 1537 * Example: 1538 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1539 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1540 * 1541 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1542 * In this case, where = 7, nthcomp = 2. 1543 */ 1544 where = res.array_len - 2; 1545 ASSERT(where > 0); 1546 1547 resop = &res.array[where - 1]; 1548 ASSERT(resop->resop == OP_GETFH); 1549 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1550 nthcomp = res.array_len/3 - 1; 1551 1552 /* 1553 * Need to call nfs4_end_op before resolve_sympath to avoid 1554 * potential nfs4_start_op deadlock. 1555 */ 1556 if (!recovery) 1557 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1558 needrecov); 1559 1560 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1561 flags); 1562 1563 nfs4args_lookup_free(argop, num_argops); 1564 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1565 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1566 1567 if (ep->error) 1568 return; 1569 1570 goto recov_retry; 1571 } 1572 1573 /* getfh */ 1574 resop = &res.array[res.array_len - 2]; 1575 ASSERT(resop->resop == OP_GETFH); 1576 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1577 1578 /* getattr fsinfo res */ 1579 resop++; 1580 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1581 1582 *vtp = garp->n4g_va.va_type; 1583 1584 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1585 1586 mutex_enter(&mi->mi_lock); 1587 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1588 mi->mi_flags |= MI4_LINK; 1589 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1590 mi->mi_flags |= MI4_SYMLINK; 1591 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1592 mi->mi_flags |= MI4_ACL; 1593 mutex_exit(&mi->mi_lock); 1594 1595 if (garp->n4g_ext_res->n4g_maxread == 0) 1596 mi->mi_tsize = 1597 MIN(MAXBSIZE, mi->mi_tsize); 1598 else 1599 mi->mi_tsize = 1600 MIN(garp->n4g_ext_res->n4g_maxread, 1601 mi->mi_tsize); 1602 1603 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1604 mi->mi_stsize = 1605 MIN(MAXBSIZE, mi->mi_stsize); 1606 else 1607 mi->mi_stsize = 1608 MIN(garp->n4g_ext_res->n4g_maxwrite, 1609 mi->mi_stsize); 1610 1611 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1612 mi->mi_maxfilesize = 1613 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1614 mi->mi_maxfilesize); 1615 1616 /* 1617 * If the final component is a a symbolic link, resolve the symlink, 1618 * then try mount again using the new path. 1619 * 1620 * Assume no symbolic link for root filesysm "/". 1621 */ 1622 if (*vtp == VLNK) { 1623 /* 1624 * nthcomp is the total result length minus 1625 * the 1st 2 OPs (PUTROOTFH, GETFH), 1626 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1627 * 1628 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1629 * LOOKUP 2nd-comp GETFH GETATTR 1630 * 1631 * (8 - 2)/3 = 2 1632 */ 1633 nthcomp = (res.array_len - 2)/3; 1634 1635 /* 1636 * Need to call nfs4_end_op before resolve_sympath to avoid 1637 * potential nfs4_start_op deadlock. See RFE 4777612. 1638 */ 1639 if (!recovery) 1640 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1641 needrecov); 1642 1643 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1644 flags); 1645 1646 nfs4args_lookup_free(argop, num_argops); 1647 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1648 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1649 1650 if (ep->error) 1651 return; 1652 1653 goto recov_retry; 1654 } 1655 1656 /* 1657 * We need to figure out where in the compound the getfh 1658 * for the parent directory is. If the object to be mounted is 1659 * the root, then there is no lookup at all: 1660 * PUTROOTFH, GETFH. 1661 * If the object to be mounted is in the root, then the compound is: 1662 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1663 * In either of these cases, the index of the GETFH is 1. 1664 * If it is not at the root, then it's something like: 1665 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1666 * LOOKUP, GETFH, GETATTR 1667 * In this case, the index is llndx (last lookup index) - 2. 1668 */ 1669 if (llndx == -1 || llndx == 2) 1670 resop = &res.array[1]; 1671 else { 1672 ASSERT(llndx > 2); 1673 resop = &res.array[llndx-2]; 1674 } 1675 1676 ASSERT(resop->resop == OP_GETFH); 1677 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1678 1679 /* save the filehandles for the replica */ 1680 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1681 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1682 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1683 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1684 tmpfhp->nfs_fh4_len); 1685 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1686 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1687 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1688 1689 /* initialize fsid and supp_attrs for server fs */ 1690 svp->sv_fsid = garp->n4g_fsid; 1691 svp->sv_supp_attrs = 1692 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1693 1694 nfs_rw_exit(&svp->sv_lock); 1695 1696 nfs4args_lookup_free(argop, num_argops); 1697 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1698 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1699 if (!recovery) 1700 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1701 } 1702 1703 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1704 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1705 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1706 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1707 1708 /* 1709 * Remap the root filehandle for the given filesystem. 1710 * 1711 * results returned via the nfs4_error_t parameter. 1712 */ 1713 void 1714 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1715 { 1716 struct servinfo4 *svp; 1717 vtype_t vtype; 1718 nfs_fh4 rootfh; 1719 int getfh_flags; 1720 char *orig_sv_path; 1721 int orig_sv_pathlen, num_retry; 1722 1723 mutex_enter(&mi->mi_lock); 1724 1725 remap_retry: 1726 svp = mi->mi_curr_serv; 1727 getfh_flags = 1728 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1729 getfh_flags |= 1730 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1731 mutex_exit(&mi->mi_lock); 1732 1733 /* 1734 * Just in case server path being mounted contains 1735 * symlinks and fails w/STALE, save the initial sv_path 1736 * so we can redrive the initial mount compound with the 1737 * initial sv_path -- not a symlink-expanded version. 1738 * 1739 * This could only happen if a symlink was expanded 1740 * and the expanded mount compound failed stale. Because 1741 * it could be the case that the symlink was removed at 1742 * the server (and replaced with another symlink/dir, 1743 * we need to use the initial sv_path when attempting 1744 * to re-lookup everything and recover. 1745 */ 1746 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1747 orig_sv_pathlen = svp->sv_pathlen; 1748 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1749 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1750 nfs_rw_exit(&svp->sv_lock); 1751 1752 num_retry = nfs4_max_mount_retry; 1753 1754 do { 1755 /* 1756 * Get the root fh from the server. Retry nfs4_max_mount_retry 1757 * (2) times if it fails with STALE since the recovery 1758 * infrastructure doesn't do STALE recovery for components 1759 * of the server path to the object being mounted. 1760 */ 1761 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1762 1763 if (ep->error == 0 && ep->stat == NFS4_OK) 1764 break; 1765 1766 /* 1767 * For some reason, the mount compound failed. Before 1768 * retrying, we need to restore the original sv_path 1769 * because it might have contained symlinks that were 1770 * expanded by nfsgetfh_otw before the failure occurred. 1771 * replace current sv_path with orig sv_path -- just in case 1772 * it changed due to embedded symlinks. 1773 */ 1774 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1775 if (orig_sv_pathlen != svp->sv_pathlen) { 1776 kmem_free(svp->sv_path, svp->sv_pathlen); 1777 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1778 svp->sv_pathlen = orig_sv_pathlen; 1779 } 1780 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1781 nfs_rw_exit(&svp->sv_lock); 1782 1783 } while (num_retry-- > 0); 1784 1785 kmem_free(orig_sv_path, orig_sv_pathlen); 1786 1787 if (ep->error != 0 || ep->stat != 0) { 1788 return; 1789 } 1790 1791 if (vtype != VNON && vtype != mi->mi_type) { 1792 /* shouldn't happen */ 1793 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1794 "nfs4_remap_root: server root vnode type (%d) doesn't " 1795 "match mount info (%d)", vtype, mi->mi_type); 1796 } 1797 1798 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1799 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1800 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1801 nfs_rw_exit(&svp->sv_lock); 1802 sfh4_update(mi->mi_rootfh, &rootfh); 1803 1804 /* 1805 * It's possible that recovery took place on the filesystem 1806 * and the server has been updated between the time we did 1807 * the nfs4getfh_otw and now. Re-drive the otw operation 1808 * to make sure we have a good fh. 1809 */ 1810 mutex_enter(&mi->mi_lock); 1811 if (mi->mi_curr_serv != svp) 1812 goto remap_retry; 1813 1814 mutex_exit(&mi->mi_lock); 1815 } 1816 1817 static int 1818 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1819 int flags, cred_t *cr, zone_t *zone) 1820 { 1821 vnode_t *rtvp = NULL; 1822 mntinfo4_t *mi; 1823 dev_t nfs_dev; 1824 int error = 0; 1825 rnode4_t *rp; 1826 int i; 1827 struct vattr va; 1828 vtype_t vtype = VNON; 1829 vtype_t tmp_vtype = VNON; 1830 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1831 nfs4_oo_hash_bucket_t *bucketp; 1832 nfs_fh4 fh; 1833 char *droptext = ""; 1834 struct nfs_stats *nfsstatsp; 1835 nfs4_fname_t *mfname; 1836 nfs4_error_t e; 1837 char *orig_sv_path; 1838 int orig_sv_pathlen, num_retry, removed; 1839 cred_t *lcr = NULL, *tcr = cr; 1840 1841 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1842 ASSERT(nfsstatsp != NULL); 1843 1844 ASSERT(nfs_zone() == zone); 1845 ASSERT(crgetref(cr)); 1846 1847 /* 1848 * Create a mount record and link it to the vfs struct. 1849 */ 1850 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1851 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1852 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1853 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1854 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1855 1856 if (!(flags & NFSMNT_SOFT)) 1857 mi->mi_flags |= MI4_HARD; 1858 if ((flags & NFSMNT_NOPRINT)) 1859 mi->mi_flags |= MI4_NOPRINT; 1860 if (flags & NFSMNT_INT) 1861 mi->mi_flags |= MI4_INT; 1862 if (flags & NFSMNT_PUBLIC) 1863 mi->mi_flags |= MI4_PUBLIC; 1864 mi->mi_retrans = NFS_RETRIES; 1865 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1866 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1867 mi->mi_timeo = nfs4_cots_timeo; 1868 else 1869 mi->mi_timeo = NFS_TIMEO; 1870 mi->mi_prog = NFS_PROGRAM; 1871 mi->mi_vers = NFS_V4; 1872 mi->mi_rfsnames = rfsnames_v4; 1873 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1874 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1875 mi->mi_servers = svp; 1876 mi->mi_curr_serv = svp; 1877 mi->mi_acregmin = SEC2HR(ACREGMIN); 1878 mi->mi_acregmax = SEC2HR(ACREGMAX); 1879 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1880 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1881 mi->mi_fh_expire_type = FH4_PERSISTENT; 1882 mi->mi_clientid_next = NULL; 1883 mi->mi_clientid_prev = NULL; 1884 mi->mi_grace_wait = 0; 1885 mi->mi_error = 0; 1886 mi->mi_srvsettime = 0; 1887 1888 mi->mi_count = 1; 1889 1890 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1891 mi->mi_stsize = mi->mi_tsize; 1892 1893 if (flags & NFSMNT_DIRECTIO) 1894 mi->mi_flags |= MI4_DIRECTIO; 1895 1896 mi->mi_flags |= MI4_MOUNTING; 1897 1898 /* 1899 * Make a vfs struct for nfs. We do this here instead of below 1900 * because rtvp needs a vfs before we can do a getattr on it. 1901 * 1902 * Assign a unique device id to the mount 1903 */ 1904 mutex_enter(&nfs_minor_lock); 1905 do { 1906 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1907 nfs_dev = makedevice(nfs_major, nfs_minor); 1908 } while (vfs_devismounted(nfs_dev)); 1909 mutex_exit(&nfs_minor_lock); 1910 1911 vfsp->vfs_dev = nfs_dev; 1912 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1913 vfsp->vfs_data = (caddr_t)mi; 1914 vfsp->vfs_fstype = nfsfstyp; 1915 vfsp->vfs_bsize = nfs4_bsize; 1916 1917 /* 1918 * Initialize fields used to support async putpage operations. 1919 */ 1920 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1921 mi->mi_async_clusters[i] = nfs4_async_clusters; 1922 mi->mi_async_init_clusters = nfs4_async_clusters; 1923 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1924 mi->mi_max_threads = nfs4_max_threads; 1925 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1926 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1927 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1928 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1929 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1930 1931 mi->mi_vfsp = vfsp; 1932 zone_hold(mi->mi_zone = zone); 1933 nfs4_mi_zonelist_add(mi); 1934 1935 /* 1936 * Initialize the <open owner/cred> hash table. 1937 */ 1938 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1939 bucketp = &(mi->mi_oo_list[i]); 1940 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1941 list_create(&bucketp->b_oo_hash_list, 1942 sizeof (nfs4_open_owner_t), 1943 offsetof(nfs4_open_owner_t, oo_hash_node)); 1944 } 1945 1946 /* 1947 * Initialize the freed open owner list. 1948 */ 1949 mi->mi_foo_num = 0; 1950 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1951 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1952 offsetof(nfs4_open_owner_t, oo_foo_node)); 1953 1954 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1955 offsetof(nfs4_lost_rqst_t, lr_node)); 1956 1957 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1958 offsetof(nfs4_bseqid_entry_t, bs_node)); 1959 1960 /* 1961 * Initialize the msg buffer. 1962 */ 1963 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1964 offsetof(nfs4_debug_msg_t, msg_node)); 1965 mi->mi_msg_count = 0; 1966 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1967 1968 /* 1969 * Initialize kstats 1970 */ 1971 nfs4_mnt_kstat_init(vfsp); 1972 1973 /* 1974 * Initialize the shared filehandle pool, and get the fname for 1975 * the filesystem root. 1976 */ 1977 sfh4_createtab(&mi->mi_filehandles); 1978 mi->mi_fname = fn_get(NULL, "."); 1979 1980 /* 1981 * Save server path we're attempting to mount. 1982 */ 1983 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1984 orig_sv_pathlen = svp_head->sv_pathlen; 1985 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1986 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1987 nfs_rw_exit(&svp->sv_lock); 1988 1989 /* 1990 * Make the GETFH call to get root fh for each replica. 1991 */ 1992 if (svp_head->sv_next) 1993 droptext = ", dropping replica"; 1994 1995 /* 1996 * If the uid is set then set the creds for secure mounts 1997 * by proxy processes such as automountd. 1998 */ 1999 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2000 if (svp->sv_secdata->uid != 0) { 2001 lcr = crdup(cr); 2002 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 2003 tcr = lcr; 2004 } 2005 nfs_rw_exit(&svp->sv_lock); 2006 for (svp = svp_head; svp; svp = svp->sv_next) { 2007 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 2008 nfs_cmn_err(error, CE_WARN, 2009 VERS_MSG "Host %s is a duplicate%s", 2010 svp->sv_hostname, droptext); 2011 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2012 svp->sv_flags |= SV4_NOTINUSE; 2013 nfs_rw_exit(&svp->sv_lock); 2014 continue; 2015 } 2016 mi->mi_curr_serv = svp; 2017 2018 /* 2019 * Just in case server path being mounted contains 2020 * symlinks and fails w/STALE, save the initial sv_path 2021 * so we can redrive the initial mount compound with the 2022 * initial sv_path -- not a symlink-expanded version. 2023 * 2024 * This could only happen if a symlink was expanded 2025 * and the expanded mount compound failed stale. Because 2026 * it could be the case that the symlink was removed at 2027 * the server (and replaced with another symlink/dir, 2028 * we need to use the initial sv_path when attempting 2029 * to re-lookup everything and recover. 2030 * 2031 * Other mount errors should evenutally be handled here also 2032 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 2033 * failures will result in mount being redriven a few times. 2034 */ 2035 num_retry = nfs4_max_mount_retry; 2036 do { 2037 nfs4getfh_otw(mi, svp, &tmp_vtype, 2038 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 2039 NFS4_GETFH_NEEDSOP, tcr, &e); 2040 2041 if (e.error == 0 && e.stat == NFS4_OK) 2042 break; 2043 2044 /* 2045 * replace current sv_path with orig sv_path -- just in 2046 * case it changed due to embedded symlinks. 2047 */ 2048 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2049 if (orig_sv_pathlen != svp->sv_pathlen) { 2050 kmem_free(svp->sv_path, svp->sv_pathlen); 2051 svp->sv_path = kmem_alloc(orig_sv_pathlen, 2052 KM_SLEEP); 2053 svp->sv_pathlen = orig_sv_pathlen; 2054 } 2055 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 2056 nfs_rw_exit(&svp->sv_lock); 2057 2058 } while (num_retry-- > 0); 2059 2060 error = e.error ? e.error : geterrno4(e.stat); 2061 if (error) { 2062 nfs_cmn_err(error, CE_WARN, 2063 VERS_MSG "initial call to %s failed%s: %m", 2064 svp->sv_hostname, droptext); 2065 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2066 svp->sv_flags |= SV4_NOTINUSE; 2067 nfs_rw_exit(&svp->sv_lock); 2068 mi->mi_flags &= ~MI4_RECOV_FAIL; 2069 mi->mi_error = 0; 2070 continue; 2071 } 2072 2073 if (tmp_vtype == VBAD) { 2074 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2075 VERS_MSG "%s returned a bad file type for " 2076 "root%s", svp->sv_hostname, droptext); 2077 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2078 svp->sv_flags |= SV4_NOTINUSE; 2079 nfs_rw_exit(&svp->sv_lock); 2080 continue; 2081 } 2082 2083 if (vtype == VNON) { 2084 vtype = tmp_vtype; 2085 } else if (vtype != tmp_vtype) { 2086 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2087 VERS_MSG "%s returned a different file type " 2088 "for root%s", svp->sv_hostname, droptext); 2089 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2090 svp->sv_flags |= SV4_NOTINUSE; 2091 nfs_rw_exit(&svp->sv_lock); 2092 continue; 2093 } 2094 if (firstsvp == NULL) 2095 firstsvp = svp; 2096 } 2097 2098 kmem_free(orig_sv_path, orig_sv_pathlen); 2099 2100 if (firstsvp == NULL) { 2101 if (error == 0) 2102 error = ENOENT; 2103 goto bad; 2104 } 2105 2106 mi->mi_curr_serv = svp = firstsvp; 2107 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2108 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 2109 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2110 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2111 mi->mi_rootfh = sfh4_get(&fh, mi); 2112 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 2113 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 2114 mi->mi_srvparentfh = sfh4_get(&fh, mi); 2115 nfs_rw_exit(&svp->sv_lock); 2116 2117 /* 2118 * Make the root vnode without attributes. 2119 */ 2120 mfname = mi->mi_fname; 2121 fn_hold(mfname); 2122 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 2123 &mfname, NULL, mi, cr, gethrtime()); 2124 rtvp->v_type = vtype; 2125 2126 mi->mi_curread = mi->mi_tsize; 2127 mi->mi_curwrite = mi->mi_stsize; 2128 2129 /* 2130 * Start the manager thread responsible for handling async worker 2131 * threads. 2132 */ 2133 MI4_HOLD(mi); 2134 VFS_HOLD(vfsp); /* add reference for thread */ 2135 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 2136 vfsp, 0, minclsyspri); 2137 ASSERT(mi->mi_manager_thread != NULL); 2138 2139 /* 2140 * Create the thread that handles over-the-wire calls for 2141 * VOP_INACTIVE. 2142 * This needs to happen after the manager thread is created. 2143 */ 2144 MI4_HOLD(mi); 2145 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 2146 mi, 0, minclsyspri); 2147 ASSERT(mi->mi_inactive_thread != NULL); 2148 2149 /* If we didn't get a type, get one now */ 2150 if (rtvp->v_type == VNON) { 2151 va.va_mask = AT_TYPE; 2152 error = nfs4getattr(rtvp, &va, tcr); 2153 if (error) 2154 goto bad; 2155 rtvp->v_type = va.va_type; 2156 } 2157 2158 mi->mi_type = rtvp->v_type; 2159 2160 mutex_enter(&mi->mi_lock); 2161 mi->mi_flags &= ~MI4_MOUNTING; 2162 mutex_exit(&mi->mi_lock); 2163 2164 *rtvpp = rtvp; 2165 if (lcr != NULL) 2166 crfree(lcr); 2167 2168 return (0); 2169 bad: 2170 /* 2171 * An error occurred somewhere, need to clean up... 2172 */ 2173 if (lcr != NULL) 2174 crfree(lcr); 2175 if (rtvp != NULL) { 2176 /* 2177 * We need to release our reference to the root vnode and 2178 * destroy the mntinfo4 struct that we just created. 2179 */ 2180 rp = VTOR4(rtvp); 2181 if (rp->r_flags & R4HASHED) 2182 rp4_rmhash(rp); 2183 VN_RELE(rtvp); 2184 } 2185 nfs4_async_stop(vfsp); 2186 nfs4_async_manager_stop(vfsp); 2187 removed = nfs4_mi_zonelist_remove(mi); 2188 if (removed) 2189 zone_rele(mi->mi_zone); 2190 2191 /* 2192 * This releases the initial "hold" of the mi since it will never 2193 * be referenced by the vfsp. Also, when mount returns to vfs.c 2194 * with an error, the vfsp will be destroyed, not rele'd. 2195 */ 2196 MI4_RELE(mi); 2197 2198 *rtvpp = NULL; 2199 return (error); 2200 } 2201 2202 /* 2203 * vfs operations 2204 */ 2205 static int 2206 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 2207 { 2208 mntinfo4_t *mi; 2209 ushort_t omax; 2210 int removed; 2211 2212 if (secpolicy_fs_unmount(cr, vfsp) != 0) 2213 return (EPERM); 2214 2215 mi = VFTOMI4(vfsp); 2216 2217 if (flag & MS_FORCE) { 2218 vfsp->vfs_flag |= VFS_UNMOUNTED; 2219 if (nfs_zone() != mi->mi_zone) { 2220 /* 2221 * If the request is coming from the wrong zone, 2222 * we don't want to create any new threads, and 2223 * performance is not a concern. Do everything 2224 * inline. 2225 */ 2226 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 2227 "nfs4_unmount x-zone forced unmount of vfs %p\n", 2228 (void *)vfsp)); 2229 nfs4_free_mount(vfsp, cr); 2230 } else { 2231 /* 2232 * Free data structures asynchronously, to avoid 2233 * blocking the current thread (for performance 2234 * reasons only). 2235 */ 2236 async_free_mount(vfsp, cr); 2237 } 2238 return (0); 2239 } 2240 /* 2241 * Wait until all asynchronous putpage operations on 2242 * this file system are complete before flushing rnodes 2243 * from the cache. 2244 */ 2245 omax = mi->mi_max_threads; 2246 if (nfs4_async_stop_sig(vfsp)) { 2247 2248 return (EINTR); 2249 } 2250 r4flush(vfsp, cr); 2251 /* 2252 * If there are any active vnodes on this file system, 2253 * then the file system is busy and can't be umounted. 2254 */ 2255 if (check_rtable4(vfsp)) { 2256 mutex_enter(&mi->mi_async_lock); 2257 mi->mi_max_threads = omax; 2258 mutex_exit(&mi->mi_async_lock); 2259 return (EBUSY); 2260 } 2261 /* 2262 * The unmount can't fail from now on, and there are no active 2263 * files that could require over-the-wire calls to the server, 2264 * so stop the async manager and the inactive thread. 2265 */ 2266 nfs4_async_manager_stop(vfsp); 2267 /* 2268 * Destroy all rnodes belonging to this file system from the 2269 * rnode hash queues and purge any resources allocated to 2270 * them. 2271 */ 2272 destroy_rtable4(vfsp, cr); 2273 vfsp->vfs_flag |= VFS_UNMOUNTED; 2274 2275 nfs4_remove_mi_from_server(mi, NULL); 2276 removed = nfs4_mi_zonelist_remove(mi); 2277 if (removed) 2278 zone_rele(mi->mi_zone); 2279 2280 return (0); 2281 } 2282 2283 /* 2284 * find root of nfs 2285 */ 2286 static int 2287 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2288 { 2289 mntinfo4_t *mi; 2290 vnode_t *vp; 2291 nfs4_fname_t *mfname; 2292 servinfo4_t *svp; 2293 2294 mi = VFTOMI4(vfsp); 2295 2296 if (nfs_zone() != mi->mi_zone) 2297 return (EPERM); 2298 2299 svp = mi->mi_curr_serv; 2300 if (svp) { 2301 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2302 if (svp->sv_flags & SV4_ROOT_STALE) { 2303 nfs_rw_exit(&svp->sv_lock); 2304 2305 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2306 if (svp->sv_flags & SV4_ROOT_STALE) { 2307 svp->sv_flags &= ~SV4_ROOT_STALE; 2308 nfs_rw_exit(&svp->sv_lock); 2309 return (ENOENT); 2310 } 2311 nfs_rw_exit(&svp->sv_lock); 2312 } else 2313 nfs_rw_exit(&svp->sv_lock); 2314 } 2315 2316 mfname = mi->mi_fname; 2317 fn_hold(mfname); 2318 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2319 VFTOMI4(vfsp), CRED(), gethrtime()); 2320 2321 if (VTOR4(vp)->r_flags & R4STALE) { 2322 VN_RELE(vp); 2323 return (ENOENT); 2324 } 2325 2326 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2327 2328 vp->v_type = mi->mi_type; 2329 2330 *vpp = vp; 2331 2332 return (0); 2333 } 2334 2335 static int 2336 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2337 { 2338 int error; 2339 nfs4_ga_res_t gar; 2340 nfs4_ga_ext_res_t ger; 2341 2342 gar.n4g_ext_res = &ger; 2343 2344 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2345 NFS4_STATFS_ATTR_MASK, cr)) 2346 return (error); 2347 2348 *sbp = gar.n4g_ext_res->n4g_sb; 2349 2350 return (0); 2351 } 2352 2353 /* 2354 * Get file system statistics. 2355 */ 2356 static int 2357 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2358 { 2359 int error; 2360 vnode_t *vp; 2361 cred_t *cr; 2362 2363 error = nfs4_root(vfsp, &vp); 2364 if (error) 2365 return (error); 2366 2367 cr = CRED(); 2368 2369 error = nfs4_statfs_otw(vp, sbp, cr); 2370 if (!error) { 2371 (void) strncpy(sbp->f_basetype, 2372 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2373 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2374 } else { 2375 nfs4_purge_stale_fh(error, vp, cr); 2376 } 2377 2378 VN_RELE(vp); 2379 2380 return (error); 2381 } 2382 2383 static kmutex_t nfs4_syncbusy; 2384 2385 /* 2386 * Flush dirty nfs files for file system vfsp. 2387 * If vfsp == NULL, all nfs files are flushed. 2388 * 2389 * SYNC_CLOSE in flag is passed to us to 2390 * indicate that we are shutting down and or 2391 * rebooting. 2392 */ 2393 static int 2394 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2395 { 2396 /* 2397 * Cross-zone calls are OK here, since this translates to a 2398 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2399 */ 2400 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2401 r4flush(vfsp, cr); 2402 mutex_exit(&nfs4_syncbusy); 2403 } 2404 2405 /* 2406 * if SYNC_CLOSE is set then we know that 2407 * the system is rebooting, mark the mntinfo 2408 * for later examination. 2409 */ 2410 if (vfsp && (flag & SYNC_CLOSE)) { 2411 mntinfo4_t *mi; 2412 2413 mi = VFTOMI4(vfsp); 2414 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2415 mutex_enter(&mi->mi_lock); 2416 mi->mi_flags |= MI4_SHUTDOWN; 2417 mutex_exit(&mi->mi_lock); 2418 } 2419 } 2420 return (0); 2421 } 2422 2423 /* 2424 * vget is difficult, if not impossible, to support in v4 because we don't 2425 * know the parent directory or name, which makes it impossible to create a 2426 * useful shadow vnode. And we need the shadow vnode for things like 2427 * OPEN. 2428 */ 2429 2430 /* ARGSUSED */ 2431 /* 2432 * XXX Check nfs4_vget_pseudo() for dependency. 2433 */ 2434 static int 2435 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2436 { 2437 return (EREMOTE); 2438 } 2439 2440 /* 2441 * nfs4_mountroot get called in the case where we are diskless booting. All 2442 * we need from here is the ability to get the server info and from there we 2443 * can simply call nfs4_rootvp. 2444 */ 2445 /* ARGSUSED */ 2446 static int 2447 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2448 { 2449 vnode_t *rtvp; 2450 char root_hostname[SYS_NMLN+1]; 2451 struct servinfo4 *svp; 2452 int error; 2453 int vfsflags; 2454 size_t size; 2455 char *root_path; 2456 struct pathname pn; 2457 char *name; 2458 cred_t *cr; 2459 mntinfo4_t *mi; 2460 struct nfs_args args; /* nfs mount arguments */ 2461 static char token[10]; 2462 nfs4_error_t n4e; 2463 2464 bzero(&args, sizeof (args)); 2465 2466 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2467 clkset(-1L); /* hack for now - until we get time svc? */ 2468 2469 if (why == ROOT_REMOUNT) { 2470 /* 2471 * Shouldn't happen. 2472 */ 2473 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2474 } 2475 2476 if (why == ROOT_UNMOUNT) { 2477 /* 2478 * Nothing to do for NFS. 2479 */ 2480 return (0); 2481 } 2482 2483 /* 2484 * why == ROOT_INIT 2485 */ 2486 2487 name = token; 2488 *name = 0; 2489 (void) getfsname("root", name, sizeof (token)); 2490 2491 pn_alloc(&pn); 2492 root_path = pn.pn_path; 2493 2494 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2495 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2496 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2497 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2498 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2499 2500 /* 2501 * Get server address 2502 * Get the root path 2503 * Get server's transport 2504 * Get server's hostname 2505 * Get options 2506 */ 2507 args.addr = &svp->sv_addr; 2508 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2509 args.fh = (char *)&svp->sv_fhandle; 2510 args.knconf = svp->sv_knconf; 2511 args.hostname = root_hostname; 2512 vfsflags = 0; 2513 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2514 &args, &vfsflags)) { 2515 if (error == EPROTONOSUPPORT) 2516 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2517 "mount_root failed: server doesn't support NFS V4"); 2518 else 2519 nfs_cmn_err(error, CE_WARN, 2520 "nfs4_mountroot: mount_root failed: %m"); 2521 nfs_rw_exit(&svp->sv_lock); 2522 sv4_free(svp); 2523 pn_free(&pn); 2524 return (error); 2525 } 2526 nfs_rw_exit(&svp->sv_lock); 2527 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2528 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2529 (void) strcpy(svp->sv_hostname, root_hostname); 2530 2531 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2532 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2533 (void) strcpy(svp->sv_path, root_path); 2534 2535 /* 2536 * Force root partition to always be mounted with AUTH_UNIX for now 2537 */ 2538 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2539 svp->sv_secdata->secmod = AUTH_UNIX; 2540 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2541 svp->sv_secdata->data = NULL; 2542 2543 cr = crgetcred(); 2544 rtvp = NULL; 2545 2546 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2547 2548 if (error) { 2549 crfree(cr); 2550 pn_free(&pn); 2551 goto errout; 2552 } 2553 2554 mi = VTOMI4(rtvp); 2555 2556 /* 2557 * Send client id to the server, if necessary 2558 */ 2559 nfs4_error_zinit(&n4e); 2560 nfs4setclientid(mi, cr, FALSE, &n4e); 2561 error = n4e.error; 2562 2563 crfree(cr); 2564 2565 if (error) { 2566 pn_free(&pn); 2567 goto errout; 2568 } 2569 2570 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2571 if (error) { 2572 nfs_cmn_err(error, CE_WARN, 2573 "nfs4_mountroot: invalid root mount options"); 2574 pn_free(&pn); 2575 goto errout; 2576 } 2577 2578 (void) vfs_lock_wait(vfsp); 2579 vfs_add(NULL, vfsp, vfsflags); 2580 vfs_unlock(vfsp); 2581 2582 size = strlen(svp->sv_hostname); 2583 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2584 rootfs.bo_name[size] = ':'; 2585 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2586 2587 pn_free(&pn); 2588 2589 errout: 2590 if (error) { 2591 sv4_free(svp); 2592 nfs4_async_stop(vfsp); 2593 nfs4_async_manager_stop(vfsp); 2594 } 2595 2596 if (rtvp != NULL) 2597 VN_RELE(rtvp); 2598 2599 return (error); 2600 } 2601 2602 /* 2603 * Initialization routine for VFS routines. Should only be called once 2604 */ 2605 int 2606 nfs4_vfsinit(void) 2607 { 2608 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2609 nfs4setclientid_init(); 2610 return (0); 2611 } 2612 2613 void 2614 nfs4_vfsfini(void) 2615 { 2616 nfs4setclientid_fini(); 2617 mutex_destroy(&nfs4_syncbusy); 2618 } 2619 2620 void 2621 nfs4_freevfs(vfs_t *vfsp) 2622 { 2623 mntinfo4_t *mi; 2624 2625 /* need to release the initial hold */ 2626 mi = VFTOMI4(vfsp); 2627 MI4_RELE(mi); 2628 } 2629 2630 /* 2631 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2632 */ 2633 struct nfs4_server nfs4_server_lst = 2634 { &nfs4_server_lst, &nfs4_server_lst }; 2635 2636 kmutex_t nfs4_server_lst_lock; 2637 2638 static void 2639 nfs4setclientid_init(void) 2640 { 2641 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2642 } 2643 2644 static void 2645 nfs4setclientid_fini(void) 2646 { 2647 mutex_destroy(&nfs4_server_lst_lock); 2648 } 2649 2650 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2651 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2652 2653 /* 2654 * Set the clientid for the server for "mi". No-op if the clientid is 2655 * already set. 2656 * 2657 * The recovery boolean should be set to TRUE if this function was called 2658 * by the recovery code, and FALSE otherwise. This is used to determine 2659 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2660 * for adding a mntinfo4_t to a nfs4_server_t. 2661 * 2662 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2663 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2664 */ 2665 void 2666 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2667 { 2668 struct nfs4_server *np; 2669 struct servinfo4 *svp = mi->mi_curr_serv; 2670 nfs4_recov_state_t recov_state; 2671 int num_retries = 0; 2672 bool_t retry; 2673 cred_t *lcr = NULL; 2674 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2675 time_t lease_time = 0; 2676 2677 recov_state.rs_flags = 0; 2678 recov_state.rs_num_retry_despite_err = 0; 2679 ASSERT(n4ep != NULL); 2680 2681 recov_retry: 2682 retry = FALSE; 2683 nfs4_error_zinit(n4ep); 2684 if (!recovery) 2685 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2686 2687 mutex_enter(&nfs4_server_lst_lock); 2688 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2689 mutex_exit(&nfs4_server_lst_lock); 2690 if (!np) { 2691 struct nfs4_server *tnp; 2692 np = new_nfs4_server(svp, cr); 2693 mutex_enter(&np->s_lock); 2694 2695 mutex_enter(&nfs4_server_lst_lock); 2696 tnp = servinfo4_to_nfs4_server(svp); 2697 if (tnp) { 2698 /* 2699 * another thread snuck in and put server on list. 2700 * since we aren't adding it to the nfs4_server_list 2701 * we need to set the ref count to 0 and destroy it. 2702 */ 2703 np->s_refcnt = 0; 2704 destroy_nfs4_server(np); 2705 np = tnp; 2706 } else { 2707 /* 2708 * do not give list a reference until everything 2709 * succeeds 2710 */ 2711 insque(np, &nfs4_server_lst); 2712 } 2713 mutex_exit(&nfs4_server_lst_lock); 2714 } 2715 ASSERT(MUTEX_HELD(&np->s_lock)); 2716 /* 2717 * If we find the server already has N4S_CLIENTID_SET, then 2718 * just return, we've already done SETCLIENTID to that server 2719 */ 2720 if (np->s_flags & N4S_CLIENTID_SET) { 2721 /* add mi to np's mntinfo4_list */ 2722 nfs4_add_mi_to_server(np, mi); 2723 if (!recovery) 2724 nfs_rw_exit(&mi->mi_recovlock); 2725 mutex_exit(&np->s_lock); 2726 nfs4_server_rele(np); 2727 return; 2728 } 2729 mutex_exit(&np->s_lock); 2730 2731 2732 /* 2733 * Drop the mi_recovlock since nfs4_start_op will 2734 * acquire it again for us. 2735 */ 2736 if (!recovery) { 2737 nfs_rw_exit(&mi->mi_recovlock); 2738 2739 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2740 if (n4ep->error) { 2741 nfs4_server_rele(np); 2742 return; 2743 } 2744 } 2745 2746 mutex_enter(&np->s_lock); 2747 while (np->s_flags & N4S_CLIENTID_PEND) { 2748 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 2749 mutex_exit(&np->s_lock); 2750 nfs4_server_rele(np); 2751 if (!recovery) 2752 nfs4_end_op(mi, NULL, NULL, &recov_state, 2753 recovery); 2754 n4ep->error = EINTR; 2755 return; 2756 } 2757 } 2758 2759 if (np->s_flags & N4S_CLIENTID_SET) { 2760 /* XXX copied/pasted from above */ 2761 /* add mi to np's mntinfo4_list */ 2762 nfs4_add_mi_to_server(np, mi); 2763 mutex_exit(&np->s_lock); 2764 nfs4_server_rele(np); 2765 if (!recovery) 2766 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2767 return; 2768 } 2769 2770 /* 2771 * Reset the N4S_CB_PINGED flag. This is used to 2772 * indicate if we have received a CB_NULL from the 2773 * server. Also we reset the waiter flag. 2774 */ 2775 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2776 /* any failure must now clear this flag */ 2777 np->s_flags |= N4S_CLIENTID_PEND; 2778 mutex_exit(&np->s_lock); 2779 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2780 2781 if (n4ep->error == EACCES) { 2782 /* 2783 * If the uid is set then set the creds for secure mounts 2784 * by proxy processes such as automountd. 2785 */ 2786 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2787 if (svp->sv_secdata->uid != 0) { 2788 lcr = crdup(cr); 2789 (void) crsetugid(lcr, svp->sv_secdata->uid, 2790 crgetgid(cr)); 2791 } 2792 nfs_rw_exit(&svp->sv_lock); 2793 2794 if (lcr != NULL) { 2795 mutex_enter(&np->s_lock); 2796 crfree(np->s_cred); 2797 np->s_cred = lcr; 2798 mutex_exit(&np->s_lock); 2799 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2800 &retry_inuse); 2801 } 2802 } 2803 mutex_enter(&np->s_lock); 2804 lease_time = np->s_lease_time; 2805 np->s_flags &= ~N4S_CLIENTID_PEND; 2806 mutex_exit(&np->s_lock); 2807 2808 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2809 /* 2810 * Start recovery if failover is a possibility. If 2811 * invoked by the recovery thread itself, then just 2812 * return and let it handle the failover first. NB: 2813 * recovery is not allowed if the mount is in progress 2814 * since the infrastructure is not sufficiently setup 2815 * to allow it. Just return the error (after suitable 2816 * retries). 2817 */ 2818 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2819 (void) nfs4_start_recovery(n4ep, mi, NULL, 2820 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2821 /* 2822 * Don't retry here, just return and let 2823 * recovery take over. 2824 */ 2825 if (recovery) 2826 retry = FALSE; 2827 } else if (nfs4_rpc_retry_error(n4ep->error) || 2828 n4ep->stat == NFS4ERR_RESOURCE || 2829 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2830 2831 retry = TRUE; 2832 /* 2833 * Always retry if in recovery or once had 2834 * contact with the server (but now it's 2835 * overloaded). 2836 */ 2837 if (recovery == TRUE || 2838 n4ep->error == ETIMEDOUT || 2839 n4ep->error == ECONNRESET) 2840 num_retries = 0; 2841 } else if (retry_inuse && n4ep->error == 0 && 2842 n4ep->stat == NFS4ERR_CLID_INUSE) { 2843 retry = TRUE; 2844 num_retries = 0; 2845 } 2846 } else { 2847 /* 2848 * Since everything succeeded give the list a reference count if 2849 * it hasn't been given one by add_new_nfs4_server() or if this 2850 * is not a recovery situation in which case it is already on 2851 * the list. 2852 */ 2853 mutex_enter(&np->s_lock); 2854 if ((np->s_flags & N4S_INSERTED) == 0) { 2855 np->s_refcnt++; 2856 np->s_flags |= N4S_INSERTED; 2857 } 2858 mutex_exit(&np->s_lock); 2859 } 2860 2861 if (!recovery) 2862 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2863 2864 2865 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2866 if (retry_inuse) { 2867 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2868 retry_inuse = 0; 2869 } else 2870 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2871 2872 nfs4_server_rele(np); 2873 goto recov_retry; 2874 } 2875 2876 2877 if (n4ep->error == 0) 2878 n4ep->error = geterrno4(n4ep->stat); 2879 2880 /* broadcast before release in case no other threads are waiting */ 2881 cv_broadcast(&np->s_clientid_pend); 2882 nfs4_server_rele(np); 2883 } 2884 2885 int nfs4setclientid_otw_debug = 0; 2886 2887 /* 2888 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2889 * but nothing else; the calling function must be designed to handle those 2890 * other errors. 2891 */ 2892 static void 2893 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2894 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2895 { 2896 COMPOUND4args_clnt args; 2897 COMPOUND4res_clnt res; 2898 nfs_argop4 argop[3]; 2899 SETCLIENTID4args *s_args; 2900 SETCLIENTID4resok *s_resok; 2901 int doqueue = 1; 2902 nfs4_ga_res_t *garp = NULL; 2903 timespec_t prop_time, after_time; 2904 verifier4 verf; 2905 clientid4 tmp_clientid; 2906 2907 ASSERT(!MUTEX_HELD(&np->s_lock)); 2908 2909 args.ctag = TAG_SETCLIENTID; 2910 2911 args.array = argop; 2912 args.array_len = 3; 2913 2914 /* PUTROOTFH */ 2915 argop[0].argop = OP_PUTROOTFH; 2916 2917 /* GETATTR */ 2918 argop[1].argop = OP_GETATTR; 2919 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2920 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2921 2922 /* SETCLIENTID */ 2923 argop[2].argop = OP_SETCLIENTID; 2924 2925 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2926 2927 mutex_enter(&np->s_lock); 2928 2929 s_args->client.verifier = np->clidtosend.verifier; 2930 s_args->client.id_len = np->clidtosend.id_len; 2931 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2932 s_args->client.id_val = np->clidtosend.id_val; 2933 2934 /* 2935 * Callback needs to happen on non-RDMA transport 2936 * Check if we have saved the original knetconfig 2937 * if so, use that instead. 2938 */ 2939 if (svp->sv_origknconf != NULL) 2940 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2941 else 2942 nfs4_cb_args(np, svp->sv_knconf, s_args); 2943 2944 mutex_exit(&np->s_lock); 2945 2946 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2947 2948 if (ep->error) 2949 return; 2950 2951 /* getattr lease_time res */ 2952 if (res.array_len >= 2) { 2953 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2954 2955 #ifndef _LP64 2956 /* 2957 * The 32 bit client cannot handle a lease time greater than 2958 * (INT32_MAX/1000000). This is due to the use of the 2959 * lease_time in calls to drv_usectohz() in 2960 * nfs4_renew_lease_thread(). The problem is that 2961 * drv_usectohz() takes a time_t (which is just a long = 4 2962 * bytes) as its parameter. The lease_time is multiplied by 2963 * 1000000 to convert seconds to usecs for the parameter. If 2964 * a number bigger than (INT32_MAX/1000000) is used then we 2965 * overflow on the 32bit client. 2966 */ 2967 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2968 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2969 } 2970 #endif 2971 2972 mutex_enter(&np->s_lock); 2973 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2974 2975 /* 2976 * Keep track of the lease period for the mi's 2977 * mi_msg_list. We need an appropiate time 2978 * bound to associate past facts with a current 2979 * event. The lease period is perfect for this. 2980 */ 2981 mutex_enter(&mi->mi_msg_list_lock); 2982 mi->mi_lease_period = np->s_lease_time; 2983 mutex_exit(&mi->mi_msg_list_lock); 2984 mutex_exit(&np->s_lock); 2985 } 2986 2987 2988 if (res.status == NFS4ERR_CLID_INUSE) { 2989 clientaddr4 *clid_inuse; 2990 2991 if (!(*retry_inusep)) { 2992 clid_inuse = &res.array->nfs_resop4_u. 2993 opsetclientid.SETCLIENTID4res_u.client_using; 2994 2995 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2996 "NFS4 mount (SETCLIENTID failed)." 2997 " nfs4_client_id.id is in" 2998 "use already by: r_netid<%s> r_addr<%s>", 2999 clid_inuse->r_netid, clid_inuse->r_addr); 3000 } 3001 3002 /* 3003 * XXX - The client should be more robust in its 3004 * handling of clientid in use errors (regen another 3005 * clientid and try again?) 3006 */ 3007 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3008 return; 3009 } 3010 3011 if (res.status) { 3012 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3013 return; 3014 } 3015 3016 s_resok = &res.array[2].nfs_resop4_u. 3017 opsetclientid.SETCLIENTID4res_u.resok4; 3018 3019 tmp_clientid = s_resok->clientid; 3020 3021 verf = s_resok->setclientid_confirm; 3022 3023 #ifdef DEBUG 3024 if (nfs4setclientid_otw_debug) { 3025 union { 3026 clientid4 clientid; 3027 int foo[2]; 3028 } cid; 3029 3030 cid.clientid = s_resok->clientid; 3031 3032 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3033 "nfs4setclientid_otw: OK, clientid = %x,%x, " 3034 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 3035 } 3036 #endif 3037 3038 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3039 3040 /* Confirm the client id and get the lease_time attribute */ 3041 3042 args.ctag = TAG_SETCLIENTID_CF; 3043 3044 args.array = argop; 3045 args.array_len = 1; 3046 3047 argop[0].argop = OP_SETCLIENTID_CONFIRM; 3048 3049 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 3050 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 3051 3052 /* used to figure out RTT for np */ 3053 gethrestime(&prop_time); 3054 3055 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 3056 "start time: %ld sec %ld nsec", prop_time.tv_sec, 3057 prop_time.tv_nsec)); 3058 3059 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 3060 3061 gethrestime(&after_time); 3062 mutex_enter(&np->s_lock); 3063 np->propagation_delay.tv_sec = 3064 MAX(1, after_time.tv_sec - prop_time.tv_sec); 3065 mutex_exit(&np->s_lock); 3066 3067 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 3068 "finish time: %ld sec ", after_time.tv_sec)); 3069 3070 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 3071 "propagation delay set to %ld sec", 3072 np->propagation_delay.tv_sec)); 3073 3074 if (ep->error) 3075 return; 3076 3077 if (res.status == NFS4ERR_CLID_INUSE) { 3078 clientaddr4 *clid_inuse; 3079 3080 if (!(*retry_inusep)) { 3081 clid_inuse = &res.array->nfs_resop4_u. 3082 opsetclientid.SETCLIENTID4res_u.client_using; 3083 3084 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3085 "SETCLIENTID_CONFIRM failed. " 3086 "nfs4_client_id.id is in use already by: " 3087 "r_netid<%s> r_addr<%s>", 3088 clid_inuse->r_netid, clid_inuse->r_addr); 3089 } 3090 3091 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3092 return; 3093 } 3094 3095 if (res.status) { 3096 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3097 return; 3098 } 3099 3100 mutex_enter(&np->s_lock); 3101 np->clientid = tmp_clientid; 3102 np->s_flags |= N4S_CLIENTID_SET; 3103 3104 /* Add mi to np's mntinfo4 list */ 3105 nfs4_add_mi_to_server(np, mi); 3106 3107 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 3108 /* 3109 * Start lease management thread. 3110 * Keep trying until we succeed. 3111 */ 3112 3113 np->s_refcnt++; /* pass reference to thread */ 3114 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 3115 minclsyspri); 3116 } 3117 mutex_exit(&np->s_lock); 3118 3119 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3120 } 3121 3122 /* 3123 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 3124 * mi's clientid the same as sp's. 3125 * Assumes sp is locked down. 3126 */ 3127 void 3128 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 3129 { 3130 mntinfo4_t *tmi; 3131 int in_list = 0; 3132 3133 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3134 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3135 ASSERT(sp != &nfs4_server_lst); 3136 ASSERT(MUTEX_HELD(&sp->s_lock)); 3137 3138 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3139 "nfs4_add_mi_to_server: add mi %p to sp %p", 3140 (void*)mi, (void*)sp)); 3141 3142 for (tmi = sp->mntinfo4_list; 3143 tmi != NULL; 3144 tmi = tmi->mi_clientid_next) { 3145 if (tmi == mi) { 3146 NFS4_DEBUG(nfs4_client_lease_debug, 3147 (CE_NOTE, 3148 "nfs4_add_mi_to_server: mi in list")); 3149 in_list = 1; 3150 } 3151 } 3152 3153 /* 3154 * First put a hold on the mntinfo4's vfsp so that references via 3155 * mntinfo4_list will be valid. 3156 */ 3157 if (!in_list) 3158 VFS_HOLD(mi->mi_vfsp); 3159 3160 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 3161 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 3162 3163 if (!in_list) { 3164 if (sp->mntinfo4_list) 3165 sp->mntinfo4_list->mi_clientid_prev = mi; 3166 mi->mi_clientid_next = sp->mntinfo4_list; 3167 sp->mntinfo4_list = mi; 3168 mi->mi_srvsettime = gethrestime_sec(); 3169 } 3170 3171 /* set mi's clientid to that of sp's for later matching */ 3172 mi->mi_clientid = sp->clientid; 3173 3174 /* 3175 * Update the clientid for any other mi's belonging to sp. This 3176 * must be done here while we hold sp->s_lock, so that 3177 * find_nfs4_server() continues to work. 3178 */ 3179 3180 for (tmi = sp->mntinfo4_list; 3181 tmi != NULL; 3182 tmi = tmi->mi_clientid_next) { 3183 if (tmi != mi) { 3184 tmi->mi_clientid = sp->clientid; 3185 } 3186 } 3187 } 3188 3189 /* 3190 * Remove the mi from sp's mntinfo4_list and release its reference. 3191 * Exception: if mi still has open files, flag it for later removal (when 3192 * all the files are closed). 3193 * 3194 * If this is the last mntinfo4 in sp's list then tell the lease renewal 3195 * thread to exit. 3196 */ 3197 static void 3198 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 3199 { 3200 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3201 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 3202 (void*)mi, (void*)sp)); 3203 3204 ASSERT(sp != NULL); 3205 ASSERT(MUTEX_HELD(&sp->s_lock)); 3206 ASSERT(mi->mi_open_files >= 0); 3207 3208 /* 3209 * First make sure this mntinfo4 can be taken off of the list, 3210 * ie: it doesn't have any open files remaining. 3211 */ 3212 if (mi->mi_open_files > 0) { 3213 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3214 "nfs4_remove_mi_from_server_nolock: don't " 3215 "remove mi since it still has files open")); 3216 3217 mutex_enter(&mi->mi_lock); 3218 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 3219 mutex_exit(&mi->mi_lock); 3220 return; 3221 } 3222 3223 VFS_HOLD(mi->mi_vfsp); 3224 remove_mi(sp, mi); 3225 VFS_RELE(mi->mi_vfsp); 3226 3227 if (sp->mntinfo4_list == NULL) { 3228 /* last fs unmounted, kill the thread */ 3229 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3230 "remove_mi_from_nfs4_server_nolock: kill the thread")); 3231 nfs4_mark_srv_dead(sp); 3232 } 3233 } 3234 3235 /* 3236 * Remove mi from sp's mntinfo4_list and release the vfs reference. 3237 */ 3238 static void 3239 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 3240 { 3241 ASSERT(MUTEX_HELD(&sp->s_lock)); 3242 3243 /* 3244 * We release a reference, and the caller must still have a 3245 * reference. 3246 */ 3247 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3248 3249 if (mi->mi_clientid_prev) { 3250 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3251 } else { 3252 /* This is the first mi in sp's mntinfo4_list */ 3253 /* 3254 * Make sure the first mntinfo4 in the list is the actual 3255 * mntinfo4 passed in. 3256 */ 3257 ASSERT(sp->mntinfo4_list == mi); 3258 3259 sp->mntinfo4_list = mi->mi_clientid_next; 3260 } 3261 if (mi->mi_clientid_next) 3262 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3263 3264 /* Now mark the mntinfo4's links as being removed */ 3265 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3266 3267 VFS_RELE(mi->mi_vfsp); 3268 } 3269 3270 /* 3271 * Free all the entries in sp's mntinfo4_list. 3272 */ 3273 static void 3274 remove_all_mi(nfs4_server_t *sp) 3275 { 3276 mntinfo4_t *mi; 3277 3278 ASSERT(MUTEX_HELD(&sp->s_lock)); 3279 3280 while (sp->mntinfo4_list != NULL) { 3281 mi = sp->mntinfo4_list; 3282 /* 3283 * Grab a reference in case there is only one left (which 3284 * remove_mi() frees). 3285 */ 3286 VFS_HOLD(mi->mi_vfsp); 3287 remove_mi(sp, mi); 3288 VFS_RELE(mi->mi_vfsp); 3289 } 3290 } 3291 3292 /* 3293 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3294 * 3295 * This version can be called with a null nfs4_server_t arg, 3296 * and will either find the right one and handle locking, or 3297 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3298 */ 3299 void 3300 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3301 { 3302 nfs4_server_t *sp; 3303 3304 if (esp == NULL) { 3305 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3306 sp = find_nfs4_server_all(mi, 1); 3307 } else 3308 sp = esp; 3309 3310 if (sp != NULL) 3311 nfs4_remove_mi_from_server_nolock(mi, sp); 3312 3313 /* 3314 * If we had a valid esp as input, the calling function will be 3315 * responsible for unlocking the esp nfs4_server. 3316 */ 3317 if (esp == NULL) { 3318 if (sp != NULL) 3319 mutex_exit(&sp->s_lock); 3320 nfs_rw_exit(&mi->mi_recovlock); 3321 if (sp != NULL) 3322 nfs4_server_rele(sp); 3323 } 3324 } 3325 3326 /* 3327 * Return TRUE if the given server has any non-unmounted filesystems. 3328 */ 3329 3330 bool_t 3331 nfs4_fs_active(nfs4_server_t *sp) 3332 { 3333 mntinfo4_t *mi; 3334 3335 ASSERT(MUTEX_HELD(&sp->s_lock)); 3336 3337 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3338 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3339 return (TRUE); 3340 } 3341 3342 return (FALSE); 3343 } 3344 3345 /* 3346 * Mark sp as finished and notify any waiters. 3347 */ 3348 3349 void 3350 nfs4_mark_srv_dead(nfs4_server_t *sp) 3351 { 3352 ASSERT(MUTEX_HELD(&sp->s_lock)); 3353 3354 sp->s_thread_exit = NFS4_THREAD_EXIT; 3355 cv_broadcast(&sp->cv_thread_exit); 3356 } 3357 3358 /* 3359 * Create a new nfs4_server_t structure. 3360 * Returns new node unlocked and not in list, but with a reference count of 3361 * 1. 3362 */ 3363 struct nfs4_server * 3364 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3365 { 3366 struct nfs4_server *np; 3367 timespec_t tt; 3368 union { 3369 struct { 3370 uint32_t sec; 3371 uint32_t subsec; 3372 } un_curtime; 3373 verifier4 un_verifier; 3374 } nfs4clientid_verifier; 3375 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3376 int len; 3377 3378 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3379 np->saddr.len = svp->sv_addr.len; 3380 np->saddr.maxlen = svp->sv_addr.maxlen; 3381 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3382 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3383 np->s_refcnt = 1; 3384 3385 /* 3386 * Build the nfs_client_id4 for this server mount. Ensure 3387 * the verifier is useful and that the identification is 3388 * somehow based on the server's address for the case of 3389 * multi-homed servers. 3390 */ 3391 nfs4clientid_verifier.un_verifier = 0; 3392 gethrestime(&tt); 3393 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3394 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3395 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3396 3397 /* 3398 * calculate the length of the opaque identifier. Subtract 2 3399 * for the "%s" and add the traditional +1 for null 3400 * termination. 3401 */ 3402 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3403 np->clidtosend.id_len = len + np->saddr.maxlen; 3404 3405 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3406 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3407 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3408 3409 np->s_flags = 0; 3410 np->mntinfo4_list = NULL; 3411 /* save cred for issuing rfs4calls inside the renew thread */ 3412 crhold(cr); 3413 np->s_cred = cr; 3414 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3415 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3416 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3417 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3418 offsetof(rnode4_t, r_deleg_link)); 3419 np->s_thread_exit = 0; 3420 np->state_ref_count = 0; 3421 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3422 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3423 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3424 np->s_otw_call_count = 0; 3425 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3426 np->zoneid = getzoneid(); 3427 np->zone_globals = nfs4_get_callback_globals(); 3428 ASSERT(np->zone_globals != NULL); 3429 return (np); 3430 } 3431 3432 /* 3433 * Create a new nfs4_server_t structure and add it to the list. 3434 * Returns new node locked; reference must eventually be freed. 3435 */ 3436 static struct nfs4_server * 3437 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3438 { 3439 nfs4_server_t *sp; 3440 3441 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3442 sp = new_nfs4_server(svp, cr); 3443 mutex_enter(&sp->s_lock); 3444 insque(sp, &nfs4_server_lst); 3445 sp->s_refcnt++; /* list gets a reference */ 3446 sp->s_flags |= N4S_INSERTED; 3447 sp->clientid = 0; 3448 return (sp); 3449 } 3450 3451 int nfs4_server_t_debug = 0; 3452 3453 #ifdef lint 3454 extern void 3455 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3456 #endif 3457 3458 #ifndef lint 3459 #ifdef DEBUG 3460 void 3461 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3462 { 3463 int hash16(void *p, int len); 3464 nfs4_server_t *np; 3465 3466 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3467 "dumping nfs4_server_t list in %s", txt)); 3468 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3469 "mi 0x%p, want clientid %llx, addr %d/%04X", 3470 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3471 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3472 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3473 np = np->forw) { 3474 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3475 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3476 np, (longlong_t)np->clientid, np->saddr.len, 3477 hash16((void *)np->saddr.buf, np->saddr.len), 3478 np->state_ref_count)); 3479 if (np->saddr.len == srv_p->sv_addr.len && 3480 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3481 np->saddr.len) == 0) 3482 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3483 " - address matches")); 3484 if (np->clientid == clientid || np->clientid == 0) 3485 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3486 " - clientid matches")); 3487 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3488 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3489 " - thread not exiting")); 3490 } 3491 delay(hz); 3492 } 3493 #endif 3494 #endif 3495 3496 3497 /* 3498 * Move a mntinfo4_t from one server list to another. 3499 * Locking of the two nfs4_server_t nodes will be done in list order. 3500 * 3501 * Returns NULL if the current nfs4_server_t for the filesystem could not 3502 * be found (e.g., due to forced unmount). Otherwise returns a reference 3503 * to the new nfs4_server_t, which must eventually be freed. 3504 */ 3505 nfs4_server_t * 3506 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3507 { 3508 nfs4_server_t *p, *op = NULL, *np = NULL; 3509 int num_open; 3510 zoneid_t zoneid = nfs_zoneid(); 3511 3512 ASSERT(nfs_zone() == mi->mi_zone); 3513 3514 mutex_enter(&nfs4_server_lst_lock); 3515 #ifdef DEBUG 3516 if (nfs4_server_t_debug) 3517 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3518 #endif 3519 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3520 if (p->zoneid != zoneid) 3521 continue; 3522 if (p->saddr.len == old->sv_addr.len && 3523 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3524 p->s_thread_exit != NFS4_THREAD_EXIT) { 3525 op = p; 3526 mutex_enter(&op->s_lock); 3527 op->s_refcnt++; 3528 } 3529 if (p->saddr.len == new->sv_addr.len && 3530 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3531 p->s_thread_exit != NFS4_THREAD_EXIT) { 3532 np = p; 3533 mutex_enter(&np->s_lock); 3534 } 3535 if (op != NULL && np != NULL) 3536 break; 3537 } 3538 if (op == NULL) { 3539 /* 3540 * Filesystem has been forcibly unmounted. Bail out. 3541 */ 3542 if (np != NULL) 3543 mutex_exit(&np->s_lock); 3544 mutex_exit(&nfs4_server_lst_lock); 3545 return (NULL); 3546 } 3547 if (np != NULL) { 3548 np->s_refcnt++; 3549 } else { 3550 #ifdef DEBUG 3551 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3552 "nfs4_move_mi: no target nfs4_server, will create.")); 3553 #endif 3554 np = add_new_nfs4_server(new, kcred); 3555 } 3556 mutex_exit(&nfs4_server_lst_lock); 3557 3558 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3559 "nfs4_move_mi: for mi 0x%p, " 3560 "old servinfo4 0x%p, new servinfo4 0x%p, " 3561 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3562 (void*)mi, (void*)old, (void*)new, 3563 (void*)op, (void*)np)); 3564 ASSERT(op != NULL && np != NULL); 3565 3566 /* discard any delegations */ 3567 nfs4_deleg_discard(mi, op); 3568 3569 num_open = mi->mi_open_files; 3570 mi->mi_open_files = 0; 3571 op->state_ref_count -= num_open; 3572 ASSERT(op->state_ref_count >= 0); 3573 np->state_ref_count += num_open; 3574 nfs4_remove_mi_from_server_nolock(mi, op); 3575 mi->mi_open_files = num_open; 3576 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3577 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3578 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3579 3580 nfs4_add_mi_to_server(np, mi); 3581 3582 mutex_exit(&op->s_lock); 3583 nfs4_server_rele(op); 3584 mutex_exit(&np->s_lock); 3585 3586 return (np); 3587 } 3588 3589 /* 3590 * Need to have the nfs4_server_lst_lock. 3591 * Search the nfs4_server list to find a match on this servinfo4 3592 * based on its address. 3593 * 3594 * Returns NULL if no match is found. Otherwise returns a reference (which 3595 * must eventually be freed) to a locked nfs4_server. 3596 */ 3597 nfs4_server_t * 3598 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3599 { 3600 nfs4_server_t *np; 3601 zoneid_t zoneid = nfs_zoneid(); 3602 3603 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3604 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3605 if (np->zoneid == zoneid && 3606 np->saddr.len == srv_p->sv_addr.len && 3607 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3608 np->saddr.len) == 0 && 3609 np->s_thread_exit != NFS4_THREAD_EXIT) { 3610 mutex_enter(&np->s_lock); 3611 np->s_refcnt++; 3612 return (np); 3613 } 3614 } 3615 return (NULL); 3616 } 3617 3618 /* 3619 * Search the nfs4_server_lst to find a match based on clientid and 3620 * addr. 3621 * Locks the nfs4_server down if it is found and returns a reference that 3622 * must eventually be freed. 3623 * 3624 * Returns NULL it no match is found. This means one of two things: either 3625 * mi is in the process of being mounted, or mi has been unmounted. 3626 * 3627 * The caller should be holding mi->mi_recovlock, and it should continue to 3628 * hold the lock until done with the returned nfs4_server_t. Once 3629 * mi->mi_recovlock is released, there is no guarantee that the returned 3630 * mi->nfs4_server_t will continue to correspond to mi. 3631 */ 3632 nfs4_server_t * 3633 find_nfs4_server(mntinfo4_t *mi) 3634 { 3635 return (find_nfs4_server_all(mi, 0)); 3636 } 3637 3638 /* 3639 * Same as above, but takes an "all" parameter which can be 3640 * set to 1 if the caller wishes to find nfs4_server_t's which 3641 * have been marked for termination by the exit of the renew 3642 * thread. This should only be used by operations which are 3643 * cleaning up and will not cause an OTW op. 3644 */ 3645 nfs4_server_t * 3646 find_nfs4_server_all(mntinfo4_t *mi, int all) 3647 { 3648 nfs4_server_t *np; 3649 servinfo4_t *svp; 3650 zoneid_t zoneid = mi->mi_zone->zone_id; 3651 3652 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3653 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3654 /* 3655 * This can be called from nfs4_unmount() which can be called from the 3656 * global zone, hence it's legal for the global zone to muck with 3657 * another zone's server list, as long as it doesn't try to contact 3658 * them. 3659 */ 3660 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3661 nfs_global_client_only != 0); 3662 3663 /* 3664 * The nfs4_server_lst_lock global lock is held when we get a new 3665 * clientid (via SETCLIENTID OTW). Holding this global lock and 3666 * mi_recovlock (READER is fine) ensures that the nfs4_server 3667 * and this mntinfo4 can't get out of sync, so the following search is 3668 * always valid. 3669 */ 3670 mutex_enter(&nfs4_server_lst_lock); 3671 #ifdef DEBUG 3672 if (nfs4_server_t_debug) { 3673 /* mi->mi_clientid is unprotected, ok for debug output */ 3674 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3675 mi->mi_curr_serv); 3676 } 3677 #endif 3678 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3679 mutex_enter(&np->s_lock); 3680 svp = mi->mi_curr_serv; 3681 3682 if (np->zoneid == zoneid && 3683 np->clientid == mi->mi_clientid && 3684 np->saddr.len == svp->sv_addr.len && 3685 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3686 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3687 mutex_exit(&nfs4_server_lst_lock); 3688 np->s_refcnt++; 3689 return (np); 3690 } 3691 mutex_exit(&np->s_lock); 3692 } 3693 mutex_exit(&nfs4_server_lst_lock); 3694 3695 return (NULL); 3696 } 3697 3698 /* 3699 * Release the reference to sp and destroy it if that's the last one. 3700 */ 3701 3702 void 3703 nfs4_server_rele(nfs4_server_t *sp) 3704 { 3705 mutex_enter(&sp->s_lock); 3706 ASSERT(sp->s_refcnt > 0); 3707 sp->s_refcnt--; 3708 if (sp->s_refcnt > 0) { 3709 mutex_exit(&sp->s_lock); 3710 return; 3711 } 3712 mutex_exit(&sp->s_lock); 3713 3714 mutex_enter(&nfs4_server_lst_lock); 3715 mutex_enter(&sp->s_lock); 3716 if (sp->s_refcnt > 0) { 3717 mutex_exit(&sp->s_lock); 3718 mutex_exit(&nfs4_server_lst_lock); 3719 return; 3720 } 3721 remque(sp); 3722 sp->forw = sp->back = NULL; 3723 mutex_exit(&nfs4_server_lst_lock); 3724 destroy_nfs4_server(sp); 3725 } 3726 3727 static void 3728 destroy_nfs4_server(nfs4_server_t *sp) 3729 { 3730 ASSERT(MUTEX_HELD(&sp->s_lock)); 3731 ASSERT(sp->s_refcnt == 0); 3732 ASSERT(sp->s_otw_call_count == 0); 3733 3734 remove_all_mi(sp); 3735 3736 crfree(sp->s_cred); 3737 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3738 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3739 mutex_exit(&sp->s_lock); 3740 3741 /* destroy the nfs4_server */ 3742 nfs4callback_destroy(sp); 3743 list_destroy(&sp->s_deleg_list); 3744 mutex_destroy(&sp->s_lock); 3745 cv_destroy(&sp->cv_thread_exit); 3746 cv_destroy(&sp->s_cv_otw_count); 3747 cv_destroy(&sp->s_clientid_pend); 3748 cv_destroy(&sp->wait_cb_null); 3749 nfs_rw_destroy(&sp->s_recovlock); 3750 kmem_free(sp, sizeof (*sp)); 3751 } 3752 3753 /* 3754 * Lock sp, but only if it's still active (in the list and hasn't been 3755 * flagged as exiting) or 'all' is non-zero. 3756 * Returns TRUE if sp got locked and adds a reference to sp. 3757 */ 3758 bool_t 3759 nfs4_server_vlock(nfs4_server_t *sp, int all) 3760 { 3761 nfs4_server_t *np; 3762 3763 mutex_enter(&nfs4_server_lst_lock); 3764 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3765 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3766 all != 0)) { 3767 mutex_enter(&np->s_lock); 3768 np->s_refcnt++; 3769 mutex_exit(&nfs4_server_lst_lock); 3770 return (TRUE); 3771 } 3772 } 3773 mutex_exit(&nfs4_server_lst_lock); 3774 return (FALSE); 3775 } 3776 3777 /* 3778 * Fork off a thread to free the data structures for a mount. 3779 */ 3780 3781 static void 3782 async_free_mount(vfs_t *vfsp, cred_t *cr) 3783 { 3784 freemountargs_t *args; 3785 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3786 args->fm_vfsp = vfsp; 3787 VFS_HOLD(vfsp); 3788 MI4_HOLD(VFTOMI4(vfsp)); 3789 args->fm_cr = cr; 3790 crhold(cr); 3791 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3792 minclsyspri); 3793 } 3794 3795 static void 3796 nfs4_free_mount_thread(freemountargs_t *args) 3797 { 3798 mntinfo4_t *mi; 3799 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3800 mi = VFTOMI4(args->fm_vfsp); 3801 crfree(args->fm_cr); 3802 VFS_RELE(args->fm_vfsp); 3803 MI4_RELE(mi); 3804 kmem_free(args, sizeof (freemountargs_t)); 3805 zthread_exit(); 3806 /* NOTREACHED */ 3807 } 3808 3809 /* 3810 * Thread to free the data structures for a given filesystem. 3811 */ 3812 static void 3813 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3814 { 3815 mntinfo4_t *mi = VFTOMI4(vfsp); 3816 nfs4_server_t *sp; 3817 callb_cpr_t cpr_info; 3818 kmutex_t cpr_lock; 3819 boolean_t async_thread; 3820 int removed; 3821 3822 /* 3823 * We need to participate in the CPR framework if this is a kernel 3824 * thread. 3825 */ 3826 async_thread = (curproc == nfs_zone()->zone_zsched); 3827 if (async_thread) { 3828 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3829 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3830 "nfsv4AsyncUnmount"); 3831 } 3832 3833 /* 3834 * We need to wait for all outstanding OTW calls 3835 * and recovery to finish before we remove the mi 3836 * from the nfs4_server_t, as current pending 3837 * calls might still need this linkage (in order 3838 * to find a nfs4_server_t from a mntinfo4_t). 3839 */ 3840 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3841 sp = find_nfs4_server(mi); 3842 nfs_rw_exit(&mi->mi_recovlock); 3843 3844 if (sp) { 3845 while (sp->s_otw_call_count != 0) { 3846 if (async_thread) { 3847 mutex_enter(&cpr_lock); 3848 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3849 mutex_exit(&cpr_lock); 3850 } 3851 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3852 if (async_thread) { 3853 mutex_enter(&cpr_lock); 3854 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3855 mutex_exit(&cpr_lock); 3856 } 3857 } 3858 mutex_exit(&sp->s_lock); 3859 nfs4_server_rele(sp); 3860 sp = NULL; 3861 } 3862 3863 3864 mutex_enter(&mi->mi_lock); 3865 while (mi->mi_in_recovery != 0) { 3866 if (async_thread) { 3867 mutex_enter(&cpr_lock); 3868 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3869 mutex_exit(&cpr_lock); 3870 } 3871 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3872 if (async_thread) { 3873 mutex_enter(&cpr_lock); 3874 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3875 mutex_exit(&cpr_lock); 3876 } 3877 } 3878 mutex_exit(&mi->mi_lock); 3879 3880 /* 3881 * The original purge of the dnlc via 'dounmount' 3882 * doesn't guarantee that another dnlc entry was not 3883 * added while we waitied for all outstanding OTW 3884 * and recovery calls to finish. So re-purge the 3885 * dnlc now. 3886 */ 3887 (void) dnlc_purge_vfsp(vfsp, 0); 3888 3889 /* 3890 * We need to explicitly stop the manager thread; the asyc worker 3891 * threads can timeout and exit on their own. 3892 */ 3893 mutex_enter(&mi->mi_async_lock); 3894 mi->mi_max_threads = 0; 3895 cv_broadcast(&mi->mi_async_work_cv); 3896 mutex_exit(&mi->mi_async_lock); 3897 if (mi->mi_manager_thread) 3898 nfs4_async_manager_stop(vfsp); 3899 3900 destroy_rtable4(vfsp, cr); 3901 3902 nfs4_remove_mi_from_server(mi, NULL); 3903 3904 if (async_thread) { 3905 mutex_enter(&cpr_lock); 3906 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3907 mutex_destroy(&cpr_lock); 3908 } 3909 3910 removed = nfs4_mi_zonelist_remove(mi); 3911 if (removed) 3912 zone_rele(mi->mi_zone); 3913 } 3914