1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/vnode.h> 40 #include <sys/pathname.h> 41 #include <sys/sysmacros.h> 42 #include <sys/kmem.h> 43 #include <sys/mkdev.h> 44 #include <sys/mount.h> 45 #include <sys/statvfs.h> 46 #include <sys/errno.h> 47 #include <sys/debug.h> 48 #include <sys/cmn_err.h> 49 #include <sys/utsname.h> 50 #include <sys/bootconf.h> 51 #include <sys/modctl.h> 52 #include <sys/acl.h> 53 #include <sys/flock.h> 54 #include <sys/time.h> 55 #include <sys/disp.h> 56 #include <sys/policy.h> 57 #include <sys/socket.h> 58 #include <sys/netconfig.h> 59 #include <sys/dnlc.h> 60 #include <sys/list.h> 61 #include <sys/mntent.h> 62 #include <sys/tsol/label.h> 63 64 #include <rpc/types.h> 65 #include <rpc/auth.h> 66 #include <rpc/rpcsec_gss.h> 67 #include <rpc/clnt.h> 68 69 #include <nfs/nfs.h> 70 #include <nfs/nfs_clnt.h> 71 #include <nfs/mount.h> 72 #include <nfs/nfs_acl.h> 73 74 #include <fs/fs_subr.h> 75 76 #include <nfs/nfs4.h> 77 #include <nfs/rnode4.h> 78 #include <nfs/nfs4_clnt.h> 79 #include <sys/fs/autofs.h> 80 81 82 /* 83 * Arguments passed to thread to free data structures from forced unmount. 84 */ 85 86 typedef struct { 87 vfs_t *fm_vfsp; 88 cred_t *fm_cr; 89 } freemountargs_t; 90 91 static void async_free_mount(vfs_t *, cred_t *); 92 static void nfs4_free_mount(vfs_t *, cred_t *); 93 static void nfs4_free_mount_thread(freemountargs_t *); 94 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 95 96 /* 97 * From rpcsec module (common/rpcsec). 98 */ 99 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 100 extern void sec_clnt_freeinfo(struct sec_data *); 101 102 /* 103 * The order and contents of this structure must be kept in sync with that of 104 * rfsreqcnt_v4_tmpl in nfs_stats.c 105 */ 106 static char *rfsnames_v4[] = { 107 "null", "compound", "reserved", "access", "close", "commit", "create", 108 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 109 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 110 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 111 "read", "readdir", "readlink", "remove", "rename", "renew", 112 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 113 "setclientid_confirm", "verify", "write" 114 }; 115 116 /* 117 * nfs4_max_mount_retry is the number of times the client will redrive 118 * a mount compound before giving up and returning failure. The intent 119 * is to redrive mount compounds which fail NFS4ERR_STALE so that 120 * if a component of the server path being mounted goes stale, it can 121 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 122 * code is needed outside of the recovery framework because mount is a 123 * special case. The client doesn't create vnodes/rnodes for components 124 * of the server path being mounted. The recovery code recovers real 125 * client objects, not STALE FHs which map to components of the server 126 * path being mounted. 127 * 128 * We could just fail the mount on the first time, but that would 129 * instantly trigger failover (from nfs4_mount), and the client should 130 * try to re-lookup the STALE FH before doing failover. The easiest 131 * way to "re-lookup" is to simply redrive the mount compound. 132 */ 133 static int nfs4_max_mount_retry = 2; 134 135 /* 136 * nfs4 vfs operations. 137 */ 138 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 139 static int nfs4_unmount(vfs_t *, int, cred_t *); 140 static int nfs4_root(vfs_t *, vnode_t **); 141 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 142 static int nfs4_sync(vfs_t *, short, cred_t *); 143 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 144 static int nfs4_mountroot(vfs_t *, whymountroot_t); 145 static void nfs4_freevfs(vfs_t *); 146 147 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 148 int, cred_t *, zone_t *); 149 150 vfsops_t *nfs4_vfsops; 151 152 int nfs4_vfsinit(void); 153 void nfs4_vfsfini(void); 154 static void nfs4setclientid_init(void); 155 static void nfs4setclientid_fini(void); 156 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 157 struct nfs4_server *, nfs4_error_t *, int *); 158 static void destroy_nfs4_server(nfs4_server_t *); 159 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 160 161 /* 162 * Initialize the vfs structure 163 */ 164 165 static int nfs4fstyp; 166 167 168 /* 169 * Debug variable to check for rdma based 170 * transport startup and cleanup. Controlled 171 * through /etc/system. Off by default. 172 */ 173 extern int rdma_debug; 174 175 int 176 nfs4init(int fstyp, char *name) 177 { 178 static const fs_operation_def_t nfs4_vfsops_template[] = { 179 VFSNAME_MOUNT, { .vfs_mount = nfs4_mount }, 180 VFSNAME_UNMOUNT, { .vfs_unmount = nfs4_unmount }, 181 VFSNAME_ROOT, { .vfs_root = nfs4_root }, 182 VFSNAME_STATVFS, { .vfs_statvfs = nfs4_statvfs }, 183 VFSNAME_SYNC, { .vfs_sync = nfs4_sync }, 184 VFSNAME_VGET, { .vfs_vget = nfs4_vget }, 185 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs4_mountroot }, 186 VFSNAME_FREEVFS, { .vfs_freevfs = nfs4_freevfs }, 187 NULL, NULL 188 }; 189 int error; 190 191 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 192 if (error != 0) { 193 zcmn_err(GLOBAL_ZONEID, CE_WARN, 194 "nfs4init: bad vfs ops template"); 195 return (error); 196 } 197 198 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 199 if (error != 0) { 200 (void) vfs_freevfsops_by_type(fstyp); 201 zcmn_err(GLOBAL_ZONEID, CE_WARN, 202 "nfs4init: bad vnode ops template"); 203 return (error); 204 } 205 206 nfs4fstyp = fstyp; 207 208 (void) nfs4_vfsinit(); 209 210 (void) nfs4_init_dot_entries(); 211 212 return (0); 213 } 214 215 void 216 nfs4fini(void) 217 { 218 (void) nfs4_destroy_dot_entries(); 219 nfs4_vfsfini(); 220 } 221 222 /* 223 * Create a new sec_data structure to store AUTH_DH related data: 224 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 225 * flag set for NFS V4 since we are avoiding to contact the rpcbind 226 * daemon and is using the IP time service (IPPORT_TIMESERVER). 227 * 228 * sec_data can be freed by sec_clnt_freeinfo(). 229 */ 230 struct sec_data * 231 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 232 struct knetconfig *knconf) { 233 struct sec_data *secdata; 234 dh_k4_clntdata_t *data; 235 char *pf, *p; 236 237 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 238 return (NULL); 239 240 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 241 secdata->flags = 0; 242 243 data = kmem_alloc(sizeof (*data), KM_SLEEP); 244 245 data->syncaddr.maxlen = syncaddr->maxlen; 246 data->syncaddr.len = syncaddr->len; 247 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 248 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 249 250 /* 251 * duplicate the knconf information for the 252 * new opaque data. 253 */ 254 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 255 *data->knconf = *knconf; 256 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 257 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 258 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 259 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 260 data->knconf->knc_protofmly = pf; 261 data->knconf->knc_proto = p; 262 263 /* move server netname to the sec_data structure */ 264 data->netname = kmem_alloc(nlen, KM_SLEEP); 265 bcopy(netname, data->netname, nlen); 266 data->netnamelen = (int)nlen; 267 268 secdata->secmod = AUTH_DH; 269 secdata->rpcflavor = AUTH_DH; 270 secdata->data = (caddr_t)data; 271 272 return (secdata); 273 } 274 275 static int 276 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 277 { 278 servinfo4_t *si; 279 280 /* 281 * Iterate over the servinfo4 list to make sure 282 * we do not have a duplicate. Skip any servinfo4 283 * that has been marked "NOT IN USE" 284 */ 285 for (si = svp_head; si; si = si->sv_next) { 286 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 287 if (si->sv_flags & SV4_NOTINUSE) { 288 nfs_rw_exit(&si->sv_lock); 289 continue; 290 } 291 nfs_rw_exit(&si->sv_lock); 292 if (si == svp) 293 continue; 294 if (si->sv_addr.len == svp->sv_addr.len && 295 strcmp(si->sv_knconf->knc_protofmly, 296 svp->sv_knconf->knc_protofmly) == 0 && 297 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 298 si->sv_addr.len) == 0) { 299 /* it's a duplicate */ 300 return (1); 301 } 302 } 303 /* it's not a duplicate */ 304 return (0); 305 } 306 307 void 308 nfs4_free_args(struct nfs_args *nargs) 309 { 310 if (nargs->knconf) { 311 if (nargs->knconf->knc_protofmly) 312 kmem_free(nargs->knconf->knc_protofmly, 313 KNC_STRSIZE); 314 if (nargs->knconf->knc_proto) 315 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 316 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 317 nargs->knconf = NULL; 318 } 319 320 if (nargs->fh) { 321 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 322 nargs->fh = NULL; 323 } 324 325 if (nargs->hostname) { 326 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 327 nargs->hostname = NULL; 328 } 329 330 if (nargs->addr) { 331 if (nargs->addr->buf) { 332 ASSERT(nargs->addr->len); 333 kmem_free(nargs->addr->buf, nargs->addr->len); 334 } 335 kmem_free(nargs->addr, sizeof (struct netbuf)); 336 nargs->addr = NULL; 337 } 338 339 if (nargs->syncaddr) { 340 ASSERT(nargs->syncaddr->len); 341 if (nargs->syncaddr->buf) { 342 ASSERT(nargs->syncaddr->len); 343 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 344 } 345 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 346 nargs->syncaddr = NULL; 347 } 348 349 if (nargs->netname) { 350 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 351 nargs->netname = NULL; 352 } 353 354 if (nargs->nfs_ext_u.nfs_extA.secdata) { 355 sec_clnt_freeinfo( 356 nargs->nfs_ext_u.nfs_extA.secdata); 357 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 358 } 359 } 360 361 362 int 363 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs) 364 { 365 366 int error; 367 size_t hlen; /* length of hostname */ 368 size_t nlen; /* length of netname */ 369 char netname[MAXNETNAMELEN+1]; /* server's netname */ 370 struct netbuf addr; /* server's address */ 371 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 372 struct knetconfig *knconf; /* transport structure */ 373 struct sec_data *secdata = NULL; /* security data */ 374 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 375 STRUCT_DECL(knetconfig, knconf_tmp); 376 STRUCT_DECL(netbuf, addr_tmp); 377 int flags; 378 char *p, *pf; 379 struct pathname pn; 380 char *userbufptr; 381 382 383 bzero(nargs, sizeof (*nargs)); 384 385 STRUCT_INIT(args, get_udatamodel()); 386 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 387 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 388 STRUCT_SIZE(args)))) 389 return (EFAULT); 390 391 nargs->wsize = STRUCT_FGET(args, wsize); 392 nargs->rsize = STRUCT_FGET(args, rsize); 393 nargs->timeo = STRUCT_FGET(args, timeo); 394 nargs->retrans = STRUCT_FGET(args, retrans); 395 nargs->acregmin = STRUCT_FGET(args, acregmin); 396 nargs->acregmax = STRUCT_FGET(args, acregmax); 397 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 398 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 399 400 flags = STRUCT_FGET(args, flags); 401 nargs->flags = flags; 402 403 addr.buf = NULL; 404 syncaddr.buf = NULL; 405 406 407 /* 408 * Allocate space for a knetconfig structure and 409 * its strings and copy in from user-land. 410 */ 411 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 412 STRUCT_INIT(knconf_tmp, get_udatamodel()); 413 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 414 STRUCT_SIZE(knconf_tmp))) { 415 kmem_free(knconf, sizeof (*knconf)); 416 return (EFAULT); 417 } 418 419 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 420 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 421 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 422 if (get_udatamodel() != DATAMODEL_LP64) { 423 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 424 } else { 425 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 426 } 427 428 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 429 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 430 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 431 if (error) { 432 kmem_free(pf, KNC_STRSIZE); 433 kmem_free(p, KNC_STRSIZE); 434 kmem_free(knconf, sizeof (*knconf)); 435 return (error); 436 } 437 438 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 439 if (error) { 440 kmem_free(pf, KNC_STRSIZE); 441 kmem_free(p, KNC_STRSIZE); 442 kmem_free(knconf, sizeof (*knconf)); 443 return (error); 444 } 445 446 447 knconf->knc_protofmly = pf; 448 knconf->knc_proto = p; 449 450 nargs->knconf = knconf; 451 452 /* 453 * Get server address 454 */ 455 STRUCT_INIT(addr_tmp, get_udatamodel()); 456 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 457 STRUCT_SIZE(addr_tmp))) { 458 error = EFAULT; 459 goto errout; 460 } 461 462 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 463 userbufptr = STRUCT_FGETP(addr_tmp, buf); 464 addr.len = STRUCT_FGET(addr_tmp, len); 465 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 466 addr.maxlen = addr.len; 467 if (copyin(userbufptr, addr.buf, addr.len)) { 468 kmem_free(addr.buf, addr.len); 469 error = EFAULT; 470 goto errout; 471 } 472 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 473 474 /* 475 * Get the root fhandle 476 */ 477 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 478 if (error) 479 goto errout; 480 481 /* Volatile fh: keep server paths, so use actual-size strings */ 482 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 483 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen); 484 nargs->fh[pn.pn_pathlen] = '\0'; 485 pn_free(&pn); 486 487 488 /* 489 * Get server's hostname 490 */ 491 if (flags & NFSMNT_HOSTNAME) { 492 error = copyinstr(STRUCT_FGETP(args, hostname), 493 netname, sizeof (netname), &hlen); 494 if (error) 495 goto errout; 496 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 497 (void) strcpy(nargs->hostname, netname); 498 499 } else { 500 nargs->hostname = NULL; 501 } 502 503 504 /* 505 * If there are syncaddr and netname data, load them in. This is 506 * to support data needed for NFSV4 when AUTH_DH is the negotiated 507 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 508 */ 509 netname[0] = '\0'; 510 if (flags & NFSMNT_SECURE) { 511 512 /* get syncaddr */ 513 STRUCT_INIT(addr_tmp, get_udatamodel()); 514 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 515 STRUCT_SIZE(addr_tmp))) { 516 error = EINVAL; 517 goto errout; 518 } 519 userbufptr = STRUCT_FGETP(addr_tmp, buf); 520 syncaddr.len = STRUCT_FGET(addr_tmp, len); 521 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 522 syncaddr.maxlen = syncaddr.len; 523 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 524 kmem_free(syncaddr.buf, syncaddr.len); 525 error = EFAULT; 526 goto errout; 527 } 528 529 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 530 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 531 532 /* get server's netname */ 533 if (copyinstr(STRUCT_FGETP(args, netname), netname, 534 sizeof (netname), &nlen)) { 535 error = EFAULT; 536 goto errout; 537 } 538 539 netname[nlen] = '\0'; 540 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 541 (void) strcpy(nargs->netname, netname); 542 } 543 544 /* 545 * Get the extention data which has the security data structure. 546 * This includes data for AUTH_SYS as well. 547 */ 548 if (flags & NFSMNT_NEWARGS) { 549 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 550 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 551 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 552 /* 553 * Indicating the application is using the new 554 * sec_data structure to pass in the security 555 * data. 556 */ 557 if (STRUCT_FGETP(args, 558 nfs_ext_u.nfs_extA.secdata) != NULL) { 559 error = sec_clnt_loadinfo( 560 (struct sec_data *)STRUCT_FGETP(args, 561 nfs_ext_u.nfs_extA.secdata), 562 &secdata, get_udatamodel()); 563 } 564 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 565 } 566 } 567 568 if (error) 569 goto errout; 570 571 /* 572 * Failover support: 573 * 574 * We may have a linked list of nfs_args structures, 575 * which means the user is looking for failover. If 576 * the mount is either not "read-only" or "soft", 577 * we want to bail out with EINVAL. 578 */ 579 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 580 nargs->nfs_ext_u.nfs_extB.next = 581 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 582 583 errout: 584 if (error) 585 nfs4_free_args(nargs); 586 587 return (error); 588 } 589 590 591 /* 592 * nfs mount vfsop 593 * Set up mount info record and attach it to vfs struct. 594 */ 595 static int 596 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 597 { 598 char *data = uap->dataptr; 599 int error; 600 vnode_t *rtvp; /* the server's root */ 601 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 602 struct knetconfig *rdma_knconf; /* rdma transport structure */ 603 rnode4_t *rp; 604 struct servinfo4 *svp; /* nfs server info */ 605 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 606 struct servinfo4 *svp_head; /* first nfs server info */ 607 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 608 struct sec_data *secdata; /* security data */ 609 struct nfs_args *args = NULL; 610 int flags, addr_type, removed; 611 zone_t *zone = nfs_zone(); 612 nfs4_error_t n4e; 613 zone_t *mntzone = NULL; 614 615 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 616 return (EPERM); 617 if (mvp->v_type != VDIR) 618 return (ENOTDIR); 619 /* 620 * get arguments 621 * 622 * nfs_args is now versioned and is extensible, so 623 * uap->datalen might be different from sizeof (args) 624 * in a compatible situation. 625 */ 626 more: 627 if (!(uap->flags & MS_SYSSPACE)) { 628 if (args == NULL) 629 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 630 else 631 nfs4_free_args(args); 632 error = nfs4_copyin(data, uap->datalen, args); 633 if (error) { 634 if (args) { 635 kmem_free(args, sizeof (*args)); 636 } 637 return (error); 638 } 639 } else { 640 args = (struct nfs_args *)data; 641 } 642 643 644 flags = args->flags; 645 646 /* 647 * If the request changes the locking type, disallow the remount, 648 * because it's questionable whether we can transfer the 649 * locking state correctly. 650 */ 651 if (uap->flags & MS_REMOUNT) { 652 if (!(uap->flags & MS_SYSSPACE)) { 653 nfs4_free_args(args); 654 kmem_free(args, sizeof (*args)); 655 } 656 if ((mi = VFTOMI4(vfsp)) != NULL) { 657 uint_t new_mi_llock; 658 uint_t old_mi_llock; 659 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 660 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 661 if (old_mi_llock != new_mi_llock) 662 return (EBUSY); 663 } 664 return (0); 665 } 666 667 mutex_enter(&mvp->v_lock); 668 if (!(uap->flags & MS_OVERLAY) && 669 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 670 mutex_exit(&mvp->v_lock); 671 if (!(uap->flags & MS_SYSSPACE)) { 672 nfs4_free_args(args); 673 kmem_free(args, sizeof (*args)); 674 } 675 return (EBUSY); 676 } 677 mutex_exit(&mvp->v_lock); 678 679 /* make sure things are zeroed for errout: */ 680 rtvp = NULL; 681 mi = NULL; 682 secdata = NULL; 683 684 /* 685 * A valid knetconfig structure is required. 686 */ 687 688 if (!(flags & NFSMNT_KNCONF) || 689 args->knconf == NULL || args->knconf->knc_protofmly == NULL || 690 args->knconf->knc_proto == NULL || 691 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) { 692 if (!(uap->flags & MS_SYSSPACE)) { 693 nfs4_free_args(args); 694 kmem_free(args, sizeof (*args)); 695 } 696 return (EINVAL); 697 } 698 699 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 700 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 701 if (!(uap->flags & MS_SYSSPACE)) { 702 nfs4_free_args(args); 703 kmem_free(args, sizeof (*args)); 704 } 705 return (EINVAL); 706 } 707 708 709 /* 710 * Allocate a servinfo4 struct. 711 */ 712 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 713 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 714 if (svp_tail) { 715 svp_2ndlast = svp_tail; 716 svp_tail->sv_next = svp; 717 } else { 718 svp_head = svp; 719 svp_2ndlast = svp; 720 } 721 722 svp_tail = svp; 723 svp->sv_knconf = args->knconf; 724 args->knconf = NULL; 725 726 727 /* 728 * Get server address 729 */ 730 731 if (args->addr == NULL || args->addr->buf == NULL) { 732 error = EINVAL; 733 goto errout; 734 } 735 736 svp->sv_addr.maxlen = args->addr->maxlen; 737 svp->sv_addr.len = args->addr->len; 738 svp->sv_addr.buf = args->addr->buf; 739 args->addr->buf = NULL; 740 741 742 /* 743 * Get the root fhandle 744 */ 745 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) { 746 error = EINVAL; 747 goto errout; 748 } 749 750 svp->sv_path = args->fh; 751 svp->sv_pathlen = strlen(args->fh) + 1; 752 args->fh = NULL; 753 754 /* 755 * Get server's hostname 756 */ 757 if (flags & NFSMNT_HOSTNAME) { 758 if (args->hostname == NULL || (strlen(args->hostname) > 759 MAXNETNAMELEN)) { 760 error = EINVAL; 761 goto errout; 762 } 763 svp->sv_hostnamelen = strlen(args->hostname) + 1; 764 svp->sv_hostname = args->hostname; 765 args->hostname = NULL; 766 } else { 767 char *p = "unknown-host"; 768 svp->sv_hostnamelen = strlen(p) + 1; 769 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 770 (void) strcpy(svp->sv_hostname, p); 771 } 772 773 /* 774 * RDMA MOUNT SUPPORT FOR NFS v4. 775 * Establish, is it possible to use RDMA, if so overload the 776 * knconf with rdma specific knconf and free the orignal knconf. 777 */ 778 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 779 /* 780 * Determine the addr type for RDMA, IPv4 or v6. 781 */ 782 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 783 addr_type = AF_INET; 784 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 785 addr_type = AF_INET6; 786 787 if (rdma_reachable(addr_type, &svp->sv_addr, 788 &rdma_knconf) == 0) { 789 /* 790 * If successful, hijack the orignal knconf and 791 * replace with the new one, depending on the flags. 792 */ 793 svp->sv_origknconf = svp->sv_knconf; 794 svp->sv_knconf = rdma_knconf; 795 } else { 796 if (flags & NFSMNT_TRYRDMA) { 797 #ifdef DEBUG 798 if (rdma_debug) 799 zcmn_err(getzoneid(), CE_WARN, 800 "no RDMA onboard, revert\n"); 801 #endif 802 } 803 804 if (flags & NFSMNT_DORDMA) { 805 /* 806 * If proto=rdma is specified and no RDMA 807 * path to this server is avialable then 808 * ditch this server. 809 * This is not included in the mountable 810 * server list or the replica list. 811 * Check if more servers are specified; 812 * Failover case, otherwise bail out of mount. 813 */ 814 if (args->nfs_args_ext == 815 NFS_ARGS_EXTB && 816 args->nfs_ext_u.nfs_extB.next 817 != NULL) { 818 data = (char *) 819 args->nfs_ext_u.nfs_extB.next; 820 if (uap->flags & MS_RDONLY && 821 !(flags & NFSMNT_SOFT)) { 822 if (svp_head->sv_next == NULL) { 823 svp_tail = NULL; 824 svp_2ndlast = NULL; 825 sv4_free(svp_head); 826 goto more; 827 } else { 828 svp_tail = svp_2ndlast; 829 svp_2ndlast->sv_next = 830 NULL; 831 sv4_free(svp); 832 goto more; 833 } 834 } 835 } else { 836 /* 837 * This is the last server specified 838 * in the nfs_args list passed down 839 * and its not rdma capable. 840 */ 841 if (svp_head->sv_next == NULL) { 842 /* 843 * Is this the only one 844 */ 845 error = EINVAL; 846 #ifdef DEBUG 847 if (rdma_debug) 848 zcmn_err(getzoneid(), 849 CE_WARN, 850 "No RDMA srv"); 851 #endif 852 goto errout; 853 } else { 854 /* 855 * There is list, since some 856 * servers specified before 857 * this passed all requirements 858 */ 859 svp_tail = svp_2ndlast; 860 svp_2ndlast->sv_next = NULL; 861 sv4_free(svp); 862 goto proceed; 863 } 864 } 865 } 866 } 867 } 868 869 /* 870 * If there are syncaddr and netname data, load them in. This is 871 * to support data needed for NFSV4 when AUTH_DH is the negotiated 872 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 873 */ 874 if (args->flags & NFSMNT_SECURE) { 875 svp->sv_dhsec = create_authdh_data(args->netname, 876 strlen(args->netname), 877 args->syncaddr, svp->sv_knconf); 878 } 879 880 /* 881 * Get the extention data which has the security data structure. 882 * This includes data for AUTH_SYS as well. 883 */ 884 if (flags & NFSMNT_NEWARGS) { 885 switch (args->nfs_args_ext) { 886 case NFS_ARGS_EXTA: 887 case NFS_ARGS_EXTB: 888 /* 889 * Indicating the application is using the new 890 * sec_data structure to pass in the security 891 * data. 892 */ 893 secdata = args->nfs_ext_u.nfs_extA.secdata; 894 if (secdata == NULL) { 895 error = EINVAL; 896 } else if (uap->flags & MS_SYSSPACE) { 897 /* 898 * Need to validate the flavor here if 899 * sysspace, userspace was already 900 * validate from the nfs_copyin function. 901 */ 902 switch (secdata->rpcflavor) { 903 case AUTH_NONE: 904 case AUTH_UNIX: 905 case AUTH_LOOPBACK: 906 case AUTH_DES: 907 case RPCSEC_GSS: 908 break; 909 default: 910 error = EINVAL; 911 goto errout; 912 } 913 } 914 args->nfs_ext_u.nfs_extA.secdata = NULL; 915 break; 916 917 default: 918 error = EINVAL; 919 break; 920 } 921 922 } else if (flags & NFSMNT_SECURE) { 923 /* 924 * NFSMNT_SECURE is deprecated but we keep it 925 * to support the rouge user generated application 926 * that may use this undocumented interface to do 927 * AUTH_DH security. 928 */ 929 secdata = create_authdh_data(args->netname, 930 strlen(args->netname), args->syncaddr, svp->sv_knconf); 931 932 } else { 933 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 934 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 935 secdata->data = NULL; 936 } 937 938 svp->sv_secdata = secdata; 939 940 /* 941 * User does not explictly specify a flavor, and a user 942 * defined default flavor is passed down. 943 */ 944 if (flags & NFSMNT_SECDEFAULT) { 945 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 946 svp->sv_flags |= SV4_TRYSECDEFAULT; 947 nfs_rw_exit(&svp->sv_lock); 948 } 949 950 /* 951 * Failover support: 952 * 953 * We may have a linked list of nfs_args structures, 954 * which means the user is looking for failover. If 955 * the mount is either not "read-only" or "soft", 956 * we want to bail out with EINVAL. 957 */ 958 if (args->nfs_args_ext == NFS_ARGS_EXTB && 959 args->nfs_ext_u.nfs_extB.next != NULL) { 960 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 961 data = (char *)args->nfs_ext_u.nfs_extB.next; 962 goto more; 963 } 964 error = EINVAL; 965 goto errout; 966 } 967 968 /* 969 * Determine the zone we're being mounted into. 970 */ 971 zone_hold(mntzone = zone); /* start with this assumption */ 972 if (getzoneid() == GLOBAL_ZONEID) { 973 zone_rele(mntzone); 974 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 975 ASSERT(mntzone != NULL); 976 if (mntzone != zone) { 977 error = EBUSY; 978 goto errout; 979 } 980 } 981 982 if (is_system_labeled()) { 983 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 984 svp->sv_knconf, cr); 985 986 if (error > 0) 987 goto errout; 988 989 if (error == -1) { 990 /* change mount to read-only to prevent write-down */ 991 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 992 } 993 } 994 995 /* 996 * Stop the mount from going any further if the zone is going away. 997 */ 998 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 999 error = EBUSY; 1000 goto errout; 1001 } 1002 1003 /* 1004 * Get root vnode. 1005 */ 1006 proceed: 1007 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 1008 1009 if (error) { 1010 /* if nfs4rootvp failed, it will free svp_head */ 1011 svp_head = NULL; 1012 goto errout; 1013 } 1014 1015 mi = VTOMI4(rtvp); 1016 1017 /* 1018 * Send client id to the server, if necessary 1019 */ 1020 nfs4_error_zinit(&n4e); 1021 nfs4setclientid(mi, cr, FALSE, &n4e); 1022 error = n4e.error; 1023 1024 if (error) 1025 goto errout; 1026 1027 /* 1028 * Set option fields in the mount info record 1029 */ 1030 1031 if (svp_head->sv_next) { 1032 mutex_enter(&mi->mi_lock); 1033 mi->mi_flags |= MI4_LLOCK; 1034 mutex_exit(&mi->mi_lock); 1035 } 1036 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args); 1037 1038 errout: 1039 if (error) { 1040 if (rtvp != NULL) { 1041 rp = VTOR4(rtvp); 1042 if (rp->r_flags & R4HASHED) 1043 rp4_rmhash(rp); 1044 } 1045 if (mi != NULL) { 1046 nfs4_async_stop(vfsp); 1047 nfs4_async_manager_stop(vfsp); 1048 nfs4_remove_mi_from_server(mi, NULL); 1049 if (rtvp != NULL) 1050 VN_RELE(rtvp); 1051 if (mntzone != NULL) 1052 zone_rele(mntzone); 1053 /* need to remove it from the zone */ 1054 removed = nfs4_mi_zonelist_remove(mi); 1055 if (removed) 1056 zone_rele(mi->mi_zone); 1057 MI4_RELE(mi); 1058 if (!(uap->flags & MS_SYSSPACE) && args) { 1059 nfs4_free_args(args); 1060 kmem_free(args, sizeof (*args)); 1061 } 1062 return (error); 1063 } 1064 if (svp_head) 1065 sv4_free(svp_head); 1066 } 1067 1068 if (!(uap->flags & MS_SYSSPACE) && args) { 1069 nfs4_free_args(args); 1070 kmem_free(args, sizeof (*args)); 1071 } 1072 if (rtvp != NULL) 1073 VN_RELE(rtvp); 1074 1075 if (mntzone != NULL) 1076 zone_rele(mntzone); 1077 1078 return (error); 1079 } 1080 1081 #ifdef DEBUG 1082 #define VERS_MSG "NFS4 server " 1083 #else 1084 #define VERS_MSG "NFS server " 1085 #endif 1086 1087 #define READ_MSG \ 1088 VERS_MSG "%s returned 0 for read transfer size" 1089 #define WRITE_MSG \ 1090 VERS_MSG "%s returned 0 for write transfer size" 1091 #define SIZE_MSG \ 1092 VERS_MSG "%s returned 0 for maximum file size" 1093 1094 /* 1095 * Get the symbolic link text from the server for a given filehandle 1096 * of that symlink. 1097 * 1098 * (get symlink text) PUTFH READLINK 1099 */ 1100 static int 1101 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 1102 int flags) 1103 1104 { 1105 COMPOUND4args_clnt args; 1106 COMPOUND4res_clnt res; 1107 int doqueue; 1108 nfs_argop4 argop[2]; 1109 nfs_resop4 *resop; 1110 READLINK4res *lr_res; 1111 uint_t len; 1112 bool_t needrecov = FALSE; 1113 nfs4_recov_state_t recov_state; 1114 nfs4_sharedfh_t *sfh; 1115 nfs4_error_t e; 1116 int num_retry = nfs4_max_mount_retry; 1117 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1118 1119 sfh = sfh4_get(fh, mi); 1120 recov_state.rs_flags = 0; 1121 recov_state.rs_num_retry_despite_err = 0; 1122 1123 recov_retry: 1124 nfs4_error_zinit(&e); 1125 1126 args.array_len = 2; 1127 args.array = argop; 1128 args.ctag = TAG_GET_SYMLINK; 1129 1130 if (! recovery) { 1131 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 1132 if (e.error) { 1133 sfh4_rele(&sfh); 1134 return (e.error); 1135 } 1136 } 1137 1138 /* 0. putfh symlink fh */ 1139 argop[0].argop = OP_CPUTFH; 1140 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1141 1142 /* 1. readlink */ 1143 argop[1].argop = OP_READLINK; 1144 1145 doqueue = 1; 1146 1147 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1148 1149 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 1150 1151 if (needrecov && !recovery && num_retry-- > 0) { 1152 1153 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1154 "getlinktext_otw: initiating recovery\n")); 1155 1156 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 1157 OP_READLINK, NULL) == FALSE) { 1158 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1159 if (!e.error) 1160 (void) xdr_free(xdr_COMPOUND4res_clnt, 1161 (caddr_t)&res); 1162 goto recov_retry; 1163 } 1164 } 1165 1166 /* 1167 * If non-NFS4 pcol error and/or we weren't able to recover. 1168 */ 1169 if (e.error != 0) { 1170 if (! recovery) 1171 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1172 sfh4_rele(&sfh); 1173 return (e.error); 1174 } 1175 1176 if (res.status) { 1177 e.error = geterrno4(res.status); 1178 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1179 if (! recovery) 1180 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1181 sfh4_rele(&sfh); 1182 return (e.error); 1183 } 1184 1185 /* res.status == NFS4_OK */ 1186 ASSERT(res.status == NFS4_OK); 1187 1188 resop = &res.array[1]; /* readlink res */ 1189 lr_res = &resop->nfs_resop4_u.opreadlink; 1190 1191 /* treat symlink name as data */ 1192 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 1193 1194 if (! recovery) 1195 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1196 sfh4_rele(&sfh); 1197 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1198 return (0); 1199 } 1200 1201 /* 1202 * Skip over consecutive slashes and "/./" in a pathname. 1203 */ 1204 void 1205 pathname_skipslashdot(struct pathname *pnp) 1206 { 1207 char *c1, *c2; 1208 1209 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 1210 1211 c1 = pnp->pn_path + 1; 1212 c2 = pnp->pn_path + 2; 1213 1214 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 1215 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 1216 pnp->pn_pathlen = pnp->pn_pathlen - 2; 1217 } else { 1218 pnp->pn_path++; 1219 pnp->pn_pathlen--; 1220 } 1221 } 1222 } 1223 1224 /* 1225 * Resolve a symbolic link path. The symlink is in the nth component of 1226 * svp->sv_path and has an nfs4 file handle "fh". 1227 * Upon return, the sv_path will point to the new path that has the nth 1228 * component resolved to its symlink text. 1229 */ 1230 int 1231 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 1232 cred_t *cr, int flags) 1233 { 1234 char *oldpath; 1235 char *symlink, *newpath; 1236 struct pathname oldpn, newpn; 1237 char component[MAXNAMELEN]; 1238 int i, addlen, error = 0; 1239 int oldpathlen; 1240 1241 /* Get the symbolic link text over the wire. */ 1242 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 1243 1244 if (error || symlink == NULL || strlen(symlink) == 0) 1245 return (error); 1246 1247 /* 1248 * Compose the new pathname. 1249 * Note: 1250 * - only the nth component is resolved for the pathname. 1251 * - pathname.pn_pathlen does not count the ending null byte. 1252 */ 1253 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1254 oldpath = svp->sv_path; 1255 oldpathlen = svp->sv_pathlen; 1256 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1257 nfs_rw_exit(&svp->sv_lock); 1258 kmem_free(symlink, strlen(symlink) + 1); 1259 return (error); 1260 } 1261 nfs_rw_exit(&svp->sv_lock); 1262 pn_alloc(&newpn); 1263 1264 /* 1265 * Skip over previous components from the oldpath so that the 1266 * oldpn.pn_path will point to the symlink component. Skip 1267 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1268 * pn_getcompnent can get the component. 1269 */ 1270 for (i = 1; i < nth; i++) { 1271 pathname_skipslashdot(&oldpn); 1272 error = pn_getcomponent(&oldpn, component); 1273 if (error) 1274 goto out; 1275 } 1276 1277 /* 1278 * Copy the old path upto the component right before the symlink 1279 * if the symlink is not an absolute path. 1280 */ 1281 if (symlink[0] != '/') { 1282 addlen = oldpn.pn_path - oldpn.pn_buf; 1283 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1284 newpn.pn_pathlen += addlen; 1285 newpn.pn_path += addlen; 1286 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1287 newpn.pn_pathlen++; 1288 newpn.pn_path++; 1289 } 1290 1291 /* copy the resolved symbolic link text */ 1292 addlen = strlen(symlink); 1293 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1294 error = ENAMETOOLONG; 1295 goto out; 1296 } 1297 bcopy(symlink, newpn.pn_path, addlen); 1298 newpn.pn_pathlen += addlen; 1299 newpn.pn_path += addlen; 1300 1301 /* 1302 * Check if there is any remaining path after the symlink component. 1303 * First, skip the symlink component. 1304 */ 1305 pathname_skipslashdot(&oldpn); 1306 if (error = pn_getcomponent(&oldpn, component)) 1307 goto out; 1308 1309 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1310 1311 /* 1312 * Copy the remaining path to the new pathname if there is any. 1313 */ 1314 if (addlen > 0) { 1315 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1316 error = ENAMETOOLONG; 1317 goto out; 1318 } 1319 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1320 newpn.pn_pathlen += addlen; 1321 } 1322 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1323 1324 /* get the newpath and store it in the servinfo4_t */ 1325 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1326 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1327 newpath[newpn.pn_pathlen] = '\0'; 1328 1329 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1330 svp->sv_path = newpath; 1331 svp->sv_pathlen = strlen(newpath) + 1; 1332 nfs_rw_exit(&svp->sv_lock); 1333 1334 kmem_free(oldpath, oldpathlen); 1335 out: 1336 kmem_free(symlink, strlen(symlink) + 1); 1337 pn_free(&newpn); 1338 pn_free(&oldpn); 1339 1340 return (error); 1341 } 1342 1343 /* 1344 * Get the root filehandle for the given filesystem and server, and update 1345 * svp. 1346 * 1347 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1348 * to coordinate with recovery. Otherwise, the caller is assumed to be 1349 * the recovery thread or have already done a start_fop. 1350 * 1351 * Errors are returned by the nfs4_error_t parameter. 1352 */ 1353 1354 static void 1355 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1356 int flags, cred_t *cr, nfs4_error_t *ep) 1357 { 1358 COMPOUND4args_clnt args; 1359 COMPOUND4res_clnt res; 1360 int doqueue = 1; 1361 nfs_argop4 *argop; 1362 nfs_resop4 *resop; 1363 nfs4_ga_res_t *garp; 1364 int num_argops; 1365 lookup4_param_t lookuparg; 1366 nfs_fh4 *tmpfhp; 1367 nfs_fh4 *resfhp; 1368 bool_t needrecov = FALSE; 1369 nfs4_recov_state_t recov_state; 1370 int llndx; 1371 int nthcomp; 1372 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1373 1374 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1375 ASSERT(svp->sv_path != NULL); 1376 if (svp->sv_path[0] == '\0') { 1377 nfs_rw_exit(&svp->sv_lock); 1378 nfs4_error_init(ep, EINVAL); 1379 return; 1380 } 1381 nfs_rw_exit(&svp->sv_lock); 1382 1383 recov_state.rs_flags = 0; 1384 recov_state.rs_num_retry_despite_err = 0; 1385 recov_retry: 1386 nfs4_error_zinit(ep); 1387 1388 if (!recovery) { 1389 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1390 &recov_state, NULL); 1391 1392 /* 1393 * If recovery has been started and this request as 1394 * initiated by a mount, then we must wait for recovery 1395 * to finish before proceeding, otherwise, the error 1396 * cleanup would remove data structures needed by the 1397 * recovery thread. 1398 */ 1399 if (ep->error) { 1400 mutex_enter(&mi->mi_lock); 1401 if (mi->mi_flags & MI4_MOUNTING) { 1402 mi->mi_flags |= MI4_RECOV_FAIL; 1403 mi->mi_error = EIO; 1404 1405 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1406 "nfs4getfh_otw: waiting 4 recovery\n")); 1407 1408 while (mi->mi_flags & MI4_RECOV_ACTIV) 1409 cv_wait(&mi->mi_failover_cv, 1410 &mi->mi_lock); 1411 } 1412 mutex_exit(&mi->mi_lock); 1413 return; 1414 } 1415 1416 /* 1417 * If the client does not specify a specific flavor to use 1418 * and has not gotten a secinfo list from the server yet, 1419 * retrieve the secinfo list from the server and use a 1420 * flavor from the list to mount. 1421 * 1422 * If fail to get the secinfo list from the server, then 1423 * try the default flavor. 1424 */ 1425 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1426 svp->sv_secinfo == NULL) { 1427 (void) nfs4_secinfo_path(mi, cr, FALSE); 1428 } 1429 } 1430 1431 if (recovery) 1432 args.ctag = TAG_REMAP_MOUNT; 1433 else 1434 args.ctag = TAG_MOUNT; 1435 1436 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1437 lookuparg.argsp = &args; 1438 lookuparg.resp = &res; 1439 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1440 lookuparg.trailer_len = 0; 1441 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1442 lookuparg.mi = mi; 1443 1444 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1445 ASSERT(svp->sv_path != NULL); 1446 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1447 nfs_rw_exit(&svp->sv_lock); 1448 1449 argop = args.array; 1450 num_argops = args.array_len; 1451 1452 /* choose public or root filehandle */ 1453 if (flags & NFS4_GETFH_PUBLIC) 1454 argop[0].argop = OP_PUTPUBFH; 1455 else 1456 argop[0].argop = OP_PUTROOTFH; 1457 1458 /* get fh */ 1459 argop[1].argop = OP_GETFH; 1460 1461 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1462 "nfs4getfh_otw: %s call, mi 0x%p", 1463 needrecov ? "recov" : "first", (void *)mi)); 1464 1465 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1466 1467 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1468 1469 if (needrecov) { 1470 bool_t abort; 1471 1472 if (recovery) { 1473 nfs4args_lookup_free(argop, num_argops); 1474 kmem_free(argop, 1475 lookuparg.arglen * sizeof (nfs_argop4)); 1476 if (!ep->error) 1477 (void) xdr_free(xdr_COMPOUND4res_clnt, 1478 (caddr_t)&res); 1479 return; 1480 } 1481 1482 NFS4_DEBUG(nfs4_client_recov_debug, 1483 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1484 1485 abort = nfs4_start_recovery(ep, mi, NULL, 1486 NULL, NULL, NULL, OP_GETFH, NULL); 1487 if (!ep->error) { 1488 ep->error = geterrno4(res.status); 1489 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1490 } 1491 nfs4args_lookup_free(argop, num_argops); 1492 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1493 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1494 /* have another go? */ 1495 if (abort == FALSE) 1496 goto recov_retry; 1497 return; 1498 } 1499 1500 /* 1501 * No recovery, but check if error is set. 1502 */ 1503 if (ep->error) { 1504 nfs4args_lookup_free(argop, num_argops); 1505 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1506 if (!recovery) 1507 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1508 needrecov); 1509 return; 1510 } 1511 1512 is_link_err: 1513 1514 /* for non-recovery errors */ 1515 if (res.status && res.status != NFS4ERR_SYMLINK) { 1516 if (!recovery) { 1517 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1518 needrecov); 1519 } 1520 nfs4args_lookup_free(argop, num_argops); 1521 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1522 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1523 return; 1524 } 1525 1526 /* 1527 * If any intermediate component in the path is a symbolic link, 1528 * resolve the symlink, then try mount again using the new path. 1529 */ 1530 if (res.status == NFS4ERR_SYMLINK) { 1531 int where; 1532 1533 /* 1534 * This must be from OP_LOOKUP failure. The (cfh) for this 1535 * OP_LOOKUP is a symlink node. Found out where the 1536 * OP_GETFH is for the (cfh) that is a symlink node. 1537 * 1538 * Example: 1539 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1540 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1541 * 1542 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1543 * In this case, where = 7, nthcomp = 2. 1544 */ 1545 where = res.array_len - 2; 1546 ASSERT(where > 0); 1547 1548 resop = &res.array[where - 1]; 1549 ASSERT(resop->resop == OP_GETFH); 1550 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1551 nthcomp = res.array_len/3 - 1; 1552 1553 /* 1554 * Need to call nfs4_end_op before resolve_sympath to avoid 1555 * potential nfs4_start_op deadlock. 1556 */ 1557 if (!recovery) 1558 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1559 needrecov); 1560 1561 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1562 flags); 1563 1564 nfs4args_lookup_free(argop, num_argops); 1565 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1566 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1567 1568 if (ep->error) 1569 return; 1570 1571 goto recov_retry; 1572 } 1573 1574 /* getfh */ 1575 resop = &res.array[res.array_len - 2]; 1576 ASSERT(resop->resop == OP_GETFH); 1577 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1578 1579 /* getattr fsinfo res */ 1580 resop++; 1581 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1582 1583 *vtp = garp->n4g_va.va_type; 1584 1585 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1586 1587 mutex_enter(&mi->mi_lock); 1588 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1589 mi->mi_flags |= MI4_LINK; 1590 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1591 mi->mi_flags |= MI4_SYMLINK; 1592 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1593 mi->mi_flags |= MI4_ACL; 1594 mutex_exit(&mi->mi_lock); 1595 1596 if (garp->n4g_ext_res->n4g_maxread == 0) 1597 mi->mi_tsize = 1598 MIN(MAXBSIZE, mi->mi_tsize); 1599 else 1600 mi->mi_tsize = 1601 MIN(garp->n4g_ext_res->n4g_maxread, 1602 mi->mi_tsize); 1603 1604 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1605 mi->mi_stsize = 1606 MIN(MAXBSIZE, mi->mi_stsize); 1607 else 1608 mi->mi_stsize = 1609 MIN(garp->n4g_ext_res->n4g_maxwrite, 1610 mi->mi_stsize); 1611 1612 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1613 mi->mi_maxfilesize = 1614 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1615 mi->mi_maxfilesize); 1616 1617 /* 1618 * If the final component is a a symbolic link, resolve the symlink, 1619 * then try mount again using the new path. 1620 * 1621 * Assume no symbolic link for root filesysm "/". 1622 */ 1623 if (*vtp == VLNK) { 1624 /* 1625 * nthcomp is the total result length minus 1626 * the 1st 2 OPs (PUTROOTFH, GETFH), 1627 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1628 * 1629 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1630 * LOOKUP 2nd-comp GETFH GETATTR 1631 * 1632 * (8 - 2)/3 = 2 1633 */ 1634 nthcomp = (res.array_len - 2)/3; 1635 1636 /* 1637 * Need to call nfs4_end_op before resolve_sympath to avoid 1638 * potential nfs4_start_op deadlock. See RFE 4777612. 1639 */ 1640 if (!recovery) 1641 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1642 needrecov); 1643 1644 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1645 flags); 1646 1647 nfs4args_lookup_free(argop, num_argops); 1648 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1649 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1650 1651 if (ep->error) 1652 return; 1653 1654 goto recov_retry; 1655 } 1656 1657 /* 1658 * We need to figure out where in the compound the getfh 1659 * for the parent directory is. If the object to be mounted is 1660 * the root, then there is no lookup at all: 1661 * PUTROOTFH, GETFH. 1662 * If the object to be mounted is in the root, then the compound is: 1663 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1664 * In either of these cases, the index of the GETFH is 1. 1665 * If it is not at the root, then it's something like: 1666 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1667 * LOOKUP, GETFH, GETATTR 1668 * In this case, the index is llndx (last lookup index) - 2. 1669 */ 1670 if (llndx == -1 || llndx == 2) 1671 resop = &res.array[1]; 1672 else { 1673 ASSERT(llndx > 2); 1674 resop = &res.array[llndx-2]; 1675 } 1676 1677 ASSERT(resop->resop == OP_GETFH); 1678 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1679 1680 /* save the filehandles for the replica */ 1681 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1682 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1683 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1684 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1685 tmpfhp->nfs_fh4_len); 1686 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1687 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1688 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1689 1690 /* initialize fsid and supp_attrs for server fs */ 1691 svp->sv_fsid = garp->n4g_fsid; 1692 svp->sv_supp_attrs = 1693 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1694 1695 nfs_rw_exit(&svp->sv_lock); 1696 1697 nfs4args_lookup_free(argop, num_argops); 1698 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1699 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1700 if (!recovery) 1701 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1702 } 1703 1704 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1705 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1706 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1707 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1708 1709 /* 1710 * Remap the root filehandle for the given filesystem. 1711 * 1712 * results returned via the nfs4_error_t parameter. 1713 */ 1714 void 1715 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1716 { 1717 struct servinfo4 *svp; 1718 vtype_t vtype; 1719 nfs_fh4 rootfh; 1720 int getfh_flags; 1721 char *orig_sv_path; 1722 int orig_sv_pathlen, num_retry; 1723 1724 mutex_enter(&mi->mi_lock); 1725 1726 remap_retry: 1727 svp = mi->mi_curr_serv; 1728 getfh_flags = 1729 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1730 getfh_flags |= 1731 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1732 mutex_exit(&mi->mi_lock); 1733 1734 /* 1735 * Just in case server path being mounted contains 1736 * symlinks and fails w/STALE, save the initial sv_path 1737 * so we can redrive the initial mount compound with the 1738 * initial sv_path -- not a symlink-expanded version. 1739 * 1740 * This could only happen if a symlink was expanded 1741 * and the expanded mount compound failed stale. Because 1742 * it could be the case that the symlink was removed at 1743 * the server (and replaced with another symlink/dir, 1744 * we need to use the initial sv_path when attempting 1745 * to re-lookup everything and recover. 1746 */ 1747 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1748 orig_sv_pathlen = svp->sv_pathlen; 1749 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1750 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1751 nfs_rw_exit(&svp->sv_lock); 1752 1753 num_retry = nfs4_max_mount_retry; 1754 1755 do { 1756 /* 1757 * Get the root fh from the server. Retry nfs4_max_mount_retry 1758 * (2) times if it fails with STALE since the recovery 1759 * infrastructure doesn't do STALE recovery for components 1760 * of the server path to the object being mounted. 1761 */ 1762 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1763 1764 if (ep->error == 0 && ep->stat == NFS4_OK) 1765 break; 1766 1767 /* 1768 * For some reason, the mount compound failed. Before 1769 * retrying, we need to restore the original sv_path 1770 * because it might have contained symlinks that were 1771 * expanded by nfsgetfh_otw before the failure occurred. 1772 * replace current sv_path with orig sv_path -- just in case 1773 * it changed due to embedded symlinks. 1774 */ 1775 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1776 if (orig_sv_pathlen != svp->sv_pathlen) { 1777 kmem_free(svp->sv_path, svp->sv_pathlen); 1778 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1779 svp->sv_pathlen = orig_sv_pathlen; 1780 } 1781 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1782 nfs_rw_exit(&svp->sv_lock); 1783 1784 } while (num_retry-- > 0); 1785 1786 kmem_free(orig_sv_path, orig_sv_pathlen); 1787 1788 if (ep->error != 0 || ep->stat != 0) { 1789 return; 1790 } 1791 1792 if (vtype != VNON && vtype != mi->mi_type) { 1793 /* shouldn't happen */ 1794 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1795 "nfs4_remap_root: server root vnode type (%d) doesn't " 1796 "match mount info (%d)", vtype, mi->mi_type); 1797 } 1798 1799 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1800 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1801 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1802 nfs_rw_exit(&svp->sv_lock); 1803 sfh4_update(mi->mi_rootfh, &rootfh); 1804 1805 /* 1806 * It's possible that recovery took place on the filesystem 1807 * and the server has been updated between the time we did 1808 * the nfs4getfh_otw and now. Re-drive the otw operation 1809 * to make sure we have a good fh. 1810 */ 1811 mutex_enter(&mi->mi_lock); 1812 if (mi->mi_curr_serv != svp) 1813 goto remap_retry; 1814 1815 mutex_exit(&mi->mi_lock); 1816 } 1817 1818 static int 1819 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1820 int flags, cred_t *cr, zone_t *zone) 1821 { 1822 vnode_t *rtvp = NULL; 1823 mntinfo4_t *mi; 1824 dev_t nfs_dev; 1825 int error = 0; 1826 rnode4_t *rp; 1827 int i; 1828 struct vattr va; 1829 vtype_t vtype = VNON; 1830 vtype_t tmp_vtype = VNON; 1831 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1832 nfs4_oo_hash_bucket_t *bucketp; 1833 nfs_fh4 fh; 1834 char *droptext = ""; 1835 struct nfs_stats *nfsstatsp; 1836 nfs4_fname_t *mfname; 1837 nfs4_error_t e; 1838 char *orig_sv_path; 1839 int orig_sv_pathlen, num_retry, removed; 1840 cred_t *lcr = NULL, *tcr = cr; 1841 1842 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1843 ASSERT(nfsstatsp != NULL); 1844 1845 ASSERT(nfs_zone() == zone); 1846 ASSERT(crgetref(cr)); 1847 1848 /* 1849 * Create a mount record and link it to the vfs struct. 1850 */ 1851 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1852 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1853 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1854 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1855 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1856 1857 if (!(flags & NFSMNT_SOFT)) 1858 mi->mi_flags |= MI4_HARD; 1859 if ((flags & NFSMNT_NOPRINT)) 1860 mi->mi_flags |= MI4_NOPRINT; 1861 if (flags & NFSMNT_INT) 1862 mi->mi_flags |= MI4_INT; 1863 if (flags & NFSMNT_PUBLIC) 1864 mi->mi_flags |= MI4_PUBLIC; 1865 mi->mi_retrans = NFS_RETRIES; 1866 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1867 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1868 mi->mi_timeo = nfs4_cots_timeo; 1869 else 1870 mi->mi_timeo = NFS_TIMEO; 1871 mi->mi_prog = NFS_PROGRAM; 1872 mi->mi_vers = NFS_V4; 1873 mi->mi_rfsnames = rfsnames_v4; 1874 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1875 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1876 mi->mi_servers = svp; 1877 mi->mi_curr_serv = svp; 1878 mi->mi_acregmin = SEC2HR(ACREGMIN); 1879 mi->mi_acregmax = SEC2HR(ACREGMAX); 1880 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1881 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1882 mi->mi_fh_expire_type = FH4_PERSISTENT; 1883 mi->mi_clientid_next = NULL; 1884 mi->mi_clientid_prev = NULL; 1885 mi->mi_grace_wait = 0; 1886 mi->mi_error = 0; 1887 mi->mi_srvsettime = 0; 1888 1889 mi->mi_count = 1; 1890 1891 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1892 mi->mi_stsize = mi->mi_tsize; 1893 1894 if (flags & NFSMNT_DIRECTIO) 1895 mi->mi_flags |= MI4_DIRECTIO; 1896 1897 mi->mi_flags |= MI4_MOUNTING; 1898 1899 /* 1900 * Make a vfs struct for nfs. We do this here instead of below 1901 * because rtvp needs a vfs before we can do a getattr on it. 1902 * 1903 * Assign a unique device id to the mount 1904 */ 1905 mutex_enter(&nfs_minor_lock); 1906 do { 1907 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1908 nfs_dev = makedevice(nfs_major, nfs_minor); 1909 } while (vfs_devismounted(nfs_dev)); 1910 mutex_exit(&nfs_minor_lock); 1911 1912 vfsp->vfs_dev = nfs_dev; 1913 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1914 vfsp->vfs_data = (caddr_t)mi; 1915 vfsp->vfs_fstype = nfsfstyp; 1916 vfsp->vfs_bsize = nfs4_bsize; 1917 1918 /* 1919 * Initialize fields used to support async putpage operations. 1920 */ 1921 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1922 mi->mi_async_clusters[i] = nfs4_async_clusters; 1923 mi->mi_async_init_clusters = nfs4_async_clusters; 1924 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1925 mi->mi_max_threads = nfs4_max_threads; 1926 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1927 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1928 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1929 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1930 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1931 1932 mi->mi_vfsp = vfsp; 1933 zone_hold(mi->mi_zone = zone); 1934 nfs4_mi_zonelist_add(mi); 1935 1936 /* 1937 * Initialize the <open owner/cred> hash table. 1938 */ 1939 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1940 bucketp = &(mi->mi_oo_list[i]); 1941 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1942 list_create(&bucketp->b_oo_hash_list, 1943 sizeof (nfs4_open_owner_t), 1944 offsetof(nfs4_open_owner_t, oo_hash_node)); 1945 } 1946 1947 /* 1948 * Initialize the freed open owner list. 1949 */ 1950 mi->mi_foo_num = 0; 1951 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1952 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1953 offsetof(nfs4_open_owner_t, oo_foo_node)); 1954 1955 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1956 offsetof(nfs4_lost_rqst_t, lr_node)); 1957 1958 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1959 offsetof(nfs4_bseqid_entry_t, bs_node)); 1960 1961 /* 1962 * Initialize the msg buffer. 1963 */ 1964 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1965 offsetof(nfs4_debug_msg_t, msg_node)); 1966 mi->mi_msg_count = 0; 1967 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1968 1969 /* 1970 * Initialize kstats 1971 */ 1972 nfs4_mnt_kstat_init(vfsp); 1973 1974 /* 1975 * Initialize the shared filehandle pool, and get the fname for 1976 * the filesystem root. 1977 */ 1978 sfh4_createtab(&mi->mi_filehandles); 1979 mi->mi_fname = fn_get(NULL, "."); 1980 1981 /* 1982 * Save server path we're attempting to mount. 1983 */ 1984 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1985 orig_sv_pathlen = svp_head->sv_pathlen; 1986 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1987 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1988 nfs_rw_exit(&svp->sv_lock); 1989 1990 /* 1991 * Make the GETFH call to get root fh for each replica. 1992 */ 1993 if (svp_head->sv_next) 1994 droptext = ", dropping replica"; 1995 1996 /* 1997 * If the uid is set then set the creds for secure mounts 1998 * by proxy processes such as automountd. 1999 */ 2000 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2001 if (svp->sv_secdata->uid != 0) { 2002 lcr = crdup(cr); 2003 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 2004 tcr = lcr; 2005 } 2006 nfs_rw_exit(&svp->sv_lock); 2007 for (svp = svp_head; svp; svp = svp->sv_next) { 2008 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 2009 nfs_cmn_err(error, CE_WARN, 2010 VERS_MSG "Host %s is a duplicate%s", 2011 svp->sv_hostname, droptext); 2012 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2013 svp->sv_flags |= SV4_NOTINUSE; 2014 nfs_rw_exit(&svp->sv_lock); 2015 continue; 2016 } 2017 mi->mi_curr_serv = svp; 2018 2019 /* 2020 * Just in case server path being mounted contains 2021 * symlinks and fails w/STALE, save the initial sv_path 2022 * so we can redrive the initial mount compound with the 2023 * initial sv_path -- not a symlink-expanded version. 2024 * 2025 * This could only happen if a symlink was expanded 2026 * and the expanded mount compound failed stale. Because 2027 * it could be the case that the symlink was removed at 2028 * the server (and replaced with another symlink/dir, 2029 * we need to use the initial sv_path when attempting 2030 * to re-lookup everything and recover. 2031 * 2032 * Other mount errors should evenutally be handled here also 2033 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 2034 * failures will result in mount being redriven a few times. 2035 */ 2036 num_retry = nfs4_max_mount_retry; 2037 do { 2038 nfs4getfh_otw(mi, svp, &tmp_vtype, 2039 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 2040 NFS4_GETFH_NEEDSOP, tcr, &e); 2041 2042 if (e.error == 0 && e.stat == NFS4_OK) 2043 break; 2044 2045 /* 2046 * replace current sv_path with orig sv_path -- just in 2047 * case it changed due to embedded symlinks. 2048 */ 2049 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2050 if (orig_sv_pathlen != svp->sv_pathlen) { 2051 kmem_free(svp->sv_path, svp->sv_pathlen); 2052 svp->sv_path = kmem_alloc(orig_sv_pathlen, 2053 KM_SLEEP); 2054 svp->sv_pathlen = orig_sv_pathlen; 2055 } 2056 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 2057 nfs_rw_exit(&svp->sv_lock); 2058 2059 } while (num_retry-- > 0); 2060 2061 error = e.error ? e.error : geterrno4(e.stat); 2062 if (error) { 2063 nfs_cmn_err(error, CE_WARN, 2064 VERS_MSG "initial call to %s failed%s: %m", 2065 svp->sv_hostname, droptext); 2066 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2067 svp->sv_flags |= SV4_NOTINUSE; 2068 nfs_rw_exit(&svp->sv_lock); 2069 mi->mi_flags &= ~MI4_RECOV_FAIL; 2070 mi->mi_error = 0; 2071 continue; 2072 } 2073 2074 if (tmp_vtype == VBAD) { 2075 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2076 VERS_MSG "%s returned a bad file type for " 2077 "root%s", svp->sv_hostname, droptext); 2078 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2079 svp->sv_flags |= SV4_NOTINUSE; 2080 nfs_rw_exit(&svp->sv_lock); 2081 continue; 2082 } 2083 2084 if (vtype == VNON) { 2085 vtype = tmp_vtype; 2086 } else if (vtype != tmp_vtype) { 2087 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2088 VERS_MSG "%s returned a different file type " 2089 "for root%s", svp->sv_hostname, droptext); 2090 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2091 svp->sv_flags |= SV4_NOTINUSE; 2092 nfs_rw_exit(&svp->sv_lock); 2093 continue; 2094 } 2095 if (firstsvp == NULL) 2096 firstsvp = svp; 2097 } 2098 2099 kmem_free(orig_sv_path, orig_sv_pathlen); 2100 2101 if (firstsvp == NULL) { 2102 if (error == 0) 2103 error = ENOENT; 2104 goto bad; 2105 } 2106 2107 mi->mi_curr_serv = svp = firstsvp; 2108 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2109 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 2110 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2111 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2112 mi->mi_rootfh = sfh4_get(&fh, mi); 2113 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 2114 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 2115 mi->mi_srvparentfh = sfh4_get(&fh, mi); 2116 nfs_rw_exit(&svp->sv_lock); 2117 2118 /* 2119 * Make the root vnode without attributes. 2120 */ 2121 mfname = mi->mi_fname; 2122 fn_hold(mfname); 2123 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 2124 &mfname, NULL, mi, cr, gethrtime()); 2125 rtvp->v_type = vtype; 2126 2127 mi->mi_curread = mi->mi_tsize; 2128 mi->mi_curwrite = mi->mi_stsize; 2129 2130 /* 2131 * Start the manager thread responsible for handling async worker 2132 * threads. 2133 */ 2134 MI4_HOLD(mi); 2135 VFS_HOLD(vfsp); /* add reference for thread */ 2136 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 2137 vfsp, 0, minclsyspri); 2138 ASSERT(mi->mi_manager_thread != NULL); 2139 2140 /* 2141 * Create the thread that handles over-the-wire calls for 2142 * VOP_INACTIVE. 2143 * This needs to happen after the manager thread is created. 2144 */ 2145 MI4_HOLD(mi); 2146 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 2147 mi, 0, minclsyspri); 2148 ASSERT(mi->mi_inactive_thread != NULL); 2149 2150 /* If we didn't get a type, get one now */ 2151 if (rtvp->v_type == VNON) { 2152 va.va_mask = AT_TYPE; 2153 error = nfs4getattr(rtvp, &va, tcr); 2154 if (error) 2155 goto bad; 2156 rtvp->v_type = va.va_type; 2157 } 2158 2159 mi->mi_type = rtvp->v_type; 2160 2161 mutex_enter(&mi->mi_lock); 2162 mi->mi_flags &= ~MI4_MOUNTING; 2163 mutex_exit(&mi->mi_lock); 2164 2165 *rtvpp = rtvp; 2166 if (lcr != NULL) 2167 crfree(lcr); 2168 2169 return (0); 2170 bad: 2171 /* 2172 * An error occurred somewhere, need to clean up... 2173 */ 2174 if (lcr != NULL) 2175 crfree(lcr); 2176 if (rtvp != NULL) { 2177 /* 2178 * We need to release our reference to the root vnode and 2179 * destroy the mntinfo4 struct that we just created. 2180 */ 2181 rp = VTOR4(rtvp); 2182 if (rp->r_flags & R4HASHED) 2183 rp4_rmhash(rp); 2184 VN_RELE(rtvp); 2185 } 2186 nfs4_async_stop(vfsp); 2187 nfs4_async_manager_stop(vfsp); 2188 removed = nfs4_mi_zonelist_remove(mi); 2189 if (removed) 2190 zone_rele(mi->mi_zone); 2191 2192 /* 2193 * This releases the initial "hold" of the mi since it will never 2194 * be referenced by the vfsp. Also, when mount returns to vfs.c 2195 * with an error, the vfsp will be destroyed, not rele'd. 2196 */ 2197 MI4_RELE(mi); 2198 2199 *rtvpp = NULL; 2200 return (error); 2201 } 2202 2203 /* 2204 * vfs operations 2205 */ 2206 static int 2207 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 2208 { 2209 mntinfo4_t *mi; 2210 ushort_t omax; 2211 int removed; 2212 2213 if (secpolicy_fs_unmount(cr, vfsp) != 0) 2214 return (EPERM); 2215 2216 mi = VFTOMI4(vfsp); 2217 2218 if (flag & MS_FORCE) { 2219 vfsp->vfs_flag |= VFS_UNMOUNTED; 2220 if (nfs_zone() != mi->mi_zone) { 2221 /* 2222 * If the request is coming from the wrong zone, 2223 * we don't want to create any new threads, and 2224 * performance is not a concern. Do everything 2225 * inline. 2226 */ 2227 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 2228 "nfs4_unmount x-zone forced unmount of vfs %p\n", 2229 (void *)vfsp)); 2230 nfs4_free_mount(vfsp, cr); 2231 } else { 2232 /* 2233 * Free data structures asynchronously, to avoid 2234 * blocking the current thread (for performance 2235 * reasons only). 2236 */ 2237 async_free_mount(vfsp, cr); 2238 } 2239 return (0); 2240 } 2241 /* 2242 * Wait until all asynchronous putpage operations on 2243 * this file system are complete before flushing rnodes 2244 * from the cache. 2245 */ 2246 omax = mi->mi_max_threads; 2247 if (nfs4_async_stop_sig(vfsp)) { 2248 2249 return (EINTR); 2250 } 2251 r4flush(vfsp, cr); 2252 /* 2253 * If there are any active vnodes on this file system, 2254 * then the file system is busy and can't be umounted. 2255 */ 2256 if (check_rtable4(vfsp)) { 2257 mutex_enter(&mi->mi_async_lock); 2258 mi->mi_max_threads = omax; 2259 mutex_exit(&mi->mi_async_lock); 2260 return (EBUSY); 2261 } 2262 /* 2263 * The unmount can't fail from now on, and there are no active 2264 * files that could require over-the-wire calls to the server, 2265 * so stop the async manager and the inactive thread. 2266 */ 2267 nfs4_async_manager_stop(vfsp); 2268 /* 2269 * Destroy all rnodes belonging to this file system from the 2270 * rnode hash queues and purge any resources allocated to 2271 * them. 2272 */ 2273 destroy_rtable4(vfsp, cr); 2274 vfsp->vfs_flag |= VFS_UNMOUNTED; 2275 2276 nfs4_remove_mi_from_server(mi, NULL); 2277 removed = nfs4_mi_zonelist_remove(mi); 2278 if (removed) 2279 zone_rele(mi->mi_zone); 2280 2281 return (0); 2282 } 2283 2284 /* 2285 * find root of nfs 2286 */ 2287 static int 2288 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2289 { 2290 mntinfo4_t *mi; 2291 vnode_t *vp; 2292 nfs4_fname_t *mfname; 2293 servinfo4_t *svp; 2294 2295 mi = VFTOMI4(vfsp); 2296 2297 if (nfs_zone() != mi->mi_zone) 2298 return (EPERM); 2299 2300 svp = mi->mi_curr_serv; 2301 if (svp) { 2302 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2303 if (svp->sv_flags & SV4_ROOT_STALE) { 2304 nfs_rw_exit(&svp->sv_lock); 2305 2306 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2307 if (svp->sv_flags & SV4_ROOT_STALE) { 2308 svp->sv_flags &= ~SV4_ROOT_STALE; 2309 nfs_rw_exit(&svp->sv_lock); 2310 return (ENOENT); 2311 } 2312 nfs_rw_exit(&svp->sv_lock); 2313 } else 2314 nfs_rw_exit(&svp->sv_lock); 2315 } 2316 2317 mfname = mi->mi_fname; 2318 fn_hold(mfname); 2319 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2320 VFTOMI4(vfsp), CRED(), gethrtime()); 2321 2322 if (VTOR4(vp)->r_flags & R4STALE) { 2323 VN_RELE(vp); 2324 return (ENOENT); 2325 } 2326 2327 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2328 2329 vp->v_type = mi->mi_type; 2330 2331 *vpp = vp; 2332 2333 return (0); 2334 } 2335 2336 static int 2337 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2338 { 2339 int error; 2340 nfs4_ga_res_t gar; 2341 nfs4_ga_ext_res_t ger; 2342 2343 gar.n4g_ext_res = &ger; 2344 2345 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2346 NFS4_STATFS_ATTR_MASK, cr)) 2347 return (error); 2348 2349 *sbp = gar.n4g_ext_res->n4g_sb; 2350 2351 return (0); 2352 } 2353 2354 /* 2355 * Get file system statistics. 2356 */ 2357 static int 2358 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2359 { 2360 int error; 2361 vnode_t *vp; 2362 cred_t *cr; 2363 2364 error = nfs4_root(vfsp, &vp); 2365 if (error) 2366 return (error); 2367 2368 cr = CRED(); 2369 2370 error = nfs4_statfs_otw(vp, sbp, cr); 2371 if (!error) { 2372 (void) strncpy(sbp->f_basetype, 2373 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2374 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2375 } else { 2376 nfs4_purge_stale_fh(error, vp, cr); 2377 } 2378 2379 VN_RELE(vp); 2380 2381 return (error); 2382 } 2383 2384 static kmutex_t nfs4_syncbusy; 2385 2386 /* 2387 * Flush dirty nfs files for file system vfsp. 2388 * If vfsp == NULL, all nfs files are flushed. 2389 * 2390 * SYNC_CLOSE in flag is passed to us to 2391 * indicate that we are shutting down and or 2392 * rebooting. 2393 */ 2394 static int 2395 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2396 { 2397 /* 2398 * Cross-zone calls are OK here, since this translates to a 2399 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2400 */ 2401 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2402 r4flush(vfsp, cr); 2403 mutex_exit(&nfs4_syncbusy); 2404 } 2405 2406 /* 2407 * if SYNC_CLOSE is set then we know that 2408 * the system is rebooting, mark the mntinfo 2409 * for later examination. 2410 */ 2411 if (vfsp && (flag & SYNC_CLOSE)) { 2412 mntinfo4_t *mi; 2413 2414 mi = VFTOMI4(vfsp); 2415 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2416 mutex_enter(&mi->mi_lock); 2417 mi->mi_flags |= MI4_SHUTDOWN; 2418 mutex_exit(&mi->mi_lock); 2419 } 2420 } 2421 return (0); 2422 } 2423 2424 /* 2425 * vget is difficult, if not impossible, to support in v4 because we don't 2426 * know the parent directory or name, which makes it impossible to create a 2427 * useful shadow vnode. And we need the shadow vnode for things like 2428 * OPEN. 2429 */ 2430 2431 /* ARGSUSED */ 2432 /* 2433 * XXX Check nfs4_vget_pseudo() for dependency. 2434 */ 2435 static int 2436 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2437 { 2438 return (EREMOTE); 2439 } 2440 2441 /* 2442 * nfs4_mountroot get called in the case where we are diskless booting. All 2443 * we need from here is the ability to get the server info and from there we 2444 * can simply call nfs4_rootvp. 2445 */ 2446 /* ARGSUSED */ 2447 static int 2448 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2449 { 2450 vnode_t *rtvp; 2451 char root_hostname[SYS_NMLN+1]; 2452 struct servinfo4 *svp; 2453 int error; 2454 int vfsflags; 2455 size_t size; 2456 char *root_path; 2457 struct pathname pn; 2458 char *name; 2459 cred_t *cr; 2460 mntinfo4_t *mi; 2461 struct nfs_args args; /* nfs mount arguments */ 2462 static char token[10]; 2463 nfs4_error_t n4e; 2464 2465 bzero(&args, sizeof (args)); 2466 2467 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2468 clkset(-1L); /* hack for now - until we get time svc? */ 2469 2470 if (why == ROOT_REMOUNT) { 2471 /* 2472 * Shouldn't happen. 2473 */ 2474 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2475 } 2476 2477 if (why == ROOT_UNMOUNT) { 2478 /* 2479 * Nothing to do for NFS. 2480 */ 2481 return (0); 2482 } 2483 2484 /* 2485 * why == ROOT_INIT 2486 */ 2487 2488 name = token; 2489 *name = 0; 2490 (void) getfsname("root", name, sizeof (token)); 2491 2492 pn_alloc(&pn); 2493 root_path = pn.pn_path; 2494 2495 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2496 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2497 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2498 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2499 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2500 2501 /* 2502 * Get server address 2503 * Get the root path 2504 * Get server's transport 2505 * Get server's hostname 2506 * Get options 2507 */ 2508 args.addr = &svp->sv_addr; 2509 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2510 args.fh = (char *)&svp->sv_fhandle; 2511 args.knconf = svp->sv_knconf; 2512 args.hostname = root_hostname; 2513 vfsflags = 0; 2514 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2515 &args, &vfsflags)) { 2516 if (error == EPROTONOSUPPORT) 2517 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2518 "mount_root failed: server doesn't support NFS V4"); 2519 else 2520 nfs_cmn_err(error, CE_WARN, 2521 "nfs4_mountroot: mount_root failed: %m"); 2522 nfs_rw_exit(&svp->sv_lock); 2523 sv4_free(svp); 2524 pn_free(&pn); 2525 return (error); 2526 } 2527 nfs_rw_exit(&svp->sv_lock); 2528 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2529 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2530 (void) strcpy(svp->sv_hostname, root_hostname); 2531 2532 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2533 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2534 (void) strcpy(svp->sv_path, root_path); 2535 2536 /* 2537 * Force root partition to always be mounted with AUTH_UNIX for now 2538 */ 2539 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2540 svp->sv_secdata->secmod = AUTH_UNIX; 2541 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2542 svp->sv_secdata->data = NULL; 2543 2544 cr = crgetcred(); 2545 rtvp = NULL; 2546 2547 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2548 2549 if (error) { 2550 crfree(cr); 2551 pn_free(&pn); 2552 sv4_free(svp); 2553 return (error); 2554 } 2555 2556 mi = VTOMI4(rtvp); 2557 2558 /* 2559 * Send client id to the server, if necessary 2560 */ 2561 nfs4_error_zinit(&n4e); 2562 nfs4setclientid(mi, cr, FALSE, &n4e); 2563 error = n4e.error; 2564 2565 crfree(cr); 2566 2567 if (error) { 2568 pn_free(&pn); 2569 goto errout; 2570 } 2571 2572 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2573 if (error) { 2574 nfs_cmn_err(error, CE_WARN, 2575 "nfs4_mountroot: invalid root mount options"); 2576 pn_free(&pn); 2577 goto errout; 2578 } 2579 2580 (void) vfs_lock_wait(vfsp); 2581 vfs_add(NULL, vfsp, vfsflags); 2582 vfs_unlock(vfsp); 2583 2584 size = strlen(svp->sv_hostname); 2585 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2586 rootfs.bo_name[size] = ':'; 2587 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2588 2589 pn_free(&pn); 2590 2591 errout: 2592 if (error) { 2593 sv4_free(svp); 2594 nfs4_async_stop(vfsp); 2595 nfs4_async_manager_stop(vfsp); 2596 } 2597 2598 if (rtvp != NULL) 2599 VN_RELE(rtvp); 2600 2601 return (error); 2602 } 2603 2604 /* 2605 * Initialization routine for VFS routines. Should only be called once 2606 */ 2607 int 2608 nfs4_vfsinit(void) 2609 { 2610 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2611 nfs4setclientid_init(); 2612 return (0); 2613 } 2614 2615 void 2616 nfs4_vfsfini(void) 2617 { 2618 nfs4setclientid_fini(); 2619 mutex_destroy(&nfs4_syncbusy); 2620 } 2621 2622 void 2623 nfs4_freevfs(vfs_t *vfsp) 2624 { 2625 mntinfo4_t *mi; 2626 2627 /* need to release the initial hold */ 2628 mi = VFTOMI4(vfsp); 2629 MI4_RELE(mi); 2630 } 2631 2632 /* 2633 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2634 */ 2635 struct nfs4_server nfs4_server_lst = 2636 { &nfs4_server_lst, &nfs4_server_lst }; 2637 2638 kmutex_t nfs4_server_lst_lock; 2639 2640 static void 2641 nfs4setclientid_init(void) 2642 { 2643 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2644 } 2645 2646 static void 2647 nfs4setclientid_fini(void) 2648 { 2649 mutex_destroy(&nfs4_server_lst_lock); 2650 } 2651 2652 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2653 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2654 2655 /* 2656 * Set the clientid for the server for "mi". No-op if the clientid is 2657 * already set. 2658 * 2659 * The recovery boolean should be set to TRUE if this function was called 2660 * by the recovery code, and FALSE otherwise. This is used to determine 2661 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2662 * for adding a mntinfo4_t to a nfs4_server_t. 2663 * 2664 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2665 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2666 */ 2667 void 2668 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2669 { 2670 struct nfs4_server *np; 2671 struct servinfo4 *svp = mi->mi_curr_serv; 2672 nfs4_recov_state_t recov_state; 2673 int num_retries = 0; 2674 bool_t retry; 2675 cred_t *lcr = NULL; 2676 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2677 time_t lease_time = 0; 2678 2679 recov_state.rs_flags = 0; 2680 recov_state.rs_num_retry_despite_err = 0; 2681 ASSERT(n4ep != NULL); 2682 2683 recov_retry: 2684 retry = FALSE; 2685 nfs4_error_zinit(n4ep); 2686 if (!recovery) 2687 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2688 2689 mutex_enter(&nfs4_server_lst_lock); 2690 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2691 mutex_exit(&nfs4_server_lst_lock); 2692 if (!np) { 2693 struct nfs4_server *tnp; 2694 np = new_nfs4_server(svp, cr); 2695 mutex_enter(&np->s_lock); 2696 2697 mutex_enter(&nfs4_server_lst_lock); 2698 tnp = servinfo4_to_nfs4_server(svp); 2699 if (tnp) { 2700 /* 2701 * another thread snuck in and put server on list. 2702 * since we aren't adding it to the nfs4_server_list 2703 * we need to set the ref count to 0 and destroy it. 2704 */ 2705 np->s_refcnt = 0; 2706 destroy_nfs4_server(np); 2707 np = tnp; 2708 } else { 2709 /* 2710 * do not give list a reference until everything 2711 * succeeds 2712 */ 2713 insque(np, &nfs4_server_lst); 2714 } 2715 mutex_exit(&nfs4_server_lst_lock); 2716 } 2717 ASSERT(MUTEX_HELD(&np->s_lock)); 2718 /* 2719 * If we find the server already has N4S_CLIENTID_SET, then 2720 * just return, we've already done SETCLIENTID to that server 2721 */ 2722 if (np->s_flags & N4S_CLIENTID_SET) { 2723 /* add mi to np's mntinfo4_list */ 2724 nfs4_add_mi_to_server(np, mi); 2725 if (!recovery) 2726 nfs_rw_exit(&mi->mi_recovlock); 2727 mutex_exit(&np->s_lock); 2728 nfs4_server_rele(np); 2729 return; 2730 } 2731 mutex_exit(&np->s_lock); 2732 2733 2734 /* 2735 * Drop the mi_recovlock since nfs4_start_op will 2736 * acquire it again for us. 2737 */ 2738 if (!recovery) { 2739 nfs_rw_exit(&mi->mi_recovlock); 2740 2741 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2742 if (n4ep->error) { 2743 nfs4_server_rele(np); 2744 return; 2745 } 2746 } 2747 2748 mutex_enter(&np->s_lock); 2749 while (np->s_flags & N4S_CLIENTID_PEND) { 2750 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 2751 mutex_exit(&np->s_lock); 2752 nfs4_server_rele(np); 2753 if (!recovery) 2754 nfs4_end_op(mi, NULL, NULL, &recov_state, 2755 recovery); 2756 n4ep->error = EINTR; 2757 return; 2758 } 2759 } 2760 2761 if (np->s_flags & N4S_CLIENTID_SET) { 2762 /* XXX copied/pasted from above */ 2763 /* add mi to np's mntinfo4_list */ 2764 nfs4_add_mi_to_server(np, mi); 2765 mutex_exit(&np->s_lock); 2766 nfs4_server_rele(np); 2767 if (!recovery) 2768 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2769 return; 2770 } 2771 2772 /* 2773 * Reset the N4S_CB_PINGED flag. This is used to 2774 * indicate if we have received a CB_NULL from the 2775 * server. Also we reset the waiter flag. 2776 */ 2777 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2778 /* any failure must now clear this flag */ 2779 np->s_flags |= N4S_CLIENTID_PEND; 2780 mutex_exit(&np->s_lock); 2781 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2782 2783 if (n4ep->error == EACCES) { 2784 /* 2785 * If the uid is set then set the creds for secure mounts 2786 * by proxy processes such as automountd. 2787 */ 2788 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2789 if (svp->sv_secdata->uid != 0) { 2790 lcr = crdup(cr); 2791 (void) crsetugid(lcr, svp->sv_secdata->uid, 2792 crgetgid(cr)); 2793 } 2794 nfs_rw_exit(&svp->sv_lock); 2795 2796 if (lcr != NULL) { 2797 mutex_enter(&np->s_lock); 2798 crfree(np->s_cred); 2799 np->s_cred = lcr; 2800 mutex_exit(&np->s_lock); 2801 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2802 &retry_inuse); 2803 } 2804 } 2805 mutex_enter(&np->s_lock); 2806 lease_time = np->s_lease_time; 2807 np->s_flags &= ~N4S_CLIENTID_PEND; 2808 mutex_exit(&np->s_lock); 2809 2810 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2811 /* 2812 * Start recovery if failover is a possibility. If 2813 * invoked by the recovery thread itself, then just 2814 * return and let it handle the failover first. NB: 2815 * recovery is not allowed if the mount is in progress 2816 * since the infrastructure is not sufficiently setup 2817 * to allow it. Just return the error (after suitable 2818 * retries). 2819 */ 2820 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2821 (void) nfs4_start_recovery(n4ep, mi, NULL, 2822 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2823 /* 2824 * Don't retry here, just return and let 2825 * recovery take over. 2826 */ 2827 if (recovery) 2828 retry = FALSE; 2829 } else if (nfs4_rpc_retry_error(n4ep->error) || 2830 n4ep->stat == NFS4ERR_RESOURCE || 2831 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2832 2833 retry = TRUE; 2834 /* 2835 * Always retry if in recovery or once had 2836 * contact with the server (but now it's 2837 * overloaded). 2838 */ 2839 if (recovery == TRUE || 2840 n4ep->error == ETIMEDOUT || 2841 n4ep->error == ECONNRESET) 2842 num_retries = 0; 2843 } else if (retry_inuse && n4ep->error == 0 && 2844 n4ep->stat == NFS4ERR_CLID_INUSE) { 2845 retry = TRUE; 2846 num_retries = 0; 2847 } 2848 } else { 2849 /* 2850 * Since everything succeeded give the list a reference count if 2851 * it hasn't been given one by add_new_nfs4_server() or if this 2852 * is not a recovery situation in which case it is already on 2853 * the list. 2854 */ 2855 mutex_enter(&np->s_lock); 2856 if ((np->s_flags & N4S_INSERTED) == 0) { 2857 np->s_refcnt++; 2858 np->s_flags |= N4S_INSERTED; 2859 } 2860 mutex_exit(&np->s_lock); 2861 } 2862 2863 if (!recovery) 2864 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2865 2866 2867 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2868 if (retry_inuse) { 2869 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2870 retry_inuse = 0; 2871 } else 2872 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2873 2874 nfs4_server_rele(np); 2875 goto recov_retry; 2876 } 2877 2878 2879 if (n4ep->error == 0) 2880 n4ep->error = geterrno4(n4ep->stat); 2881 2882 /* broadcast before release in case no other threads are waiting */ 2883 cv_broadcast(&np->s_clientid_pend); 2884 nfs4_server_rele(np); 2885 } 2886 2887 int nfs4setclientid_otw_debug = 0; 2888 2889 /* 2890 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2891 * but nothing else; the calling function must be designed to handle those 2892 * other errors. 2893 */ 2894 static void 2895 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2896 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2897 { 2898 COMPOUND4args_clnt args; 2899 COMPOUND4res_clnt res; 2900 nfs_argop4 argop[3]; 2901 SETCLIENTID4args *s_args; 2902 SETCLIENTID4resok *s_resok; 2903 int doqueue = 1; 2904 nfs4_ga_res_t *garp = NULL; 2905 timespec_t prop_time, after_time; 2906 verifier4 verf; 2907 clientid4 tmp_clientid; 2908 2909 ASSERT(!MUTEX_HELD(&np->s_lock)); 2910 2911 args.ctag = TAG_SETCLIENTID; 2912 2913 args.array = argop; 2914 args.array_len = 3; 2915 2916 /* PUTROOTFH */ 2917 argop[0].argop = OP_PUTROOTFH; 2918 2919 /* GETATTR */ 2920 argop[1].argop = OP_GETATTR; 2921 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2922 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2923 2924 /* SETCLIENTID */ 2925 argop[2].argop = OP_SETCLIENTID; 2926 2927 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2928 2929 mutex_enter(&np->s_lock); 2930 2931 s_args->client.verifier = np->clidtosend.verifier; 2932 s_args->client.id_len = np->clidtosend.id_len; 2933 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2934 s_args->client.id_val = np->clidtosend.id_val; 2935 2936 /* 2937 * Callback needs to happen on non-RDMA transport 2938 * Check if we have saved the original knetconfig 2939 * if so, use that instead. 2940 */ 2941 if (svp->sv_origknconf != NULL) 2942 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2943 else 2944 nfs4_cb_args(np, svp->sv_knconf, s_args); 2945 2946 mutex_exit(&np->s_lock); 2947 2948 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2949 2950 if (ep->error) 2951 return; 2952 2953 /* getattr lease_time res */ 2954 if (res.array_len >= 2) { 2955 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2956 2957 #ifndef _LP64 2958 /* 2959 * The 32 bit client cannot handle a lease time greater than 2960 * (INT32_MAX/1000000). This is due to the use of the 2961 * lease_time in calls to drv_usectohz() in 2962 * nfs4_renew_lease_thread(). The problem is that 2963 * drv_usectohz() takes a time_t (which is just a long = 4 2964 * bytes) as its parameter. The lease_time is multiplied by 2965 * 1000000 to convert seconds to usecs for the parameter. If 2966 * a number bigger than (INT32_MAX/1000000) is used then we 2967 * overflow on the 32bit client. 2968 */ 2969 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2970 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2971 } 2972 #endif 2973 2974 mutex_enter(&np->s_lock); 2975 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2976 2977 /* 2978 * Keep track of the lease period for the mi's 2979 * mi_msg_list. We need an appropiate time 2980 * bound to associate past facts with a current 2981 * event. The lease period is perfect for this. 2982 */ 2983 mutex_enter(&mi->mi_msg_list_lock); 2984 mi->mi_lease_period = np->s_lease_time; 2985 mutex_exit(&mi->mi_msg_list_lock); 2986 mutex_exit(&np->s_lock); 2987 } 2988 2989 2990 if (res.status == NFS4ERR_CLID_INUSE) { 2991 clientaddr4 *clid_inuse; 2992 2993 if (!(*retry_inusep)) { 2994 clid_inuse = &res.array->nfs_resop4_u. 2995 opsetclientid.SETCLIENTID4res_u.client_using; 2996 2997 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2998 "NFS4 mount (SETCLIENTID failed)." 2999 " nfs4_client_id.id is in" 3000 "use already by: r_netid<%s> r_addr<%s>", 3001 clid_inuse->r_netid, clid_inuse->r_addr); 3002 } 3003 3004 /* 3005 * XXX - The client should be more robust in its 3006 * handling of clientid in use errors (regen another 3007 * clientid and try again?) 3008 */ 3009 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3010 return; 3011 } 3012 3013 if (res.status) { 3014 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3015 return; 3016 } 3017 3018 s_resok = &res.array[2].nfs_resop4_u. 3019 opsetclientid.SETCLIENTID4res_u.resok4; 3020 3021 tmp_clientid = s_resok->clientid; 3022 3023 verf = s_resok->setclientid_confirm; 3024 3025 #ifdef DEBUG 3026 if (nfs4setclientid_otw_debug) { 3027 union { 3028 clientid4 clientid; 3029 int foo[2]; 3030 } cid; 3031 3032 cid.clientid = s_resok->clientid; 3033 3034 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3035 "nfs4setclientid_otw: OK, clientid = %x,%x, " 3036 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 3037 } 3038 #endif 3039 3040 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3041 3042 /* Confirm the client id and get the lease_time attribute */ 3043 3044 args.ctag = TAG_SETCLIENTID_CF; 3045 3046 args.array = argop; 3047 args.array_len = 1; 3048 3049 argop[0].argop = OP_SETCLIENTID_CONFIRM; 3050 3051 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 3052 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 3053 3054 /* used to figure out RTT for np */ 3055 gethrestime(&prop_time); 3056 3057 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 3058 "start time: %ld sec %ld nsec", prop_time.tv_sec, 3059 prop_time.tv_nsec)); 3060 3061 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 3062 3063 gethrestime(&after_time); 3064 mutex_enter(&np->s_lock); 3065 np->propagation_delay.tv_sec = 3066 MAX(1, after_time.tv_sec - prop_time.tv_sec); 3067 mutex_exit(&np->s_lock); 3068 3069 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 3070 "finish time: %ld sec ", after_time.tv_sec)); 3071 3072 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 3073 "propagation delay set to %ld sec", 3074 np->propagation_delay.tv_sec)); 3075 3076 if (ep->error) 3077 return; 3078 3079 if (res.status == NFS4ERR_CLID_INUSE) { 3080 clientaddr4 *clid_inuse; 3081 3082 if (!(*retry_inusep)) { 3083 clid_inuse = &res.array->nfs_resop4_u. 3084 opsetclientid.SETCLIENTID4res_u.client_using; 3085 3086 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3087 "SETCLIENTID_CONFIRM failed. " 3088 "nfs4_client_id.id is in use already by: " 3089 "r_netid<%s> r_addr<%s>", 3090 clid_inuse->r_netid, clid_inuse->r_addr); 3091 } 3092 3093 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3094 return; 3095 } 3096 3097 if (res.status) { 3098 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3099 return; 3100 } 3101 3102 mutex_enter(&np->s_lock); 3103 np->clientid = tmp_clientid; 3104 np->s_flags |= N4S_CLIENTID_SET; 3105 3106 /* Add mi to np's mntinfo4 list */ 3107 nfs4_add_mi_to_server(np, mi); 3108 3109 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 3110 /* 3111 * Start lease management thread. 3112 * Keep trying until we succeed. 3113 */ 3114 3115 np->s_refcnt++; /* pass reference to thread */ 3116 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 3117 minclsyspri); 3118 } 3119 mutex_exit(&np->s_lock); 3120 3121 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3122 } 3123 3124 /* 3125 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 3126 * mi's clientid the same as sp's. 3127 * Assumes sp is locked down. 3128 */ 3129 void 3130 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 3131 { 3132 mntinfo4_t *tmi; 3133 int in_list = 0; 3134 3135 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3136 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3137 ASSERT(sp != &nfs4_server_lst); 3138 ASSERT(MUTEX_HELD(&sp->s_lock)); 3139 3140 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3141 "nfs4_add_mi_to_server: add mi %p to sp %p", 3142 (void*)mi, (void*)sp)); 3143 3144 for (tmi = sp->mntinfo4_list; 3145 tmi != NULL; 3146 tmi = tmi->mi_clientid_next) { 3147 if (tmi == mi) { 3148 NFS4_DEBUG(nfs4_client_lease_debug, 3149 (CE_NOTE, 3150 "nfs4_add_mi_to_server: mi in list")); 3151 in_list = 1; 3152 } 3153 } 3154 3155 /* 3156 * First put a hold on the mntinfo4's vfsp so that references via 3157 * mntinfo4_list will be valid. 3158 */ 3159 if (!in_list) 3160 VFS_HOLD(mi->mi_vfsp); 3161 3162 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 3163 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 3164 3165 if (!in_list) { 3166 if (sp->mntinfo4_list) 3167 sp->mntinfo4_list->mi_clientid_prev = mi; 3168 mi->mi_clientid_next = sp->mntinfo4_list; 3169 sp->mntinfo4_list = mi; 3170 mi->mi_srvsettime = gethrestime_sec(); 3171 } 3172 3173 /* set mi's clientid to that of sp's for later matching */ 3174 mi->mi_clientid = sp->clientid; 3175 3176 /* 3177 * Update the clientid for any other mi's belonging to sp. This 3178 * must be done here while we hold sp->s_lock, so that 3179 * find_nfs4_server() continues to work. 3180 */ 3181 3182 for (tmi = sp->mntinfo4_list; 3183 tmi != NULL; 3184 tmi = tmi->mi_clientid_next) { 3185 if (tmi != mi) { 3186 tmi->mi_clientid = sp->clientid; 3187 } 3188 } 3189 } 3190 3191 /* 3192 * Remove the mi from sp's mntinfo4_list and release its reference. 3193 * Exception: if mi still has open files, flag it for later removal (when 3194 * all the files are closed). 3195 * 3196 * If this is the last mntinfo4 in sp's list then tell the lease renewal 3197 * thread to exit. 3198 */ 3199 static void 3200 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 3201 { 3202 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3203 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 3204 (void*)mi, (void*)sp)); 3205 3206 ASSERT(sp != NULL); 3207 ASSERT(MUTEX_HELD(&sp->s_lock)); 3208 ASSERT(mi->mi_open_files >= 0); 3209 3210 /* 3211 * First make sure this mntinfo4 can be taken off of the list, 3212 * ie: it doesn't have any open files remaining. 3213 */ 3214 if (mi->mi_open_files > 0) { 3215 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3216 "nfs4_remove_mi_from_server_nolock: don't " 3217 "remove mi since it still has files open")); 3218 3219 mutex_enter(&mi->mi_lock); 3220 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 3221 mutex_exit(&mi->mi_lock); 3222 return; 3223 } 3224 3225 VFS_HOLD(mi->mi_vfsp); 3226 remove_mi(sp, mi); 3227 VFS_RELE(mi->mi_vfsp); 3228 3229 if (sp->mntinfo4_list == NULL) { 3230 /* last fs unmounted, kill the thread */ 3231 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3232 "remove_mi_from_nfs4_server_nolock: kill the thread")); 3233 nfs4_mark_srv_dead(sp); 3234 } 3235 } 3236 3237 /* 3238 * Remove mi from sp's mntinfo4_list and release the vfs reference. 3239 */ 3240 static void 3241 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 3242 { 3243 ASSERT(MUTEX_HELD(&sp->s_lock)); 3244 3245 /* 3246 * We release a reference, and the caller must still have a 3247 * reference. 3248 */ 3249 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3250 3251 if (mi->mi_clientid_prev) { 3252 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3253 } else { 3254 /* This is the first mi in sp's mntinfo4_list */ 3255 /* 3256 * Make sure the first mntinfo4 in the list is the actual 3257 * mntinfo4 passed in. 3258 */ 3259 ASSERT(sp->mntinfo4_list == mi); 3260 3261 sp->mntinfo4_list = mi->mi_clientid_next; 3262 } 3263 if (mi->mi_clientid_next) 3264 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3265 3266 /* Now mark the mntinfo4's links as being removed */ 3267 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3268 3269 VFS_RELE(mi->mi_vfsp); 3270 } 3271 3272 /* 3273 * Free all the entries in sp's mntinfo4_list. 3274 */ 3275 static void 3276 remove_all_mi(nfs4_server_t *sp) 3277 { 3278 mntinfo4_t *mi; 3279 3280 ASSERT(MUTEX_HELD(&sp->s_lock)); 3281 3282 while (sp->mntinfo4_list != NULL) { 3283 mi = sp->mntinfo4_list; 3284 /* 3285 * Grab a reference in case there is only one left (which 3286 * remove_mi() frees). 3287 */ 3288 VFS_HOLD(mi->mi_vfsp); 3289 remove_mi(sp, mi); 3290 VFS_RELE(mi->mi_vfsp); 3291 } 3292 } 3293 3294 /* 3295 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3296 * 3297 * This version can be called with a null nfs4_server_t arg, 3298 * and will either find the right one and handle locking, or 3299 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3300 */ 3301 void 3302 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3303 { 3304 nfs4_server_t *sp; 3305 3306 if (esp == NULL) { 3307 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3308 sp = find_nfs4_server_all(mi, 1); 3309 } else 3310 sp = esp; 3311 3312 if (sp != NULL) 3313 nfs4_remove_mi_from_server_nolock(mi, sp); 3314 3315 /* 3316 * If we had a valid esp as input, the calling function will be 3317 * responsible for unlocking the esp nfs4_server. 3318 */ 3319 if (esp == NULL) { 3320 if (sp != NULL) 3321 mutex_exit(&sp->s_lock); 3322 nfs_rw_exit(&mi->mi_recovlock); 3323 if (sp != NULL) 3324 nfs4_server_rele(sp); 3325 } 3326 } 3327 3328 /* 3329 * Return TRUE if the given server has any non-unmounted filesystems. 3330 */ 3331 3332 bool_t 3333 nfs4_fs_active(nfs4_server_t *sp) 3334 { 3335 mntinfo4_t *mi; 3336 3337 ASSERT(MUTEX_HELD(&sp->s_lock)); 3338 3339 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3340 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3341 return (TRUE); 3342 } 3343 3344 return (FALSE); 3345 } 3346 3347 /* 3348 * Mark sp as finished and notify any waiters. 3349 */ 3350 3351 void 3352 nfs4_mark_srv_dead(nfs4_server_t *sp) 3353 { 3354 ASSERT(MUTEX_HELD(&sp->s_lock)); 3355 3356 sp->s_thread_exit = NFS4_THREAD_EXIT; 3357 cv_broadcast(&sp->cv_thread_exit); 3358 } 3359 3360 /* 3361 * Create a new nfs4_server_t structure. 3362 * Returns new node unlocked and not in list, but with a reference count of 3363 * 1. 3364 */ 3365 struct nfs4_server * 3366 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3367 { 3368 struct nfs4_server *np; 3369 timespec_t tt; 3370 union { 3371 struct { 3372 uint32_t sec; 3373 uint32_t subsec; 3374 } un_curtime; 3375 verifier4 un_verifier; 3376 } nfs4clientid_verifier; 3377 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3378 int len; 3379 3380 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3381 np->saddr.len = svp->sv_addr.len; 3382 np->saddr.maxlen = svp->sv_addr.maxlen; 3383 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3384 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3385 np->s_refcnt = 1; 3386 3387 /* 3388 * Build the nfs_client_id4 for this server mount. Ensure 3389 * the verifier is useful and that the identification is 3390 * somehow based on the server's address for the case of 3391 * multi-homed servers. 3392 */ 3393 nfs4clientid_verifier.un_verifier = 0; 3394 gethrestime(&tt); 3395 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3396 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3397 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3398 3399 /* 3400 * calculate the length of the opaque identifier. Subtract 2 3401 * for the "%s" and add the traditional +1 for null 3402 * termination. 3403 */ 3404 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3405 np->clidtosend.id_len = len + np->saddr.maxlen; 3406 3407 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3408 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3409 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3410 3411 np->s_flags = 0; 3412 np->mntinfo4_list = NULL; 3413 /* save cred for issuing rfs4calls inside the renew thread */ 3414 crhold(cr); 3415 np->s_cred = cr; 3416 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3417 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3418 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3419 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3420 offsetof(rnode4_t, r_deleg_link)); 3421 np->s_thread_exit = 0; 3422 np->state_ref_count = 0; 3423 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3424 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3425 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3426 np->s_otw_call_count = 0; 3427 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3428 np->zoneid = getzoneid(); 3429 np->zone_globals = nfs4_get_callback_globals(); 3430 ASSERT(np->zone_globals != NULL); 3431 return (np); 3432 } 3433 3434 /* 3435 * Create a new nfs4_server_t structure and add it to the list. 3436 * Returns new node locked; reference must eventually be freed. 3437 */ 3438 static struct nfs4_server * 3439 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3440 { 3441 nfs4_server_t *sp; 3442 3443 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3444 sp = new_nfs4_server(svp, cr); 3445 mutex_enter(&sp->s_lock); 3446 insque(sp, &nfs4_server_lst); 3447 sp->s_refcnt++; /* list gets a reference */ 3448 sp->s_flags |= N4S_INSERTED; 3449 sp->clientid = 0; 3450 return (sp); 3451 } 3452 3453 int nfs4_server_t_debug = 0; 3454 3455 #ifdef lint 3456 extern void 3457 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3458 #endif 3459 3460 #ifndef lint 3461 #ifdef DEBUG 3462 void 3463 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3464 { 3465 int hash16(void *p, int len); 3466 nfs4_server_t *np; 3467 3468 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3469 "dumping nfs4_server_t list in %s", txt)); 3470 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3471 "mi 0x%p, want clientid %llx, addr %d/%04X", 3472 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3473 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3474 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3475 np = np->forw) { 3476 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3477 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3478 np, (longlong_t)np->clientid, np->saddr.len, 3479 hash16((void *)np->saddr.buf, np->saddr.len), 3480 np->state_ref_count)); 3481 if (np->saddr.len == srv_p->sv_addr.len && 3482 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3483 np->saddr.len) == 0) 3484 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3485 " - address matches")); 3486 if (np->clientid == clientid || np->clientid == 0) 3487 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3488 " - clientid matches")); 3489 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3490 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3491 " - thread not exiting")); 3492 } 3493 delay(hz); 3494 } 3495 #endif 3496 #endif 3497 3498 3499 /* 3500 * Move a mntinfo4_t from one server list to another. 3501 * Locking of the two nfs4_server_t nodes will be done in list order. 3502 * 3503 * Returns NULL if the current nfs4_server_t for the filesystem could not 3504 * be found (e.g., due to forced unmount). Otherwise returns a reference 3505 * to the new nfs4_server_t, which must eventually be freed. 3506 */ 3507 nfs4_server_t * 3508 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3509 { 3510 nfs4_server_t *p, *op = NULL, *np = NULL; 3511 int num_open; 3512 zoneid_t zoneid = nfs_zoneid(); 3513 3514 ASSERT(nfs_zone() == mi->mi_zone); 3515 3516 mutex_enter(&nfs4_server_lst_lock); 3517 #ifdef DEBUG 3518 if (nfs4_server_t_debug) 3519 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3520 #endif 3521 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3522 if (p->zoneid != zoneid) 3523 continue; 3524 if (p->saddr.len == old->sv_addr.len && 3525 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3526 p->s_thread_exit != NFS4_THREAD_EXIT) { 3527 op = p; 3528 mutex_enter(&op->s_lock); 3529 op->s_refcnt++; 3530 } 3531 if (p->saddr.len == new->sv_addr.len && 3532 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3533 p->s_thread_exit != NFS4_THREAD_EXIT) { 3534 np = p; 3535 mutex_enter(&np->s_lock); 3536 } 3537 if (op != NULL && np != NULL) 3538 break; 3539 } 3540 if (op == NULL) { 3541 /* 3542 * Filesystem has been forcibly unmounted. Bail out. 3543 */ 3544 if (np != NULL) 3545 mutex_exit(&np->s_lock); 3546 mutex_exit(&nfs4_server_lst_lock); 3547 return (NULL); 3548 } 3549 if (np != NULL) { 3550 np->s_refcnt++; 3551 } else { 3552 #ifdef DEBUG 3553 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3554 "nfs4_move_mi: no target nfs4_server, will create.")); 3555 #endif 3556 np = add_new_nfs4_server(new, kcred); 3557 } 3558 mutex_exit(&nfs4_server_lst_lock); 3559 3560 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3561 "nfs4_move_mi: for mi 0x%p, " 3562 "old servinfo4 0x%p, new servinfo4 0x%p, " 3563 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3564 (void*)mi, (void*)old, (void*)new, 3565 (void*)op, (void*)np)); 3566 ASSERT(op != NULL && np != NULL); 3567 3568 /* discard any delegations */ 3569 nfs4_deleg_discard(mi, op); 3570 3571 num_open = mi->mi_open_files; 3572 mi->mi_open_files = 0; 3573 op->state_ref_count -= num_open; 3574 ASSERT(op->state_ref_count >= 0); 3575 np->state_ref_count += num_open; 3576 nfs4_remove_mi_from_server_nolock(mi, op); 3577 mi->mi_open_files = num_open; 3578 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3579 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3580 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3581 3582 nfs4_add_mi_to_server(np, mi); 3583 3584 mutex_exit(&op->s_lock); 3585 nfs4_server_rele(op); 3586 mutex_exit(&np->s_lock); 3587 3588 return (np); 3589 } 3590 3591 /* 3592 * Need to have the nfs4_server_lst_lock. 3593 * Search the nfs4_server list to find a match on this servinfo4 3594 * based on its address. 3595 * 3596 * Returns NULL if no match is found. Otherwise returns a reference (which 3597 * must eventually be freed) to a locked nfs4_server. 3598 */ 3599 nfs4_server_t * 3600 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3601 { 3602 nfs4_server_t *np; 3603 zoneid_t zoneid = nfs_zoneid(); 3604 3605 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3606 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3607 if (np->zoneid == zoneid && 3608 np->saddr.len == srv_p->sv_addr.len && 3609 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3610 np->saddr.len) == 0 && 3611 np->s_thread_exit != NFS4_THREAD_EXIT) { 3612 mutex_enter(&np->s_lock); 3613 np->s_refcnt++; 3614 return (np); 3615 } 3616 } 3617 return (NULL); 3618 } 3619 3620 /* 3621 * Search the nfs4_server_lst to find a match based on clientid and 3622 * addr. 3623 * Locks the nfs4_server down if it is found and returns a reference that 3624 * must eventually be freed. 3625 * 3626 * Returns NULL it no match is found. This means one of two things: either 3627 * mi is in the process of being mounted, or mi has been unmounted. 3628 * 3629 * The caller should be holding mi->mi_recovlock, and it should continue to 3630 * hold the lock until done with the returned nfs4_server_t. Once 3631 * mi->mi_recovlock is released, there is no guarantee that the returned 3632 * mi->nfs4_server_t will continue to correspond to mi. 3633 */ 3634 nfs4_server_t * 3635 find_nfs4_server(mntinfo4_t *mi) 3636 { 3637 return (find_nfs4_server_all(mi, 0)); 3638 } 3639 3640 /* 3641 * Same as above, but takes an "all" parameter which can be 3642 * set to 1 if the caller wishes to find nfs4_server_t's which 3643 * have been marked for termination by the exit of the renew 3644 * thread. This should only be used by operations which are 3645 * cleaning up and will not cause an OTW op. 3646 */ 3647 nfs4_server_t * 3648 find_nfs4_server_all(mntinfo4_t *mi, int all) 3649 { 3650 nfs4_server_t *np; 3651 servinfo4_t *svp; 3652 zoneid_t zoneid = mi->mi_zone->zone_id; 3653 3654 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3655 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3656 /* 3657 * This can be called from nfs4_unmount() which can be called from the 3658 * global zone, hence it's legal for the global zone to muck with 3659 * another zone's server list, as long as it doesn't try to contact 3660 * them. 3661 */ 3662 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3663 nfs_global_client_only != 0); 3664 3665 /* 3666 * The nfs4_server_lst_lock global lock is held when we get a new 3667 * clientid (via SETCLIENTID OTW). Holding this global lock and 3668 * mi_recovlock (READER is fine) ensures that the nfs4_server 3669 * and this mntinfo4 can't get out of sync, so the following search is 3670 * always valid. 3671 */ 3672 mutex_enter(&nfs4_server_lst_lock); 3673 #ifdef DEBUG 3674 if (nfs4_server_t_debug) { 3675 /* mi->mi_clientid is unprotected, ok for debug output */ 3676 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3677 mi->mi_curr_serv); 3678 } 3679 #endif 3680 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3681 mutex_enter(&np->s_lock); 3682 svp = mi->mi_curr_serv; 3683 3684 if (np->zoneid == zoneid && 3685 np->clientid == mi->mi_clientid && 3686 np->saddr.len == svp->sv_addr.len && 3687 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3688 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3689 mutex_exit(&nfs4_server_lst_lock); 3690 np->s_refcnt++; 3691 return (np); 3692 } 3693 mutex_exit(&np->s_lock); 3694 } 3695 mutex_exit(&nfs4_server_lst_lock); 3696 3697 return (NULL); 3698 } 3699 3700 /* 3701 * Release the reference to sp and destroy it if that's the last one. 3702 */ 3703 3704 void 3705 nfs4_server_rele(nfs4_server_t *sp) 3706 { 3707 mutex_enter(&sp->s_lock); 3708 ASSERT(sp->s_refcnt > 0); 3709 sp->s_refcnt--; 3710 if (sp->s_refcnt > 0) { 3711 mutex_exit(&sp->s_lock); 3712 return; 3713 } 3714 mutex_exit(&sp->s_lock); 3715 3716 mutex_enter(&nfs4_server_lst_lock); 3717 mutex_enter(&sp->s_lock); 3718 if (sp->s_refcnt > 0) { 3719 mutex_exit(&sp->s_lock); 3720 mutex_exit(&nfs4_server_lst_lock); 3721 return; 3722 } 3723 remque(sp); 3724 sp->forw = sp->back = NULL; 3725 mutex_exit(&nfs4_server_lst_lock); 3726 destroy_nfs4_server(sp); 3727 } 3728 3729 static void 3730 destroy_nfs4_server(nfs4_server_t *sp) 3731 { 3732 ASSERT(MUTEX_HELD(&sp->s_lock)); 3733 ASSERT(sp->s_refcnt == 0); 3734 ASSERT(sp->s_otw_call_count == 0); 3735 3736 remove_all_mi(sp); 3737 3738 crfree(sp->s_cred); 3739 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3740 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3741 mutex_exit(&sp->s_lock); 3742 3743 /* destroy the nfs4_server */ 3744 nfs4callback_destroy(sp); 3745 list_destroy(&sp->s_deleg_list); 3746 mutex_destroy(&sp->s_lock); 3747 cv_destroy(&sp->cv_thread_exit); 3748 cv_destroy(&sp->s_cv_otw_count); 3749 cv_destroy(&sp->s_clientid_pend); 3750 cv_destroy(&sp->wait_cb_null); 3751 nfs_rw_destroy(&sp->s_recovlock); 3752 kmem_free(sp, sizeof (*sp)); 3753 } 3754 3755 /* 3756 * Lock sp, but only if it's still active (in the list and hasn't been 3757 * flagged as exiting) or 'all' is non-zero. 3758 * Returns TRUE if sp got locked and adds a reference to sp. 3759 */ 3760 bool_t 3761 nfs4_server_vlock(nfs4_server_t *sp, int all) 3762 { 3763 nfs4_server_t *np; 3764 3765 mutex_enter(&nfs4_server_lst_lock); 3766 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3767 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3768 all != 0)) { 3769 mutex_enter(&np->s_lock); 3770 np->s_refcnt++; 3771 mutex_exit(&nfs4_server_lst_lock); 3772 return (TRUE); 3773 } 3774 } 3775 mutex_exit(&nfs4_server_lst_lock); 3776 return (FALSE); 3777 } 3778 3779 /* 3780 * Fork off a thread to free the data structures for a mount. 3781 */ 3782 3783 static void 3784 async_free_mount(vfs_t *vfsp, cred_t *cr) 3785 { 3786 freemountargs_t *args; 3787 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3788 args->fm_vfsp = vfsp; 3789 VFS_HOLD(vfsp); 3790 MI4_HOLD(VFTOMI4(vfsp)); 3791 args->fm_cr = cr; 3792 crhold(cr); 3793 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3794 minclsyspri); 3795 } 3796 3797 static void 3798 nfs4_free_mount_thread(freemountargs_t *args) 3799 { 3800 mntinfo4_t *mi; 3801 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3802 mi = VFTOMI4(args->fm_vfsp); 3803 crfree(args->fm_cr); 3804 VFS_RELE(args->fm_vfsp); 3805 MI4_RELE(mi); 3806 kmem_free(args, sizeof (freemountargs_t)); 3807 zthread_exit(); 3808 /* NOTREACHED */ 3809 } 3810 3811 /* 3812 * Thread to free the data structures for a given filesystem. 3813 */ 3814 static void 3815 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3816 { 3817 mntinfo4_t *mi = VFTOMI4(vfsp); 3818 nfs4_server_t *sp; 3819 callb_cpr_t cpr_info; 3820 kmutex_t cpr_lock; 3821 boolean_t async_thread; 3822 int removed; 3823 3824 /* 3825 * We need to participate in the CPR framework if this is a kernel 3826 * thread. 3827 */ 3828 async_thread = (curproc == nfs_zone()->zone_zsched); 3829 if (async_thread) { 3830 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3831 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3832 "nfsv4AsyncUnmount"); 3833 } 3834 3835 /* 3836 * We need to wait for all outstanding OTW calls 3837 * and recovery to finish before we remove the mi 3838 * from the nfs4_server_t, as current pending 3839 * calls might still need this linkage (in order 3840 * to find a nfs4_server_t from a mntinfo4_t). 3841 */ 3842 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3843 sp = find_nfs4_server(mi); 3844 nfs_rw_exit(&mi->mi_recovlock); 3845 3846 if (sp) { 3847 while (sp->s_otw_call_count != 0) { 3848 if (async_thread) { 3849 mutex_enter(&cpr_lock); 3850 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3851 mutex_exit(&cpr_lock); 3852 } 3853 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3854 if (async_thread) { 3855 mutex_enter(&cpr_lock); 3856 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3857 mutex_exit(&cpr_lock); 3858 } 3859 } 3860 mutex_exit(&sp->s_lock); 3861 nfs4_server_rele(sp); 3862 sp = NULL; 3863 } 3864 3865 3866 mutex_enter(&mi->mi_lock); 3867 while (mi->mi_in_recovery != 0) { 3868 if (async_thread) { 3869 mutex_enter(&cpr_lock); 3870 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3871 mutex_exit(&cpr_lock); 3872 } 3873 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3874 if (async_thread) { 3875 mutex_enter(&cpr_lock); 3876 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3877 mutex_exit(&cpr_lock); 3878 } 3879 } 3880 mutex_exit(&mi->mi_lock); 3881 3882 /* 3883 * The original purge of the dnlc via 'dounmount' 3884 * doesn't guarantee that another dnlc entry was not 3885 * added while we waitied for all outstanding OTW 3886 * and recovery calls to finish. So re-purge the 3887 * dnlc now. 3888 */ 3889 (void) dnlc_purge_vfsp(vfsp, 0); 3890 3891 /* 3892 * We need to explicitly stop the manager thread; the asyc worker 3893 * threads can timeout and exit on their own. 3894 */ 3895 mutex_enter(&mi->mi_async_lock); 3896 mi->mi_max_threads = 0; 3897 cv_broadcast(&mi->mi_async_work_cv); 3898 mutex_exit(&mi->mi_async_lock); 3899 if (mi->mi_manager_thread) 3900 nfs4_async_manager_stop(vfsp); 3901 3902 destroy_rtable4(vfsp, cr); 3903 3904 nfs4_remove_mi_from_server(mi, NULL); 3905 3906 if (async_thread) { 3907 mutex_enter(&cpr_lock); 3908 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3909 mutex_destroy(&cpr_lock); 3910 } 3911 3912 removed = nfs4_mi_zonelist_remove(mi); 3913 if (removed) 3914 zone_rele(mi->mi_zone); 3915 } 3916