1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/time.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/dnlc.h> 59 #include <sys/list.h> 60 #include <sys/mntent.h> 61 #include <sys/tsol/label.h> 62 63 #include <rpc/types.h> 64 #include <rpc/auth.h> 65 #include <rpc/rpcsec_gss.h> 66 #include <rpc/clnt.h> 67 68 #include <nfs/nfs.h> 69 #include <nfs/nfs_clnt.h> 70 #include <nfs/mount.h> 71 #include <nfs/nfs_acl.h> 72 73 #include <fs/fs_subr.h> 74 75 #include <nfs/nfs4.h> 76 #include <nfs/rnode4.h> 77 #include <nfs/nfs4_clnt.h> 78 #include <sys/fs/autofs.h> 79 80 81 /* 82 * Arguments passed to thread to free data structures from forced unmount. 83 */ 84 85 typedef struct { 86 vfs_t *fm_vfsp; 87 cred_t *fm_cr; 88 } freemountargs_t; 89 90 static void async_free_mount(vfs_t *, cred_t *); 91 static void nfs4_free_mount(vfs_t *, cred_t *); 92 static void nfs4_free_mount_thread(freemountargs_t *); 93 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 94 95 /* 96 * From rpcsec module (common/rpcsec). 97 */ 98 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 99 extern void sec_clnt_freeinfo(struct sec_data *); 100 101 /* 102 * The order and contents of this structure must be kept in sync with that of 103 * rfsreqcnt_v4_tmpl in nfs_stats.c 104 */ 105 static char *rfsnames_v4[] = { 106 "null", "compound", "reserved", "access", "close", "commit", "create", 107 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock", 108 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr", 109 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh", 110 "read", "readdir", "readlink", "remove", "rename", "renew", 111 "restorefh", "savefh", "secinfo", "setattr", "setclientid", 112 "setclientid_confirm", "verify", "write" 113 }; 114 115 /* 116 * nfs4_max_mount_retry is the number of times the client will redrive 117 * a mount compound before giving up and returning failure. The intent 118 * is to redrive mount compounds which fail NFS4ERR_STALE so that 119 * if a component of the server path being mounted goes stale, it can 120 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 121 * code is needed outside of the recovery framework because mount is a 122 * special case. The client doesn't create vnodes/rnodes for components 123 * of the server path being mounted. The recovery code recovers real 124 * client objects, not STALE FHs which map to components of the server 125 * path being mounted. 126 * 127 * We could just fail the mount on the first time, but that would 128 * instantly trigger failover (from nfs4_mount), and the client should 129 * try to re-lookup the STALE FH before doing failover. The easiest 130 * way to "re-lookup" is to simply redrive the mount compound. 131 */ 132 static int nfs4_max_mount_retry = 2; 133 134 /* 135 * nfs4 vfs operations. 136 */ 137 static int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 138 static int nfs4_unmount(vfs_t *, int, cred_t *); 139 static int nfs4_root(vfs_t *, vnode_t **); 140 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 141 static int nfs4_sync(vfs_t *, short, cred_t *); 142 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 143 static int nfs4_mountroot(vfs_t *, whymountroot_t); 144 static void nfs4_freevfs(vfs_t *); 145 146 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 147 int, cred_t *, zone_t *); 148 149 vfsops_t *nfs4_vfsops; 150 151 int nfs4_vfsinit(void); 152 void nfs4_vfsfini(void); 153 static void nfs4setclientid_init(void); 154 static void nfs4setclientid_fini(void); 155 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *, 156 struct nfs4_server *, nfs4_error_t *, int *); 157 static void destroy_nfs4_server(nfs4_server_t *); 158 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 159 160 /* 161 * Initialize the vfs structure 162 */ 163 164 static int nfs4fstyp; 165 166 167 /* 168 * Debug variable to check for rdma based 169 * transport startup and cleanup. Controlled 170 * through /etc/system. Off by default. 171 */ 172 extern int rdma_debug; 173 174 int 175 nfs4init(int fstyp, char *name) 176 { 177 static const fs_operation_def_t nfs4_vfsops_template[] = { 178 VFSNAME_MOUNT, nfs4_mount, 179 VFSNAME_UNMOUNT, nfs4_unmount, 180 VFSNAME_ROOT, nfs4_root, 181 VFSNAME_STATVFS, nfs4_statvfs, 182 VFSNAME_SYNC, (fs_generic_func_p) nfs4_sync, 183 VFSNAME_VGET, nfs4_vget, 184 VFSNAME_MOUNTROOT, nfs4_mountroot, 185 VFSNAME_FREEVFS, (fs_generic_func_p)nfs4_freevfs, 186 NULL, NULL 187 }; 188 int error; 189 190 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 191 if (error != 0) { 192 zcmn_err(GLOBAL_ZONEID, CE_WARN, 193 "nfs4init: bad vfs ops template"); 194 return (error); 195 } 196 197 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 198 if (error != 0) { 199 (void) vfs_freevfsops_by_type(fstyp); 200 zcmn_err(GLOBAL_ZONEID, CE_WARN, 201 "nfs4init: bad vnode ops template"); 202 return (error); 203 } 204 205 nfs4fstyp = fstyp; 206 207 (void) nfs4_vfsinit(); 208 209 (void) nfs4_init_dot_entries(); 210 211 return (0); 212 } 213 214 void 215 nfs4fini(void) 216 { 217 (void) nfs4_destroy_dot_entries(); 218 nfs4_vfsfini(); 219 } 220 221 /* 222 * Create a new sec_data structure to store AUTH_DH related data: 223 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 224 * flag set for NFS V4 since we are avoiding to contact the rpcbind 225 * daemon and is using the IP time service (IPPORT_TIMESERVER). 226 * 227 * sec_data can be freed by sec_clnt_freeinfo(). 228 */ 229 struct sec_data * 230 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 231 struct knetconfig *knconf) { 232 struct sec_data *secdata; 233 dh_k4_clntdata_t *data; 234 char *pf, *p; 235 236 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 237 return (NULL); 238 239 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 240 secdata->flags = 0; 241 242 data = kmem_alloc(sizeof (*data), KM_SLEEP); 243 244 data->syncaddr.maxlen = syncaddr->maxlen; 245 data->syncaddr.len = syncaddr->len; 246 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 247 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 248 249 /* 250 * duplicate the knconf information for the 251 * new opaque data. 252 */ 253 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 254 *data->knconf = *knconf; 255 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 256 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 257 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 258 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 259 data->knconf->knc_protofmly = pf; 260 data->knconf->knc_proto = p; 261 262 /* move server netname to the sec_data structure */ 263 data->netname = kmem_alloc(nlen, KM_SLEEP); 264 bcopy(netname, data->netname, nlen); 265 data->netnamelen = (int)nlen; 266 267 secdata->secmod = AUTH_DH; 268 secdata->rpcflavor = AUTH_DH; 269 secdata->data = (caddr_t)data; 270 271 return (secdata); 272 } 273 274 static int 275 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 276 { 277 servinfo4_t *si; 278 279 /* 280 * Iterate over the servinfo4 list to make sure 281 * we do not have a duplicate. Skip any servinfo4 282 * that has been marked "NOT IN USE" 283 */ 284 for (si = svp_head; si; si = si->sv_next) { 285 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 286 if (si->sv_flags & SV4_NOTINUSE) { 287 nfs_rw_exit(&si->sv_lock); 288 continue; 289 } 290 nfs_rw_exit(&si->sv_lock); 291 if (si == svp) 292 continue; 293 if (si->sv_addr.len == svp->sv_addr.len && 294 strcmp(si->sv_knconf->knc_protofmly, 295 svp->sv_knconf->knc_protofmly) == 0 && 296 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 297 si->sv_addr.len) == 0) { 298 /* it's a duplicate */ 299 return (1); 300 } 301 } 302 /* it's not a duplicate */ 303 return (0); 304 } 305 306 void 307 nfs4_free_args(struct nfs_args *nargs) 308 { 309 if (nargs->knconf) { 310 if (nargs->knconf->knc_protofmly) 311 kmem_free(nargs->knconf->knc_protofmly, 312 KNC_STRSIZE); 313 if (nargs->knconf->knc_proto) 314 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 315 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 316 nargs->knconf = NULL; 317 } 318 319 if (nargs->fh) { 320 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 321 nargs->fh = NULL; 322 } 323 324 if (nargs->hostname) { 325 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 326 nargs->hostname = NULL; 327 } 328 329 if (nargs->addr) { 330 if (nargs->addr->buf) { 331 ASSERT(nargs->addr->len); 332 kmem_free(nargs->addr->buf, nargs->addr->len); 333 } 334 kmem_free(nargs->addr, sizeof (struct netbuf)); 335 nargs->addr = NULL; 336 } 337 338 if (nargs->syncaddr) { 339 ASSERT(nargs->syncaddr->len); 340 if (nargs->syncaddr->buf) { 341 ASSERT(nargs->syncaddr->len); 342 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 343 } 344 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 345 nargs->syncaddr = NULL; 346 } 347 348 if (nargs->netname) { 349 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 350 nargs->netname = NULL; 351 } 352 353 if (nargs->nfs_ext_u.nfs_extA.secdata) { 354 sec_clnt_freeinfo( 355 nargs->nfs_ext_u.nfs_extA.secdata); 356 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 357 } 358 } 359 360 361 int 362 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs) 363 { 364 365 int error; 366 size_t hlen; /* length of hostname */ 367 size_t nlen; /* length of netname */ 368 char netname[MAXNETNAMELEN+1]; /* server's netname */ 369 struct netbuf addr; /* server's address */ 370 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 371 struct knetconfig *knconf; /* transport structure */ 372 struct sec_data *secdata = NULL; /* security data */ 373 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 374 STRUCT_DECL(knetconfig, knconf_tmp); 375 STRUCT_DECL(netbuf, addr_tmp); 376 int flags; 377 char *p, *pf; 378 struct pathname pn; 379 char *userbufptr; 380 381 382 bzero(nargs, sizeof (*nargs)); 383 384 STRUCT_INIT(args, get_udatamodel()); 385 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 386 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 387 STRUCT_SIZE(args)))) 388 return (EFAULT); 389 390 nargs->wsize = STRUCT_FGET(args, wsize); 391 nargs->rsize = STRUCT_FGET(args, rsize); 392 nargs->timeo = STRUCT_FGET(args, timeo); 393 nargs->retrans = STRUCT_FGET(args, retrans); 394 nargs->acregmin = STRUCT_FGET(args, acregmin); 395 nargs->acregmax = STRUCT_FGET(args, acregmax); 396 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 397 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 398 399 flags = STRUCT_FGET(args, flags); 400 nargs->flags = flags; 401 402 addr.buf = NULL; 403 syncaddr.buf = NULL; 404 405 406 /* 407 * Allocate space for a knetconfig structure and 408 * its strings and copy in from user-land. 409 */ 410 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 411 STRUCT_INIT(knconf_tmp, get_udatamodel()); 412 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 413 STRUCT_SIZE(knconf_tmp))) { 414 kmem_free(knconf, sizeof (*knconf)); 415 return (EFAULT); 416 } 417 418 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 419 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 420 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 421 if (get_udatamodel() != DATAMODEL_LP64) { 422 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 423 } else { 424 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 425 } 426 427 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 428 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 429 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 430 if (error) { 431 kmem_free(pf, KNC_STRSIZE); 432 kmem_free(p, KNC_STRSIZE); 433 kmem_free(knconf, sizeof (*knconf)); 434 return (error); 435 } 436 437 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 438 if (error) { 439 kmem_free(pf, KNC_STRSIZE); 440 kmem_free(p, KNC_STRSIZE); 441 kmem_free(knconf, sizeof (*knconf)); 442 return (error); 443 } 444 445 446 knconf->knc_protofmly = pf; 447 knconf->knc_proto = p; 448 449 nargs->knconf = knconf; 450 451 /* 452 * Get server address 453 */ 454 STRUCT_INIT(addr_tmp, get_udatamodel()); 455 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 456 STRUCT_SIZE(addr_tmp))) { 457 error = EFAULT; 458 goto errout; 459 } 460 461 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 462 userbufptr = STRUCT_FGETP(addr_tmp, buf); 463 addr.len = STRUCT_FGET(addr_tmp, len); 464 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 465 addr.maxlen = addr.len; 466 if (copyin(userbufptr, addr.buf, addr.len)) { 467 kmem_free(addr.buf, addr.len); 468 error = EFAULT; 469 goto errout; 470 } 471 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 472 473 /* 474 * Get the root fhandle 475 */ 476 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 477 if (error) 478 goto errout; 479 480 /* Volatile fh: keep server paths, so use actual-size strings */ 481 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 482 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen); 483 nargs->fh[pn.pn_pathlen] = '\0'; 484 pn_free(&pn); 485 486 487 /* 488 * Get server's hostname 489 */ 490 if (flags & NFSMNT_HOSTNAME) { 491 error = copyinstr(STRUCT_FGETP(args, hostname), 492 netname, sizeof (netname), &hlen); 493 if (error) 494 goto errout; 495 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 496 (void) strcpy(nargs->hostname, netname); 497 498 } else { 499 nargs->hostname = NULL; 500 } 501 502 503 /* 504 * If there are syncaddr and netname data, load them in. This is 505 * to support data needed for NFSV4 when AUTH_DH is the negotiated 506 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 507 */ 508 netname[0] = '\0'; 509 if (flags & NFSMNT_SECURE) { 510 511 /* get syncaddr */ 512 STRUCT_INIT(addr_tmp, get_udatamodel()); 513 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 514 STRUCT_SIZE(addr_tmp))) { 515 error = EINVAL; 516 goto errout; 517 } 518 userbufptr = STRUCT_FGETP(addr_tmp, buf); 519 syncaddr.len = STRUCT_FGET(addr_tmp, len); 520 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 521 syncaddr.maxlen = syncaddr.len; 522 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 523 kmem_free(syncaddr.buf, syncaddr.len); 524 error = EFAULT; 525 goto errout; 526 } 527 528 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 529 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 530 531 /* get server's netname */ 532 if (copyinstr(STRUCT_FGETP(args, netname), netname, 533 sizeof (netname), &nlen)) { 534 error = EFAULT; 535 goto errout; 536 } 537 538 netname[nlen] = '\0'; 539 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 540 (void) strcpy(nargs->netname, netname); 541 } 542 543 /* 544 * Get the extention data which has the security data structure. 545 * This includes data for AUTH_SYS as well. 546 */ 547 if (flags & NFSMNT_NEWARGS) { 548 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 549 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 550 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 551 /* 552 * Indicating the application is using the new 553 * sec_data structure to pass in the security 554 * data. 555 */ 556 if (STRUCT_FGETP(args, 557 nfs_ext_u.nfs_extA.secdata) != NULL) { 558 error = sec_clnt_loadinfo( 559 (struct sec_data *)STRUCT_FGETP(args, 560 nfs_ext_u.nfs_extA.secdata), 561 &secdata, get_udatamodel()); 562 } 563 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 564 } 565 } 566 567 if (error) 568 goto errout; 569 570 /* 571 * Failover support: 572 * 573 * We may have a linked list of nfs_args structures, 574 * which means the user is looking for failover. If 575 * the mount is either not "read-only" or "soft", 576 * we want to bail out with EINVAL. 577 */ 578 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 579 nargs->nfs_ext_u.nfs_extB.next = 580 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 581 582 errout: 583 if (error) 584 nfs4_free_args(nargs); 585 586 return (error); 587 } 588 589 590 /* 591 * nfs mount vfsop 592 * Set up mount info record and attach it to vfs struct. 593 */ 594 static int 595 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 596 { 597 char *data = uap->dataptr; 598 int error; 599 vnode_t *rtvp; /* the server's root */ 600 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 601 struct knetconfig *rdma_knconf; /* rdma transport structure */ 602 rnode4_t *rp; 603 struct servinfo4 *svp; /* nfs server info */ 604 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 605 struct servinfo4 *svp_head; /* first nfs server info */ 606 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 607 struct sec_data *secdata; /* security data */ 608 struct nfs_args *args = NULL; 609 int flags, addr_type, removed; 610 zone_t *zone = nfs_zone(); 611 nfs4_error_t n4e; 612 zone_t *mntzone = NULL; 613 614 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 615 return (EPERM); 616 if (mvp->v_type != VDIR) 617 return (ENOTDIR); 618 /* 619 * get arguments 620 * 621 * nfs_args is now versioned and is extensible, so 622 * uap->datalen might be different from sizeof (args) 623 * in a compatible situation. 624 */ 625 more: 626 if (!(uap->flags & MS_SYSSPACE)) { 627 if (args == NULL) 628 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 629 else 630 nfs4_free_args(args); 631 error = nfs4_copyin(data, uap->datalen, args); 632 if (error) { 633 if (args) { 634 kmem_free(args, sizeof (*args)); 635 } 636 return (error); 637 } 638 } else { 639 args = (struct nfs_args *)data; 640 } 641 642 643 flags = args->flags; 644 645 /* 646 * If the request changes the locking type, disallow the remount, 647 * because it's questionable whether we can transfer the 648 * locking state correctly. 649 */ 650 if (uap->flags & MS_REMOUNT) { 651 if (!(uap->flags & MS_SYSSPACE)) { 652 nfs4_free_args(args); 653 kmem_free(args, sizeof (*args)); 654 } 655 if ((mi = VFTOMI4(vfsp)) != NULL) { 656 uint_t new_mi_llock; 657 uint_t old_mi_llock; 658 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 659 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 660 if (old_mi_llock != new_mi_llock) 661 return (EBUSY); 662 } 663 return (0); 664 } 665 666 mutex_enter(&mvp->v_lock); 667 if (!(uap->flags & MS_OVERLAY) && 668 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 669 mutex_exit(&mvp->v_lock); 670 if (!(uap->flags & MS_SYSSPACE)) { 671 nfs4_free_args(args); 672 kmem_free(args, sizeof (*args)); 673 } 674 return (EBUSY); 675 } 676 mutex_exit(&mvp->v_lock); 677 678 /* make sure things are zeroed for errout: */ 679 rtvp = NULL; 680 mi = NULL; 681 secdata = NULL; 682 683 /* 684 * A valid knetconfig structure is required. 685 */ 686 687 if (!(flags & NFSMNT_KNCONF) || 688 args->knconf == NULL || args->knconf->knc_protofmly == NULL || 689 args->knconf->knc_proto == NULL || 690 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) { 691 if (!(uap->flags & MS_SYSSPACE)) { 692 nfs4_free_args(args); 693 kmem_free(args, sizeof (*args)); 694 } 695 return (EINVAL); 696 } 697 698 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 699 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 700 if (!(uap->flags & MS_SYSSPACE)) { 701 nfs4_free_args(args); 702 kmem_free(args, sizeof (*args)); 703 } 704 return (EINVAL); 705 } 706 707 708 /* 709 * Allocate a servinfo4 struct. 710 */ 711 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 712 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 713 if (svp_tail) { 714 svp_2ndlast = svp_tail; 715 svp_tail->sv_next = svp; 716 } else { 717 svp_head = svp; 718 svp_2ndlast = svp; 719 } 720 721 svp_tail = svp; 722 svp->sv_knconf = args->knconf; 723 args->knconf = NULL; 724 725 726 /* 727 * Get server address 728 */ 729 730 if (args->addr == NULL || args->addr->buf == NULL) { 731 error = EINVAL; 732 goto errout; 733 } 734 735 svp->sv_addr.maxlen = args->addr->maxlen; 736 svp->sv_addr.len = args->addr->len; 737 svp->sv_addr.buf = args->addr->buf; 738 args->addr->buf = NULL; 739 740 741 /* 742 * Get the root fhandle 743 */ 744 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) { 745 error = EINVAL; 746 goto errout; 747 } 748 749 svp->sv_path = args->fh; 750 svp->sv_pathlen = strlen(args->fh) + 1; 751 args->fh = NULL; 752 753 /* 754 * Get server's hostname 755 */ 756 if (flags & NFSMNT_HOSTNAME) { 757 if (args->hostname == NULL || (strlen(args->hostname) > 758 MAXNETNAMELEN)) { 759 error = EINVAL; 760 goto errout; 761 } 762 svp->sv_hostnamelen = strlen(args->hostname) + 1; 763 svp->sv_hostname = args->hostname; 764 args->hostname = NULL; 765 } else { 766 char *p = "unknown-host"; 767 svp->sv_hostnamelen = strlen(p) + 1; 768 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 769 (void) strcpy(svp->sv_hostname, p); 770 } 771 772 /* 773 * RDMA MOUNT SUPPORT FOR NFS v4. 774 * Establish, is it possible to use RDMA, if so overload the 775 * knconf with rdma specific knconf and free the orignal knconf. 776 */ 777 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 778 /* 779 * Determine the addr type for RDMA, IPv4 or v6. 780 */ 781 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 782 addr_type = AF_INET; 783 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 784 addr_type = AF_INET6; 785 786 if (rdma_reachable(addr_type, &svp->sv_addr, 787 &rdma_knconf) == 0) { 788 /* 789 * If successful, hijack the orignal knconf and 790 * replace with the new one, depending on the flags. 791 */ 792 svp->sv_origknconf = svp->sv_knconf; 793 svp->sv_knconf = rdma_knconf; 794 } else { 795 if (flags & NFSMNT_TRYRDMA) { 796 #ifdef DEBUG 797 if (rdma_debug) 798 zcmn_err(getzoneid(), CE_WARN, 799 "no RDMA onboard, revert\n"); 800 #endif 801 } 802 803 if (flags & NFSMNT_DORDMA) { 804 /* 805 * If proto=rdma is specified and no RDMA 806 * path to this server is avialable then 807 * ditch this server. 808 * This is not included in the mountable 809 * server list or the replica list. 810 * Check if more servers are specified; 811 * Failover case, otherwise bail out of mount. 812 */ 813 if (args->nfs_args_ext == 814 NFS_ARGS_EXTB && 815 args->nfs_ext_u.nfs_extB.next 816 != NULL) { 817 data = (char *) 818 args->nfs_ext_u.nfs_extB.next; 819 if (uap->flags & MS_RDONLY && 820 !(flags & NFSMNT_SOFT)) { 821 if (svp_head->sv_next == NULL) { 822 svp_tail = NULL; 823 svp_2ndlast = NULL; 824 sv4_free(svp_head); 825 goto more; 826 } else { 827 svp_tail = svp_2ndlast; 828 svp_2ndlast->sv_next = 829 NULL; 830 sv4_free(svp); 831 goto more; 832 } 833 } 834 } else { 835 /* 836 * This is the last server specified 837 * in the nfs_args list passed down 838 * and its not rdma capable. 839 */ 840 if (svp_head->sv_next == NULL) { 841 /* 842 * Is this the only one 843 */ 844 error = EINVAL; 845 #ifdef DEBUG 846 if (rdma_debug) 847 zcmn_err(getzoneid(), 848 CE_WARN, 849 "No RDMA srv"); 850 #endif 851 goto errout; 852 } else { 853 /* 854 * There is list, since some 855 * servers specified before 856 * this passed all requirements 857 */ 858 svp_tail = svp_2ndlast; 859 svp_2ndlast->sv_next = NULL; 860 sv4_free(svp); 861 goto proceed; 862 } 863 } 864 } 865 } 866 } 867 868 /* 869 * If there are syncaddr and netname data, load them in. This is 870 * to support data needed for NFSV4 when AUTH_DH is the negotiated 871 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 872 */ 873 if (args->flags & NFSMNT_SECURE) { 874 svp->sv_dhsec = create_authdh_data(args->netname, 875 strlen(args->netname), 876 args->syncaddr, svp->sv_knconf); 877 } 878 879 /* 880 * Get the extention data which has the security data structure. 881 * This includes data for AUTH_SYS as well. 882 */ 883 if (flags & NFSMNT_NEWARGS) { 884 switch (args->nfs_args_ext) { 885 case NFS_ARGS_EXTA: 886 case NFS_ARGS_EXTB: 887 /* 888 * Indicating the application is using the new 889 * sec_data structure to pass in the security 890 * data. 891 */ 892 secdata = args->nfs_ext_u.nfs_extA.secdata; 893 if (secdata == NULL) { 894 error = EINVAL; 895 } else if (uap->flags & MS_SYSSPACE) { 896 /* 897 * Need to validate the flavor here if 898 * sysspace, userspace was already 899 * validate from the nfs_copyin function. 900 */ 901 switch (secdata->rpcflavor) { 902 case AUTH_NONE: 903 case AUTH_UNIX: 904 case AUTH_LOOPBACK: 905 case AUTH_DES: 906 case RPCSEC_GSS: 907 break; 908 default: 909 error = EINVAL; 910 goto errout; 911 } 912 } 913 args->nfs_ext_u.nfs_extA.secdata = NULL; 914 break; 915 916 default: 917 error = EINVAL; 918 break; 919 } 920 921 } else if (flags & NFSMNT_SECURE) { 922 /* 923 * NFSMNT_SECURE is deprecated but we keep it 924 * to support the rouge user generated application 925 * that may use this undocumented interface to do 926 * AUTH_DH security. 927 */ 928 secdata = create_authdh_data(args->netname, 929 strlen(args->netname), args->syncaddr, svp->sv_knconf); 930 931 } else { 932 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 933 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 934 secdata->data = NULL; 935 } 936 937 svp->sv_secdata = secdata; 938 939 /* 940 * User does not explictly specify a flavor, and a user 941 * defined default flavor is passed down. 942 */ 943 if (flags & NFSMNT_SECDEFAULT) { 944 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 945 svp->sv_flags |= SV4_TRYSECDEFAULT; 946 nfs_rw_exit(&svp->sv_lock); 947 } 948 949 /* 950 * Failover support: 951 * 952 * We may have a linked list of nfs_args structures, 953 * which means the user is looking for failover. If 954 * the mount is either not "read-only" or "soft", 955 * we want to bail out with EINVAL. 956 */ 957 if (args->nfs_args_ext == NFS_ARGS_EXTB && 958 args->nfs_ext_u.nfs_extB.next != NULL) { 959 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 960 data = (char *)args->nfs_ext_u.nfs_extB.next; 961 goto more; 962 } 963 error = EINVAL; 964 goto errout; 965 } 966 967 /* 968 * Determine the zone we're being mounted into. 969 */ 970 zone_hold(mntzone = zone); /* start with this assumption */ 971 if (getzoneid() == GLOBAL_ZONEID) { 972 zone_rele(mntzone); 973 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 974 ASSERT(mntzone != NULL); 975 if (mntzone != zone) { 976 error = EBUSY; 977 goto errout; 978 } 979 } 980 981 if (is_system_labeled()) { 982 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 983 svp->sv_knconf, cr); 984 985 if (error > 0) 986 goto errout; 987 988 if (error == -1) { 989 /* change mount to read-only to prevent write-down */ 990 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 991 } 992 } 993 994 /* 995 * Stop the mount from going any further if the zone is going away. 996 */ 997 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 998 error = EBUSY; 999 goto errout; 1000 } 1001 1002 /* 1003 * Get root vnode. 1004 */ 1005 proceed: 1006 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 1007 1008 if (error) { 1009 /* if nfs4rootvp failed, it will free svp_head */ 1010 svp_head = NULL; 1011 goto errout; 1012 } 1013 1014 mi = VTOMI4(rtvp); 1015 1016 /* 1017 * Send client id to the server, if necessary 1018 */ 1019 nfs4_error_zinit(&n4e); 1020 nfs4setclientid(mi, cr, FALSE, &n4e); 1021 error = n4e.error; 1022 1023 if (error) 1024 goto errout; 1025 1026 /* 1027 * Set option fields in the mount info record 1028 */ 1029 1030 if (svp_head->sv_next) { 1031 mutex_enter(&mi->mi_lock); 1032 mi->mi_flags |= MI4_LLOCK; 1033 mutex_exit(&mi->mi_lock); 1034 } 1035 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args); 1036 1037 errout: 1038 if (error) { 1039 if (rtvp != NULL) { 1040 rp = VTOR4(rtvp); 1041 if (rp->r_flags & R4HASHED) 1042 rp4_rmhash(rp); 1043 } 1044 if (mi != NULL) { 1045 nfs4_async_stop(vfsp); 1046 nfs4_async_manager_stop(vfsp); 1047 nfs4_remove_mi_from_server(mi, NULL); 1048 if (rtvp != NULL) 1049 VN_RELE(rtvp); 1050 if (mntzone != NULL) 1051 zone_rele(mntzone); 1052 /* need to remove it from the zone */ 1053 removed = nfs4_mi_zonelist_remove(mi); 1054 if (removed) 1055 zone_rele(mi->mi_zone); 1056 MI4_RELE(mi); 1057 if (!(uap->flags & MS_SYSSPACE) && args) { 1058 nfs4_free_args(args); 1059 kmem_free(args, sizeof (*args)); 1060 } 1061 return (error); 1062 } 1063 if (svp_head) 1064 sv4_free(svp_head); 1065 } 1066 1067 if (!(uap->flags & MS_SYSSPACE) && args) { 1068 nfs4_free_args(args); 1069 kmem_free(args, sizeof (*args)); 1070 } 1071 if (rtvp != NULL) 1072 VN_RELE(rtvp); 1073 1074 if (mntzone != NULL) 1075 zone_rele(mntzone); 1076 1077 return (error); 1078 } 1079 1080 #ifdef DEBUG 1081 #define VERS_MSG "NFS4 server " 1082 #else 1083 #define VERS_MSG "NFS server " 1084 #endif 1085 1086 #define READ_MSG \ 1087 VERS_MSG "%s returned 0 for read transfer size" 1088 #define WRITE_MSG \ 1089 VERS_MSG "%s returned 0 for write transfer size" 1090 #define SIZE_MSG \ 1091 VERS_MSG "%s returned 0 for maximum file size" 1092 1093 /* 1094 * Get the symbolic link text from the server for a given filehandle 1095 * of that symlink. 1096 * 1097 * (get symlink text) PUTFH READLINK 1098 */ 1099 static int 1100 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 1101 int flags) 1102 1103 { 1104 COMPOUND4args_clnt args; 1105 COMPOUND4res_clnt res; 1106 int doqueue; 1107 nfs_argop4 argop[2]; 1108 nfs_resop4 *resop; 1109 READLINK4res *lr_res; 1110 uint_t len; 1111 bool_t needrecov = FALSE; 1112 nfs4_recov_state_t recov_state; 1113 nfs4_sharedfh_t *sfh; 1114 nfs4_error_t e; 1115 int num_retry = nfs4_max_mount_retry; 1116 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1117 1118 sfh = sfh4_get(fh, mi); 1119 recov_state.rs_flags = 0; 1120 recov_state.rs_num_retry_despite_err = 0; 1121 1122 recov_retry: 1123 nfs4_error_zinit(&e); 1124 1125 args.array_len = 2; 1126 args.array = argop; 1127 args.ctag = TAG_GET_SYMLINK; 1128 1129 if (! recovery) { 1130 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 1131 if (e.error) { 1132 sfh4_rele(&sfh); 1133 return (e.error); 1134 } 1135 } 1136 1137 /* 0. putfh symlink fh */ 1138 argop[0].argop = OP_CPUTFH; 1139 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1140 1141 /* 1. readlink */ 1142 argop[1].argop = OP_READLINK; 1143 1144 doqueue = 1; 1145 1146 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1147 1148 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 1149 1150 if (needrecov && !recovery && num_retry-- > 0) { 1151 1152 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1153 "getlinktext_otw: initiating recovery\n")); 1154 1155 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 1156 OP_READLINK, NULL) == FALSE) { 1157 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1158 if (!e.error) 1159 (void) xdr_free(xdr_COMPOUND4res_clnt, 1160 (caddr_t)&res); 1161 goto recov_retry; 1162 } 1163 } 1164 1165 /* 1166 * If non-NFS4 pcol error and/or we weren't able to recover. 1167 */ 1168 if (e.error != 0) { 1169 if (! recovery) 1170 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1171 sfh4_rele(&sfh); 1172 return (e.error); 1173 } 1174 1175 if (res.status) { 1176 e.error = geterrno4(res.status); 1177 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1178 if (! recovery) 1179 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1180 sfh4_rele(&sfh); 1181 return (e.error); 1182 } 1183 1184 /* res.status == NFS4_OK */ 1185 ASSERT(res.status == NFS4_OK); 1186 1187 resop = &res.array[1]; /* readlink res */ 1188 lr_res = &resop->nfs_resop4_u.opreadlink; 1189 1190 /* treat symlink name as data */ 1191 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 1192 1193 if (! recovery) 1194 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1195 sfh4_rele(&sfh); 1196 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1197 return (0); 1198 } 1199 1200 /* 1201 * Skip over consecutive slashes and "/./" in a pathname. 1202 */ 1203 void 1204 pathname_skipslashdot(struct pathname *pnp) 1205 { 1206 char *c1, *c2; 1207 1208 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 1209 1210 c1 = pnp->pn_path + 1; 1211 c2 = pnp->pn_path + 2; 1212 1213 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 1214 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 1215 pnp->pn_pathlen = pnp->pn_pathlen - 2; 1216 } else { 1217 pnp->pn_path++; 1218 pnp->pn_pathlen--; 1219 } 1220 } 1221 } 1222 1223 /* 1224 * Resolve a symbolic link path. The symlink is in the nth component of 1225 * svp->sv_path and has an nfs4 file handle "fh". 1226 * Upon return, the sv_path will point to the new path that has the nth 1227 * component resolved to its symlink text. 1228 */ 1229 int 1230 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 1231 cred_t *cr, int flags) 1232 { 1233 char *oldpath; 1234 char *symlink, *newpath; 1235 struct pathname oldpn, newpn; 1236 char component[MAXNAMELEN]; 1237 int i, addlen, error = 0; 1238 int oldpathlen; 1239 1240 /* Get the symbolic link text over the wire. */ 1241 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 1242 1243 if (error || symlink == NULL || strlen(symlink) == 0) 1244 return (error); 1245 1246 /* 1247 * Compose the new pathname. 1248 * Note: 1249 * - only the nth component is resolved for the pathname. 1250 * - pathname.pn_pathlen does not count the ending null byte. 1251 */ 1252 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1253 oldpath = svp->sv_path; 1254 oldpathlen = svp->sv_pathlen; 1255 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1256 nfs_rw_exit(&svp->sv_lock); 1257 kmem_free(symlink, strlen(symlink) + 1); 1258 return (error); 1259 } 1260 nfs_rw_exit(&svp->sv_lock); 1261 pn_alloc(&newpn); 1262 1263 /* 1264 * Skip over previous components from the oldpath so that the 1265 * oldpn.pn_path will point to the symlink component. Skip 1266 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1267 * pn_getcompnent can get the component. 1268 */ 1269 for (i = 1; i < nth; i++) { 1270 pathname_skipslashdot(&oldpn); 1271 error = pn_getcomponent(&oldpn, component); 1272 if (error) 1273 goto out; 1274 } 1275 1276 /* 1277 * Copy the old path upto the component right before the symlink 1278 * if the symlink is not an absolute path. 1279 */ 1280 if (symlink[0] != '/') { 1281 addlen = oldpn.pn_path - oldpn.pn_buf; 1282 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1283 newpn.pn_pathlen += addlen; 1284 newpn.pn_path += addlen; 1285 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1286 newpn.pn_pathlen++; 1287 newpn.pn_path++; 1288 } 1289 1290 /* copy the resolved symbolic link text */ 1291 addlen = strlen(symlink); 1292 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1293 error = ENAMETOOLONG; 1294 goto out; 1295 } 1296 bcopy(symlink, newpn.pn_path, addlen); 1297 newpn.pn_pathlen += addlen; 1298 newpn.pn_path += addlen; 1299 1300 /* 1301 * Check if there is any remaining path after the symlink component. 1302 * First, skip the symlink component. 1303 */ 1304 pathname_skipslashdot(&oldpn); 1305 if (error = pn_getcomponent(&oldpn, component)) 1306 goto out; 1307 1308 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1309 1310 /* 1311 * Copy the remaining path to the new pathname if there is any. 1312 */ 1313 if (addlen > 0) { 1314 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1315 error = ENAMETOOLONG; 1316 goto out; 1317 } 1318 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1319 newpn.pn_pathlen += addlen; 1320 } 1321 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1322 1323 /* get the newpath and store it in the servinfo4_t */ 1324 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1325 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1326 newpath[newpn.pn_pathlen] = '\0'; 1327 1328 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1329 svp->sv_path = newpath; 1330 svp->sv_pathlen = strlen(newpath) + 1; 1331 nfs_rw_exit(&svp->sv_lock); 1332 1333 kmem_free(oldpath, oldpathlen); 1334 out: 1335 kmem_free(symlink, strlen(symlink) + 1); 1336 pn_free(&newpn); 1337 pn_free(&oldpn); 1338 1339 return (error); 1340 } 1341 1342 /* 1343 * Get the root filehandle for the given filesystem and server, and update 1344 * svp. 1345 * 1346 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1347 * to coordinate with recovery. Otherwise, the caller is assumed to be 1348 * the recovery thread or have already done a start_fop. 1349 * 1350 * Errors are returned by the nfs4_error_t parameter. 1351 */ 1352 1353 static void 1354 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1355 int flags, cred_t *cr, nfs4_error_t *ep) 1356 { 1357 COMPOUND4args_clnt args; 1358 COMPOUND4res_clnt res; 1359 int doqueue = 1; 1360 nfs_argop4 *argop; 1361 nfs_resop4 *resop; 1362 nfs4_ga_res_t *garp; 1363 int num_argops; 1364 lookup4_param_t lookuparg; 1365 nfs_fh4 *tmpfhp; 1366 nfs_fh4 *resfhp; 1367 bool_t needrecov = FALSE; 1368 nfs4_recov_state_t recov_state; 1369 int llndx; 1370 int nthcomp; 1371 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1372 1373 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1374 ASSERT(svp->sv_path != NULL); 1375 if (svp->sv_path[0] == '\0') { 1376 nfs_rw_exit(&svp->sv_lock); 1377 nfs4_error_init(ep, EINVAL); 1378 return; 1379 } 1380 nfs_rw_exit(&svp->sv_lock); 1381 1382 recov_state.rs_flags = 0; 1383 recov_state.rs_num_retry_despite_err = 0; 1384 recov_retry: 1385 nfs4_error_zinit(ep); 1386 1387 if (!recovery) { 1388 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1389 &recov_state, NULL); 1390 1391 /* 1392 * If recovery has been started and this request as 1393 * initiated by a mount, then we must wait for recovery 1394 * to finish before proceeding, otherwise, the error 1395 * cleanup would remove data structures needed by the 1396 * recovery thread. 1397 */ 1398 if (ep->error) { 1399 mutex_enter(&mi->mi_lock); 1400 if (mi->mi_flags & MI4_MOUNTING) { 1401 mi->mi_flags |= MI4_RECOV_FAIL; 1402 mi->mi_error = EIO; 1403 1404 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1405 "nfs4getfh_otw: waiting 4 recovery\n")); 1406 1407 while (mi->mi_flags & MI4_RECOV_ACTIV) 1408 cv_wait(&mi->mi_failover_cv, 1409 &mi->mi_lock); 1410 } 1411 mutex_exit(&mi->mi_lock); 1412 return; 1413 } 1414 1415 /* 1416 * If the client does not specify a specific flavor to use 1417 * and has not gotten a secinfo list from the server yet, 1418 * retrieve the secinfo list from the server and use a 1419 * flavor from the list to mount. 1420 * 1421 * If fail to get the secinfo list from the server, then 1422 * try the default flavor. 1423 */ 1424 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1425 svp->sv_secinfo == NULL) { 1426 (void) nfs4_secinfo_path(mi, cr, FALSE); 1427 } 1428 } 1429 1430 if (recovery) 1431 args.ctag = TAG_REMAP_MOUNT; 1432 else 1433 args.ctag = TAG_MOUNT; 1434 1435 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1436 lookuparg.argsp = &args; 1437 lookuparg.resp = &res; 1438 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1439 lookuparg.trailer_len = 0; 1440 lookuparg.ga_bits = FATTR4_FSINFO_MASK; 1441 lookuparg.mi = mi; 1442 1443 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1444 ASSERT(svp->sv_path != NULL); 1445 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1446 nfs_rw_exit(&svp->sv_lock); 1447 1448 argop = args.array; 1449 num_argops = args.array_len; 1450 1451 /* choose public or root filehandle */ 1452 if (flags & NFS4_GETFH_PUBLIC) 1453 argop[0].argop = OP_PUTPUBFH; 1454 else 1455 argop[0].argop = OP_PUTROOTFH; 1456 1457 /* get fh */ 1458 argop[1].argop = OP_GETFH; 1459 1460 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1461 "nfs4getfh_otw: %s call, mi 0x%p", 1462 needrecov ? "recov" : "first", (void *)mi)); 1463 1464 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1465 1466 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1467 1468 if (needrecov) { 1469 bool_t abort; 1470 1471 if (recovery) { 1472 nfs4args_lookup_free(argop, num_argops); 1473 kmem_free(argop, 1474 lookuparg.arglen * sizeof (nfs_argop4)); 1475 if (!ep->error) 1476 (void) xdr_free(xdr_COMPOUND4res_clnt, 1477 (caddr_t)&res); 1478 return; 1479 } 1480 1481 NFS4_DEBUG(nfs4_client_recov_debug, 1482 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1483 1484 abort = nfs4_start_recovery(ep, mi, NULL, 1485 NULL, NULL, NULL, OP_GETFH, NULL); 1486 if (!ep->error) { 1487 ep->error = geterrno4(res.status); 1488 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1489 } 1490 nfs4args_lookup_free(argop, num_argops); 1491 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1492 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1493 /* have another go? */ 1494 if (abort == FALSE) 1495 goto recov_retry; 1496 return; 1497 } 1498 1499 /* 1500 * No recovery, but check if error is set. 1501 */ 1502 if (ep->error) { 1503 nfs4args_lookup_free(argop, num_argops); 1504 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1505 if (!recovery) 1506 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1507 needrecov); 1508 return; 1509 } 1510 1511 is_link_err: 1512 1513 /* for non-recovery errors */ 1514 if (res.status && res.status != NFS4ERR_SYMLINK) { 1515 if (!recovery) { 1516 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1517 needrecov); 1518 } 1519 nfs4args_lookup_free(argop, num_argops); 1520 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1521 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1522 return; 1523 } 1524 1525 /* 1526 * If any intermediate component in the path is a symbolic link, 1527 * resolve the symlink, then try mount again using the new path. 1528 */ 1529 if (res.status == NFS4ERR_SYMLINK) { 1530 int where; 1531 1532 /* 1533 * This must be from OP_LOOKUP failure. The (cfh) for this 1534 * OP_LOOKUP is a symlink node. Found out where the 1535 * OP_GETFH is for the (cfh) that is a symlink node. 1536 * 1537 * Example: 1538 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1539 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1540 * 1541 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1542 * In this case, where = 7, nthcomp = 2. 1543 */ 1544 where = res.array_len - 2; 1545 ASSERT(where > 0); 1546 1547 resop = &res.array[where - 1]; 1548 ASSERT(resop->resop == OP_GETFH); 1549 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1550 nthcomp = res.array_len/3 - 1; 1551 1552 /* 1553 * Need to call nfs4_end_op before resolve_sympath to avoid 1554 * potential nfs4_start_op deadlock. 1555 */ 1556 if (!recovery) 1557 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1558 needrecov); 1559 1560 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1561 flags); 1562 1563 nfs4args_lookup_free(argop, num_argops); 1564 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1565 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1566 1567 if (ep->error) 1568 return; 1569 1570 goto recov_retry; 1571 } 1572 1573 /* getfh */ 1574 resop = &res.array[res.array_len - 2]; 1575 ASSERT(resop->resop == OP_GETFH); 1576 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1577 1578 /* getattr fsinfo res */ 1579 resop++; 1580 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1581 1582 *vtp = garp->n4g_va.va_type; 1583 1584 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1585 1586 mutex_enter(&mi->mi_lock); 1587 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1588 mi->mi_flags |= MI4_LINK; 1589 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1590 mi->mi_flags |= MI4_SYMLINK; 1591 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK) 1592 mi->mi_flags |= MI4_ACL; 1593 mutex_exit(&mi->mi_lock); 1594 1595 if (garp->n4g_ext_res->n4g_maxread == 0) 1596 mi->mi_tsize = 1597 MIN(MAXBSIZE, mi->mi_tsize); 1598 else 1599 mi->mi_tsize = 1600 MIN(garp->n4g_ext_res->n4g_maxread, 1601 mi->mi_tsize); 1602 1603 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1604 mi->mi_stsize = 1605 MIN(MAXBSIZE, mi->mi_stsize); 1606 else 1607 mi->mi_stsize = 1608 MIN(garp->n4g_ext_res->n4g_maxwrite, 1609 mi->mi_stsize); 1610 1611 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1612 mi->mi_maxfilesize = 1613 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1614 mi->mi_maxfilesize); 1615 1616 /* 1617 * If the final component is a a symbolic link, resolve the symlink, 1618 * then try mount again using the new path. 1619 * 1620 * Assume no symbolic link for root filesysm "/". 1621 */ 1622 if (*vtp == VLNK) { 1623 /* 1624 * nthcomp is the total result length minus 1625 * the 1st 2 OPs (PUTROOTFH, GETFH), 1626 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1627 * 1628 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1629 * LOOKUP 2nd-comp GETFH GETATTR 1630 * 1631 * (8 - 2)/3 = 2 1632 */ 1633 nthcomp = (res.array_len - 2)/3; 1634 1635 /* 1636 * Need to call nfs4_end_op before resolve_sympath to avoid 1637 * potential nfs4_start_op deadlock. See RFE 4777612. 1638 */ 1639 if (!recovery) 1640 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1641 needrecov); 1642 1643 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1644 flags); 1645 1646 nfs4args_lookup_free(argop, num_argops); 1647 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1648 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1649 1650 if (ep->error) 1651 return; 1652 1653 goto recov_retry; 1654 } 1655 1656 /* 1657 * We need to figure out where in the compound the getfh 1658 * for the parent directory is. If the object to be mounted is 1659 * the root, then there is no lookup at all: 1660 * PUTROOTFH, GETFH. 1661 * If the object to be mounted is in the root, then the compound is: 1662 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1663 * In either of these cases, the index of the GETFH is 1. 1664 * If it is not at the root, then it's something like: 1665 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1666 * LOOKUP, GETFH, GETATTR 1667 * In this case, the index is llndx (last lookup index) - 2. 1668 */ 1669 if (llndx == -1 || llndx == 2) 1670 resop = &res.array[1]; 1671 else { 1672 ASSERT(llndx > 2); 1673 resop = &res.array[llndx-2]; 1674 } 1675 1676 ASSERT(resop->resop == OP_GETFH); 1677 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1678 1679 /* save the filehandles for the replica */ 1680 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1681 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1682 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1683 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1684 tmpfhp->nfs_fh4_len); 1685 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1686 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1687 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1688 1689 /* initialize fsid and supp_attrs for server fs */ 1690 svp->sv_fsid = garp->n4g_fsid; 1691 svp->sv_supp_attrs = 1692 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; 1693 1694 nfs_rw_exit(&svp->sv_lock); 1695 1696 nfs4args_lookup_free(argop, num_argops); 1697 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1698 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1699 if (!recovery) 1700 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1701 } 1702 1703 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1704 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1705 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1706 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1707 1708 /* 1709 * Remap the root filehandle for the given filesystem. 1710 * 1711 * results returned via the nfs4_error_t parameter. 1712 */ 1713 void 1714 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1715 { 1716 struct servinfo4 *svp; 1717 vtype_t vtype; 1718 nfs_fh4 rootfh; 1719 int getfh_flags; 1720 char *orig_sv_path; 1721 int orig_sv_pathlen, num_retry; 1722 1723 mutex_enter(&mi->mi_lock); 1724 1725 remap_retry: 1726 svp = mi->mi_curr_serv; 1727 getfh_flags = 1728 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1729 getfh_flags |= 1730 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1731 mutex_exit(&mi->mi_lock); 1732 1733 /* 1734 * Just in case server path being mounted contains 1735 * symlinks and fails w/STALE, save the initial sv_path 1736 * so we can redrive the initial mount compound with the 1737 * initial sv_path -- not a symlink-expanded version. 1738 * 1739 * This could only happen if a symlink was expanded 1740 * and the expanded mount compound failed stale. Because 1741 * it could be the case that the symlink was removed at 1742 * the server (and replaced with another symlink/dir, 1743 * we need to use the initial sv_path when attempting 1744 * to re-lookup everything and recover. 1745 */ 1746 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1747 orig_sv_pathlen = svp->sv_pathlen; 1748 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1749 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 1750 nfs_rw_exit(&svp->sv_lock); 1751 1752 num_retry = nfs4_max_mount_retry; 1753 1754 do { 1755 /* 1756 * Get the root fh from the server. Retry nfs4_max_mount_retry 1757 * (2) times if it fails with STALE since the recovery 1758 * infrastructure doesn't do STALE recovery for components 1759 * of the server path to the object being mounted. 1760 */ 1761 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 1762 1763 if (ep->error == 0 && ep->stat == NFS4_OK) 1764 break; 1765 1766 /* 1767 * For some reason, the mount compound failed. Before 1768 * retrying, we need to restore the original sv_path 1769 * because it might have contained symlinks that were 1770 * expanded by nfsgetfh_otw before the failure occurred. 1771 * replace current sv_path with orig sv_path -- just in case 1772 * it changed due to embedded symlinks. 1773 */ 1774 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1775 if (orig_sv_pathlen != svp->sv_pathlen) { 1776 kmem_free(svp->sv_path, svp->sv_pathlen); 1777 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 1778 svp->sv_pathlen = orig_sv_pathlen; 1779 } 1780 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 1781 nfs_rw_exit(&svp->sv_lock); 1782 1783 } while (num_retry-- > 0); 1784 1785 kmem_free(orig_sv_path, orig_sv_pathlen); 1786 1787 if (ep->error != 0 || ep->stat != 0) { 1788 return; 1789 } 1790 1791 if (vtype != VNON && vtype != mi->mi_type) { 1792 /* shouldn't happen */ 1793 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 1794 "nfs4_remap_root: server root vnode type (%d) doesn't " 1795 "match mount info (%d)", vtype, mi->mi_type); 1796 } 1797 1798 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1799 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1800 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1801 nfs_rw_exit(&svp->sv_lock); 1802 sfh4_update(mi->mi_rootfh, &rootfh); 1803 1804 /* 1805 * It's possible that recovery took place on the filesystem 1806 * and the server has been updated between the time we did 1807 * the nfs4getfh_otw and now. Re-drive the otw operation 1808 * to make sure we have a good fh. 1809 */ 1810 mutex_enter(&mi->mi_lock); 1811 if (mi->mi_curr_serv != svp) 1812 goto remap_retry; 1813 1814 mutex_exit(&mi->mi_lock); 1815 } 1816 1817 static int 1818 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 1819 int flags, cred_t *cr, zone_t *zone) 1820 { 1821 vnode_t *rtvp = NULL; 1822 mntinfo4_t *mi; 1823 dev_t nfs_dev; 1824 int error = 0; 1825 rnode4_t *rp; 1826 int i; 1827 struct vattr va; 1828 vtype_t vtype = VNON; 1829 vtype_t tmp_vtype = VNON; 1830 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 1831 nfs4_oo_hash_bucket_t *bucketp; 1832 nfs_fh4 fh; 1833 char *droptext = ""; 1834 struct nfs_stats *nfsstatsp; 1835 nfs4_fname_t *mfname; 1836 nfs4_error_t e; 1837 char *orig_sv_path; 1838 int orig_sv_pathlen, num_retry, removed; 1839 cred_t *lcr = NULL, *tcr = cr; 1840 1841 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1842 ASSERT(nfsstatsp != NULL); 1843 1844 ASSERT(nfs_zone() == zone); 1845 ASSERT(crgetref(cr)); 1846 1847 /* 1848 * Create a mount record and link it to the vfs struct. 1849 */ 1850 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1851 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1852 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 1853 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 1854 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 1855 1856 if (!(flags & NFSMNT_SOFT)) 1857 mi->mi_flags |= MI4_HARD; 1858 if ((flags & NFSMNT_NOPRINT)) 1859 mi->mi_flags |= MI4_NOPRINT; 1860 if (flags & NFSMNT_INT) 1861 mi->mi_flags |= MI4_INT; 1862 if (flags & NFSMNT_PUBLIC) 1863 mi->mi_flags |= MI4_PUBLIC; 1864 mi->mi_retrans = NFS_RETRIES; 1865 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1866 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1867 mi->mi_timeo = nfs4_cots_timeo; 1868 else 1869 mi->mi_timeo = NFS_TIMEO; 1870 mi->mi_prog = NFS_PROGRAM; 1871 mi->mi_vers = NFS_V4; 1872 mi->mi_rfsnames = rfsnames_v4; 1873 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr; 1874 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1875 mi->mi_servers = svp; 1876 mi->mi_curr_serv = svp; 1877 mi->mi_acregmin = SEC2HR(ACREGMIN); 1878 mi->mi_acregmax = SEC2HR(ACREGMAX); 1879 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1880 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1881 mi->mi_fh_expire_type = FH4_PERSISTENT; 1882 mi->mi_clientid_next = NULL; 1883 mi->mi_clientid_prev = NULL; 1884 mi->mi_grace_wait = 0; 1885 mi->mi_error = 0; 1886 mi->mi_srvsettime = 0; 1887 1888 mi->mi_count = 1; 1889 1890 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 1891 mi->mi_stsize = mi->mi_tsize; 1892 1893 if (flags & NFSMNT_DIRECTIO) 1894 mi->mi_flags |= MI4_DIRECTIO; 1895 1896 mi->mi_flags |= MI4_MOUNTING; 1897 1898 /* 1899 * Make a vfs struct for nfs. We do this here instead of below 1900 * because rtvp needs a vfs before we can do a getattr on it. 1901 * 1902 * Assign a unique device id to the mount 1903 */ 1904 mutex_enter(&nfs_minor_lock); 1905 do { 1906 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1907 nfs_dev = makedevice(nfs_major, nfs_minor); 1908 } while (vfs_devismounted(nfs_dev)); 1909 mutex_exit(&nfs_minor_lock); 1910 1911 vfsp->vfs_dev = nfs_dev; 1912 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 1913 vfsp->vfs_data = (caddr_t)mi; 1914 vfsp->vfs_fstype = nfsfstyp; 1915 vfsp->vfs_bsize = nfs4_bsize; 1916 1917 /* 1918 * Initialize fields used to support async putpage operations. 1919 */ 1920 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 1921 mi->mi_async_clusters[i] = nfs4_async_clusters; 1922 mi->mi_async_init_clusters = nfs4_async_clusters; 1923 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1924 mi->mi_max_threads = nfs4_max_threads; 1925 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1926 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1927 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1928 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1929 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 1930 1931 mi->mi_vfsp = vfsp; 1932 zone_hold(mi->mi_zone = zone); 1933 nfs4_mi_zonelist_add(mi); 1934 1935 /* 1936 * Initialize the <open owner/cred> hash table. 1937 */ 1938 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 1939 bucketp = &(mi->mi_oo_list[i]); 1940 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 1941 list_create(&bucketp->b_oo_hash_list, 1942 sizeof (nfs4_open_owner_t), 1943 offsetof(nfs4_open_owner_t, oo_hash_node)); 1944 } 1945 1946 /* 1947 * Initialize the freed open owner list. 1948 */ 1949 mi->mi_foo_num = 0; 1950 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 1951 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 1952 offsetof(nfs4_open_owner_t, oo_foo_node)); 1953 1954 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 1955 offsetof(nfs4_lost_rqst_t, lr_node)); 1956 1957 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 1958 offsetof(nfs4_bseqid_entry_t, bs_node)); 1959 1960 /* 1961 * Initialize the msg buffer. 1962 */ 1963 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 1964 offsetof(nfs4_debug_msg_t, msg_node)); 1965 mi->mi_msg_count = 0; 1966 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 1967 1968 /* 1969 * Initialize kstats 1970 */ 1971 nfs4_mnt_kstat_init(vfsp); 1972 1973 /* 1974 * Initialize the shared filehandle pool, and get the fname for 1975 * the filesystem root. 1976 */ 1977 sfh4_createtab(&mi->mi_filehandles); 1978 mi->mi_fname = fn_get(NULL, "."); 1979 1980 /* 1981 * Save server path we're attempting to mount. 1982 */ 1983 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1984 orig_sv_pathlen = svp_head->sv_pathlen; 1985 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 1986 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 1987 nfs_rw_exit(&svp->sv_lock); 1988 1989 /* 1990 * Make the GETFH call to get root fh for each replica. 1991 */ 1992 if (svp_head->sv_next) 1993 droptext = ", dropping replica"; 1994 1995 /* 1996 * If the uid is set then set the creds for secure mounts 1997 * by proxy processes such as automountd. 1998 */ 1999 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2000 if (svp->sv_secdata->uid != 0) { 2001 lcr = crdup(cr); 2002 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 2003 tcr = lcr; 2004 } 2005 nfs_rw_exit(&svp->sv_lock); 2006 for (svp = svp_head; svp; svp = svp->sv_next) { 2007 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 2008 nfs_cmn_err(error, CE_WARN, 2009 VERS_MSG "Host %s is a duplicate%s", 2010 svp->sv_hostname, droptext); 2011 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2012 svp->sv_flags |= SV4_NOTINUSE; 2013 nfs_rw_exit(&svp->sv_lock); 2014 continue; 2015 } 2016 mi->mi_curr_serv = svp; 2017 2018 /* 2019 * Just in case server path being mounted contains 2020 * symlinks and fails w/STALE, save the initial sv_path 2021 * so we can redrive the initial mount compound with the 2022 * initial sv_path -- not a symlink-expanded version. 2023 * 2024 * This could only happen if a symlink was expanded 2025 * and the expanded mount compound failed stale. Because 2026 * it could be the case that the symlink was removed at 2027 * the server (and replaced with another symlink/dir, 2028 * we need to use the initial sv_path when attempting 2029 * to re-lookup everything and recover. 2030 * 2031 * Other mount errors should evenutally be handled here also 2032 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 2033 * failures will result in mount being redriven a few times. 2034 */ 2035 num_retry = nfs4_max_mount_retry; 2036 do { 2037 nfs4getfh_otw(mi, svp, &tmp_vtype, 2038 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 2039 NFS4_GETFH_NEEDSOP, tcr, &e); 2040 2041 if (e.error == 0 && e.stat == NFS4_OK) 2042 break; 2043 2044 /* 2045 * replace current sv_path with orig sv_path -- just in 2046 * case it changed due to embedded symlinks. 2047 */ 2048 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2049 if (orig_sv_pathlen != svp->sv_pathlen) { 2050 kmem_free(svp->sv_path, svp->sv_pathlen); 2051 svp->sv_path = kmem_alloc(orig_sv_pathlen, 2052 KM_SLEEP); 2053 svp->sv_pathlen = orig_sv_pathlen; 2054 } 2055 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 2056 nfs_rw_exit(&svp->sv_lock); 2057 2058 } while (num_retry-- > 0); 2059 2060 error = e.error ? e.error : geterrno4(e.stat); 2061 if (error) { 2062 nfs_cmn_err(error, CE_WARN, 2063 VERS_MSG "initial call to %s failed%s: %m", 2064 svp->sv_hostname, droptext); 2065 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2066 svp->sv_flags |= SV4_NOTINUSE; 2067 nfs_rw_exit(&svp->sv_lock); 2068 mi->mi_flags &= ~MI4_RECOV_FAIL; 2069 mi->mi_error = 0; 2070 continue; 2071 } 2072 2073 if (tmp_vtype == VBAD) { 2074 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2075 VERS_MSG "%s returned a bad file type for " 2076 "root%s", svp->sv_hostname, droptext); 2077 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2078 svp->sv_flags |= SV4_NOTINUSE; 2079 nfs_rw_exit(&svp->sv_lock); 2080 continue; 2081 } 2082 2083 if (vtype == VNON) { 2084 vtype = tmp_vtype; 2085 } else if (vtype != tmp_vtype) { 2086 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2087 VERS_MSG "%s returned a different file type " 2088 "for root%s", svp->sv_hostname, droptext); 2089 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2090 svp->sv_flags |= SV4_NOTINUSE; 2091 nfs_rw_exit(&svp->sv_lock); 2092 continue; 2093 } 2094 if (firstsvp == NULL) 2095 firstsvp = svp; 2096 } 2097 2098 kmem_free(orig_sv_path, orig_sv_pathlen); 2099 2100 if (firstsvp == NULL) { 2101 if (error == 0) 2102 error = ENOENT; 2103 goto bad; 2104 } 2105 2106 mi->mi_curr_serv = svp = firstsvp; 2107 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2108 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 2109 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2110 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2111 mi->mi_rootfh = sfh4_get(&fh, mi); 2112 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 2113 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 2114 mi->mi_srvparentfh = sfh4_get(&fh, mi); 2115 nfs_rw_exit(&svp->sv_lock); 2116 2117 /* 2118 * Make the root vnode without attributes. 2119 */ 2120 mfname = mi->mi_fname; 2121 fn_hold(mfname); 2122 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 2123 &mfname, NULL, mi, cr, gethrtime()); 2124 rtvp->v_type = vtype; 2125 2126 mi->mi_curread = mi->mi_tsize; 2127 mi->mi_curwrite = mi->mi_stsize; 2128 2129 /* 2130 * Start the manager thread responsible for handling async worker 2131 * threads. 2132 */ 2133 MI4_HOLD(mi); 2134 VFS_HOLD(vfsp); /* add reference for thread */ 2135 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 2136 vfsp, 0, minclsyspri); 2137 ASSERT(mi->mi_manager_thread != NULL); 2138 2139 /* 2140 * Create the thread that handles over-the-wire calls for 2141 * VOP_INACTIVE. 2142 * This needs to happen after the manager thread is created. 2143 */ 2144 MI4_HOLD(mi); 2145 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 2146 mi, 0, minclsyspri); 2147 ASSERT(mi->mi_inactive_thread != NULL); 2148 2149 /* If we didn't get a type, get one now */ 2150 if (rtvp->v_type == VNON) { 2151 va.va_mask = AT_TYPE; 2152 error = nfs4getattr(rtvp, &va, tcr); 2153 if (error) 2154 goto bad; 2155 rtvp->v_type = va.va_type; 2156 } 2157 2158 mi->mi_type = rtvp->v_type; 2159 2160 mutex_enter(&mi->mi_lock); 2161 mi->mi_flags &= ~MI4_MOUNTING; 2162 mutex_exit(&mi->mi_lock); 2163 2164 *rtvpp = rtvp; 2165 if (lcr != NULL) 2166 crfree(lcr); 2167 2168 return (0); 2169 bad: 2170 /* 2171 * An error occurred somewhere, need to clean up... 2172 */ 2173 if (lcr != NULL) 2174 crfree(lcr); 2175 if (rtvp != NULL) { 2176 /* 2177 * We need to release our reference to the root vnode and 2178 * destroy the mntinfo4 struct that we just created. 2179 */ 2180 rp = VTOR4(rtvp); 2181 if (rp->r_flags & R4HASHED) 2182 rp4_rmhash(rp); 2183 VN_RELE(rtvp); 2184 } 2185 nfs4_async_stop(vfsp); 2186 nfs4_async_manager_stop(vfsp); 2187 removed = nfs4_mi_zonelist_remove(mi); 2188 if (removed) 2189 zone_rele(mi->mi_zone); 2190 2191 /* 2192 * This releases the initial "hold" of the mi since it will never 2193 * be referenced by the vfsp. Also, when mount returns to vfs.c 2194 * with an error, the vfsp will be destroyed, not rele'd. 2195 */ 2196 MI4_RELE(mi); 2197 2198 *rtvpp = NULL; 2199 return (error); 2200 } 2201 2202 /* 2203 * vfs operations 2204 */ 2205 static int 2206 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 2207 { 2208 mntinfo4_t *mi; 2209 ushort_t omax; 2210 int removed; 2211 2212 if (secpolicy_fs_unmount(cr, vfsp) != 0) 2213 return (EPERM); 2214 2215 mi = VFTOMI4(vfsp); 2216 2217 if (flag & MS_FORCE) { 2218 vfsp->vfs_flag |= VFS_UNMOUNTED; 2219 if (nfs_zone() != mi->mi_zone) { 2220 /* 2221 * If the request is coming from the wrong zone, 2222 * we don't want to create any new threads, and 2223 * performance is not a concern. Do everything 2224 * inline. 2225 */ 2226 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 2227 "nfs4_unmount x-zone forced unmount of vfs %p\n", 2228 (void *)vfsp)); 2229 nfs4_free_mount(vfsp, cr); 2230 } else { 2231 /* 2232 * Free data structures asynchronously, to avoid 2233 * blocking the current thread (for performance 2234 * reasons only). 2235 */ 2236 async_free_mount(vfsp, cr); 2237 } 2238 return (0); 2239 } 2240 /* 2241 * Wait until all asynchronous putpage operations on 2242 * this file system are complete before flushing rnodes 2243 * from the cache. 2244 */ 2245 omax = mi->mi_max_threads; 2246 if (nfs4_async_stop_sig(vfsp)) { 2247 2248 return (EINTR); 2249 } 2250 r4flush(vfsp, cr); 2251 /* 2252 * If there are any active vnodes on this file system, 2253 * then the file system is busy and can't be umounted. 2254 */ 2255 if (check_rtable4(vfsp)) { 2256 mutex_enter(&mi->mi_async_lock); 2257 mi->mi_max_threads = omax; 2258 mutex_exit(&mi->mi_async_lock); 2259 return (EBUSY); 2260 } 2261 /* 2262 * The unmount can't fail from now on, and there are no active 2263 * files that could require over-the-wire calls to the server, 2264 * so stop the async manager and the inactive thread. 2265 */ 2266 nfs4_async_manager_stop(vfsp); 2267 /* 2268 * Destroy all rnodes belonging to this file system from the 2269 * rnode hash queues and purge any resources allocated to 2270 * them. 2271 */ 2272 destroy_rtable4(vfsp, cr); 2273 vfsp->vfs_flag |= VFS_UNMOUNTED; 2274 2275 nfs4_remove_mi_from_server(mi, NULL); 2276 removed = nfs4_mi_zonelist_remove(mi); 2277 if (removed) 2278 zone_rele(mi->mi_zone); 2279 2280 return (0); 2281 } 2282 2283 /* 2284 * find root of nfs 2285 */ 2286 static int 2287 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2288 { 2289 mntinfo4_t *mi; 2290 vnode_t *vp; 2291 nfs4_fname_t *mfname; 2292 servinfo4_t *svp; 2293 2294 mi = VFTOMI4(vfsp); 2295 2296 if (nfs_zone() != mi->mi_zone) 2297 return (EPERM); 2298 2299 svp = mi->mi_curr_serv; 2300 if (svp) { 2301 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2302 if (svp->sv_flags & SV4_ROOT_STALE) { 2303 nfs_rw_exit(&svp->sv_lock); 2304 2305 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2306 if (svp->sv_flags & SV4_ROOT_STALE) { 2307 svp->sv_flags &= ~SV4_ROOT_STALE; 2308 nfs_rw_exit(&svp->sv_lock); 2309 return (ENOENT); 2310 } 2311 nfs_rw_exit(&svp->sv_lock); 2312 } else 2313 nfs_rw_exit(&svp->sv_lock); 2314 } 2315 2316 mfname = mi->mi_fname; 2317 fn_hold(mfname); 2318 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2319 VFTOMI4(vfsp), CRED(), gethrtime()); 2320 2321 if (VTOR4(vp)->r_flags & R4STALE) { 2322 VN_RELE(vp); 2323 return (ENOENT); 2324 } 2325 2326 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2327 2328 vp->v_type = mi->mi_type; 2329 2330 *vpp = vp; 2331 2332 return (0); 2333 } 2334 2335 static int 2336 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2337 { 2338 int error; 2339 nfs4_ga_res_t gar; 2340 nfs4_ga_ext_res_t ger; 2341 2342 gar.n4g_ext_res = &ger; 2343 2344 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2345 NFS4_STATFS_ATTR_MASK, cr)) 2346 return (error); 2347 2348 *sbp = gar.n4g_ext_res->n4g_sb; 2349 2350 return (0); 2351 } 2352 2353 /* 2354 * Get file system statistics. 2355 */ 2356 static int 2357 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2358 { 2359 int error; 2360 vnode_t *vp; 2361 cred_t *cr; 2362 2363 error = nfs4_root(vfsp, &vp); 2364 if (error) 2365 return (error); 2366 2367 cr = CRED(); 2368 2369 error = nfs4_statfs_otw(vp, sbp, cr); 2370 if (!error) { 2371 (void) strncpy(sbp->f_basetype, 2372 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2373 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2374 } else { 2375 nfs4_purge_stale_fh(error, vp, cr); 2376 } 2377 2378 VN_RELE(vp); 2379 2380 return (error); 2381 } 2382 2383 static kmutex_t nfs4_syncbusy; 2384 2385 /* 2386 * Flush dirty nfs files for file system vfsp. 2387 * If vfsp == NULL, all nfs files are flushed. 2388 * 2389 * SYNC_CLOSE in flag is passed to us to 2390 * indicate that we are shutting down and or 2391 * rebooting. 2392 */ 2393 static int 2394 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2395 { 2396 /* 2397 * Cross-zone calls are OK here, since this translates to a 2398 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2399 */ 2400 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2401 r4flush(vfsp, cr); 2402 mutex_exit(&nfs4_syncbusy); 2403 } 2404 2405 /* 2406 * if SYNC_CLOSE is set then we know that 2407 * the system is rebooting, mark the mntinfo 2408 * for later examination. 2409 */ 2410 if (vfsp && (flag & SYNC_CLOSE)) { 2411 mntinfo4_t *mi; 2412 2413 mi = VFTOMI4(vfsp); 2414 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2415 mutex_enter(&mi->mi_lock); 2416 mi->mi_flags |= MI4_SHUTDOWN; 2417 mutex_exit(&mi->mi_lock); 2418 } 2419 } 2420 return (0); 2421 } 2422 2423 /* 2424 * vget is difficult, if not impossible, to support in v4 because we don't 2425 * know the parent directory or name, which makes it impossible to create a 2426 * useful shadow vnode. And we need the shadow vnode for things like 2427 * OPEN. 2428 */ 2429 2430 /* ARGSUSED */ 2431 /* 2432 * XXX Check nfs4_vget_pseudo() for dependency. 2433 */ 2434 static int 2435 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2436 { 2437 return (EREMOTE); 2438 } 2439 2440 /* 2441 * nfs4_mountroot get called in the case where we are diskless booting. All 2442 * we need from here is the ability to get the server info and from there we 2443 * can simply call nfs4_rootvp. 2444 */ 2445 /* ARGSUSED */ 2446 static int 2447 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2448 { 2449 vnode_t *rtvp; 2450 char root_hostname[SYS_NMLN+1]; 2451 struct servinfo4 *svp; 2452 int error; 2453 int vfsflags; 2454 size_t size; 2455 char *root_path; 2456 struct pathname pn; 2457 char *name; 2458 cred_t *cr; 2459 mntinfo4_t *mi; 2460 struct nfs_args args; /* nfs mount arguments */ 2461 static char token[10]; 2462 nfs4_error_t n4e; 2463 2464 bzero(&args, sizeof (args)); 2465 2466 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2467 clkset(-1L); /* hack for now - until we get time svc? */ 2468 2469 if (why == ROOT_REMOUNT) { 2470 /* 2471 * Shouldn't happen. 2472 */ 2473 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2474 } 2475 2476 if (why == ROOT_UNMOUNT) { 2477 /* 2478 * Nothing to do for NFS. 2479 */ 2480 return (0); 2481 } 2482 2483 /* 2484 * why == ROOT_INIT 2485 */ 2486 2487 name = token; 2488 *name = 0; 2489 (void) getfsname("root", name, sizeof (token)); 2490 2491 pn_alloc(&pn); 2492 root_path = pn.pn_path; 2493 2494 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2495 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2496 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2497 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2498 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2499 2500 /* 2501 * Get server address 2502 * Get the root path 2503 * Get server's transport 2504 * Get server's hostname 2505 * Get options 2506 */ 2507 args.addr = &svp->sv_addr; 2508 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2509 args.fh = (char *)&svp->sv_fhandle; 2510 args.knconf = svp->sv_knconf; 2511 args.hostname = root_hostname; 2512 vfsflags = 0; 2513 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2514 &args, &vfsflags)) { 2515 if (error == EPROTONOSUPPORT) 2516 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2517 "mount_root failed: server doesn't support NFS V4"); 2518 else 2519 nfs_cmn_err(error, CE_WARN, 2520 "nfs4_mountroot: mount_root failed: %m"); 2521 nfs_rw_exit(&svp->sv_lock); 2522 sv4_free(svp); 2523 pn_free(&pn); 2524 return (error); 2525 } 2526 nfs_rw_exit(&svp->sv_lock); 2527 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2528 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2529 (void) strcpy(svp->sv_hostname, root_hostname); 2530 2531 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2532 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2533 (void) strcpy(svp->sv_path, root_path); 2534 2535 /* 2536 * Force root partition to always be mounted with AUTH_UNIX for now 2537 */ 2538 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2539 svp->sv_secdata->secmod = AUTH_UNIX; 2540 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2541 svp->sv_secdata->data = NULL; 2542 2543 cr = crgetcred(); 2544 rtvp = NULL; 2545 2546 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2547 2548 if (error) { 2549 crfree(cr); 2550 pn_free(&pn); 2551 sv4_free(svp); 2552 return (error); 2553 } 2554 2555 mi = VTOMI4(rtvp); 2556 2557 /* 2558 * Send client id to the server, if necessary 2559 */ 2560 nfs4_error_zinit(&n4e); 2561 nfs4setclientid(mi, cr, FALSE, &n4e); 2562 error = n4e.error; 2563 2564 crfree(cr); 2565 2566 if (error) { 2567 pn_free(&pn); 2568 goto errout; 2569 } 2570 2571 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2572 if (error) { 2573 nfs_cmn_err(error, CE_WARN, 2574 "nfs4_mountroot: invalid root mount options"); 2575 pn_free(&pn); 2576 goto errout; 2577 } 2578 2579 (void) vfs_lock_wait(vfsp); 2580 vfs_add(NULL, vfsp, vfsflags); 2581 vfs_unlock(vfsp); 2582 2583 size = strlen(svp->sv_hostname); 2584 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2585 rootfs.bo_name[size] = ':'; 2586 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2587 2588 pn_free(&pn); 2589 2590 errout: 2591 if (error) { 2592 sv4_free(svp); 2593 nfs4_async_stop(vfsp); 2594 nfs4_async_manager_stop(vfsp); 2595 } 2596 2597 if (rtvp != NULL) 2598 VN_RELE(rtvp); 2599 2600 return (error); 2601 } 2602 2603 /* 2604 * Initialization routine for VFS routines. Should only be called once 2605 */ 2606 int 2607 nfs4_vfsinit(void) 2608 { 2609 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2610 nfs4setclientid_init(); 2611 return (0); 2612 } 2613 2614 void 2615 nfs4_vfsfini(void) 2616 { 2617 nfs4setclientid_fini(); 2618 mutex_destroy(&nfs4_syncbusy); 2619 } 2620 2621 void 2622 nfs4_freevfs(vfs_t *vfsp) 2623 { 2624 mntinfo4_t *mi; 2625 2626 /* need to release the initial hold */ 2627 mi = VFTOMI4(vfsp); 2628 MI4_RELE(mi); 2629 } 2630 2631 /* 2632 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2633 */ 2634 struct nfs4_server nfs4_server_lst = 2635 { &nfs4_server_lst, &nfs4_server_lst }; 2636 2637 kmutex_t nfs4_server_lst_lock; 2638 2639 static void 2640 nfs4setclientid_init(void) 2641 { 2642 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2643 } 2644 2645 static void 2646 nfs4setclientid_fini(void) 2647 { 2648 mutex_destroy(&nfs4_server_lst_lock); 2649 } 2650 2651 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2652 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2653 2654 /* 2655 * Set the clientid for the server for "mi". No-op if the clientid is 2656 * already set. 2657 * 2658 * The recovery boolean should be set to TRUE if this function was called 2659 * by the recovery code, and FALSE otherwise. This is used to determine 2660 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock 2661 * for adding a mntinfo4_t to a nfs4_server_t. 2662 * 2663 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then 2664 * 'n4ep->error' is set to geterrno4(n4ep->stat). 2665 */ 2666 void 2667 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep) 2668 { 2669 struct nfs4_server *np; 2670 struct servinfo4 *svp = mi->mi_curr_serv; 2671 nfs4_recov_state_t recov_state; 2672 int num_retries = 0; 2673 bool_t retry; 2674 cred_t *lcr = NULL; 2675 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */ 2676 time_t lease_time = 0; 2677 2678 recov_state.rs_flags = 0; 2679 recov_state.rs_num_retry_despite_err = 0; 2680 ASSERT(n4ep != NULL); 2681 2682 recov_retry: 2683 retry = FALSE; 2684 nfs4_error_zinit(n4ep); 2685 if (!recovery) 2686 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2687 2688 mutex_enter(&nfs4_server_lst_lock); 2689 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2690 mutex_exit(&nfs4_server_lst_lock); 2691 if (!np) { 2692 struct nfs4_server *tnp; 2693 np = new_nfs4_server(svp, cr); 2694 mutex_enter(&np->s_lock); 2695 2696 mutex_enter(&nfs4_server_lst_lock); 2697 tnp = servinfo4_to_nfs4_server(svp); 2698 if (tnp) { 2699 /* 2700 * another thread snuck in and put server on list. 2701 * since we aren't adding it to the nfs4_server_list 2702 * we need to set the ref count to 0 and destroy it. 2703 */ 2704 np->s_refcnt = 0; 2705 destroy_nfs4_server(np); 2706 np = tnp; 2707 } else { 2708 /* 2709 * do not give list a reference until everything 2710 * succeeds 2711 */ 2712 insque(np, &nfs4_server_lst); 2713 } 2714 mutex_exit(&nfs4_server_lst_lock); 2715 } 2716 ASSERT(MUTEX_HELD(&np->s_lock)); 2717 /* 2718 * If we find the server already has N4S_CLIENTID_SET, then 2719 * just return, we've already done SETCLIENTID to that server 2720 */ 2721 if (np->s_flags & N4S_CLIENTID_SET) { 2722 /* add mi to np's mntinfo4_list */ 2723 nfs4_add_mi_to_server(np, mi); 2724 if (!recovery) 2725 nfs_rw_exit(&mi->mi_recovlock); 2726 mutex_exit(&np->s_lock); 2727 nfs4_server_rele(np); 2728 return; 2729 } 2730 mutex_exit(&np->s_lock); 2731 2732 2733 /* 2734 * Drop the mi_recovlock since nfs4_start_op will 2735 * acquire it again for us. 2736 */ 2737 if (!recovery) { 2738 nfs_rw_exit(&mi->mi_recovlock); 2739 2740 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 2741 if (n4ep->error) { 2742 nfs4_server_rele(np); 2743 return; 2744 } 2745 } 2746 2747 mutex_enter(&np->s_lock); 2748 while (np->s_flags & N4S_CLIENTID_PEND) { 2749 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 2750 mutex_exit(&np->s_lock); 2751 nfs4_server_rele(np); 2752 if (!recovery) 2753 nfs4_end_op(mi, NULL, NULL, &recov_state, 2754 recovery); 2755 n4ep->error = EINTR; 2756 return; 2757 } 2758 } 2759 2760 if (np->s_flags & N4S_CLIENTID_SET) { 2761 /* XXX copied/pasted from above */ 2762 /* add mi to np's mntinfo4_list */ 2763 nfs4_add_mi_to_server(np, mi); 2764 mutex_exit(&np->s_lock); 2765 nfs4_server_rele(np); 2766 if (!recovery) 2767 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2768 return; 2769 } 2770 2771 /* 2772 * Reset the N4S_CB_PINGED flag. This is used to 2773 * indicate if we have received a CB_NULL from the 2774 * server. Also we reset the waiter flag. 2775 */ 2776 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 2777 /* any failure must now clear this flag */ 2778 np->s_flags |= N4S_CLIENTID_PEND; 2779 mutex_exit(&np->s_lock); 2780 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse); 2781 2782 if (n4ep->error == EACCES) { 2783 /* 2784 * If the uid is set then set the creds for secure mounts 2785 * by proxy processes such as automountd. 2786 */ 2787 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2788 if (svp->sv_secdata->uid != 0) { 2789 lcr = crdup(cr); 2790 (void) crsetugid(lcr, svp->sv_secdata->uid, 2791 crgetgid(cr)); 2792 } 2793 nfs_rw_exit(&svp->sv_lock); 2794 2795 if (lcr != NULL) { 2796 mutex_enter(&np->s_lock); 2797 crfree(np->s_cred); 2798 np->s_cred = lcr; 2799 mutex_exit(&np->s_lock); 2800 nfs4setclientid_otw(mi, svp, lcr, np, n4ep, 2801 &retry_inuse); 2802 } 2803 } 2804 mutex_enter(&np->s_lock); 2805 lease_time = np->s_lease_time; 2806 np->s_flags &= ~N4S_CLIENTID_PEND; 2807 mutex_exit(&np->s_lock); 2808 2809 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 2810 /* 2811 * Start recovery if failover is a possibility. If 2812 * invoked by the recovery thread itself, then just 2813 * return and let it handle the failover first. NB: 2814 * recovery is not allowed if the mount is in progress 2815 * since the infrastructure is not sufficiently setup 2816 * to allow it. Just return the error (after suitable 2817 * retries). 2818 */ 2819 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 2820 (void) nfs4_start_recovery(n4ep, mi, NULL, 2821 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 2822 /* 2823 * Don't retry here, just return and let 2824 * recovery take over. 2825 */ 2826 if (recovery) 2827 retry = FALSE; 2828 } else if (nfs4_rpc_retry_error(n4ep->error) || 2829 n4ep->stat == NFS4ERR_RESOURCE || 2830 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 2831 2832 retry = TRUE; 2833 /* 2834 * Always retry if in recovery or once had 2835 * contact with the server (but now it's 2836 * overloaded). 2837 */ 2838 if (recovery == TRUE || 2839 n4ep->error == ETIMEDOUT || 2840 n4ep->error == ECONNRESET) 2841 num_retries = 0; 2842 } else if (retry_inuse && n4ep->error == 0 && 2843 n4ep->stat == NFS4ERR_CLID_INUSE) { 2844 retry = TRUE; 2845 num_retries = 0; 2846 } 2847 } else { 2848 /* 2849 * Since everything succeeded give the list a reference count if 2850 * it hasn't been given one by add_new_nfs4_server() or if this 2851 * is not a recovery situation in which case it is already on 2852 * the list. 2853 */ 2854 mutex_enter(&np->s_lock); 2855 if ((np->s_flags & N4S_INSERTED) == 0) { 2856 np->s_refcnt++; 2857 np->s_flags |= N4S_INSERTED; 2858 } 2859 mutex_exit(&np->s_lock); 2860 } 2861 2862 if (!recovery) 2863 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 2864 2865 2866 if (retry && num_retries++ < nfs4_num_sclid_retries) { 2867 if (retry_inuse) { 2868 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 2869 retry_inuse = 0; 2870 } else 2871 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 2872 2873 nfs4_server_rele(np); 2874 goto recov_retry; 2875 } 2876 2877 2878 if (n4ep->error == 0) 2879 n4ep->error = geterrno4(n4ep->stat); 2880 2881 /* broadcast before release in case no other threads are waiting */ 2882 cv_broadcast(&np->s_clientid_pend); 2883 nfs4_server_rele(np); 2884 } 2885 2886 int nfs4setclientid_otw_debug = 0; 2887 2888 /* 2889 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM, 2890 * but nothing else; the calling function must be designed to handle those 2891 * other errors. 2892 */ 2893 static void 2894 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr, 2895 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep) 2896 { 2897 COMPOUND4args_clnt args; 2898 COMPOUND4res_clnt res; 2899 nfs_argop4 argop[3]; 2900 SETCLIENTID4args *s_args; 2901 SETCLIENTID4resok *s_resok; 2902 int doqueue = 1; 2903 nfs4_ga_res_t *garp = NULL; 2904 timespec_t prop_time, after_time; 2905 verifier4 verf; 2906 clientid4 tmp_clientid; 2907 2908 ASSERT(!MUTEX_HELD(&np->s_lock)); 2909 2910 args.ctag = TAG_SETCLIENTID; 2911 2912 args.array = argop; 2913 args.array_len = 3; 2914 2915 /* PUTROOTFH */ 2916 argop[0].argop = OP_PUTROOTFH; 2917 2918 /* GETATTR */ 2919 argop[1].argop = OP_GETATTR; 2920 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK; 2921 argop[1].nfs_argop4_u.opgetattr.mi = mi; 2922 2923 /* SETCLIENTID */ 2924 argop[2].argop = OP_SETCLIENTID; 2925 2926 s_args = &argop[2].nfs_argop4_u.opsetclientid; 2927 2928 mutex_enter(&np->s_lock); 2929 2930 s_args->client.verifier = np->clidtosend.verifier; 2931 s_args->client.id_len = np->clidtosend.id_len; 2932 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT); 2933 s_args->client.id_val = np->clidtosend.id_val; 2934 2935 /* 2936 * Callback needs to happen on non-RDMA transport 2937 * Check if we have saved the original knetconfig 2938 * if so, use that instead. 2939 */ 2940 if (svp->sv_origknconf != NULL) 2941 nfs4_cb_args(np, svp->sv_origknconf, s_args); 2942 else 2943 nfs4_cb_args(np, svp->sv_knconf, s_args); 2944 2945 mutex_exit(&np->s_lock); 2946 2947 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2948 2949 if (ep->error) 2950 return; 2951 2952 /* getattr lease_time res */ 2953 if (res.array_len >= 2) { 2954 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2955 2956 #ifndef _LP64 2957 /* 2958 * The 32 bit client cannot handle a lease time greater than 2959 * (INT32_MAX/1000000). This is due to the use of the 2960 * lease_time in calls to drv_usectohz() in 2961 * nfs4_renew_lease_thread(). The problem is that 2962 * drv_usectohz() takes a time_t (which is just a long = 4 2963 * bytes) as its parameter. The lease_time is multiplied by 2964 * 1000000 to convert seconds to usecs for the parameter. If 2965 * a number bigger than (INT32_MAX/1000000) is used then we 2966 * overflow on the 32bit client. 2967 */ 2968 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) { 2969 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000; 2970 } 2971 #endif 2972 2973 mutex_enter(&np->s_lock); 2974 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime; 2975 2976 /* 2977 * Keep track of the lease period for the mi's 2978 * mi_msg_list. We need an appropiate time 2979 * bound to associate past facts with a current 2980 * event. The lease period is perfect for this. 2981 */ 2982 mutex_enter(&mi->mi_msg_list_lock); 2983 mi->mi_lease_period = np->s_lease_time; 2984 mutex_exit(&mi->mi_msg_list_lock); 2985 mutex_exit(&np->s_lock); 2986 } 2987 2988 2989 if (res.status == NFS4ERR_CLID_INUSE) { 2990 clientaddr4 *clid_inuse; 2991 2992 if (!(*retry_inusep)) { 2993 clid_inuse = &res.array->nfs_resop4_u. 2994 opsetclientid.SETCLIENTID4res_u.client_using; 2995 2996 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 2997 "NFS4 mount (SETCLIENTID failed)." 2998 " nfs4_client_id.id is in" 2999 "use already by: r_netid<%s> r_addr<%s>", 3000 clid_inuse->r_netid, clid_inuse->r_addr); 3001 } 3002 3003 /* 3004 * XXX - The client should be more robust in its 3005 * handling of clientid in use errors (regen another 3006 * clientid and try again?) 3007 */ 3008 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3009 return; 3010 } 3011 3012 if (res.status) { 3013 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3014 return; 3015 } 3016 3017 s_resok = &res.array[2].nfs_resop4_u. 3018 opsetclientid.SETCLIENTID4res_u.resok4; 3019 3020 tmp_clientid = s_resok->clientid; 3021 3022 verf = s_resok->setclientid_confirm; 3023 3024 #ifdef DEBUG 3025 if (nfs4setclientid_otw_debug) { 3026 union { 3027 clientid4 clientid; 3028 int foo[2]; 3029 } cid; 3030 3031 cid.clientid = s_resok->clientid; 3032 3033 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3034 "nfs4setclientid_otw: OK, clientid = %x,%x, " 3035 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf); 3036 } 3037 #endif 3038 3039 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3040 3041 /* Confirm the client id and get the lease_time attribute */ 3042 3043 args.ctag = TAG_SETCLIENTID_CF; 3044 3045 args.array = argop; 3046 args.array_len = 1; 3047 3048 argop[0].argop = OP_SETCLIENTID_CONFIRM; 3049 3050 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid; 3051 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf; 3052 3053 /* used to figure out RTT for np */ 3054 gethrestime(&prop_time); 3055 3056 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: " 3057 "start time: %ld sec %ld nsec", prop_time.tv_sec, 3058 prop_time.tv_nsec)); 3059 3060 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 3061 3062 gethrestime(&after_time); 3063 mutex_enter(&np->s_lock); 3064 np->propagation_delay.tv_sec = 3065 MAX(1, after_time.tv_sec - prop_time.tv_sec); 3066 mutex_exit(&np->s_lock); 3067 3068 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: " 3069 "finish time: %ld sec ", after_time.tv_sec)); 3070 3071 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: " 3072 "propagation delay set to %ld sec", 3073 np->propagation_delay.tv_sec)); 3074 3075 if (ep->error) 3076 return; 3077 3078 if (res.status == NFS4ERR_CLID_INUSE) { 3079 clientaddr4 *clid_inuse; 3080 3081 if (!(*retry_inusep)) { 3082 clid_inuse = &res.array->nfs_resop4_u. 3083 opsetclientid.SETCLIENTID4res_u.client_using; 3084 3085 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 3086 "SETCLIENTID_CONFIRM failed. " 3087 "nfs4_client_id.id is in use already by: " 3088 "r_netid<%s> r_addr<%s>", 3089 clid_inuse->r_netid, clid_inuse->r_addr); 3090 } 3091 3092 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3093 return; 3094 } 3095 3096 if (res.status) { 3097 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3098 return; 3099 } 3100 3101 mutex_enter(&np->s_lock); 3102 np->clientid = tmp_clientid; 3103 np->s_flags |= N4S_CLIENTID_SET; 3104 3105 /* Add mi to np's mntinfo4 list */ 3106 nfs4_add_mi_to_server(np, mi); 3107 3108 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) { 3109 /* 3110 * Start lease management thread. 3111 * Keep trying until we succeed. 3112 */ 3113 3114 np->s_refcnt++; /* pass reference to thread */ 3115 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0, 3116 minclsyspri); 3117 } 3118 mutex_exit(&np->s_lock); 3119 3120 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3121 } 3122 3123 /* 3124 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 3125 * mi's clientid the same as sp's. 3126 * Assumes sp is locked down. 3127 */ 3128 void 3129 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 3130 { 3131 mntinfo4_t *tmi; 3132 int in_list = 0; 3133 3134 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3135 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3136 ASSERT(sp != &nfs4_server_lst); 3137 ASSERT(MUTEX_HELD(&sp->s_lock)); 3138 3139 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3140 "nfs4_add_mi_to_server: add mi %p to sp %p", 3141 (void*)mi, (void*)sp)); 3142 3143 for (tmi = sp->mntinfo4_list; 3144 tmi != NULL; 3145 tmi = tmi->mi_clientid_next) { 3146 if (tmi == mi) { 3147 NFS4_DEBUG(nfs4_client_lease_debug, 3148 (CE_NOTE, 3149 "nfs4_add_mi_to_server: mi in list")); 3150 in_list = 1; 3151 } 3152 } 3153 3154 /* 3155 * First put a hold on the mntinfo4's vfsp so that references via 3156 * mntinfo4_list will be valid. 3157 */ 3158 if (!in_list) 3159 VFS_HOLD(mi->mi_vfsp); 3160 3161 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 3162 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 3163 3164 if (!in_list) { 3165 if (sp->mntinfo4_list) 3166 sp->mntinfo4_list->mi_clientid_prev = mi; 3167 mi->mi_clientid_next = sp->mntinfo4_list; 3168 sp->mntinfo4_list = mi; 3169 mi->mi_srvsettime = gethrestime_sec(); 3170 } 3171 3172 /* set mi's clientid to that of sp's for later matching */ 3173 mi->mi_clientid = sp->clientid; 3174 3175 /* 3176 * Update the clientid for any other mi's belonging to sp. This 3177 * must be done here while we hold sp->s_lock, so that 3178 * find_nfs4_server() continues to work. 3179 */ 3180 3181 for (tmi = sp->mntinfo4_list; 3182 tmi != NULL; 3183 tmi = tmi->mi_clientid_next) { 3184 if (tmi != mi) { 3185 tmi->mi_clientid = sp->clientid; 3186 } 3187 } 3188 } 3189 3190 /* 3191 * Remove the mi from sp's mntinfo4_list and release its reference. 3192 * Exception: if mi still has open files, flag it for later removal (when 3193 * all the files are closed). 3194 * 3195 * If this is the last mntinfo4 in sp's list then tell the lease renewal 3196 * thread to exit. 3197 */ 3198 static void 3199 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 3200 { 3201 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3202 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 3203 (void*)mi, (void*)sp)); 3204 3205 ASSERT(sp != NULL); 3206 ASSERT(MUTEX_HELD(&sp->s_lock)); 3207 ASSERT(mi->mi_open_files >= 0); 3208 3209 /* 3210 * First make sure this mntinfo4 can be taken off of the list, 3211 * ie: it doesn't have any open files remaining. 3212 */ 3213 if (mi->mi_open_files > 0) { 3214 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3215 "nfs4_remove_mi_from_server_nolock: don't " 3216 "remove mi since it still has files open")); 3217 3218 mutex_enter(&mi->mi_lock); 3219 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 3220 mutex_exit(&mi->mi_lock); 3221 return; 3222 } 3223 3224 VFS_HOLD(mi->mi_vfsp); 3225 remove_mi(sp, mi); 3226 VFS_RELE(mi->mi_vfsp); 3227 3228 if (sp->mntinfo4_list == NULL) { 3229 /* last fs unmounted, kill the thread */ 3230 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3231 "remove_mi_from_nfs4_server_nolock: kill the thread")); 3232 nfs4_mark_srv_dead(sp); 3233 } 3234 } 3235 3236 /* 3237 * Remove mi from sp's mntinfo4_list and release the vfs reference. 3238 */ 3239 static void 3240 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 3241 { 3242 ASSERT(MUTEX_HELD(&sp->s_lock)); 3243 3244 /* 3245 * We release a reference, and the caller must still have a 3246 * reference. 3247 */ 3248 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3249 3250 if (mi->mi_clientid_prev) { 3251 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3252 } else { 3253 /* This is the first mi in sp's mntinfo4_list */ 3254 /* 3255 * Make sure the first mntinfo4 in the list is the actual 3256 * mntinfo4 passed in. 3257 */ 3258 ASSERT(sp->mntinfo4_list == mi); 3259 3260 sp->mntinfo4_list = mi->mi_clientid_next; 3261 } 3262 if (mi->mi_clientid_next) 3263 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3264 3265 /* Now mark the mntinfo4's links as being removed */ 3266 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3267 3268 VFS_RELE(mi->mi_vfsp); 3269 } 3270 3271 /* 3272 * Free all the entries in sp's mntinfo4_list. 3273 */ 3274 static void 3275 remove_all_mi(nfs4_server_t *sp) 3276 { 3277 mntinfo4_t *mi; 3278 3279 ASSERT(MUTEX_HELD(&sp->s_lock)); 3280 3281 while (sp->mntinfo4_list != NULL) { 3282 mi = sp->mntinfo4_list; 3283 /* 3284 * Grab a reference in case there is only one left (which 3285 * remove_mi() frees). 3286 */ 3287 VFS_HOLD(mi->mi_vfsp); 3288 remove_mi(sp, mi); 3289 VFS_RELE(mi->mi_vfsp); 3290 } 3291 } 3292 3293 /* 3294 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3295 * 3296 * This version can be called with a null nfs4_server_t arg, 3297 * and will either find the right one and handle locking, or 3298 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3299 */ 3300 void 3301 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3302 { 3303 nfs4_server_t *sp; 3304 3305 if (esp == NULL) { 3306 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3307 sp = find_nfs4_server_all(mi, 1); 3308 } else 3309 sp = esp; 3310 3311 if (sp != NULL) 3312 nfs4_remove_mi_from_server_nolock(mi, sp); 3313 3314 /* 3315 * If we had a valid esp as input, the calling function will be 3316 * responsible for unlocking the esp nfs4_server. 3317 */ 3318 if (esp == NULL) { 3319 if (sp != NULL) 3320 mutex_exit(&sp->s_lock); 3321 nfs_rw_exit(&mi->mi_recovlock); 3322 if (sp != NULL) 3323 nfs4_server_rele(sp); 3324 } 3325 } 3326 3327 /* 3328 * Return TRUE if the given server has any non-unmounted filesystems. 3329 */ 3330 3331 bool_t 3332 nfs4_fs_active(nfs4_server_t *sp) 3333 { 3334 mntinfo4_t *mi; 3335 3336 ASSERT(MUTEX_HELD(&sp->s_lock)); 3337 3338 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3339 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3340 return (TRUE); 3341 } 3342 3343 return (FALSE); 3344 } 3345 3346 /* 3347 * Mark sp as finished and notify any waiters. 3348 */ 3349 3350 void 3351 nfs4_mark_srv_dead(nfs4_server_t *sp) 3352 { 3353 ASSERT(MUTEX_HELD(&sp->s_lock)); 3354 3355 sp->s_thread_exit = NFS4_THREAD_EXIT; 3356 cv_broadcast(&sp->cv_thread_exit); 3357 } 3358 3359 /* 3360 * Create a new nfs4_server_t structure. 3361 * Returns new node unlocked and not in list, but with a reference count of 3362 * 1. 3363 */ 3364 struct nfs4_server * 3365 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3366 { 3367 struct nfs4_server *np; 3368 timespec_t tt; 3369 union { 3370 struct { 3371 uint32_t sec; 3372 uint32_t subsec; 3373 } un_curtime; 3374 verifier4 un_verifier; 3375 } nfs4clientid_verifier; 3376 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3377 int len; 3378 3379 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3380 np->saddr.len = svp->sv_addr.len; 3381 np->saddr.maxlen = svp->sv_addr.maxlen; 3382 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3383 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3384 np->s_refcnt = 1; 3385 3386 /* 3387 * Build the nfs_client_id4 for this server mount. Ensure 3388 * the verifier is useful and that the identification is 3389 * somehow based on the server's address for the case of 3390 * multi-homed servers. 3391 */ 3392 nfs4clientid_verifier.un_verifier = 0; 3393 gethrestime(&tt); 3394 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3395 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3396 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3397 3398 /* 3399 * calculate the length of the opaque identifier. Subtract 2 3400 * for the "%s" and add the traditional +1 for null 3401 * termination. 3402 */ 3403 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3404 np->clidtosend.id_len = len + np->saddr.maxlen; 3405 3406 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3407 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3408 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3409 3410 np->s_flags = 0; 3411 np->mntinfo4_list = NULL; 3412 /* save cred for issuing rfs4calls inside the renew thread */ 3413 crhold(cr); 3414 np->s_cred = cr; 3415 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3416 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3417 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3418 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3419 offsetof(rnode4_t, r_deleg_link)); 3420 np->s_thread_exit = 0; 3421 np->state_ref_count = 0; 3422 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3423 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3424 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3425 np->s_otw_call_count = 0; 3426 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3427 np->zoneid = getzoneid(); 3428 np->zone_globals = nfs4_get_callback_globals(); 3429 ASSERT(np->zone_globals != NULL); 3430 return (np); 3431 } 3432 3433 /* 3434 * Create a new nfs4_server_t structure and add it to the list. 3435 * Returns new node locked; reference must eventually be freed. 3436 */ 3437 static struct nfs4_server * 3438 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3439 { 3440 nfs4_server_t *sp; 3441 3442 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3443 sp = new_nfs4_server(svp, cr); 3444 mutex_enter(&sp->s_lock); 3445 insque(sp, &nfs4_server_lst); 3446 sp->s_refcnt++; /* list gets a reference */ 3447 sp->s_flags |= N4S_INSERTED; 3448 sp->clientid = 0; 3449 return (sp); 3450 } 3451 3452 int nfs4_server_t_debug = 0; 3453 3454 #ifdef lint 3455 extern void 3456 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3457 #endif 3458 3459 #ifndef lint 3460 #ifdef DEBUG 3461 void 3462 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3463 { 3464 int hash16(void *p, int len); 3465 nfs4_server_t *np; 3466 3467 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3468 "dumping nfs4_server_t list in %s", txt)); 3469 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3470 "mi 0x%p, want clientid %llx, addr %d/%04X", 3471 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3472 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3473 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3474 np = np->forw) { 3475 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3476 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3477 np, (longlong_t)np->clientid, np->saddr.len, 3478 hash16((void *)np->saddr.buf, np->saddr.len), 3479 np->state_ref_count)); 3480 if (np->saddr.len == srv_p->sv_addr.len && 3481 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3482 np->saddr.len) == 0) 3483 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3484 " - address matches")); 3485 if (np->clientid == clientid || np->clientid == 0) 3486 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3487 " - clientid matches")); 3488 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3489 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3490 " - thread not exiting")); 3491 } 3492 delay(hz); 3493 } 3494 #endif 3495 #endif 3496 3497 3498 /* 3499 * Move a mntinfo4_t from one server list to another. 3500 * Locking of the two nfs4_server_t nodes will be done in list order. 3501 * 3502 * Returns NULL if the current nfs4_server_t for the filesystem could not 3503 * be found (e.g., due to forced unmount). Otherwise returns a reference 3504 * to the new nfs4_server_t, which must eventually be freed. 3505 */ 3506 nfs4_server_t * 3507 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3508 { 3509 nfs4_server_t *p, *op = NULL, *np = NULL; 3510 int num_open; 3511 zoneid_t zoneid = nfs_zoneid(); 3512 3513 ASSERT(nfs_zone() == mi->mi_zone); 3514 3515 mutex_enter(&nfs4_server_lst_lock); 3516 #ifdef DEBUG 3517 if (nfs4_server_t_debug) 3518 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3519 #endif 3520 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3521 if (p->zoneid != zoneid) 3522 continue; 3523 if (p->saddr.len == old->sv_addr.len && 3524 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3525 p->s_thread_exit != NFS4_THREAD_EXIT) { 3526 op = p; 3527 mutex_enter(&op->s_lock); 3528 op->s_refcnt++; 3529 } 3530 if (p->saddr.len == new->sv_addr.len && 3531 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3532 p->s_thread_exit != NFS4_THREAD_EXIT) { 3533 np = p; 3534 mutex_enter(&np->s_lock); 3535 } 3536 if (op != NULL && np != NULL) 3537 break; 3538 } 3539 if (op == NULL) { 3540 /* 3541 * Filesystem has been forcibly unmounted. Bail out. 3542 */ 3543 if (np != NULL) 3544 mutex_exit(&np->s_lock); 3545 mutex_exit(&nfs4_server_lst_lock); 3546 return (NULL); 3547 } 3548 if (np != NULL) { 3549 np->s_refcnt++; 3550 } else { 3551 #ifdef DEBUG 3552 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3553 "nfs4_move_mi: no target nfs4_server, will create.")); 3554 #endif 3555 np = add_new_nfs4_server(new, kcred); 3556 } 3557 mutex_exit(&nfs4_server_lst_lock); 3558 3559 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3560 "nfs4_move_mi: for mi 0x%p, " 3561 "old servinfo4 0x%p, new servinfo4 0x%p, " 3562 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3563 (void*)mi, (void*)old, (void*)new, 3564 (void*)op, (void*)np)); 3565 ASSERT(op != NULL && np != NULL); 3566 3567 /* discard any delegations */ 3568 nfs4_deleg_discard(mi, op); 3569 3570 num_open = mi->mi_open_files; 3571 mi->mi_open_files = 0; 3572 op->state_ref_count -= num_open; 3573 ASSERT(op->state_ref_count >= 0); 3574 np->state_ref_count += num_open; 3575 nfs4_remove_mi_from_server_nolock(mi, op); 3576 mi->mi_open_files = num_open; 3577 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3578 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3579 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3580 3581 nfs4_add_mi_to_server(np, mi); 3582 3583 mutex_exit(&op->s_lock); 3584 nfs4_server_rele(op); 3585 mutex_exit(&np->s_lock); 3586 3587 return (np); 3588 } 3589 3590 /* 3591 * Need to have the nfs4_server_lst_lock. 3592 * Search the nfs4_server list to find a match on this servinfo4 3593 * based on its address. 3594 * 3595 * Returns NULL if no match is found. Otherwise returns a reference (which 3596 * must eventually be freed) to a locked nfs4_server. 3597 */ 3598 nfs4_server_t * 3599 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3600 { 3601 nfs4_server_t *np; 3602 zoneid_t zoneid = nfs_zoneid(); 3603 3604 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3605 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3606 if (np->zoneid == zoneid && 3607 np->saddr.len == srv_p->sv_addr.len && 3608 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3609 np->saddr.len) == 0 && 3610 np->s_thread_exit != NFS4_THREAD_EXIT) { 3611 mutex_enter(&np->s_lock); 3612 np->s_refcnt++; 3613 return (np); 3614 } 3615 } 3616 return (NULL); 3617 } 3618 3619 /* 3620 * Search the nfs4_server_lst to find a match based on clientid and 3621 * addr. 3622 * Locks the nfs4_server down if it is found and returns a reference that 3623 * must eventually be freed. 3624 * 3625 * Returns NULL it no match is found. This means one of two things: either 3626 * mi is in the process of being mounted, or mi has been unmounted. 3627 * 3628 * The caller should be holding mi->mi_recovlock, and it should continue to 3629 * hold the lock until done with the returned nfs4_server_t. Once 3630 * mi->mi_recovlock is released, there is no guarantee that the returned 3631 * mi->nfs4_server_t will continue to correspond to mi. 3632 */ 3633 nfs4_server_t * 3634 find_nfs4_server(mntinfo4_t *mi) 3635 { 3636 return (find_nfs4_server_all(mi, 0)); 3637 } 3638 3639 /* 3640 * Same as above, but takes an "all" parameter which can be 3641 * set to 1 if the caller wishes to find nfs4_server_t's which 3642 * have been marked for termination by the exit of the renew 3643 * thread. This should only be used by operations which are 3644 * cleaning up and will not cause an OTW op. 3645 */ 3646 nfs4_server_t * 3647 find_nfs4_server_all(mntinfo4_t *mi, int all) 3648 { 3649 nfs4_server_t *np; 3650 servinfo4_t *svp; 3651 zoneid_t zoneid = mi->mi_zone->zone_id; 3652 3653 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3654 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3655 /* 3656 * This can be called from nfs4_unmount() which can be called from the 3657 * global zone, hence it's legal for the global zone to muck with 3658 * another zone's server list, as long as it doesn't try to contact 3659 * them. 3660 */ 3661 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3662 nfs_global_client_only != 0); 3663 3664 /* 3665 * The nfs4_server_lst_lock global lock is held when we get a new 3666 * clientid (via SETCLIENTID OTW). Holding this global lock and 3667 * mi_recovlock (READER is fine) ensures that the nfs4_server 3668 * and this mntinfo4 can't get out of sync, so the following search is 3669 * always valid. 3670 */ 3671 mutex_enter(&nfs4_server_lst_lock); 3672 #ifdef DEBUG 3673 if (nfs4_server_t_debug) { 3674 /* mi->mi_clientid is unprotected, ok for debug output */ 3675 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3676 mi->mi_curr_serv); 3677 } 3678 #endif 3679 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3680 mutex_enter(&np->s_lock); 3681 svp = mi->mi_curr_serv; 3682 3683 if (np->zoneid == zoneid && 3684 np->clientid == mi->mi_clientid && 3685 np->saddr.len == svp->sv_addr.len && 3686 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3687 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3688 mutex_exit(&nfs4_server_lst_lock); 3689 np->s_refcnt++; 3690 return (np); 3691 } 3692 mutex_exit(&np->s_lock); 3693 } 3694 mutex_exit(&nfs4_server_lst_lock); 3695 3696 return (NULL); 3697 } 3698 3699 /* 3700 * Release the reference to sp and destroy it if that's the last one. 3701 */ 3702 3703 void 3704 nfs4_server_rele(nfs4_server_t *sp) 3705 { 3706 mutex_enter(&sp->s_lock); 3707 ASSERT(sp->s_refcnt > 0); 3708 sp->s_refcnt--; 3709 if (sp->s_refcnt > 0) { 3710 mutex_exit(&sp->s_lock); 3711 return; 3712 } 3713 mutex_exit(&sp->s_lock); 3714 3715 mutex_enter(&nfs4_server_lst_lock); 3716 mutex_enter(&sp->s_lock); 3717 if (sp->s_refcnt > 0) { 3718 mutex_exit(&sp->s_lock); 3719 mutex_exit(&nfs4_server_lst_lock); 3720 return; 3721 } 3722 remque(sp); 3723 sp->forw = sp->back = NULL; 3724 mutex_exit(&nfs4_server_lst_lock); 3725 destroy_nfs4_server(sp); 3726 } 3727 3728 static void 3729 destroy_nfs4_server(nfs4_server_t *sp) 3730 { 3731 ASSERT(MUTEX_HELD(&sp->s_lock)); 3732 ASSERT(sp->s_refcnt == 0); 3733 ASSERT(sp->s_otw_call_count == 0); 3734 3735 remove_all_mi(sp); 3736 3737 crfree(sp->s_cred); 3738 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 3739 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 3740 mutex_exit(&sp->s_lock); 3741 3742 /* destroy the nfs4_server */ 3743 nfs4callback_destroy(sp); 3744 list_destroy(&sp->s_deleg_list); 3745 mutex_destroy(&sp->s_lock); 3746 cv_destroy(&sp->cv_thread_exit); 3747 cv_destroy(&sp->s_cv_otw_count); 3748 cv_destroy(&sp->s_clientid_pend); 3749 cv_destroy(&sp->wait_cb_null); 3750 nfs_rw_destroy(&sp->s_recovlock); 3751 kmem_free(sp, sizeof (*sp)); 3752 } 3753 3754 /* 3755 * Lock sp, but only if it's still active (in the list and hasn't been 3756 * flagged as exiting) or 'all' is non-zero. 3757 * Returns TRUE if sp got locked and adds a reference to sp. 3758 */ 3759 bool_t 3760 nfs4_server_vlock(nfs4_server_t *sp, int all) 3761 { 3762 nfs4_server_t *np; 3763 3764 mutex_enter(&nfs4_server_lst_lock); 3765 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3766 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 3767 all != 0)) { 3768 mutex_enter(&np->s_lock); 3769 np->s_refcnt++; 3770 mutex_exit(&nfs4_server_lst_lock); 3771 return (TRUE); 3772 } 3773 } 3774 mutex_exit(&nfs4_server_lst_lock); 3775 return (FALSE); 3776 } 3777 3778 /* 3779 * Fork off a thread to free the data structures for a mount. 3780 */ 3781 3782 static void 3783 async_free_mount(vfs_t *vfsp, cred_t *cr) 3784 { 3785 freemountargs_t *args; 3786 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 3787 args->fm_vfsp = vfsp; 3788 VFS_HOLD(vfsp); 3789 MI4_HOLD(VFTOMI4(vfsp)); 3790 args->fm_cr = cr; 3791 crhold(cr); 3792 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 3793 minclsyspri); 3794 } 3795 3796 static void 3797 nfs4_free_mount_thread(freemountargs_t *args) 3798 { 3799 mntinfo4_t *mi; 3800 nfs4_free_mount(args->fm_vfsp, args->fm_cr); 3801 mi = VFTOMI4(args->fm_vfsp); 3802 crfree(args->fm_cr); 3803 VFS_RELE(args->fm_vfsp); 3804 MI4_RELE(mi); 3805 kmem_free(args, sizeof (freemountargs_t)); 3806 zthread_exit(); 3807 /* NOTREACHED */ 3808 } 3809 3810 /* 3811 * Thread to free the data structures for a given filesystem. 3812 */ 3813 static void 3814 nfs4_free_mount(vfs_t *vfsp, cred_t *cr) 3815 { 3816 mntinfo4_t *mi = VFTOMI4(vfsp); 3817 nfs4_server_t *sp; 3818 callb_cpr_t cpr_info; 3819 kmutex_t cpr_lock; 3820 boolean_t async_thread; 3821 int removed; 3822 3823 /* 3824 * We need to participate in the CPR framework if this is a kernel 3825 * thread. 3826 */ 3827 async_thread = (curproc == nfs_zone()->zone_zsched); 3828 if (async_thread) { 3829 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 3830 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 3831 "nfsv4AsyncUnmount"); 3832 } 3833 3834 /* 3835 * We need to wait for all outstanding OTW calls 3836 * and recovery to finish before we remove the mi 3837 * from the nfs4_server_t, as current pending 3838 * calls might still need this linkage (in order 3839 * to find a nfs4_server_t from a mntinfo4_t). 3840 */ 3841 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 3842 sp = find_nfs4_server(mi); 3843 nfs_rw_exit(&mi->mi_recovlock); 3844 3845 if (sp) { 3846 while (sp->s_otw_call_count != 0) { 3847 if (async_thread) { 3848 mutex_enter(&cpr_lock); 3849 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3850 mutex_exit(&cpr_lock); 3851 } 3852 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 3853 if (async_thread) { 3854 mutex_enter(&cpr_lock); 3855 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3856 mutex_exit(&cpr_lock); 3857 } 3858 } 3859 mutex_exit(&sp->s_lock); 3860 nfs4_server_rele(sp); 3861 sp = NULL; 3862 } 3863 3864 3865 mutex_enter(&mi->mi_lock); 3866 while (mi->mi_in_recovery != 0) { 3867 if (async_thread) { 3868 mutex_enter(&cpr_lock); 3869 CALLB_CPR_SAFE_BEGIN(&cpr_info); 3870 mutex_exit(&cpr_lock); 3871 } 3872 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 3873 if (async_thread) { 3874 mutex_enter(&cpr_lock); 3875 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 3876 mutex_exit(&cpr_lock); 3877 } 3878 } 3879 mutex_exit(&mi->mi_lock); 3880 3881 /* 3882 * The original purge of the dnlc via 'dounmount' 3883 * doesn't guarantee that another dnlc entry was not 3884 * added while we waitied for all outstanding OTW 3885 * and recovery calls to finish. So re-purge the 3886 * dnlc now. 3887 */ 3888 (void) dnlc_purge_vfsp(vfsp, 0); 3889 3890 /* 3891 * We need to explicitly stop the manager thread; the asyc worker 3892 * threads can timeout and exit on their own. 3893 */ 3894 mutex_enter(&mi->mi_async_lock); 3895 mi->mi_max_threads = 0; 3896 cv_broadcast(&mi->mi_async_work_cv); 3897 mutex_exit(&mi->mi_async_lock); 3898 if (mi->mi_manager_thread) 3899 nfs4_async_manager_stop(vfsp); 3900 3901 destroy_rtable4(vfsp, cr); 3902 3903 nfs4_remove_mi_from_server(mi, NULL); 3904 3905 if (async_thread) { 3906 mutex_enter(&cpr_lock); 3907 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 3908 mutex_destroy(&cpr_lock); 3909 } 3910 3911 removed = nfs4_mi_zonelist_remove(mi); 3912 if (removed) 3913 zone_rele(mi->mi_zone); 3914 } 3915