1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are 29 * triggered from a "stub" rnode via a special set of vnodeops. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/time.h> 37 #include <sys/vnode.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/file.h> 41 #include <sys/filio.h> 42 #include <sys/uio.h> 43 #include <sys/buf.h> 44 #include <sys/mman.h> 45 #include <sys/pathname.h> 46 #include <sys/dirent.h> 47 #include <sys/debug.h> 48 #include <sys/vmsystm.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/swap.h> 52 #include <sys/errno.h> 53 #include <sys/strsubr.h> 54 #include <sys/sysmacros.h> 55 #include <sys/kmem.h> 56 #include <sys/mount.h> 57 #include <sys/cmn_err.h> 58 #include <sys/pathconf.h> 59 #include <sys/utsname.h> 60 #include <sys/dnlc.h> 61 #include <sys/acl.h> 62 #include <sys/systeminfo.h> 63 #include <sys/policy.h> 64 #include <sys/sdt.h> 65 #include <sys/list.h> 66 #include <sys/stat.h> 67 #include <sys/mntent.h> 68 #include <sys/priv.h> 69 70 #include <rpc/types.h> 71 #include <rpc/auth.h> 72 #include <rpc/clnt.h> 73 74 #include <nfs/nfs.h> 75 #include <nfs/nfs_clnt.h> 76 #include <nfs/nfs_acl.h> 77 #include <nfs/lm.h> 78 #include <nfs/nfs4.h> 79 #include <nfs/nfs4_kprot.h> 80 #include <nfs/rnode4.h> 81 #include <nfs/nfs4_clnt.h> 82 #include <nfs/nfsid_map.h> 83 #include <nfs/nfs4_idmap_impl.h> 84 85 #include <vm/hat.h> 86 #include <vm/as.h> 87 #include <vm/page.h> 88 #include <vm/pvn.h> 89 #include <vm/seg.h> 90 #include <vm/seg_map.h> 91 #include <vm/seg_kpm.h> 92 #include <vm/seg_vn.h> 93 94 #include <fs/fs_subr.h> 95 96 #include <sys/ddi.h> 97 #include <sys/int_fmtio.h> 98 99 #include <sys/sunddi.h> 100 101 #include <sys/priv_names.h> 102 103 extern zone_key_t nfs4clnt_zone_key; 104 extern zone_key_t nfsidmap_zone_key; 105 106 /* 107 * The automatic unmounter thread stuff! 108 */ 109 static int nfs4_trigger_thread_timer = 20; /* in seconds */ 110 111 /* 112 * Just a default.... 113 */ 114 static uint_t nfs4_trigger_mount_to = 240; 115 116 typedef struct nfs4_trigger_globals { 117 kmutex_t ntg_forest_lock; 118 uint_t ntg_mount_to; 119 int ntg_thread_started; 120 nfs4_ephemeral_tree_t *ntg_forest; 121 } nfs4_trigger_globals_t; 122 123 kmutex_t nfs4_ephemeral_thread_lock; 124 125 zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED; 126 127 static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *); 128 129 /* 130 * Used for ephemeral mounts; contains data either duplicated from 131 * servinfo4_t, or hand-crafted, depending on type of ephemeral mount. 132 * 133 * It's intended that this structure is used solely for ephemeral 134 * mount-type specific data, for passing this data to 135 * nfs4_trigger_nargs_create(). 136 */ 137 typedef struct ephemeral_servinfo { 138 char *esi_hostname; 139 char *esi_netname; 140 char *esi_path; 141 int esi_path_len; 142 int esi_mount_flags; 143 struct netbuf *esi_addr; 144 struct netbuf *esi_syncaddr; 145 struct knetconfig *esi_knconf; 146 } ephemeral_servinfo_t; 147 148 /* 149 * Collect together the mount-type specific and generic data args. 150 */ 151 typedef struct domount_args { 152 ephemeral_servinfo_t *dma_esi; 153 char *dma_hostlist; /* comma-sep. for RO failover */ 154 struct nfs_args *dma_nargs; 155 } domount_args_t; 156 157 158 /* 159 * The vnode ops functions for a trigger stub vnode 160 */ 161 static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *); 162 static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *, 163 caller_context_t *); 164 static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *, 165 caller_context_t *); 166 static int nfs4_trigger_access(vnode_t *, int, int, cred_t *, 167 caller_context_t *); 168 static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *, 169 caller_context_t *); 170 static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **, 171 struct pathname *, int, vnode_t *, cred_t *, caller_context_t *, 172 int *, pathname_t *); 173 static int nfs4_trigger_create(vnode_t *, char *, struct vattr *, 174 enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *, 175 vsecattr_t *); 176 static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *, 177 int); 178 static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *, 179 caller_context_t *, int); 180 static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *, 181 cred_t *, caller_context_t *, int); 182 static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *, 183 vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp); 184 static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *, 185 caller_context_t *, int); 186 static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *, 187 cred_t *, caller_context_t *, int); 188 static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *); 189 190 /* 191 * Regular NFSv4 vnodeops that we need to reference directly 192 */ 193 extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *, 194 caller_context_t *); 195 extern void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *); 196 extern int nfs4_rwlock(vnode_t *, int, caller_context_t *); 197 extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *); 198 extern int nfs4_lookup(vnode_t *, char *, vnode_t **, 199 struct pathname *, int, vnode_t *, cred_t *, 200 caller_context_t *, int *, pathname_t *); 201 extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *, 202 caller_context_t *); 203 extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 204 caller_context_t *); 205 extern int nfs4_fid(vnode_t *, fid_t *, caller_context_t *); 206 extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); 207 208 static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **); 209 static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, 210 cred_t *, vnode_t **); 211 static int nfs4_trigger_domount_args_create(vnode_t *, cred_t *, 212 domount_args_t **dmap); 213 static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, 214 vnode_t *vp); 215 static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *, 216 cred_t *); 217 static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); 218 static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, 219 servinfo4_t *); 220 static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *, 221 cred_t *); 222 static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, 223 ephemeral_servinfo_t *); 224 static void nfs4_trigger_nargs_destroy(struct nfs_args *); 225 static char *nfs4_trigger_create_mntopts(vfs_t *); 226 static void nfs4_trigger_destroy_mntopts(char *); 227 static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); 228 static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); 229 static enum clnt_stat nfs4_ping_server_common(struct knetconfig *, 230 struct netbuf *, int); 231 232 extern int umount2_engine(vfs_t *, int, cred_t *, int); 233 234 vnodeops_t *nfs4_trigger_vnodeops; 235 236 /* 237 * These are the vnodeops that we must define for stub vnodes. 238 * 239 * 240 * Many of the VOPs defined for NFSv4 do not need to be defined here, 241 * for various reasons. This will result in the VFS default function being 242 * used: 243 * 244 * - These VOPs require a previous VOP_OPEN to have occurred. That will have 245 * lost the reference to the stub vnode, meaning these should not be called: 246 * close, read, write, ioctl, readdir, seek. 247 * 248 * - These VOPs are meaningless for vnodes without data pages. Since the 249 * stub vnode is of type VDIR, these should not be called: 250 * space, getpage, putpage, map, addmap, delmap, pageio, fsync. 251 * 252 * - These VOPs are otherwise not applicable, and should not be called: 253 * dump, setsecattr. 254 * 255 * 256 * These VOPs we do not want to define, but nor do we want the VFS default 257 * action. Instead, we specify the VFS error function, with fs_error(), but 258 * note that fs_error() is not actually called. Instead it results in the 259 * use of the error function defined for the particular VOP, in vn_ops_table[]: 260 * 261 * - frlock, dispose, shrlock. 262 * 263 * 264 * These VOPs we define to use the corresponding regular NFSv4 vnodeop. 265 * NOTE: if any of these ops involve an OTW call with the stub FH, then 266 * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo() 267 * to protect the security data in the servinfo4_t for the "parent" 268 * filesystem that contains the stub. 269 * 270 * - These VOPs should not trigger a mount, so that "ls -l" does not: 271 * pathconf, getsecattr. 272 * 273 * - These VOPs would not make sense to trigger: 274 * inactive, rwlock, rwunlock, fid, realvp. 275 */ 276 const fs_operation_def_t nfs4_trigger_vnodeops_template[] = { 277 VOPNAME_OPEN, { .vop_open = nfs4_trigger_open }, 278 VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr }, 279 VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr }, 280 VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access }, 281 VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup }, 282 VOPNAME_CREATE, { .vop_create = nfs4_trigger_create }, 283 VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove }, 284 VOPNAME_LINK, { .vop_link = nfs4_trigger_link }, 285 VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename }, 286 VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir }, 287 VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir }, 288 VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink }, 289 VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink }, 290 VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, 291 VOPNAME_FID, { .vop_fid = nfs4_fid }, 292 VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, 293 VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, 294 VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, 295 VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, 296 VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, 297 VOPNAME_FRLOCK, { .error = fs_error }, 298 VOPNAME_DISPOSE, { .error = fs_error }, 299 VOPNAME_SHRLOCK, { .error = fs_error }, 300 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 301 NULL, NULL 302 }; 303 304 static void 305 nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net) 306 { 307 ASSERT(mutex_owned(&net->net_cnt_lock)); 308 net->net_refcnt++; 309 ASSERT(net->net_refcnt != 0); 310 } 311 312 static void 313 nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net) 314 { 315 mutex_enter(&net->net_cnt_lock); 316 nfs4_ephemeral_tree_incr(net); 317 mutex_exit(&net->net_cnt_lock); 318 } 319 320 /* 321 * We need a safe way to decrement the refcnt whilst the 322 * lock is being held. 323 */ 324 static void 325 nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net) 326 { 327 ASSERT(mutex_owned(&net->net_cnt_lock)); 328 ASSERT(net->net_refcnt != 0); 329 net->net_refcnt--; 330 } 331 332 static void 333 nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net) 334 { 335 mutex_enter(&net->net_cnt_lock); 336 nfs4_ephemeral_tree_decr(net); 337 mutex_exit(&net->net_cnt_lock); 338 } 339 340 /* 341 * Trigger ops for stub vnodes; for mirror mounts, etc. 342 * 343 * The general idea is that a "triggering" op will first call 344 * nfs4_trigger_mount(), which will find out whether a mount has already 345 * been triggered. 346 * 347 * If it has, then nfs4_trigger_mount() sets newvp to the root vnode 348 * of the covering vfs. 349 * 350 * If a mount has not yet been triggered, nfs4_trigger_mount() will do so, 351 * and again set newvp, as above. 352 * 353 * The triggering op may then re-issue the VOP by calling it on newvp. 354 * 355 * Note that some ops may perform custom action, and may or may not need 356 * to trigger a mount. 357 * 358 * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We 359 * obviously can't do this with VOP_<whatever>, since it's a stub vnode 360 * and that would just recurse. Instead, we call the v4 op directly, 361 * by name. This is OK, since we know that the vnode is for NFSv4, 362 * otherwise it couldn't be a stub. 363 * 364 */ 365 366 static int 367 nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 368 { 369 int error; 370 vnode_t *newvp; 371 372 error = nfs4_trigger_mount(*vpp, cr, &newvp); 373 if (error) 374 return (error); 375 376 /* Release the stub vnode, as we're losing the reference to it */ 377 VN_RELE(*vpp); 378 379 /* Give the caller the root vnode of the newly-mounted fs */ 380 *vpp = newvp; 381 382 /* return with VN_HELD(newvp) */ 383 return (VOP_OPEN(vpp, flag, cr, ct)); 384 } 385 386 void 387 nfs4_fake_attrs(vnode_t *vp, struct vattr *vap) 388 { 389 uint_t mask; 390 timespec_t now; 391 392 /* 393 * Set some attributes here for referrals. 394 */ 395 mask = vap->va_mask; 396 bzero(vap, sizeof (struct vattr)); 397 vap->va_mask = mask; 398 vap->va_uid = 0; 399 vap->va_gid = 0; 400 vap->va_nlink = 1; 401 vap->va_size = 1; 402 gethrestime(&now); 403 vap->va_atime = now; 404 vap->va_mtime = now; 405 vap->va_ctime = now; 406 vap->va_type = VDIR; 407 vap->va_mode = 0555; 408 vap->va_fsid = vp->v_vfsp->vfs_dev; 409 vap->va_rdev = 0; 410 vap->va_blksize = MAXBSIZE; 411 vap->va_nblocks = 1; 412 vap->va_seq = 0; 413 } 414 415 /* 416 * For the majority of cases, nfs4_trigger_getattr() will not trigger 417 * a mount. However, if ATTR_TRIGGER is set, we are being informed 418 * that we need to force the mount before we attempt to determine 419 * the attributes. The intent is an atomic operation for security 420 * testing. 421 * 422 * If we're not triggering a mount, we can still inquire about the 423 * actual attributes from the server in the mirror mount case, 424 * and will return manufactured attributes for a referral (see 425 * the 'create' branch of find_referral_stubvp()). 426 */ 427 static int 428 nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 429 caller_context_t *ct) 430 { 431 int error; 432 433 if (flags & ATTR_TRIGGER || RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) { 434 vnode_t *newvp; 435 436 error = nfs4_trigger_mount(vp, cr, &newvp); 437 if (error) 438 return (error); 439 440 error = VOP_GETATTR(newvp, vap, flags, cr, ct); 441 VN_RELE(newvp); 442 } else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) { 443 444 nfs4_fake_attrs(vp, vap); 445 error = 0; 446 } 447 448 return (error); 449 } 450 451 static int 452 nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 453 caller_context_t *ct) 454 { 455 int error; 456 vnode_t *newvp; 457 458 error = nfs4_trigger_mount(vp, cr, &newvp); 459 if (error) 460 return (error); 461 462 error = VOP_SETATTR(newvp, vap, flags, cr, ct); 463 VN_RELE(newvp); 464 465 return (error); 466 } 467 468 static int 469 nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr, 470 caller_context_t *ct) 471 { 472 int error; 473 vnode_t *newvp; 474 475 error = nfs4_trigger_mount(vp, cr, &newvp); 476 if (error) 477 return (error); 478 479 error = VOP_ACCESS(newvp, mode, flags, cr, ct); 480 VN_RELE(newvp); 481 482 return (error); 483 } 484 485 static int 486 nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 487 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 488 caller_context_t *ct, int *deflags, pathname_t *rpnp) 489 { 490 int error; 491 vnode_t *newdvp; 492 rnode4_t *drp = VTOR4(dvp); 493 494 ASSERT(RP_ISSTUB(drp)); 495 496 /* 497 * It's not legal to lookup ".." for an fs root, so we mustn't pass 498 * that up. Instead, pass onto the regular op, regardless of whether 499 * we've triggered a mount. 500 */ 501 if (strcmp(nm, "..") == 0) 502 if (RP_ISSTUB_MIRRORMOUNT(drp)) { 503 return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, 504 ct, deflags, rpnp)); 505 } else if (RP_ISSTUB_REFERRAL(drp)) { 506 /* Return the parent vnode */ 507 return (vtodv(dvp, vpp, cr, TRUE)); 508 } 509 510 error = nfs4_trigger_mount(dvp, cr, &newdvp); 511 if (error) 512 return (error); 513 514 error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct, 515 deflags, rpnp); 516 VN_RELE(newdvp); 517 518 return (error); 519 } 520 521 static int 522 nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va, 523 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr, 524 int flags, caller_context_t *ct, vsecattr_t *vsecp) 525 { 526 int error; 527 vnode_t *newdvp; 528 529 error = nfs4_trigger_mount(dvp, cr, &newdvp); 530 if (error) 531 return (error); 532 533 error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr, 534 flags, ct, vsecp); 535 VN_RELE(newdvp); 536 537 return (error); 538 } 539 540 static int 541 nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 542 int flags) 543 { 544 int error; 545 vnode_t *newdvp; 546 547 error = nfs4_trigger_mount(dvp, cr, &newdvp); 548 if (error) 549 return (error); 550 551 error = VOP_REMOVE(newdvp, nm, cr, ct, flags); 552 VN_RELE(newdvp); 553 554 return (error); 555 } 556 557 static int 558 nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr, 559 caller_context_t *ct, int flags) 560 { 561 int error; 562 vnode_t *newtdvp; 563 564 error = nfs4_trigger_mount(tdvp, cr, &newtdvp); 565 if (error) 566 return (error); 567 568 /* 569 * We don't check whether svp is a stub. Let the NFSv4 code 570 * detect that error, and return accordingly. 571 */ 572 error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags); 573 VN_RELE(newtdvp); 574 575 return (error); 576 } 577 578 static int 579 nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 580 cred_t *cr, caller_context_t *ct, int flags) 581 { 582 int error; 583 vnode_t *newsdvp; 584 rnode4_t *tdrp = VTOR4(tdvp); 585 586 /* 587 * We know that sdvp is a stub, otherwise we would not be here. 588 * 589 * If tdvp is also be a stub, there are two possibilities: it 590 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)] 591 * or it is a different stub [!VN_CMP(sdvp, tdvp)]. 592 * 593 * In the former case, just trigger sdvp, and treat tdvp as 594 * though it were not a stub. 595 * 596 * In the latter case, it might be a different stub for the 597 * same server fs as sdvp, or for a different server fs. 598 * Regardless, from the client perspective this would still 599 * be a cross-filesystem rename, and should not be allowed, 600 * so return EXDEV, without triggering either mount. 601 */ 602 if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp)) 603 return (EXDEV); 604 605 error = nfs4_trigger_mount(sdvp, cr, &newsdvp); 606 if (error) 607 return (error); 608 609 error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags); 610 611 VN_RELE(newsdvp); 612 613 return (error); 614 } 615 616 /* ARGSUSED */ 617 static int 618 nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, 619 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 620 { 621 int error; 622 vnode_t *newdvp; 623 624 error = nfs4_trigger_mount(dvp, cr, &newdvp); 625 if (error) 626 return (error); 627 628 error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp); 629 VN_RELE(newdvp); 630 631 return (error); 632 } 633 634 static int 635 nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 636 caller_context_t *ct, int flags) 637 { 638 int error; 639 vnode_t *newdvp; 640 641 error = nfs4_trigger_mount(dvp, cr, &newdvp); 642 if (error) 643 return (error); 644 645 error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags); 646 VN_RELE(newdvp); 647 648 return (error); 649 } 650 651 static int 652 nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, 653 cred_t *cr, caller_context_t *ct, int flags) 654 { 655 int error; 656 vnode_t *newdvp; 657 658 error = nfs4_trigger_mount(dvp, cr, &newdvp); 659 if (error) 660 return (error); 661 662 error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags); 663 VN_RELE(newdvp); 664 665 return (error); 666 } 667 668 static int 669 nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, 670 caller_context_t *ct) 671 { 672 int error; 673 vnode_t *newvp; 674 675 error = nfs4_trigger_mount(vp, cr, &newvp); 676 if (error) 677 return (error); 678 679 error = VOP_READLINK(newvp, uiop, cr, ct); 680 VN_RELE(newvp); 681 682 return (error); 683 } 684 685 /* end of trigger vnode ops */ 686 687 /* 688 * See if the mount has already been done by another caller. 689 */ 690 static int 691 nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp, 692 bool_t *was_mounted, vfs_t **vfsp) 693 { 694 int error; 695 mntinfo4_t *mi = VTOMI4(vp); 696 697 *was_mounted = FALSE; 698 699 error = vn_vfsrlock_wait(vp); 700 if (error) 701 return (error); 702 703 *vfsp = vn_mountedvfs(vp); 704 if (*vfsp != NULL) { 705 /* the mount has already occurred */ 706 error = VFS_ROOT(*vfsp, newvpp); 707 if (!error) { 708 /* need to update the reference time */ 709 mutex_enter(&mi->mi_lock); 710 if (mi->mi_ephemeral) 711 mi->mi_ephemeral->ne_ref_time = 712 gethrestime_sec(); 713 mutex_exit(&mi->mi_lock); 714 715 *was_mounted = TRUE; 716 } 717 } 718 719 vn_vfsunlock(vp); 720 return (0); 721 } 722 723 /* 724 * Mount upon a trigger vnode; for mirror-mounts, referrals, etc. 725 * 726 * The mount may have already occurred, via another thread. If not, 727 * assemble the location information - which may require fetching - and 728 * perform the mount. 729 * 730 * Sets newvp to be the root of the fs that is now covering vp. Note 731 * that we return with VN_HELD(*newvp). 732 * 733 * The caller is responsible for passing the VOP onto the covering fs. 734 */ 735 static int 736 nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) 737 { 738 int error; 739 vfs_t *vfsp; 740 rnode4_t *rp = VTOR4(vp); 741 mntinfo4_t *mi = VTOMI4(vp); 742 domount_args_t *dma; 743 744 nfs4_ephemeral_tree_t *net; 745 746 bool_t must_unlock = FALSE; 747 bool_t is_building = FALSE; 748 bool_t was_mounted = FALSE; 749 750 cred_t *mcred = NULL; 751 752 nfs4_trigger_globals_t *ntg; 753 754 zone_t *zone = curproc->p_zone; 755 756 ASSERT(RP_ISSTUB(rp)); 757 758 *newvpp = NULL; 759 760 /* 761 * Has the mount already occurred? 762 */ 763 error = nfs4_trigger_mounted_already(vp, newvpp, 764 &was_mounted, &vfsp); 765 if (error || was_mounted) 766 goto done; 767 768 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 769 ASSERT(ntg != NULL); 770 771 mutex_enter(&mi->mi_lock); 772 773 /* 774 * We need to lock down the ephemeral tree. 775 */ 776 if (mi->mi_ephemeral_tree == NULL) { 777 net = kmem_zalloc(sizeof (*net), KM_SLEEP); 778 mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL); 779 mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL); 780 net->net_refcnt = 1; 781 net->net_status = NFS4_EPHEMERAL_TREE_BUILDING; 782 is_building = TRUE; 783 784 /* 785 * We need to add it to the zone specific list for 786 * automatic unmounting and harvesting of deadwood. 787 */ 788 mutex_enter(&ntg->ntg_forest_lock); 789 if (ntg->ntg_forest != NULL) 790 net->net_next = ntg->ntg_forest; 791 ntg->ntg_forest = net; 792 mutex_exit(&ntg->ntg_forest_lock); 793 794 /* 795 * No lock order confusion with mi_lock because no 796 * other node could have grabbed net_tree_lock. 797 */ 798 mutex_enter(&net->net_tree_lock); 799 mi->mi_ephemeral_tree = net; 800 net->net_mount = mi; 801 mutex_exit(&mi->mi_lock); 802 803 MI4_HOLD(mi); 804 VFS_HOLD(mi->mi_vfsp); 805 } else { 806 net = mi->mi_ephemeral_tree; 807 nfs4_ephemeral_tree_hold(net); 808 809 mutex_exit(&mi->mi_lock); 810 811 mutex_enter(&net->net_tree_lock); 812 813 /* 814 * We can only procede if the tree is neither locked 815 * nor being torn down. 816 */ 817 mutex_enter(&net->net_cnt_lock); 818 if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) { 819 nfs4_ephemeral_tree_decr(net); 820 mutex_exit(&net->net_cnt_lock); 821 mutex_exit(&net->net_tree_lock); 822 823 return (EIO); 824 } 825 mutex_exit(&net->net_cnt_lock); 826 } 827 828 mutex_enter(&net->net_cnt_lock); 829 net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING; 830 mutex_exit(&net->net_cnt_lock); 831 832 must_unlock = TRUE; 833 834 error = nfs4_trigger_domount_args_create(vp, cr, &dma); 835 if (error) 836 goto done; 837 838 /* 839 * Note that since we define mirror mounts to work 840 * for any user, we simply extend the privileges of 841 * the user's credentials to allow the mount to 842 * proceed. 843 */ 844 mcred = crdup(cr); 845 if (mcred == NULL) { 846 error = EINVAL; 847 nfs4_trigger_domount_args_destroy(dma, vp); 848 goto done; 849 } 850 851 crset_zone_privall(mcred); 852 if (is_system_labeled()) 853 (void) setpflags(NET_MAC_AWARE, 1, mcred); 854 855 error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp); 856 nfs4_trigger_domount_args_destroy(dma, vp); 857 858 DTRACE_PROBE2(nfs4clnt__func__referral__mount, 859 vnode_t *, vp, int, error); 860 861 crfree(mcred); 862 863 done: 864 865 if (must_unlock) { 866 mutex_enter(&net->net_cnt_lock); 867 net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; 868 869 /* 870 * REFCNT: If we are the root of the tree, then we need 871 * to keep a reference because we malloced the tree and 872 * this is where we tied it to our mntinfo. 873 * 874 * If we are not the root of the tree, then our tie to 875 * the mntinfo occured elsewhere and we need to 876 * decrement the reference to the tree. 877 */ 878 if (is_building) 879 net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; 880 else 881 nfs4_ephemeral_tree_decr(net); 882 mutex_exit(&net->net_cnt_lock); 883 884 mutex_exit(&net->net_tree_lock); 885 } 886 887 if (!error && (newvpp == NULL || *newvpp == NULL)) 888 error = ENOSYS; 889 890 return (error); 891 } 892 893 /* 894 * Collect together both the generic & mount-type specific args. 895 */ 896 static int 897 nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr, domount_args_t **dmap) 898 { 899 int nointr; 900 char *hostlist; 901 servinfo4_t *svp; 902 struct nfs_args *nargs, *nargs_head; 903 enum clnt_stat status; 904 ephemeral_servinfo_t *esi, *esi_first; 905 domount_args_t *dma; 906 mntinfo4_t *mi = VTOMI4(vp); 907 908 nointr = !(mi->mi_flags & MI4_INT); 909 hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 910 911 svp = mi->mi_curr_serv; 912 /* check if the current server is responding */ 913 status = nfs4_trigger_ping_server(svp, nointr); 914 if (status == RPC_SUCCESS) { 915 esi_first = nfs4_trigger_esi_create(vp, svp, cr); 916 if (esi_first == NULL) { 917 kmem_free(hostlist, MAXPATHLEN); 918 return (EINVAL); 919 } 920 921 (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN); 922 923 nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first); 924 } else { 925 /* current server did not respond */ 926 esi_first = NULL; 927 nargs_head = NULL; 928 } 929 nargs = nargs_head; 930 931 /* 932 * NFS RO failover. 933 * 934 * If we have multiple servinfo4 structures, linked via sv_next, 935 * we must create one nfs_args for each, linking the nfs_args via 936 * nfs_ext_u.nfs_extB.next. 937 * 938 * We need to build a corresponding esi for each, too, but that is 939 * used solely for building nfs_args, and may be immediately 940 * discarded, as domount() requires the info from just one esi, 941 * but all the nfs_args. 942 * 943 * Currently, the NFS mount code will hang if not all servers 944 * requested are available. To avoid that, we need to ping each 945 * server, here, and remove it from the list if it is not 946 * responding. This has the side-effect of that server then 947 * being permanently unavailable for this failover mount, even if 948 * it recovers. That's unfortunate, but the best we can do until 949 * the mount code path is fixed. 950 */ 951 952 /* 953 * If the current server was down, loop indefinitely until we find 954 * at least one responsive server. 955 */ 956 do { 957 /* no locking needed for sv_next; it is only set at fs mount */ 958 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 959 struct nfs_args *next; 960 961 /* 962 * nargs_head: the head of the nfs_args list 963 * nargs: the current tail of the list 964 * next: the newly-created element to be added 965 */ 966 967 /* 968 * We've already tried the current server, above; 969 * if it was responding, we have already included it 970 * and it may now be ignored. 971 * 972 * Otherwise, try it again, since it may now have 973 * recovered. 974 */ 975 if (svp == mi->mi_curr_serv && esi_first != NULL) 976 continue; 977 978 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 979 if (svp->sv_flags & SV4_NOTINUSE) { 980 nfs_rw_exit(&svp->sv_lock); 981 continue; 982 } 983 nfs_rw_exit(&svp->sv_lock); 984 985 /* check if the server is responding */ 986 status = nfs4_trigger_ping_server(svp, nointr); 987 if (status == RPC_INTR) { 988 kmem_free(hostlist, MAXPATHLEN); 989 nfs4_trigger_esi_destroy(esi_first, vp); 990 nargs = nargs_head; 991 while (nargs != NULL) { 992 next = nargs->nfs_ext_u.nfs_extB.next; 993 nfs4_trigger_nargs_destroy(nargs); 994 nargs = next; 995 } 996 return (EINTR); 997 } else if (status != RPC_SUCCESS) { 998 /* if the server did not respond, ignore it */ 999 continue; 1000 } 1001 1002 esi = nfs4_trigger_esi_create(vp, svp, cr); 1003 if (esi == NULL) 1004 continue; 1005 1006 /* 1007 * If the original current server (mi_curr_serv) 1008 * was down when when we first tried it, 1009 * (i.e. esi_first == NULL), 1010 * we select this new server (svp) to be the server 1011 * that we will actually contact (esi_first). 1012 * 1013 * Note that it's possible that mi_curr_serv == svp, 1014 * if that mi_curr_serv was down but has now recovered. 1015 */ 1016 next = nfs4_trigger_nargs_create(mi, svp, esi); 1017 if (esi_first == NULL) { 1018 ASSERT(nargs == NULL); 1019 ASSERT(nargs_head == NULL); 1020 nargs_head = next; 1021 esi_first = esi; 1022 (void) strlcpy(hostlist, 1023 esi_first->esi_hostname, MAXPATHLEN); 1024 } else { 1025 ASSERT(nargs_head != NULL); 1026 nargs->nfs_ext_u.nfs_extB.next = next; 1027 (void) strlcat(hostlist, ",", MAXPATHLEN); 1028 (void) strlcat(hostlist, esi->esi_hostname, 1029 MAXPATHLEN); 1030 /* esi was only needed for hostname & nargs */ 1031 nfs4_trigger_esi_destroy(esi, vp); 1032 } 1033 1034 nargs = next; 1035 } 1036 1037 /* if we've had no response at all, wait a second */ 1038 if (esi_first == NULL) 1039 delay(drv_usectohz(1000000)); 1040 1041 } while (esi_first == NULL); 1042 ASSERT(nargs_head != NULL); 1043 1044 dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP); 1045 dma->dma_esi = esi_first; 1046 dma->dma_hostlist = hostlist; 1047 dma->dma_nargs = nargs_head; 1048 *dmap = dma; 1049 1050 return (0); 1051 } 1052 1053 static void 1054 nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) 1055 { 1056 if (dma != NULL) { 1057 if (dma->dma_esi != NULL && vp != NULL) 1058 nfs4_trigger_esi_destroy(dma->dma_esi, vp); 1059 1060 if (dma->dma_hostlist != NULL) 1061 kmem_free(dma->dma_hostlist, MAXPATHLEN); 1062 1063 if (dma->dma_nargs != NULL) { 1064 struct nfs_args *nargs = dma->dma_nargs; 1065 1066 do { 1067 struct nfs_args *next = 1068 nargs->nfs_ext_u.nfs_extB.next; 1069 1070 nfs4_trigger_nargs_destroy(nargs); 1071 nargs = next; 1072 } while (nargs != NULL); 1073 } 1074 1075 kmem_free(dma, sizeof (domount_args_t)); 1076 } 1077 } 1078 1079 /* 1080 * The ephemeral_servinfo_t struct contains basic information we will need to 1081 * perform the mount. Whilst the structure is generic across different 1082 * types of ephemeral mount, the way we gather its contents differs. 1083 */ 1084 static ephemeral_servinfo_t * 1085 nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr) 1086 { 1087 ephemeral_servinfo_t *esi; 1088 rnode4_t *rp = VTOR4(vp); 1089 1090 ASSERT(RP_ISSTUB(rp)); 1091 1092 /* Call the ephemeral type-specific routine */ 1093 if (RP_ISSTUB_MIRRORMOUNT(rp)) 1094 esi = nfs4_trigger_esi_create_mirrormount(vp, svp); 1095 else if (RP_ISSTUB_REFERRAL(rp)) 1096 esi = nfs4_trigger_esi_create_referral(vp, cr); 1097 else 1098 esi = NULL; 1099 return (esi); 1100 } 1101 1102 static void 1103 nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) 1104 { 1105 rnode4_t *rp = VTOR4(vp); 1106 1107 ASSERT(RP_ISSTUB(rp)); 1108 1109 /* Currently, no need for an ephemeral type-specific routine */ 1110 1111 /* 1112 * The contents of ephemeral_servinfo_t goes into nfs_args, 1113 * and will be handled by nfs4_trigger_nargs_destroy(). 1114 * We need only free the structure itself. 1115 */ 1116 if (esi != NULL) 1117 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1118 } 1119 1120 /* 1121 * Some of this may turn out to be common with other ephemeral types, 1122 * in which case it should be moved to nfs4_trigger_esi_create(), or a 1123 * common function called. 1124 */ 1125 1126 /* 1127 * Mirror mounts case - should have all data available 1128 */ 1129 static ephemeral_servinfo_t * 1130 nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) 1131 { 1132 char *stubpath; 1133 struct knetconfig *sikncp, *svkncp; 1134 struct netbuf *bufp; 1135 ephemeral_servinfo_t *esi; 1136 1137 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1138 1139 /* initially set to be our type of ephemeral mount; may be added to */ 1140 esi->esi_mount_flags = NFSMNT_MIRRORMOUNT; 1141 1142 /* 1143 * We're copying info from the stub rnode's servinfo4, but 1144 * we must create new copies, not pointers, since this information 1145 * is to be associated with the new mount, which will be 1146 * unmounted (and its structures freed) separately 1147 */ 1148 1149 /* 1150 * Sizes passed to kmem_[z]alloc here must match those freed 1151 * in nfs4_free_args() 1152 */ 1153 1154 /* 1155 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this 1156 * is difficult to avoid: as we need to read svp to calculate the 1157 * sizes to be allocated. 1158 */ 1159 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1160 1161 esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP); 1162 (void) strcat(esi->esi_hostname, svp->sv_hostname); 1163 1164 esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1165 bufp = esi->esi_addr; 1166 bufp->len = svp->sv_addr.len; 1167 bufp->maxlen = svp->sv_addr.maxlen; 1168 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1169 bcopy(svp->sv_addr.buf, bufp->buf, bufp->len); 1170 1171 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1172 sikncp = esi->esi_knconf; 1173 svkncp = svp->sv_knconf; 1174 sikncp->knc_semantics = svkncp->knc_semantics; 1175 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1176 (void) strcat((char *)sikncp->knc_protofmly, 1177 (char *)svkncp->knc_protofmly); 1178 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1179 (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto); 1180 sikncp->knc_rdev = svkncp->knc_rdev; 1181 1182 /* 1183 * Used when AUTH_DH is negotiated. 1184 * 1185 * This is ephemeral mount-type specific, since it contains the 1186 * server's time-sync syncaddr. 1187 */ 1188 if (svp->sv_dhsec) { 1189 struct netbuf *bufp; 1190 sec_data_t *sdata; 1191 dh_k4_clntdata_t *data; 1192 1193 sdata = svp->sv_dhsec; 1194 data = (dh_k4_clntdata_t *)sdata->data; 1195 ASSERT(sdata->rpcflavor == AUTH_DH); 1196 1197 bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1198 bufp->len = data->syncaddr.len; 1199 bufp->maxlen = data->syncaddr.maxlen; 1200 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1201 bcopy(data->syncaddr.buf, bufp->buf, bufp->len); 1202 esi->esi_syncaddr = bufp; 1203 1204 if (data->netname != NULL) { 1205 int nmlen = data->netnamelen; 1206 1207 /* 1208 * We need to copy from a dh_k4_clntdata_t 1209 * netname/netnamelen pair to a NUL-terminated 1210 * netname string suitable for putting in nfs_args, 1211 * where the latter has no netnamelen field. 1212 */ 1213 esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP); 1214 bcopy(data->netname, esi->esi_netname, nmlen); 1215 } 1216 } else { 1217 esi->esi_syncaddr = NULL; 1218 esi->esi_netname = NULL; 1219 } 1220 1221 stubpath = fn_path(VTOSV(vp)->sv_name); 1222 /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */ 1223 ASSERT(*stubpath == '.'); 1224 stubpath += 1; 1225 1226 /* for nfs_args->fh */ 1227 esi->esi_path_len = strlen(stubpath) + 1; 1228 if (strcmp(svp->sv_path, "/") != 0) 1229 esi->esi_path_len += strlen(svp->sv_path); 1230 esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); 1231 if (strcmp(svp->sv_path, "/") != 0) 1232 (void) strcat(esi->esi_path, svp->sv_path); 1233 (void) strcat(esi->esi_path, stubpath); 1234 1235 stubpath -= 1; 1236 /* stubpath allocated by fn_path() */ 1237 kmem_free(stubpath, strlen(stubpath) + 1); 1238 1239 nfs_rw_exit(&svp->sv_lock); 1240 1241 return (esi); 1242 } 1243 1244 /* 1245 * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to 1246 * get network information required to do the mount call. 1247 */ 1248 int 1249 nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp) 1250 { 1251 door_arg_t door_args; 1252 door_handle_t dh; 1253 XDR xdr; 1254 refd_door_args_t *xdr_argsp; 1255 refd_door_res_t *orig_resp; 1256 k_sigset_t smask; 1257 int xdr_len = 0; 1258 int res_len = 16; /* length of an ip adress */ 1259 int orig_reslen = res_len; 1260 int error = 0; 1261 struct nfsidmap_globals *nig; 1262 1263 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) 1264 return (ECONNREFUSED); 1265 1266 nig = zone_getspecific(nfsidmap_zone_key, nfs_zone()); 1267 ASSERT(nig != NULL); 1268 1269 mutex_enter(&nig->nfsidmap_daemon_lock); 1270 dh = nig->nfsidmap_daemon_dh; 1271 if (dh == NULL) { 1272 mutex_exit(&nig->nfsidmap_daemon_lock); 1273 cmn_err(CE_NOTE, 1274 "nfs4_callmapid: nfsmapid daemon not " \ 1275 "running unable to resolve host name\n"); 1276 return (EINVAL); 1277 } 1278 door_ki_hold(dh); 1279 mutex_exit(&nig->nfsidmap_daemon_lock); 1280 1281 xdr_len = xdr_sizeof(&(xdr_utf8string), server); 1282 1283 xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP); 1284 xdr_argsp->xdr_len = xdr_len; 1285 xdr_argsp->cmd = NFSMAPID_SRV_NETINFO; 1286 1287 xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg, 1288 xdr_len, XDR_ENCODE); 1289 1290 if (!xdr_utf8string(&xdr, server)) { 1291 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1292 door_ki_rele(dh); 1293 return (1); 1294 } 1295 1296 if (orig_reslen) 1297 orig_resp = kmem_alloc(orig_reslen, KM_SLEEP); 1298 1299 door_args.data_ptr = (char *)xdr_argsp; 1300 door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len; 1301 door_args.desc_ptr = NULL; 1302 door_args.desc_num = 0; 1303 door_args.rbuf = orig_resp ? (char *)orig_resp : NULL; 1304 door_args.rsize = res_len; 1305 1306 sigintr(&smask, 1); 1307 error = door_ki_upcall(dh, &door_args); 1308 sigunintr(&smask); 1309 1310 door_ki_rele(dh); 1311 1312 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1313 if (error) { 1314 kmem_free(orig_resp, orig_reslen); 1315 /* 1316 * There is no door to connect to. The referral daemon 1317 * must not be running yet. 1318 */ 1319 cmn_err(CE_WARN, 1320 "nfsmapid not running cannot resolve host name"); 1321 goto out; 1322 } 1323 1324 /* 1325 * If the results buffer passed back are not the same as 1326 * what was sent free the old buffer and use the new one. 1327 */ 1328 if (orig_resp && orig_reslen) { 1329 refd_door_res_t *door_resp; 1330 1331 door_resp = (refd_door_res_t *)door_args.rbuf; 1332 if ((void *)door_args.rbuf != orig_resp) 1333 kmem_free(orig_resp, orig_reslen); 1334 if (door_resp->res_status == 0) { 1335 xdrmem_create(&xdr, (char *)&door_resp->xdr_res, 1336 door_resp->xdr_len, XDR_DECODE); 1337 bzero(resp, sizeof (struct nfs_fsl_info)); 1338 if (!xdr_nfs_fsl_info(&xdr, resp)) { 1339 DTRACE_PROBE2( 1340 nfs4clnt__debug__referral__upcall__xdrfail, 1341 struct nfs_fsl_info *, resp, 1342 char *, "nfs4_callmapid"); 1343 error = EINVAL; 1344 } 1345 } else { 1346 DTRACE_PROBE2( 1347 nfs4clnt__debug__referral__upcall__badstatus, 1348 int, door_resp->res_status, 1349 char *, "nfs4_callmapid"); 1350 error = door_resp->res_status; 1351 } 1352 kmem_free(door_args.rbuf, door_args.rsize); 1353 } 1354 out: 1355 DTRACE_PROBE2(nfs4clnt__func__referral__upcall, 1356 char *, server, int, error); 1357 return (error); 1358 } 1359 1360 /* 1361 * Fetches the fs_locations attribute. Typically called 1362 * from a Replication/Migration/Referrals/Mirror-mount context 1363 * 1364 * Fills in the attributes in garp. The caller is assumed 1365 * to have allocated memory for garp. 1366 * 1367 * lock: if set do not lock s_recovlock and mi_recovlock mutex, 1368 * it's already done by caller. Otherwise lock these mutexes 1369 * before doing the rfs4call(). 1370 * 1371 * Returns 1372 * 1 for success 1373 * 0 for failure 1374 */ 1375 int 1376 nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm, 1377 cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock) 1378 { 1379 COMPOUND4args_clnt args; 1380 COMPOUND4res_clnt res; 1381 nfs_argop4 *argop; 1382 int argoplist_size = 3 * sizeof (nfs_argop4); 1383 nfs4_server_t *sp = NULL; 1384 int doqueue = 1; 1385 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1386 int retval = 1; 1387 struct nfs4_clnt *nfscl; 1388 1389 if (lock == TRUE) 1390 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 1391 else 1392 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 1393 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 1394 1395 sp = find_nfs4_server(mi); 1396 if (lock == TRUE) 1397 nfs_rw_exit(&mi->mi_recovlock); 1398 1399 if (sp != NULL) 1400 mutex_exit(&sp->s_lock); 1401 1402 if (lock == TRUE) { 1403 if (sp != NULL) 1404 (void) nfs_rw_enter_sig(&sp->s_recovlock, 1405 RW_WRITER, 0); 1406 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 1407 } else { 1408 if (sp != NULL) { 1409 ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) || 1410 nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER)); 1411 } 1412 } 1413 1414 /* 1415 * Do we want to do the setup for recovery here? 1416 * 1417 * We know that the server responded to a null ping a very 1418 * short time ago, and we know that we intend to do a 1419 * single stateless operation - we want to fetch attributes, 1420 * so we know we can't encounter errors about state. If 1421 * something goes wrong with the GETATTR, like not being 1422 * able to get a response from the server or getting any 1423 * kind of FH error, we should fail the mount. 1424 * 1425 * We may want to re-visited this at a later time. 1426 */ 1427 argop = kmem_alloc(argoplist_size, KM_SLEEP); 1428 1429 args.ctag = TAG_GETATTR_FSLOCATION; 1430 /* PUTFH LOOKUP GETATTR */ 1431 args.array_len = 3; 1432 args.array = argop; 1433 1434 /* 0. putfh file */ 1435 argop[0].argop = OP_CPUTFH; 1436 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1437 1438 /* 1. lookup name, can't be dotdot */ 1439 argop[1].argop = OP_CLOOKUP; 1440 argop[1].nfs_argop4_u.opclookup.cname = nm; 1441 1442 /* 2. file attrs */ 1443 argop[2].argop = OP_GETATTR; 1444 argop[2].nfs_argop4_u.opgetattr.attr_request = 1445 FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK | 1446 FATTR4_MOUNTED_ON_FILEID_MASK; 1447 argop[2].nfs_argop4_u.opgetattr.mi = mi; 1448 1449 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1450 1451 if (lock == TRUE) { 1452 nfs_rw_exit(&mi->mi_recovlock); 1453 if (sp != NULL) 1454 nfs_rw_exit(&sp->s_recovlock); 1455 } 1456 1457 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1458 nfscl->nfscl_stat.referrals.value.ui64++; 1459 DTRACE_PROBE3(nfs4clnt__func__referral__fsloc, 1460 nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e); 1461 1462 if (e.error != 0) { 1463 if (sp != NULL) 1464 nfs4_server_rele(sp); 1465 kmem_free(argop, argoplist_size); 1466 return (0); 1467 } 1468 1469 /* 1470 * Check for all possible error conditions. 1471 * For valid replies without an ops array or for illegal 1472 * replies, return a failure. 1473 */ 1474 if (res.status != NFS4_OK || res.array_len < 3 || 1475 res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) { 1476 retval = 0; 1477 goto exit; 1478 } 1479 1480 /* 1481 * There isn't much value in putting the attributes 1482 * in the attr cache since fs_locations4 aren't 1483 * encountered very frequently, so just make them 1484 * available to the caller. 1485 */ 1486 *garp = res.array[2].nfs_resop4_u.opgetattr.ga_res; 1487 1488 DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc, 1489 nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations"); 1490 1491 /* No fs_locations? -- return a failure */ 1492 if (garp->n4g_ext_res == NULL || 1493 garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) { 1494 retval = 0; 1495 goto exit; 1496 } 1497 1498 if (!garp->n4g_fsid_valid) 1499 retval = 0; 1500 1501 exit: 1502 if (retval == 0) { 1503 /* the call was ok but failed validating the call results */ 1504 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1505 } else { 1506 ASSERT(callres != NULL); 1507 *callres = res; 1508 } 1509 1510 if (sp != NULL) 1511 nfs4_server_rele(sp); 1512 kmem_free(argop, argoplist_size); 1513 return (retval); 1514 } 1515 1516 /* tunable to disable referral mounts */ 1517 int nfs4_no_referrals = 0; 1518 1519 /* 1520 * Returns NULL if the vnode cannot be created or found. 1521 */ 1522 vnode_t * 1523 find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr) 1524 { 1525 nfs_fh4 *stub_fh, *dfh; 1526 nfs4_sharedfh_t *sfhp; 1527 char *newfhval; 1528 vnode_t *vp = NULL; 1529 fattr4_mounted_on_fileid mnt_on_fileid; 1530 nfs4_ga_res_t garp; 1531 mntinfo4_t *mi; 1532 COMPOUND4res_clnt callres; 1533 hrtime_t t; 1534 1535 if (nfs4_no_referrals) 1536 return (NULL); 1537 1538 /* 1539 * Get the mounted_on_fileid, unique on that server::fsid 1540 */ 1541 mi = VTOMI4(dvp); 1542 if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr, 1543 &garp, &callres, FALSE) == 0) 1544 return (NULL); 1545 mnt_on_fileid = garp.n4g_mon_fid; 1546 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1547 1548 /* 1549 * Build a fake filehandle from the dir FH and the mounted_on_fileid 1550 */ 1551 dfh = &VTOR4(dvp)->r_fh->sfh_fh; 1552 stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP); 1553 stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len + 1554 sizeof (fattr4_mounted_on_fileid), KM_SLEEP); 1555 newfhval = stub_fh->nfs_fh4_val; 1556 1557 /* copy directory's file handle */ 1558 bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len); 1559 stub_fh->nfs_fh4_len = dfh->nfs_fh4_len; 1560 newfhval = newfhval + dfh->nfs_fh4_len; 1561 1562 /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */ 1563 bcopy((char *)&mnt_on_fileid, newfhval, 1564 sizeof (fattr4_mounted_on_fileid)); 1565 stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid); 1566 1567 sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL); 1568 kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len + 1569 sizeof (fattr4_mounted_on_fileid)); 1570 kmem_free(stub_fh, sizeof (nfs_fh4)); 1571 if (sfhp == NULL) 1572 return (NULL); 1573 1574 t = gethrtime(); 1575 garp.n4g_va.va_type = VDIR; 1576 vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t, 1577 cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp)); 1578 1579 if (vp != NULL) 1580 vp->v_type = VDIR; 1581 1582 sfh4_rele(&sfhp); 1583 return (vp); 1584 } 1585 1586 int 1587 nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr) 1588 { 1589 vnode_t *nvp; 1590 rnode4_t *rp; 1591 1592 if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL) 1593 return (EINVAL); 1594 1595 rp = VTOR4(nvp); 1596 mutex_enter(&rp->r_statelock); 1597 r4_stub_referral(rp); 1598 mutex_exit(&rp->r_statelock); 1599 dnlc_enter(dvp, nm, nvp); 1600 1601 if (*vpp != NULL) 1602 VN_RELE(*vpp); /* no longer need this vnode */ 1603 1604 *vpp = nvp; 1605 1606 return (0); 1607 } 1608 1609 /* 1610 * Fetch the location information and resolve the new server. 1611 * Caller needs to free up the XDR data which is returned. 1612 * Input: mount info, shared filehandle, nodename 1613 * Return: Index to the result or Error(-1) 1614 * Output: FsLocations Info, Resolved Server Info. 1615 */ 1616 int 1617 nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, 1618 char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res, 1619 struct nfs_fsl_info *fsloc) 1620 { 1621 fs_location4 *fsp; 1622 struct nfs_fsl_info nfsfsloc; 1623 int ret, i, error; 1624 nfs4_ga_res_t garp; 1625 COMPOUND4res_clnt callres; 1626 struct knetconfig *knc; 1627 1628 ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE); 1629 if (ret == 0) 1630 return (-1); 1631 1632 /* 1633 * As a lame attempt to figuring out if we're 1634 * handling a migration event or a referral, 1635 * look for rnodes with this fsid in the rnode 1636 * cache. 1637 * 1638 * If we can find one or more such rnodes, it 1639 * means we're handling a migration event and 1640 * we want to bail out in that case. 1641 */ 1642 if (r4find_by_fsid(mi, &garp.n4g_fsid)) { 1643 DTRACE_PROBE3(nfs4clnt__debug__referral__migration, 1644 mntinfo4_t *, mi, nfs4_ga_res_t *, &garp, 1645 char *, "nfs4_process_referral"); 1646 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1647 return (-1); 1648 } 1649 1650 /* 1651 * Find the first responsive server to mount. When we find 1652 * one, fsp will point to it. 1653 */ 1654 for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) { 1655 1656 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i]; 1657 if (fsp->server_len == 0 || fsp->server_val == NULL) 1658 continue; 1659 1660 error = nfs4_callmapid(fsp->server_val, &nfsfsloc); 1661 if (error != 0) 1662 continue; 1663 1664 error = nfs4_ping_server_common(nfsfsloc.knconf, 1665 nfsfsloc.addr, !(mi->mi_flags & MI4_INT)); 1666 if (error == RPC_SUCCESS) 1667 break; 1668 1669 DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr, 1670 sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf, 1671 char *, "nfs4_process_referral"); 1672 1673 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1674 } 1675 knc = nfsfsloc.knconf; 1676 if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) || 1677 (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) { 1678 DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc, 1679 nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral"); 1680 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1681 return (-1); 1682 } 1683 1684 /* Send the results back */ 1685 *fsloc = nfsfsloc; 1686 *grp = garp; 1687 *res = callres; 1688 return (i); 1689 } 1690 1691 /* 1692 * Referrals case - need to fetch referral data and then upcall to 1693 * user-level to get complete mount data. 1694 */ 1695 static ephemeral_servinfo_t * 1696 nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr) 1697 { 1698 struct knetconfig *sikncp, *svkncp; 1699 struct netbuf *bufp; 1700 ephemeral_servinfo_t *esi; 1701 vnode_t *dvp; 1702 rnode4_t *drp; 1703 fs_location4 *fsp; 1704 struct nfs_fsl_info nfsfsloc; 1705 nfs4_ga_res_t garp; 1706 char *p; 1707 char fn[MAXNAMELEN]; 1708 int i, index = -1; 1709 mntinfo4_t *mi; 1710 COMPOUND4res_clnt callres; 1711 1712 /* 1713 * If we're passed in a stub vnode that 1714 * isn't a "referral" stub, bail out 1715 * and return a failure 1716 */ 1717 if (!RP_ISSTUB_REFERRAL(VTOR4(vp))) 1718 return (NULL); 1719 1720 if (vtodv(vp, &dvp, CRED(), TRUE) != 0) 1721 return (NULL); 1722 1723 drp = VTOR4(dvp); 1724 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) { 1725 VN_RELE(dvp); 1726 return (NULL); 1727 } 1728 1729 if (vtoname(vp, fn, MAXNAMELEN) != 0) { 1730 nfs_rw_exit(&drp->r_rwlock); 1731 VN_RELE(dvp); 1732 return (NULL); 1733 } 1734 1735 mi = VTOMI4(dvp); 1736 index = nfs4_process_referral(mi, drp->r_fh, fn, cr, 1737 &garp, &callres, &nfsfsloc); 1738 nfs_rw_exit(&drp->r_rwlock); 1739 VN_RELE(dvp); 1740 if (index < 0) 1741 return (NULL); 1742 1743 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; 1744 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1745 1746 /* initially set to be our type of ephemeral mount; may be added to */ 1747 esi->esi_mount_flags = NFSMNT_REFERRAL; 1748 1749 esi->esi_hostname = 1750 kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP); 1751 bcopy(fsp->server_val->utf8string_val, esi->esi_hostname, 1752 fsp->server_val->utf8string_len); 1753 esi->esi_hostname[fsp->server_val->utf8string_len] = '\0'; 1754 1755 bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 1756 bufp->len = nfsfsloc.addr->len; 1757 bufp->maxlen = nfsfsloc.addr->maxlen; 1758 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1759 bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len); 1760 esi->esi_addr = bufp; 1761 1762 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1763 sikncp = esi->esi_knconf; 1764 1765 DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc, 1766 struct nfs_fsl_info *, &nfsfsloc, 1767 char *, "nfs4_trigger_esi_create_referral"); 1768 1769 svkncp = nfsfsloc.knconf; 1770 sikncp->knc_semantics = svkncp->knc_semantics; 1771 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1772 (void) strlcat((char *)sikncp->knc_protofmly, 1773 (char *)svkncp->knc_protofmly, KNC_STRSIZE); 1774 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1775 (void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto, 1776 KNC_STRSIZE); 1777 sikncp->knc_rdev = svkncp->knc_rdev; 1778 1779 DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf, 1780 struct knetconfig *, sikncp, 1781 char *, "nfs4_trigger_esi_create_referral"); 1782 1783 esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP); 1784 bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len); 1785 esi->esi_syncaddr = NULL; 1786 1787 esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1788 esi->esi_path_len = MAXPATHLEN; 1789 *p++ = '/'; 1790 for (i = 0; i < fsp->rootpath.pathname4_len; i++) { 1791 component4 *comp; 1792 1793 comp = &fsp->rootpath.pathname4_val[i]; 1794 /* If no space, null the string and bail */ 1795 if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN) 1796 goto err; 1797 bcopy(comp->utf8string_val, p, comp->utf8string_len); 1798 p += comp->utf8string_len; 1799 *p++ = '/'; 1800 } 1801 if (fsp->rootpath.pathname4_len != 0) 1802 *(p - 1) = '\0'; 1803 else 1804 *p = '\0'; 1805 p = esi->esi_path; 1806 esi->esi_path = strdup(p); 1807 esi->esi_path_len = strlen(p) + 1; 1808 kmem_free(p, MAXPATHLEN); 1809 1810 /* Allocated in nfs4_process_referral() */ 1811 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1812 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1813 1814 return (esi); 1815 err: 1816 kmem_free(esi->esi_path, esi->esi_path_len); 1817 kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1); 1818 kmem_free(esi->esi_addr->buf, esi->esi_addr->len); 1819 kmem_free(esi->esi_addr, sizeof (struct netbuf)); 1820 kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE); 1821 kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE); 1822 kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf)); 1823 kmem_free(esi->esi_netname, nfsfsloc.netnm_len); 1824 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1825 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1826 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1827 return (NULL); 1828 } 1829 1830 /* 1831 * Assemble the args, and call the generic VFS mount function to 1832 * finally perform the ephemeral mount. 1833 */ 1834 static int 1835 nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp, 1836 cred_t *cr, vnode_t **newvpp) 1837 { 1838 struct mounta *uap; 1839 char *mntpt, *orig_path, *path; 1840 const char *orig_mntpt; 1841 int retval; 1842 int mntpt_len; 1843 int spec_len; 1844 zone_t *zone = curproc->p_zone; 1845 bool_t has_leading_slash; 1846 int i; 1847 1848 vfs_t *stubvfsp = stubvp->v_vfsp; 1849 ephemeral_servinfo_t *esi = dma->dma_esi; 1850 struct nfs_args *nargs = dma->dma_nargs; 1851 1852 /* first, construct the mount point for the ephemeral mount */ 1853 orig_path = path = fn_path(VTOSV(stubvp)->sv_name); 1854 orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt); 1855 1856 if (*orig_path == '.') 1857 orig_path++; 1858 1859 /* 1860 * Get rid of zone's root path 1861 */ 1862 if (zone != global_zone) { 1863 /* 1864 * -1 for trailing '/' and -1 for EOS. 1865 */ 1866 if (strncmp(zone->zone_rootpath, orig_mntpt, 1867 zone->zone_rootpathlen - 1) == 0) { 1868 orig_mntpt += (zone->zone_rootpathlen - 2); 1869 } 1870 } 1871 1872 mntpt_len = strlen(orig_mntpt) + strlen(orig_path); 1873 mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP); 1874 (void) strcat(mntpt, orig_mntpt); 1875 (void) strcat(mntpt, orig_path); 1876 1877 kmem_free(path, strlen(path) + 1); 1878 path = esi->esi_path; 1879 if (*path == '.') 1880 path++; 1881 if (path[0] == '/' && path[1] == '/') 1882 path++; 1883 has_leading_slash = (*path == '/'); 1884 1885 spec_len = strlen(dma->dma_hostlist); 1886 spec_len += strlen(path); 1887 1888 /* We are going to have to add this in */ 1889 if (!has_leading_slash) 1890 spec_len++; 1891 1892 /* We need to get the ':' for dma_hostlist:esi_path */ 1893 spec_len++; 1894 1895 uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP); 1896 uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP); 1897 (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist, 1898 has_leading_slash ? "" : "/", path); 1899 1900 uap->dir = mntpt; 1901 1902 uap->flags = MS_SYSSPACE | MS_DATA; 1903 /* fstype-independent mount options not covered elsewhere */ 1904 /* copy parent's mount(1M) "-m" flag */ 1905 if (stubvfsp->vfs_flag & VFS_NOMNTTAB) 1906 uap->flags |= MS_NOMNTTAB; 1907 1908 uap->fstype = MNTTYPE_NFS4; 1909 uap->dataptr = (char *)nargs; 1910 /* not needed for MS_SYSSPACE */ 1911 uap->datalen = 0; 1912 1913 /* use optptr to pass in extra mount options */ 1914 uap->flags |= MS_OPTIONSTR; 1915 uap->optptr = nfs4_trigger_create_mntopts(stubvfsp); 1916 if (uap->optptr == NULL) { 1917 retval = EINVAL; 1918 goto done; 1919 } 1920 1921 /* domount() expects us to count the trailing NUL */ 1922 uap->optlen = strlen(uap->optptr) + 1; 1923 1924 /* 1925 * If we get EBUSY, we try again once to see if we can perform 1926 * the mount. We do this because of a spurious race condition. 1927 */ 1928 for (i = 0; i < 2; i++) { 1929 int error; 1930 bool_t was_mounted; 1931 1932 retval = domount(NULL, uap, stubvp, cr, vfsp); 1933 if (retval == 0) { 1934 retval = VFS_ROOT(*vfsp, newvpp); 1935 VFS_RELE(*vfsp); 1936 break; 1937 } else if (retval != EBUSY) { 1938 break; 1939 } 1940 1941 /* 1942 * We might find it mounted by the other racer... 1943 */ 1944 error = nfs4_trigger_mounted_already(stubvp, 1945 newvpp, &was_mounted, vfsp); 1946 if (error) { 1947 goto done; 1948 } else if (was_mounted) { 1949 retval = 0; 1950 break; 1951 } 1952 } 1953 1954 done: 1955 if (uap->optptr) 1956 nfs4_trigger_destroy_mntopts(uap->optptr); 1957 1958 kmem_free(uap->spec, spec_len + 1); 1959 kmem_free(uap, sizeof (struct mounta)); 1960 kmem_free(mntpt, mntpt_len + 1); 1961 1962 return (retval); 1963 } 1964 1965 /* 1966 * Build an nfs_args structure for passing to domount(). 1967 * 1968 * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t; 1969 * generic data - common to all ephemeral mount types - is read directly 1970 * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode. 1971 */ 1972 static struct nfs_args * 1973 nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, 1974 ephemeral_servinfo_t *esi) 1975 { 1976 sec_data_t *secdata; 1977 struct nfs_args *nargs; 1978 1979 /* setup the nfs args */ 1980 nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 1981 1982 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1983 1984 nargs->addr = esi->esi_addr; 1985 1986 /* for AUTH_DH by negotiation */ 1987 if (esi->esi_syncaddr || esi->esi_netname) { 1988 nargs->flags |= NFSMNT_SECURE; 1989 nargs->syncaddr = esi->esi_syncaddr; 1990 nargs->netname = esi->esi_netname; 1991 } 1992 1993 nargs->flags |= NFSMNT_KNCONF; 1994 nargs->knconf = esi->esi_knconf; 1995 nargs->flags |= NFSMNT_HOSTNAME; 1996 nargs->hostname = esi->esi_hostname; 1997 nargs->fh = esi->esi_path; 1998 1999 /* general mount settings, all copied from parent mount */ 2000 mutex_enter(&mi->mi_lock); 2001 2002 if (!(mi->mi_flags & MI4_HARD)) 2003 nargs->flags |= NFSMNT_SOFT; 2004 2005 nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO | 2006 NFSMNT_RETRANS; 2007 nargs->wsize = mi->mi_stsize; 2008 nargs->rsize = mi->mi_tsize; 2009 nargs->timeo = mi->mi_timeo; 2010 nargs->retrans = mi->mi_retrans; 2011 2012 if (mi->mi_flags & MI4_INT) 2013 nargs->flags |= NFSMNT_INT; 2014 if (mi->mi_flags & MI4_NOAC) 2015 nargs->flags |= NFSMNT_NOAC; 2016 2017 nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN | 2018 NFSMNT_ACDIRMAX; 2019 nargs->acregmin = HR2SEC(mi->mi_acregmin); 2020 nargs->acregmax = HR2SEC(mi->mi_acregmax); 2021 nargs->acdirmin = HR2SEC(mi->mi_acdirmin); 2022 nargs->acdirmax = HR2SEC(mi->mi_acdirmax); 2023 2024 /* add any specific flags for this type of ephemeral mount */ 2025 nargs->flags |= esi->esi_mount_flags; 2026 2027 if (mi->mi_flags & MI4_NOCTO) 2028 nargs->flags |= NFSMNT_NOCTO; 2029 if (mi->mi_flags & MI4_GRPID) 2030 nargs->flags |= NFSMNT_GRPID; 2031 if (mi->mi_flags & MI4_LLOCK) 2032 nargs->flags |= NFSMNT_LLOCK; 2033 if (mi->mi_flags & MI4_NOPRINT) 2034 nargs->flags |= NFSMNT_NOPRINT; 2035 if (mi->mi_flags & MI4_DIRECTIO) 2036 nargs->flags |= NFSMNT_DIRECTIO; 2037 if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT) 2038 nargs->flags |= NFSMNT_PUBLIC; 2039 2040 /* Do some referral-specific option tweaking */ 2041 if (nargs->flags & NFSMNT_REFERRAL) { 2042 nargs->flags &= ~NFSMNT_DORDMA; 2043 nargs->flags |= NFSMNT_TRYRDMA; 2044 } 2045 2046 mutex_exit(&mi->mi_lock); 2047 2048 /* 2049 * Security data & negotiation policy. 2050 * 2051 * For mirror mounts, we need to preserve the parent mount's 2052 * preference for security negotiation, translating SV4_TRYSECDEFAULT 2053 * to NFSMNT_SECDEFAULT if present. 2054 * 2055 * For referrals, we always want security negotiation and will 2056 * set NFSMNT_SECDEFAULT and we will not copy current secdata. 2057 * The reason is that we can't negotiate down from a parent's 2058 * Kerberos flavor to AUTH_SYS. 2059 * 2060 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific 2061 * security flavour was requested, with data in sv_secdata, and that 2062 * no negotiation should occur. If this specified flavour fails, that's 2063 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT. 2064 * 2065 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in 2066 * default flavour, in sv_secdata, but then negotiate a new flavour. 2067 * Possible flavours are recorded in an array in sv_secinfo, with 2068 * currently in-use flavour pointed to by sv_currsec. 2069 * 2070 * If sv_currsec is set, i.e. if negotiation has already occurred, 2071 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, 2072 * we will set NFSMNT_SECDEFAULT, to enable negotiation. 2073 */ 2074 if (nargs->flags & NFSMNT_REFERRAL) { 2075 /* enable negotiation for referral mount */ 2076 nargs->flags |= NFSMNT_SECDEFAULT; 2077 secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP); 2078 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 2079 secdata->data = NULL; 2080 } else if (svp->sv_flags & SV4_TRYSECDEFAULT) { 2081 /* enable negotiation for mirror mount */ 2082 nargs->flags |= NFSMNT_SECDEFAULT; 2083 2084 /* 2085 * As a starting point for negotiation, copy parent 2086 * mount's negotiated flavour (sv_currsec) if available, 2087 * or its passed-in flavour (sv_secdata) if not. 2088 */ 2089 if (svp->sv_currsec != NULL) 2090 secdata = copy_sec_data(svp->sv_currsec); 2091 else if (svp->sv_secdata != NULL) 2092 secdata = copy_sec_data(svp->sv_secdata); 2093 else 2094 secdata = NULL; 2095 } else { 2096 /* do not enable negotiation; copy parent's passed-in flavour */ 2097 if (svp->sv_secdata != NULL) 2098 secdata = copy_sec_data(svp->sv_secdata); 2099 else 2100 secdata = NULL; 2101 } 2102 2103 nfs_rw_exit(&svp->sv_lock); 2104 2105 nargs->flags |= NFSMNT_NEWARGS; 2106 nargs->nfs_args_ext = NFS_ARGS_EXTB; 2107 nargs->nfs_ext_u.nfs_extB.secdata = secdata; 2108 2109 /* for NFS RO failover; caller will set if necessary */ 2110 nargs->nfs_ext_u.nfs_extB.next = NULL; 2111 2112 return (nargs); 2113 } 2114 2115 static void 2116 nfs4_trigger_nargs_destroy(struct nfs_args *nargs) 2117 { 2118 /* 2119 * Either the mount failed, in which case the data is not needed, or 2120 * nfs4_mount() has either taken copies of what it needs or, 2121 * where it has merely copied the ptr, it has set *our* ptr to NULL, 2122 * whereby nfs4_free_args() will ignore it. 2123 */ 2124 nfs4_free_args(nargs); 2125 kmem_free(nargs, sizeof (struct nfs_args)); 2126 } 2127 2128 /* 2129 * When we finally get into the mounting, we need to add this 2130 * node to the ephemeral tree. 2131 * 2132 * This is called from nfs4_mount(). 2133 */ 2134 int 2135 nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) 2136 { 2137 mntinfo4_t *mi_parent; 2138 nfs4_ephemeral_t *eph; 2139 nfs4_ephemeral_tree_t *net; 2140 2141 nfs4_ephemeral_t *prior; 2142 nfs4_ephemeral_t *child; 2143 2144 nfs4_ephemeral_t *peer; 2145 2146 nfs4_trigger_globals_t *ntg; 2147 zone_t *zone = curproc->p_zone; 2148 2149 int rc = 0; 2150 2151 mi_parent = VTOMI4(mvp); 2152 2153 /* 2154 * Get this before grabbing anything else! 2155 */ 2156 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 2157 if (!ntg->ntg_thread_started) { 2158 nfs4_ephemeral_start_harvester(ntg); 2159 } 2160 2161 mutex_enter(&mi_parent->mi_lock); 2162 mutex_enter(&mi->mi_lock); 2163 2164 net = mi->mi_ephemeral_tree = 2165 mi_parent->mi_ephemeral_tree; 2166 2167 /* 2168 * If the mi_ephemeral_tree is NULL, then it 2169 * means that either the harvester or a manual 2170 * umount has cleared the tree out right before 2171 * we got here. 2172 * 2173 * There is nothing we can do here, so return 2174 * to the caller and let them decide whether they 2175 * try again. 2176 */ 2177 if (net == NULL) { 2178 mutex_exit(&mi->mi_lock); 2179 mutex_exit(&mi_parent->mi_lock); 2180 2181 return (EBUSY); 2182 } 2183 2184 /* 2185 * We've just tied the mntinfo to the tree, so 2186 * now we bump the refcnt and hold it there until 2187 * this mntinfo is removed from the tree. 2188 */ 2189 nfs4_ephemeral_tree_hold(net); 2190 2191 /* 2192 * We need to tack together the ephemeral mount 2193 * with this new mntinfo. 2194 */ 2195 eph = kmem_zalloc(sizeof (*eph), KM_SLEEP); 2196 eph->ne_mount = mi; 2197 MI4_HOLD(mi); 2198 VFS_HOLD(mi->mi_vfsp); 2199 eph->ne_ref_time = gethrestime_sec(); 2200 2201 /* 2202 * We need to tell the ephemeral mount when 2203 * to time out. 2204 */ 2205 eph->ne_mount_to = ntg->ntg_mount_to; 2206 2207 mi->mi_ephemeral = eph; 2208 2209 /* 2210 * If the enclosing mntinfo4 is also ephemeral, 2211 * then we need to point to its enclosing parent. 2212 * Else the enclosing mntinfo4 is the enclosing parent. 2213 * 2214 * We also need to weave this ephemeral node 2215 * into the tree. 2216 */ 2217 if (mi_parent->mi_flags & MI4_EPHEMERAL) { 2218 /* 2219 * We need to decide if we are 2220 * the root node of this branch 2221 * or if we are a sibling of this 2222 * branch. 2223 */ 2224 prior = mi_parent->mi_ephemeral; 2225 if (prior == NULL) { 2226 /* 2227 * Race condition, clean up, and 2228 * let caller handle mntinfo. 2229 */ 2230 mi->mi_flags &= ~MI4_EPHEMERAL; 2231 mi->mi_ephemeral = NULL; 2232 kmem_free(eph, sizeof (*eph)); 2233 VFS_RELE(mi->mi_vfsp); 2234 MI4_RELE(mi); 2235 nfs4_ephemeral_tree_rele(net); 2236 rc = EBUSY; 2237 } else { 2238 if (prior->ne_child == NULL) { 2239 prior->ne_child = eph; 2240 } else { 2241 child = prior->ne_child; 2242 2243 prior->ne_child = eph; 2244 eph->ne_peer = child; 2245 2246 child->ne_prior = eph; 2247 } 2248 2249 eph->ne_prior = prior; 2250 } 2251 } else { 2252 /* 2253 * The parent mntinfo4 is the non-ephemeral 2254 * root of the ephemeral tree. We 2255 * need to decide if we are the root 2256 * node of that tree or if we are a 2257 * sibling of the root node. 2258 * 2259 * We are the root if there is no 2260 * other node. 2261 */ 2262 if (net->net_root == NULL) { 2263 net->net_root = eph; 2264 } else { 2265 eph->ne_peer = peer = net->net_root; 2266 ASSERT(peer != NULL); 2267 net->net_root = eph; 2268 2269 peer->ne_prior = eph; 2270 } 2271 2272 eph->ne_prior = NULL; 2273 } 2274 2275 mutex_exit(&mi->mi_lock); 2276 mutex_exit(&mi_parent->mi_lock); 2277 2278 return (rc); 2279 } 2280 2281 /* 2282 * Commit the changes to the ephemeral tree for removing this node. 2283 */ 2284 static void 2285 nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph) 2286 { 2287 nfs4_ephemeral_t *e = eph; 2288 nfs4_ephemeral_t *peer; 2289 nfs4_ephemeral_t *prior; 2290 2291 peer = eph->ne_peer; 2292 prior = e->ne_prior; 2293 2294 /* 2295 * If this branch root was not the 2296 * tree root, then we need to fix back pointers. 2297 */ 2298 if (prior) { 2299 if (prior->ne_child == e) { 2300 prior->ne_child = peer; 2301 } else { 2302 prior->ne_peer = peer; 2303 } 2304 2305 if (peer) 2306 peer->ne_prior = prior; 2307 } else if (peer) { 2308 peer->ne_mount->mi_ephemeral_tree->net_root = peer; 2309 peer->ne_prior = NULL; 2310 } else { 2311 e->ne_mount->mi_ephemeral_tree->net_root = NULL; 2312 } 2313 } 2314 2315 /* 2316 * We want to avoid recursion at all costs. So we need to 2317 * unroll the tree. We do this by a depth first traversal to 2318 * leaf nodes. We blast away the leaf and work our way back 2319 * up and down the tree. 2320 */ 2321 static int 2322 nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, 2323 int isTreeRoot, int flag, cred_t *cr) 2324 { 2325 nfs4_ephemeral_t *e = eph; 2326 nfs4_ephemeral_t *prior; 2327 mntinfo4_t *mi; 2328 vfs_t *vfsp; 2329 int error; 2330 2331 /* 2332 * We use the loop while unrolling the ephemeral tree. 2333 */ 2334 for (;;) { 2335 /* 2336 * First we walk down the child. 2337 */ 2338 if (e->ne_child) { 2339 prior = e; 2340 e = e->ne_child; 2341 continue; 2342 } 2343 2344 /* 2345 * If we are the root of the branch we are removing, 2346 * we end it here. But if the branch is the root of 2347 * the tree, we have to forge on. We do not consider 2348 * the peer list for the root because while it may 2349 * be okay to remove, it is both extra work and a 2350 * potential for a false-positive error to stall the 2351 * unmount attempt. 2352 */ 2353 if (e == eph && isTreeRoot == FALSE) 2354 return (0); 2355 2356 /* 2357 * Next we walk down the peer list. 2358 */ 2359 if (e->ne_peer) { 2360 prior = e; 2361 e = e->ne_peer; 2362 continue; 2363 } 2364 2365 /* 2366 * We can only remove the node passed in by the 2367 * caller if it is the root of the ephemeral tree. 2368 * Otherwise, the caller will remove it. 2369 */ 2370 if (e == eph && isTreeRoot == FALSE) 2371 return (0); 2372 2373 /* 2374 * Okay, we have a leaf node, time 2375 * to prune it! 2376 * 2377 * Note that prior can only be NULL if 2378 * and only if it is the root of the 2379 * ephemeral tree. 2380 */ 2381 prior = e->ne_prior; 2382 2383 mi = e->ne_mount; 2384 mutex_enter(&mi->mi_lock); 2385 vfsp = mi->mi_vfsp; 2386 ASSERT(vfsp != NULL); 2387 2388 /* 2389 * Cleared by umount2_engine. 2390 */ 2391 VFS_HOLD(vfsp); 2392 2393 /* 2394 * Inform nfs4_unmount to not recursively 2395 * descend into this node's children when it 2396 * gets processed. 2397 */ 2398 mi->mi_flags |= MI4_EPHEMERAL_RECURSED; 2399 mutex_exit(&mi->mi_lock); 2400 2401 error = umount2_engine(vfsp, flag, cr, FALSE); 2402 if (error) { 2403 /* 2404 * We need to reenable nfs4_unmount's ability 2405 * to recursively descend on this node. 2406 */ 2407 mutex_enter(&mi->mi_lock); 2408 mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED; 2409 mutex_exit(&mi->mi_lock); 2410 2411 return (error); 2412 } 2413 2414 /* 2415 * If we are the current node, we do not want to 2416 * touch anything else. At this point, the only 2417 * way the current node can have survived to here 2418 * is if it is the root of the ephemeral tree and 2419 * we are unmounting the enclosing mntinfo4. 2420 */ 2421 if (e == eph) { 2422 ASSERT(prior == NULL); 2423 return (0); 2424 } 2425 2426 /* 2427 * Stitch up the prior node. Note that since 2428 * we have handled the root of the tree, prior 2429 * must be non-NULL. 2430 */ 2431 ASSERT(prior != NULL); 2432 if (prior->ne_child == e) { 2433 prior->ne_child = NULL; 2434 } else { 2435 ASSERT(prior->ne_peer == e); 2436 2437 prior->ne_peer = NULL; 2438 } 2439 2440 e = prior; 2441 } 2442 2443 /* NOTREACHED */ 2444 } 2445 2446 /* 2447 * Common code to safely release net_cnt_lock and net_tree_lock 2448 */ 2449 void 2450 nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, 2451 nfs4_ephemeral_tree_t **pnet) 2452 { 2453 nfs4_ephemeral_tree_t *net = *pnet; 2454 2455 if (*pmust_unlock) { 2456 mutex_enter(&net->net_cnt_lock); 2457 net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; 2458 mutex_exit(&net->net_cnt_lock); 2459 2460 mutex_exit(&net->net_tree_lock); 2461 2462 *pmust_unlock = FALSE; 2463 } 2464 } 2465 2466 /* 2467 * While we may have removed any child or sibling nodes of this 2468 * ephemeral node, we can not nuke it until we know that there 2469 * were no actived vnodes on it. This will do that final 2470 * work once we know it is not busy. 2471 */ 2472 void 2473 nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, 2474 nfs4_ephemeral_tree_t **pnet) 2475 { 2476 /* 2477 * Now we need to get rid of the ephemeral data if it exists. 2478 */ 2479 mutex_enter(&mi->mi_lock); 2480 if (mi->mi_ephemeral) { 2481 /* 2482 * If we are the root node of an ephemeral branch 2483 * which is being removed, then we need to fixup 2484 * pointers into and out of the node. 2485 */ 2486 if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) 2487 nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); 2488 2489 nfs4_ephemeral_tree_rele(*pnet); 2490 ASSERT(mi->mi_ephemeral != NULL); 2491 2492 kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); 2493 mi->mi_ephemeral = NULL; 2494 VFS_RELE(mi->mi_vfsp); 2495 MI4_RELE(mi); 2496 } 2497 mutex_exit(&mi->mi_lock); 2498 2499 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2500 } 2501 2502 /* 2503 * Unmount an ephemeral node. 2504 * 2505 * Note that if this code fails, then it must unlock. 2506 * 2507 * If it succeeds, then the caller must be prepared to do so. 2508 */ 2509 int 2510 nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, 2511 bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet) 2512 { 2513 int error = 0; 2514 nfs4_ephemeral_t *eph; 2515 nfs4_ephemeral_tree_t *net; 2516 int is_derooting = FALSE; 2517 int is_recursed = FALSE; 2518 int was_locked = FALSE; 2519 2520 /* 2521 * Make sure to set the default state for cleaning 2522 * up the tree in the caller (and on the way out). 2523 */ 2524 *pmust_unlock = FALSE; 2525 2526 /* 2527 * The active vnodes on this file system may be ephemeral 2528 * children. We need to check for and try to unmount them 2529 * here. If any can not be unmounted, we are going 2530 * to return EBUSY. 2531 */ 2532 mutex_enter(&mi->mi_lock); 2533 2534 /* 2535 * If an ephemeral tree, we need to check to see if 2536 * the lock is already held. If it is, then we need 2537 * to see if we are being called as a result of 2538 * the recursive removal of some node of the tree or 2539 * if we are another attempt to remove the tree. 2540 * 2541 * mi_flags & MI4_EPHEMERAL indicates an ephemeral 2542 * node. mi_ephemeral being non-NULL also does this. 2543 * 2544 * mi_ephemeral_tree being non-NULL is sufficient 2545 * to also indicate either it is an ephemeral node 2546 * or the enclosing mntinfo4. 2547 * 2548 * Do we need MI4_EPHEMERAL? Yes, it is useful for 2549 * when we delete the ephemeral node and need to 2550 * differentiate from an ephemeral node and the 2551 * enclosing root node. 2552 */ 2553 *pnet = net = mi->mi_ephemeral_tree; 2554 if (net == NULL) { 2555 mutex_exit(&mi->mi_lock); 2556 return (0); 2557 } 2558 2559 eph = mi->mi_ephemeral; 2560 is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; 2561 is_derooting = (eph == NULL); 2562 2563 mutex_enter(&net->net_cnt_lock); 2564 2565 /* 2566 * If this is not recursion, then we need to 2567 * check to see if a harvester thread has 2568 * already grabbed the lock. 2569 * 2570 * After we exit this branch, we may not 2571 * blindly return, we need to jump to 2572 * is_busy! 2573 */ 2574 if (!is_recursed) { 2575 if (net->net_status & 2576 NFS4_EPHEMERAL_TREE_LOCKED) { 2577 /* 2578 * If the tree is locked, we need 2579 * to decide whether we are the 2580 * harvester or some explicit call 2581 * for a umount. The only way that 2582 * we are the harvester is if 2583 * MS_SYSSPACE is set. 2584 * 2585 * We only let the harvester through 2586 * at this point. 2587 * 2588 * We return EBUSY so that the 2589 * caller knows something is 2590 * going on. Note that by that 2591 * time, the umount in the other 2592 * thread may have already occured. 2593 */ 2594 if (!(flag & MS_SYSSPACE)) { 2595 mutex_exit(&net->net_cnt_lock); 2596 mutex_exit(&mi->mi_lock); 2597 2598 return (EBUSY); 2599 } 2600 2601 was_locked = TRUE; 2602 } 2603 } 2604 2605 mutex_exit(&net->net_cnt_lock); 2606 mutex_exit(&mi->mi_lock); 2607 2608 /* 2609 * If we are not the harvester, we need to check 2610 * to see if we need to grab the tree lock. 2611 */ 2612 if (was_locked == FALSE) { 2613 /* 2614 * If we grab the lock, it means that no other 2615 * operation is working on the tree. If we don't 2616 * grab it, we need to decide if this is because 2617 * we are a recursive call or a new operation. 2618 */ 2619 if (mutex_tryenter(&net->net_tree_lock)) { 2620 *pmust_unlock = TRUE; 2621 } else { 2622 /* 2623 * If we are a recursive call, we can 2624 * proceed without the lock. 2625 * Otherwise we have to wait until 2626 * the lock becomes free. 2627 */ 2628 if (!is_recursed) { 2629 mutex_enter(&net->net_cnt_lock); 2630 if (net->net_status & 2631 (NFS4_EPHEMERAL_TREE_DEROOTING 2632 | NFS4_EPHEMERAL_TREE_INVALID)) { 2633 mutex_exit(&net->net_cnt_lock); 2634 goto is_busy; 2635 } 2636 mutex_exit(&net->net_cnt_lock); 2637 2638 /* 2639 * We can't hold any other locks whilst 2640 * we wait on this to free up. 2641 */ 2642 mutex_enter(&net->net_tree_lock); 2643 2644 /* 2645 * Note that while mi->mi_ephemeral 2646 * may change and thus we have to 2647 * update eph, it is the case that 2648 * we have tied down net and 2649 * do not care if mi->mi_ephemeral_tree 2650 * has changed. 2651 */ 2652 mutex_enter(&mi->mi_lock); 2653 eph = mi->mi_ephemeral; 2654 mutex_exit(&mi->mi_lock); 2655 2656 /* 2657 * Okay, we need to see if either the 2658 * tree got nuked or the current node 2659 * got nuked. Both of which will cause 2660 * an error. 2661 * 2662 * Note that a subsequent retry of the 2663 * umount shall work. 2664 */ 2665 mutex_enter(&net->net_cnt_lock); 2666 if (net->net_status & 2667 NFS4_EPHEMERAL_TREE_INVALID || 2668 (!is_derooting && eph == NULL)) { 2669 mutex_exit(&net->net_cnt_lock); 2670 mutex_exit(&net->net_tree_lock); 2671 goto is_busy; 2672 } 2673 mutex_exit(&net->net_cnt_lock); 2674 *pmust_unlock = TRUE; 2675 } 2676 } 2677 } 2678 2679 /* 2680 * Only once we have grabbed the lock can we mark what we 2681 * are planning on doing to the ephemeral tree. 2682 */ 2683 if (*pmust_unlock) { 2684 mutex_enter(&net->net_cnt_lock); 2685 net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING; 2686 2687 /* 2688 * Check to see if we are nuking the root. 2689 */ 2690 if (is_derooting) 2691 net->net_status |= 2692 NFS4_EPHEMERAL_TREE_DEROOTING; 2693 mutex_exit(&net->net_cnt_lock); 2694 } 2695 2696 if (!is_derooting) { 2697 /* 2698 * Only work on children if the caller has not already 2699 * done so. 2700 */ 2701 if (!is_recursed) { 2702 ASSERT(eph != NULL); 2703 2704 error = nfs4_ephemeral_unmount_engine(eph, 2705 FALSE, flag, cr); 2706 if (error) 2707 goto is_busy; 2708 } 2709 } else { 2710 eph = net->net_root; 2711 2712 /* 2713 * Only work if there is something there. 2714 */ 2715 if (eph) { 2716 error = nfs4_ephemeral_unmount_engine(eph, TRUE, 2717 flag, cr); 2718 if (error) { 2719 mutex_enter(&net->net_cnt_lock); 2720 net->net_status &= 2721 ~NFS4_EPHEMERAL_TREE_DEROOTING; 2722 mutex_exit(&net->net_cnt_lock); 2723 goto is_busy; 2724 } 2725 2726 /* 2727 * Nothing else which goes wrong will 2728 * invalidate the blowing away of the 2729 * ephmeral tree. 2730 */ 2731 net->net_root = NULL; 2732 } 2733 2734 /* 2735 * We have derooted and we have caused the tree to be 2736 * invalidated. 2737 */ 2738 mutex_enter(&net->net_cnt_lock); 2739 net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; 2740 net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; 2741 DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting, 2742 uint_t, net->net_refcnt); 2743 2744 /* 2745 * We will not finalize this node, so safe to 2746 * release it. 2747 */ 2748 nfs4_ephemeral_tree_decr(net); 2749 mutex_exit(&net->net_cnt_lock); 2750 2751 if (was_locked == FALSE) 2752 mutex_exit(&net->net_tree_lock); 2753 2754 /* 2755 * We have just blown away any notation of this 2756 * tree being locked or having a refcnt. 2757 * We can't let the caller try to clean things up. 2758 */ 2759 *pmust_unlock = FALSE; 2760 2761 /* 2762 * At this point, the tree should no longer be 2763 * associated with the mntinfo4. We need to pull 2764 * it off there and let the harvester take 2765 * care of it once the refcnt drops. 2766 */ 2767 mutex_enter(&mi->mi_lock); 2768 mi->mi_ephemeral_tree = NULL; 2769 mutex_exit(&mi->mi_lock); 2770 } 2771 2772 return (0); 2773 2774 is_busy: 2775 2776 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2777 2778 return (error); 2779 } 2780 2781 /* 2782 * Do the umount and record any error in the parent. 2783 */ 2784 static void 2785 nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag, 2786 nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior) 2787 { 2788 int error; 2789 2790 /* 2791 * Only act on if the fs is still mounted. 2792 */ 2793 if (vfsp == NULL) 2794 return; 2795 2796 error = umount2_engine(vfsp, flag, kcred, FALSE); 2797 if (error) { 2798 if (prior) { 2799 if (prior->ne_child == e) 2800 prior->ne_state |= 2801 NFS4_EPHEMERAL_CHILD_ERROR; 2802 else 2803 prior->ne_state |= 2804 NFS4_EPHEMERAL_PEER_ERROR; 2805 } 2806 } 2807 } 2808 2809 /* 2810 * For each tree in the forest (where the forest is in 2811 * effect all of the ephemeral trees for this zone), 2812 * scan to see if a node can be unmounted. Note that 2813 * unlike nfs4_ephemeral_unmount_engine(), we do 2814 * not process the current node before children or 2815 * siblings. I.e., if a node can be unmounted, we 2816 * do not recursively check to see if the nodes 2817 * hanging off of it can also be unmounted. 2818 * 2819 * Instead, we delve down deep to try and remove the 2820 * children first. Then, because we share code with 2821 * nfs4_ephemeral_unmount_engine(), we will try 2822 * them again. This could be a performance issue in 2823 * the future. 2824 * 2825 * Also note that unlike nfs4_ephemeral_unmount_engine(), 2826 * we do not halt on an error. We will not remove the 2827 * current node, but we will keep on trying to remove 2828 * the others. 2829 * 2830 * force indicates that we want the unmount to occur 2831 * even if there is something blocking it. 2832 * 2833 * time_check indicates that we want to see if the 2834 * mount has expired past mount_to or not. Typically 2835 * we want to do this and only on a shutdown of the 2836 * zone would we want to ignore the check. 2837 */ 2838 static void 2839 nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg, 2840 bool_t force, bool_t time_check) 2841 { 2842 nfs4_ephemeral_tree_t *net; 2843 nfs4_ephemeral_tree_t *prev = NULL; 2844 nfs4_ephemeral_tree_t *next; 2845 nfs4_ephemeral_t *e; 2846 nfs4_ephemeral_t *prior; 2847 time_t now = gethrestime_sec(); 2848 2849 nfs4_ephemeral_tree_t *harvest = NULL; 2850 2851 int flag; 2852 2853 mntinfo4_t *mi; 2854 vfs_t *vfsp; 2855 2856 if (force) 2857 flag = MS_FORCE | MS_SYSSPACE; 2858 else 2859 flag = MS_SYSSPACE; 2860 2861 mutex_enter(&ntg->ntg_forest_lock); 2862 for (net = ntg->ntg_forest; net != NULL; net = next) { 2863 next = net->net_next; 2864 2865 nfs4_ephemeral_tree_hold(net); 2866 2867 mutex_enter(&net->net_tree_lock); 2868 2869 /* 2870 * Let the unmount code know that the 2871 * tree is already locked! 2872 */ 2873 mutex_enter(&net->net_cnt_lock); 2874 net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED; 2875 mutex_exit(&net->net_cnt_lock); 2876 2877 /* 2878 * If the intent is force all ephemeral nodes to 2879 * be unmounted in this zone, we can short circuit a 2880 * lot of tree traversal and simply zap the root node. 2881 */ 2882 if (force) { 2883 if (net->net_root) { 2884 mi = net->net_root->ne_mount; 2885 2886 vfsp = mi->mi_vfsp; 2887 ASSERT(vfsp != NULL); 2888 2889 /* 2890 * Cleared by umount2_engine. 2891 */ 2892 VFS_HOLD(vfsp); 2893 2894 (void) umount2_engine(vfsp, flag, 2895 kcred, FALSE); 2896 2897 goto check_done; 2898 } 2899 } 2900 2901 e = net->net_root; 2902 if (e) 2903 e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD; 2904 2905 while (e) { 2906 if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) { 2907 e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING; 2908 if (e->ne_child) { 2909 e = e->ne_child; 2910 e->ne_state = 2911 NFS4_EPHEMERAL_VISIT_CHILD; 2912 } 2913 2914 continue; 2915 } else if (e->ne_state == 2916 NFS4_EPHEMERAL_VISIT_SIBLING) { 2917 e->ne_state = NFS4_EPHEMERAL_PROCESS_ME; 2918 if (e->ne_peer) { 2919 e = e->ne_peer; 2920 e->ne_state = 2921 NFS4_EPHEMERAL_VISIT_CHILD; 2922 } 2923 2924 continue; 2925 } else if (e->ne_state == 2926 NFS4_EPHEMERAL_CHILD_ERROR) { 2927 prior = e->ne_prior; 2928 2929 /* 2930 * If a child reported an error, do 2931 * not bother trying to unmount. 2932 * 2933 * If your prior node is a parent, 2934 * pass the error up such that they 2935 * also do not try to unmount. 2936 * 2937 * However, if your prior is a sibling, 2938 * let them try to unmount if they can. 2939 */ 2940 if (prior) { 2941 if (prior->ne_child == e) 2942 prior->ne_state |= 2943 NFS4_EPHEMERAL_CHILD_ERROR; 2944 else 2945 prior->ne_state |= 2946 NFS4_EPHEMERAL_PEER_ERROR; 2947 } 2948 2949 /* 2950 * Clear the error and if needed, process peers. 2951 * 2952 * Once we mask out the error, we know whether 2953 * or we have to process another node. 2954 */ 2955 e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR; 2956 if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME) 2957 e = prior; 2958 2959 continue; 2960 } else if (e->ne_state == 2961 NFS4_EPHEMERAL_PEER_ERROR) { 2962 prior = e->ne_prior; 2963 2964 if (prior) { 2965 if (prior->ne_child == e) 2966 prior->ne_state = 2967 NFS4_EPHEMERAL_CHILD_ERROR; 2968 else 2969 prior->ne_state = 2970 NFS4_EPHEMERAL_PEER_ERROR; 2971 } 2972 2973 /* 2974 * Clear the error from this node and do the 2975 * correct processing. 2976 */ 2977 e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR; 2978 continue; 2979 } 2980 2981 prior = e->ne_prior; 2982 e->ne_state = NFS4_EPHEMERAL_OK; 2983 2984 /* 2985 * It must be the case that we need to process 2986 * this node. 2987 */ 2988 if (!time_check || 2989 now - e->ne_ref_time > e->ne_mount_to) { 2990 mi = e->ne_mount; 2991 vfsp = mi->mi_vfsp; 2992 2993 /* 2994 * Cleared by umount2_engine. 2995 */ 2996 if (vfsp != NULL) 2997 VFS_HOLD(vfsp); 2998 2999 /* 3000 * Note that we effectively work down to the 3001 * leaf nodes first, try to unmount them, 3002 * then work our way back up into the leaf 3003 * nodes. 3004 * 3005 * Also note that we deal with a lot of 3006 * complexity by sharing the work with 3007 * the manual unmount code. 3008 */ 3009 nfs4_ephemeral_record_umount(vfsp, flag, 3010 e, prior); 3011 } 3012 3013 e = prior; 3014 } 3015 3016 check_done: 3017 3018 /* 3019 * At this point we are done processing this tree. 3020 * 3021 * If the tree is invalid and we were the only reference 3022 * to it, then we push it on the local linked list 3023 * to remove it at the end. We avoid that action now 3024 * to keep the tree processing going along at a fair clip. 3025 * 3026 * Else, even if we were the only reference, we 3027 * allow it to be reused as needed. 3028 */ 3029 mutex_enter(&net->net_cnt_lock); 3030 nfs4_ephemeral_tree_decr(net); 3031 if (net->net_refcnt == 0 && 3032 net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { 3033 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3034 mutex_exit(&net->net_cnt_lock); 3035 mutex_exit(&net->net_tree_lock); 3036 3037 if (prev) 3038 prev->net_next = net->net_next; 3039 else 3040 ntg->ntg_forest = net->net_next; 3041 3042 net->net_next = harvest; 3043 harvest = net; 3044 3045 VFS_RELE(net->net_mount->mi_vfsp); 3046 MI4_RELE(net->net_mount); 3047 3048 continue; 3049 } 3050 3051 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3052 mutex_exit(&net->net_cnt_lock); 3053 mutex_exit(&net->net_tree_lock); 3054 3055 prev = net; 3056 } 3057 mutex_exit(&ntg->ntg_forest_lock); 3058 3059 for (net = harvest; net != NULL; net = next) { 3060 next = net->net_next; 3061 3062 mutex_destroy(&net->net_tree_lock); 3063 mutex_destroy(&net->net_cnt_lock); 3064 kmem_free(net, sizeof (*net)); 3065 } 3066 } 3067 3068 /* 3069 * This is the thread which decides when the harvesting 3070 * can proceed and when to kill it off for this zone. 3071 */ 3072 static void 3073 nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg) 3074 { 3075 clock_t timeleft; 3076 zone_t *zone = curproc->p_zone; 3077 3078 for (;;) { 3079 timeleft = zone_status_timedwait(zone, ddi_get_lbolt() + 3080 nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN); 3081 3082 /* 3083 * zone is exiting... 3084 */ 3085 if (timeleft != -1) { 3086 ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN); 3087 zthread_exit(); 3088 /* NOTREACHED */ 3089 } 3090 3091 /* 3092 * Only bother scanning if there is potential 3093 * work to be done. 3094 */ 3095 if (ntg->ntg_forest == NULL) 3096 continue; 3097 3098 /* 3099 * Now scan the list and get rid of everything which 3100 * is old. 3101 */ 3102 nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE); 3103 } 3104 3105 /* NOTREACHED */ 3106 } 3107 3108 /* 3109 * The zone specific glue needed to start the unmount harvester. 3110 * 3111 * Note that we want to avoid holding the mutex as long as possible, 3112 * hence the multiple checks. 3113 * 3114 * The caller should avoid us getting down here in the first 3115 * place. 3116 */ 3117 static void 3118 nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg) 3119 { 3120 /* 3121 * It got started before we got here... 3122 */ 3123 if (ntg->ntg_thread_started) 3124 return; 3125 3126 mutex_enter(&nfs4_ephemeral_thread_lock); 3127 3128 if (ntg->ntg_thread_started) { 3129 mutex_exit(&nfs4_ephemeral_thread_lock); 3130 return; 3131 } 3132 3133 /* 3134 * Start the unmounter harvester thread for this zone. 3135 */ 3136 (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester, 3137 ntg, 0, minclsyspri); 3138 3139 ntg->ntg_thread_started = TRUE; 3140 mutex_exit(&nfs4_ephemeral_thread_lock); 3141 } 3142 3143 /*ARGSUSED*/ 3144 static void * 3145 nfs4_ephemeral_zsd_create(zoneid_t zoneid) 3146 { 3147 nfs4_trigger_globals_t *ntg; 3148 3149 ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP); 3150 ntg->ntg_thread_started = FALSE; 3151 3152 /* 3153 * This is the default.... 3154 */ 3155 ntg->ntg_mount_to = nfs4_trigger_thread_timer; 3156 3157 mutex_init(&ntg->ntg_forest_lock, NULL, 3158 MUTEX_DEFAULT, NULL); 3159 3160 return (ntg); 3161 } 3162 3163 /* 3164 * Try a nice gentle walk down the forest and convince 3165 * all of the trees to gracefully give it up. 3166 */ 3167 /*ARGSUSED*/ 3168 static void 3169 nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg) 3170 { 3171 nfs4_trigger_globals_t *ntg = arg; 3172 3173 if (!ntg) 3174 return; 3175 3176 nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE); 3177 } 3178 3179 /* 3180 * Race along the forest and rip all of the trees out by 3181 * their rootballs! 3182 */ 3183 /*ARGSUSED*/ 3184 static void 3185 nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg) 3186 { 3187 nfs4_trigger_globals_t *ntg = arg; 3188 3189 if (!ntg) 3190 return; 3191 3192 nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE); 3193 3194 mutex_destroy(&ntg->ntg_forest_lock); 3195 kmem_free(ntg, sizeof (*ntg)); 3196 } 3197 3198 /* 3199 * This is the zone independent cleanup needed for 3200 * emphemeral mount processing. 3201 */ 3202 void 3203 nfs4_ephemeral_fini(void) 3204 { 3205 (void) zone_key_delete(nfs4_ephemeral_key); 3206 mutex_destroy(&nfs4_ephemeral_thread_lock); 3207 } 3208 3209 /* 3210 * This is the zone independent initialization needed for 3211 * emphemeral mount processing. 3212 */ 3213 void 3214 nfs4_ephemeral_init(void) 3215 { 3216 mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT, 3217 NULL); 3218 3219 zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create, 3220 nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy); 3221 } 3222 3223 /* 3224 * nfssys() calls this function to set the per-zone 3225 * value of mount_to to drive when an ephemeral mount is 3226 * timed out. Each mount will grab a copy of this value 3227 * when mounted. 3228 */ 3229 void 3230 nfs4_ephemeral_set_mount_to(uint_t mount_to) 3231 { 3232 nfs4_trigger_globals_t *ntg; 3233 zone_t *zone = curproc->p_zone; 3234 3235 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 3236 3237 ntg->ntg_mount_to = mount_to; 3238 } 3239 3240 /* 3241 * Walk the list of v4 mount options; if they are currently set in vfsp, 3242 * append them to a new comma-separated mount option string, and return it. 3243 * 3244 * Caller should free by calling nfs4_trigger_destroy_mntopts(). 3245 */ 3246 static char * 3247 nfs4_trigger_create_mntopts(vfs_t *vfsp) 3248 { 3249 uint_t i; 3250 char *mntopts; 3251 struct vfssw *vswp; 3252 mntopts_t *optproto; 3253 3254 mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP); 3255 3256 /* get the list of applicable mount options for v4; locks *vswp */ 3257 vswp = vfs_getvfssw(MNTTYPE_NFS4); 3258 optproto = &vswp->vsw_optproto; 3259 3260 for (i = 0; i < optproto->mo_count; i++) { 3261 struct mntopt *mop = &optproto->mo_list[i]; 3262 3263 if (mop->mo_flags & MO_EMPTY) 3264 continue; 3265 3266 if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) { 3267 kmem_free(mntopts, MAX_MNTOPT_STR); 3268 vfs_unrefvfssw(vswp); 3269 return (NULL); 3270 } 3271 } 3272 3273 vfs_unrefvfssw(vswp); 3274 3275 /* 3276 * MNTOPT_XATTR is not in the v4 mount opt proto list, 3277 * and it may only be passed via MS_OPTIONSTR, so we 3278 * must handle it here. 3279 * 3280 * Ideally, it would be in the list, but NFS does not specify its 3281 * own opt proto list, it uses instead the default one. Since 3282 * not all filesystems support extended attrs, it would not be 3283 * appropriate to add it there. 3284 */ 3285 if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) || 3286 nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) { 3287 kmem_free(mntopts, MAX_MNTOPT_STR); 3288 return (NULL); 3289 } 3290 3291 return (mntopts); 3292 } 3293 3294 static void 3295 nfs4_trigger_destroy_mntopts(char *mntopts) 3296 { 3297 if (mntopts) 3298 kmem_free(mntopts, MAX_MNTOPT_STR); 3299 } 3300 3301 /* 3302 * Check a single mount option (optname). Add to mntopts if it is set in VFS. 3303 */ 3304 static int 3305 nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) 3306 { 3307 if (mntopts == NULL || optname == NULL || vfsp == NULL) 3308 return (EINVAL); 3309 3310 if (vfs_optionisset(vfsp, optname, NULL)) { 3311 size_t mntoptslen = strlen(mntopts); 3312 size_t optnamelen = strlen(optname); 3313 3314 /* +1 for ',', +1 for NUL */ 3315 if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR) 3316 return (EOVERFLOW); 3317 3318 /* first or subsequent mount option? */ 3319 if (*mntopts != '\0') 3320 (void) strcat(mntopts, ","); 3321 3322 (void) strcat(mntopts, optname); 3323 } 3324 3325 return (0); 3326 } 3327 3328 static enum clnt_stat 3329 nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr) 3330 { 3331 int retries; 3332 uint_t max_msgsize; 3333 enum clnt_stat status; 3334 CLIENT *cl; 3335 struct timeval timeout; 3336 3337 /* as per recov_newserver() */ 3338 max_msgsize = 0; 3339 retries = 1; 3340 timeout.tv_sec = 2; 3341 timeout.tv_usec = 0; 3342 3343 if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4, 3344 max_msgsize, retries, CRED(), &cl) != 0) 3345 return (RPC_FAILED); 3346 3347 if (nointr) 3348 cl->cl_nosignal = TRUE; 3349 status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL, 3350 timeout); 3351 if (nointr) 3352 cl->cl_nosignal = FALSE; 3353 3354 AUTH_DESTROY(cl->cl_auth); 3355 CLNT_DESTROY(cl); 3356 3357 return (status); 3358 } 3359 3360 static enum clnt_stat 3361 nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) 3362 { 3363 return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr)); 3364 } 3365