1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are 29 * triggered from a "stub" rnode via a special set of vnodeops. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/time.h> 37 #include <sys/vnode.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/file.h> 41 #include <sys/filio.h> 42 #include <sys/uio.h> 43 #include <sys/buf.h> 44 #include <sys/mman.h> 45 #include <sys/pathname.h> 46 #include <sys/dirent.h> 47 #include <sys/debug.h> 48 #include <sys/vmsystm.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/swap.h> 52 #include <sys/errno.h> 53 #include <sys/strsubr.h> 54 #include <sys/sysmacros.h> 55 #include <sys/kmem.h> 56 #include <sys/mount.h> 57 #include <sys/cmn_err.h> 58 #include <sys/pathconf.h> 59 #include <sys/utsname.h> 60 #include <sys/dnlc.h> 61 #include <sys/acl.h> 62 #include <sys/systeminfo.h> 63 #include <sys/policy.h> 64 #include <sys/sdt.h> 65 #include <sys/list.h> 66 #include <sys/stat.h> 67 #include <sys/mntent.h> 68 #include <sys/priv.h> 69 70 #include <rpc/types.h> 71 #include <rpc/auth.h> 72 #include <rpc/clnt.h> 73 74 #include <nfs/nfs.h> 75 #include <nfs/nfs_clnt.h> 76 #include <nfs/nfs_acl.h> 77 #include <nfs/lm.h> 78 #include <nfs/nfs4.h> 79 #include <nfs/nfs4_kprot.h> 80 #include <nfs/rnode4.h> 81 #include <nfs/nfs4_clnt.h> 82 #include <nfs/nfsid_map.h> 83 #include <nfs/nfs4_idmap_impl.h> 84 85 #include <vm/hat.h> 86 #include <vm/as.h> 87 #include <vm/page.h> 88 #include <vm/pvn.h> 89 #include <vm/seg.h> 90 #include <vm/seg_map.h> 91 #include <vm/seg_kpm.h> 92 #include <vm/seg_vn.h> 93 94 #include <fs/fs_subr.h> 95 96 #include <sys/ddi.h> 97 #include <sys/int_fmtio.h> 98 99 #include <sys/sunddi.h> 100 101 #include <sys/priv_names.h> 102 103 extern zone_key_t nfs4clnt_zone_key; 104 extern zone_key_t nfsidmap_zone_key; 105 106 /* 107 * The automatic unmounter thread stuff! 108 */ 109 static int nfs4_trigger_thread_timer = 20; /* in seconds */ 110 111 /* 112 * Just a default.... 113 */ 114 static uint_t nfs4_trigger_mount_to = 240; 115 116 typedef struct nfs4_trigger_globals { 117 kmutex_t ntg_forest_lock; 118 uint_t ntg_mount_to; 119 int ntg_thread_started; 120 nfs4_ephemeral_tree_t *ntg_forest; 121 } nfs4_trigger_globals_t; 122 123 kmutex_t nfs4_ephemeral_thread_lock; 124 125 zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED; 126 127 static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *); 128 129 /* 130 * Used for ephemeral mounts; contains data either duplicated from 131 * servinfo4_t, or hand-crafted, depending on type of ephemeral mount. 132 * 133 * It's intended that this structure is used solely for ephemeral 134 * mount-type specific data, for passing this data to 135 * nfs4_trigger_nargs_create(). 136 */ 137 typedef struct ephemeral_servinfo { 138 char *esi_hostname; 139 char *esi_netname; 140 char *esi_path; 141 int esi_path_len; 142 int esi_mount_flags; 143 struct netbuf *esi_addr; 144 struct netbuf *esi_syncaddr; 145 struct knetconfig *esi_knconf; 146 } ephemeral_servinfo_t; 147 148 /* 149 * Collect together the mount-type specific and generic data args. 150 */ 151 typedef struct domount_args { 152 ephemeral_servinfo_t *dma_esi; 153 char *dma_hostlist; /* comma-sep. for RO failover */ 154 struct nfs_args *dma_nargs; 155 } domount_args_t; 156 157 158 /* 159 * The vnode ops functions for a trigger stub vnode 160 */ 161 static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *); 162 static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *, 163 caller_context_t *); 164 static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *, 165 caller_context_t *); 166 static int nfs4_trigger_access(vnode_t *, int, int, cred_t *, 167 caller_context_t *); 168 static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *, 169 caller_context_t *); 170 static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **, 171 struct pathname *, int, vnode_t *, cred_t *, caller_context_t *, 172 int *, pathname_t *); 173 static int nfs4_trigger_create(vnode_t *, char *, struct vattr *, 174 enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *, 175 vsecattr_t *); 176 static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *, 177 int); 178 static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *, 179 caller_context_t *, int); 180 static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *, 181 cred_t *, caller_context_t *, int); 182 static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *, 183 vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp); 184 static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *, 185 caller_context_t *, int); 186 static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *, 187 cred_t *, caller_context_t *, int); 188 static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *); 189 190 /* 191 * Regular NFSv4 vnodeops that we need to reference directly 192 */ 193 extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *, 194 caller_context_t *); 195 extern void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *); 196 extern int nfs4_rwlock(vnode_t *, int, caller_context_t *); 197 extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *); 198 extern int nfs4_lookup(vnode_t *, char *, vnode_t **, 199 struct pathname *, int, vnode_t *, cred_t *, 200 caller_context_t *, int *, pathname_t *); 201 extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *, 202 caller_context_t *); 203 extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 204 caller_context_t *); 205 extern int nfs4_fid(vnode_t *, fid_t *, caller_context_t *); 206 extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); 207 208 static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **); 209 static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, 210 cred_t *, vnode_t **); 211 static int nfs4_trigger_domount_args_create(vnode_t *, cred_t *, 212 domount_args_t **dmap); 213 static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, 214 vnode_t *vp); 215 static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *, 216 cred_t *); 217 static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); 218 static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, 219 servinfo4_t *); 220 static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *, 221 cred_t *); 222 static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, 223 ephemeral_servinfo_t *); 224 static void nfs4_trigger_nargs_destroy(struct nfs_args *); 225 static char *nfs4_trigger_create_mntopts(vfs_t *); 226 static void nfs4_trigger_destroy_mntopts(char *); 227 static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); 228 static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); 229 static enum clnt_stat nfs4_ping_server_common(struct knetconfig *, 230 struct netbuf *, int); 231 232 extern int umount2_engine(vfs_t *, int, cred_t *, int); 233 234 vnodeops_t *nfs4_trigger_vnodeops; 235 236 /* 237 * These are the vnodeops that we must define for stub vnodes. 238 * 239 * 240 * Many of the VOPs defined for NFSv4 do not need to be defined here, 241 * for various reasons. This will result in the VFS default function being 242 * used: 243 * 244 * - These VOPs require a previous VOP_OPEN to have occurred. That will have 245 * lost the reference to the stub vnode, meaning these should not be called: 246 * close, read, write, ioctl, readdir, seek. 247 * 248 * - These VOPs are meaningless for vnodes without data pages. Since the 249 * stub vnode is of type VDIR, these should not be called: 250 * space, getpage, putpage, map, addmap, delmap, pageio, fsync. 251 * 252 * - These VOPs are otherwise not applicable, and should not be called: 253 * dump, setsecattr. 254 * 255 * 256 * These VOPs we do not want to define, but nor do we want the VFS default 257 * action. Instead, we specify the VFS error function, with fs_error(), but 258 * note that fs_error() is not actually called. Instead it results in the 259 * use of the error function defined for the particular VOP, in vn_ops_table[]: 260 * 261 * - frlock, dispose, shrlock. 262 * 263 * 264 * These VOPs we define to use the corresponding regular NFSv4 vnodeop. 265 * NOTE: if any of these ops involve an OTW call with the stub FH, then 266 * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo() 267 * to protect the security data in the servinfo4_t for the "parent" 268 * filesystem that contains the stub. 269 * 270 * - These VOPs should not trigger a mount, so that "ls -l" does not: 271 * pathconf, getsecattr. 272 * 273 * - These VOPs would not make sense to trigger: 274 * inactive, rwlock, rwunlock, fid, realvp. 275 */ 276 const fs_operation_def_t nfs4_trigger_vnodeops_template[] = { 277 VOPNAME_OPEN, { .vop_open = nfs4_trigger_open }, 278 VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr }, 279 VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr }, 280 VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access }, 281 VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup }, 282 VOPNAME_CREATE, { .vop_create = nfs4_trigger_create }, 283 VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove }, 284 VOPNAME_LINK, { .vop_link = nfs4_trigger_link }, 285 VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename }, 286 VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir }, 287 VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir }, 288 VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink }, 289 VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink }, 290 VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, 291 VOPNAME_FID, { .vop_fid = nfs4_fid }, 292 VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, 293 VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, 294 VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, 295 VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, 296 VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, 297 VOPNAME_FRLOCK, { .error = fs_error }, 298 VOPNAME_DISPOSE, { .error = fs_error }, 299 VOPNAME_SHRLOCK, { .error = fs_error }, 300 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 301 NULL, NULL 302 }; 303 304 static void 305 nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net) 306 { 307 ASSERT(mutex_owned(&net->net_cnt_lock)); 308 net->net_refcnt++; 309 ASSERT(net->net_refcnt != 0); 310 } 311 312 static void 313 nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net) 314 { 315 mutex_enter(&net->net_cnt_lock); 316 nfs4_ephemeral_tree_incr(net); 317 mutex_exit(&net->net_cnt_lock); 318 } 319 320 /* 321 * We need a safe way to decrement the refcnt whilst the 322 * lock is being held. 323 */ 324 static void 325 nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net) 326 { 327 ASSERT(mutex_owned(&net->net_cnt_lock)); 328 ASSERT(net->net_refcnt != 0); 329 net->net_refcnt--; 330 } 331 332 static void 333 nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net) 334 { 335 mutex_enter(&net->net_cnt_lock); 336 nfs4_ephemeral_tree_decr(net); 337 mutex_exit(&net->net_cnt_lock); 338 } 339 340 /* 341 * Trigger ops for stub vnodes; for mirror mounts, etc. 342 * 343 * The general idea is that a "triggering" op will first call 344 * nfs4_trigger_mount(), which will find out whether a mount has already 345 * been triggered. 346 * 347 * If it has, then nfs4_trigger_mount() sets newvp to the root vnode 348 * of the covering vfs. 349 * 350 * If a mount has not yet been triggered, nfs4_trigger_mount() will do so, 351 * and again set newvp, as above. 352 * 353 * The triggering op may then re-issue the VOP by calling it on newvp. 354 * 355 * Note that some ops may perform custom action, and may or may not need 356 * to trigger a mount. 357 * 358 * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We 359 * obviously can't do this with VOP_<whatever>, since it's a stub vnode 360 * and that would just recurse. Instead, we call the v4 op directly, 361 * by name. This is OK, since we know that the vnode is for NFSv4, 362 * otherwise it couldn't be a stub. 363 * 364 */ 365 366 static int 367 nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 368 { 369 int error; 370 vnode_t *newvp; 371 372 error = nfs4_trigger_mount(*vpp, cr, &newvp); 373 if (error) 374 return (error); 375 376 /* Release the stub vnode, as we're losing the reference to it */ 377 VN_RELE(*vpp); 378 379 /* Give the caller the root vnode of the newly-mounted fs */ 380 *vpp = newvp; 381 382 /* return with VN_HELD(newvp) */ 383 return (VOP_OPEN(vpp, flag, cr, ct)); 384 } 385 386 void 387 nfs4_fake_attrs(vnode_t *vp, struct vattr *vap) 388 { 389 uint_t mask; 390 timespec_t now; 391 392 /* 393 * Set some attributes here for referrals. 394 */ 395 mask = vap->va_mask; 396 bzero(vap, sizeof (struct vattr)); 397 vap->va_mask = mask; 398 vap->va_uid = 0; 399 vap->va_gid = 0; 400 vap->va_nlink = 1; 401 vap->va_size = 1; 402 gethrestime(&now); 403 vap->va_atime = now; 404 vap->va_mtime = now; 405 vap->va_ctime = now; 406 vap->va_type = VDIR; 407 vap->va_mode = 0555; 408 vap->va_fsid = vp->v_vfsp->vfs_dev; 409 vap->va_rdev = 0; 410 vap->va_blksize = MAXBSIZE; 411 vap->va_nblocks = 1; 412 vap->va_seq = 0; 413 } 414 415 /* 416 * For the majority of cases, nfs4_trigger_getattr() will not trigger 417 * a mount. However, if ATTR_TRIGGER is set, we are being informed 418 * that we need to force the mount before we attempt to determine 419 * the attributes. The intent is an atomic operation for security 420 * testing. 421 * 422 * If we're not triggering a mount, we can still inquire about the 423 * actual attributes from the server in the mirror mount case, 424 * and will return manufactured attributes for a referral (see 425 * the 'create' branch of find_referral_stubvp()). 426 */ 427 static int 428 nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 429 caller_context_t *ct) 430 { 431 int error; 432 433 if (flags & ATTR_TRIGGER) { 434 vnode_t *newvp; 435 436 error = nfs4_trigger_mount(vp, cr, &newvp); 437 if (error) 438 return (error); 439 440 error = VOP_GETATTR(newvp, vap, flags, cr, ct); 441 VN_RELE(newvp); 442 443 } else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) { 444 445 error = nfs4_getattr(vp, vap, flags, cr, ct); 446 447 } else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) { 448 449 nfs4_fake_attrs(vp, vap); 450 error = 0; 451 } 452 453 return (error); 454 } 455 456 static int 457 nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 458 caller_context_t *ct) 459 { 460 int error; 461 vnode_t *newvp; 462 463 error = nfs4_trigger_mount(vp, cr, &newvp); 464 if (error) 465 return (error); 466 467 error = VOP_SETATTR(newvp, vap, flags, cr, ct); 468 VN_RELE(newvp); 469 470 return (error); 471 } 472 473 static int 474 nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr, 475 caller_context_t *ct) 476 { 477 int error; 478 vnode_t *newvp; 479 480 error = nfs4_trigger_mount(vp, cr, &newvp); 481 if (error) 482 return (error); 483 484 error = VOP_ACCESS(newvp, mode, flags, cr, ct); 485 VN_RELE(newvp); 486 487 return (error); 488 } 489 490 static int 491 nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 492 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 493 caller_context_t *ct, int *deflags, pathname_t *rpnp) 494 { 495 int error; 496 vnode_t *newdvp; 497 rnode4_t *drp = VTOR4(dvp); 498 499 ASSERT(RP_ISSTUB(drp)); 500 501 /* 502 * It's not legal to lookup ".." for an fs root, so we mustn't pass 503 * that up. Instead, pass onto the regular op, regardless of whether 504 * we've triggered a mount. 505 */ 506 if (strcmp(nm, "..") == 0) 507 if (RP_ISSTUB_MIRRORMOUNT(drp)) { 508 return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, 509 ct, deflags, rpnp)); 510 } else if (RP_ISSTUB_REFERRAL(drp)) { 511 /* Return the parent vnode */ 512 return (vtodv(dvp, vpp, cr, TRUE)); 513 } 514 515 error = nfs4_trigger_mount(dvp, cr, &newdvp); 516 if (error) 517 return (error); 518 519 error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct, 520 deflags, rpnp); 521 VN_RELE(newdvp); 522 523 return (error); 524 } 525 526 static int 527 nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va, 528 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr, 529 int flags, caller_context_t *ct, vsecattr_t *vsecp) 530 { 531 int error; 532 vnode_t *newdvp; 533 534 error = nfs4_trigger_mount(dvp, cr, &newdvp); 535 if (error) 536 return (error); 537 538 error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr, 539 flags, ct, vsecp); 540 VN_RELE(newdvp); 541 542 return (error); 543 } 544 545 static int 546 nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 547 int flags) 548 { 549 int error; 550 vnode_t *newdvp; 551 552 error = nfs4_trigger_mount(dvp, cr, &newdvp); 553 if (error) 554 return (error); 555 556 error = VOP_REMOVE(newdvp, nm, cr, ct, flags); 557 VN_RELE(newdvp); 558 559 return (error); 560 } 561 562 static int 563 nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr, 564 caller_context_t *ct, int flags) 565 { 566 int error; 567 vnode_t *newtdvp; 568 569 error = nfs4_trigger_mount(tdvp, cr, &newtdvp); 570 if (error) 571 return (error); 572 573 /* 574 * We don't check whether svp is a stub. Let the NFSv4 code 575 * detect that error, and return accordingly. 576 */ 577 error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags); 578 VN_RELE(newtdvp); 579 580 return (error); 581 } 582 583 static int 584 nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 585 cred_t *cr, caller_context_t *ct, int flags) 586 { 587 int error; 588 vnode_t *newsdvp; 589 rnode4_t *tdrp = VTOR4(tdvp); 590 591 /* 592 * We know that sdvp is a stub, otherwise we would not be here. 593 * 594 * If tdvp is also be a stub, there are two possibilities: it 595 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)] 596 * or it is a different stub [!VN_CMP(sdvp, tdvp)]. 597 * 598 * In the former case, just trigger sdvp, and treat tdvp as 599 * though it were not a stub. 600 * 601 * In the latter case, it might be a different stub for the 602 * same server fs as sdvp, or for a different server fs. 603 * Regardless, from the client perspective this would still 604 * be a cross-filesystem rename, and should not be allowed, 605 * so return EXDEV, without triggering either mount. 606 */ 607 if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp)) 608 return (EXDEV); 609 610 error = nfs4_trigger_mount(sdvp, cr, &newsdvp); 611 if (error) 612 return (error); 613 614 error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags); 615 616 VN_RELE(newsdvp); 617 618 return (error); 619 } 620 621 /* ARGSUSED */ 622 static int 623 nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, 624 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 625 { 626 int error; 627 vnode_t *newdvp; 628 629 error = nfs4_trigger_mount(dvp, cr, &newdvp); 630 if (error) 631 return (error); 632 633 error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp); 634 VN_RELE(newdvp); 635 636 return (error); 637 } 638 639 static int 640 nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 641 caller_context_t *ct, int flags) 642 { 643 int error; 644 vnode_t *newdvp; 645 646 error = nfs4_trigger_mount(dvp, cr, &newdvp); 647 if (error) 648 return (error); 649 650 error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags); 651 VN_RELE(newdvp); 652 653 return (error); 654 } 655 656 static int 657 nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, 658 cred_t *cr, caller_context_t *ct, int flags) 659 { 660 int error; 661 vnode_t *newdvp; 662 663 error = nfs4_trigger_mount(dvp, cr, &newdvp); 664 if (error) 665 return (error); 666 667 error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags); 668 VN_RELE(newdvp); 669 670 return (error); 671 } 672 673 static int 674 nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, 675 caller_context_t *ct) 676 { 677 int error; 678 vnode_t *newvp; 679 680 error = nfs4_trigger_mount(vp, cr, &newvp); 681 if (error) 682 return (error); 683 684 error = VOP_READLINK(newvp, uiop, cr, ct); 685 VN_RELE(newvp); 686 687 return (error); 688 } 689 690 /* end of trigger vnode ops */ 691 692 /* 693 * See if the mount has already been done by another caller. 694 */ 695 static int 696 nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp, 697 bool_t *was_mounted, vfs_t **vfsp) 698 { 699 int error; 700 mntinfo4_t *mi = VTOMI4(vp); 701 702 *was_mounted = FALSE; 703 704 error = vn_vfsrlock_wait(vp); 705 if (error) 706 return (error); 707 708 *vfsp = vn_mountedvfs(vp); 709 if (*vfsp != NULL) { 710 /* the mount has already occurred */ 711 error = VFS_ROOT(*vfsp, newvpp); 712 if (!error) { 713 /* need to update the reference time */ 714 mutex_enter(&mi->mi_lock); 715 if (mi->mi_ephemeral) 716 mi->mi_ephemeral->ne_ref_time = 717 gethrestime_sec(); 718 mutex_exit(&mi->mi_lock); 719 720 *was_mounted = TRUE; 721 } 722 } 723 724 vn_vfsunlock(vp); 725 return (0); 726 } 727 728 /* 729 * Mount upon a trigger vnode; for mirror-mounts, referrals, etc. 730 * 731 * The mount may have already occurred, via another thread. If not, 732 * assemble the location information - which may require fetching - and 733 * perform the mount. 734 * 735 * Sets newvp to be the root of the fs that is now covering vp. Note 736 * that we return with VN_HELD(*newvp). 737 * 738 * The caller is responsible for passing the VOP onto the covering fs. 739 */ 740 static int 741 nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) 742 { 743 int error; 744 vfs_t *vfsp; 745 rnode4_t *rp = VTOR4(vp); 746 mntinfo4_t *mi = VTOMI4(vp); 747 domount_args_t *dma; 748 749 nfs4_ephemeral_tree_t *net; 750 751 bool_t must_unlock = FALSE; 752 bool_t is_building = FALSE; 753 bool_t was_mounted = FALSE; 754 755 cred_t *mcred = NULL; 756 757 nfs4_trigger_globals_t *ntg; 758 759 zone_t *zone = curproc->p_zone; 760 761 ASSERT(RP_ISSTUB(rp)); 762 763 *newvpp = NULL; 764 765 /* 766 * Has the mount already occurred? 767 */ 768 error = nfs4_trigger_mounted_already(vp, newvpp, 769 &was_mounted, &vfsp); 770 if (error || was_mounted) 771 goto done; 772 773 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 774 ASSERT(ntg != NULL); 775 776 mutex_enter(&mi->mi_lock); 777 778 /* 779 * We need to lock down the ephemeral tree. 780 */ 781 if (mi->mi_ephemeral_tree == NULL) { 782 net = kmem_zalloc(sizeof (*net), KM_SLEEP); 783 mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL); 784 mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL); 785 net->net_refcnt = 1; 786 net->net_status = NFS4_EPHEMERAL_TREE_BUILDING; 787 is_building = TRUE; 788 789 /* 790 * We need to add it to the zone specific list for 791 * automatic unmounting and harvesting of deadwood. 792 */ 793 mutex_enter(&ntg->ntg_forest_lock); 794 if (ntg->ntg_forest != NULL) 795 net->net_next = ntg->ntg_forest; 796 ntg->ntg_forest = net; 797 mutex_exit(&ntg->ntg_forest_lock); 798 799 /* 800 * No lock order confusion with mi_lock because no 801 * other node could have grabbed net_tree_lock. 802 */ 803 mutex_enter(&net->net_tree_lock); 804 mi->mi_ephemeral_tree = net; 805 net->net_mount = mi; 806 mutex_exit(&mi->mi_lock); 807 808 MI4_HOLD(mi); 809 VFS_HOLD(mi->mi_vfsp); 810 } else { 811 net = mi->mi_ephemeral_tree; 812 nfs4_ephemeral_tree_hold(net); 813 814 mutex_exit(&mi->mi_lock); 815 816 mutex_enter(&net->net_tree_lock); 817 818 /* 819 * We can only procede if the tree is neither locked 820 * nor being torn down. 821 */ 822 mutex_enter(&net->net_cnt_lock); 823 if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) { 824 nfs4_ephemeral_tree_decr(net); 825 mutex_exit(&net->net_cnt_lock); 826 mutex_exit(&net->net_tree_lock); 827 828 return (EIO); 829 } 830 mutex_exit(&net->net_cnt_lock); 831 } 832 833 mutex_enter(&net->net_cnt_lock); 834 net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING; 835 mutex_exit(&net->net_cnt_lock); 836 837 must_unlock = TRUE; 838 839 error = nfs4_trigger_domount_args_create(vp, cr, &dma); 840 if (error) 841 goto done; 842 843 /* 844 * Note that since we define mirror mounts to work 845 * for any user, we simply extend the privileges of 846 * the user's credentials to allow the mount to 847 * proceed. 848 */ 849 mcred = crdup(cr); 850 if (mcred == NULL) { 851 error = EINVAL; 852 nfs4_trigger_domount_args_destroy(dma, vp); 853 goto done; 854 } 855 856 crset_zone_privall(mcred); 857 if (is_system_labeled()) 858 (void) setpflags(NET_MAC_AWARE, 1, mcred); 859 860 error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp); 861 nfs4_trigger_domount_args_destroy(dma, vp); 862 863 DTRACE_PROBE2(nfs4clnt__func__referral__mount, 864 vnode_t *, vp, int, error); 865 866 crfree(mcred); 867 868 done: 869 870 if (must_unlock) { 871 mutex_enter(&net->net_cnt_lock); 872 net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; 873 874 /* 875 * REFCNT: If we are the root of the tree, then we need 876 * to keep a reference because we malloced the tree and 877 * this is where we tied it to our mntinfo. 878 * 879 * If we are not the root of the tree, then our tie to 880 * the mntinfo occured elsewhere and we need to 881 * decrement the reference to the tree. 882 */ 883 if (is_building) 884 net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; 885 else 886 nfs4_ephemeral_tree_decr(net); 887 mutex_exit(&net->net_cnt_lock); 888 889 mutex_exit(&net->net_tree_lock); 890 } 891 892 if (!error && (newvpp == NULL || *newvpp == NULL)) 893 error = ENOSYS; 894 895 return (error); 896 } 897 898 /* 899 * Collect together both the generic & mount-type specific args. 900 */ 901 static int 902 nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr, domount_args_t **dmap) 903 { 904 int nointr; 905 char *hostlist; 906 servinfo4_t *svp; 907 struct nfs_args *nargs, *nargs_head; 908 enum clnt_stat status; 909 ephemeral_servinfo_t *esi, *esi_first; 910 domount_args_t *dma; 911 mntinfo4_t *mi = VTOMI4(vp); 912 913 nointr = !(mi->mi_flags & MI4_INT); 914 hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 915 916 svp = mi->mi_curr_serv; 917 /* check if the current server is responding */ 918 status = nfs4_trigger_ping_server(svp, nointr); 919 if (status == RPC_SUCCESS) { 920 esi_first = nfs4_trigger_esi_create(vp, svp, cr); 921 if (esi_first == NULL) { 922 kmem_free(hostlist, MAXPATHLEN); 923 return (EINVAL); 924 } 925 926 (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN); 927 928 nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first); 929 } else { 930 /* current server did not respond */ 931 esi_first = NULL; 932 nargs_head = NULL; 933 } 934 nargs = nargs_head; 935 936 /* 937 * NFS RO failover. 938 * 939 * If we have multiple servinfo4 structures, linked via sv_next, 940 * we must create one nfs_args for each, linking the nfs_args via 941 * nfs_ext_u.nfs_extB.next. 942 * 943 * We need to build a corresponding esi for each, too, but that is 944 * used solely for building nfs_args, and may be immediately 945 * discarded, as domount() requires the info from just one esi, 946 * but all the nfs_args. 947 * 948 * Currently, the NFS mount code will hang if not all servers 949 * requested are available. To avoid that, we need to ping each 950 * server, here, and remove it from the list if it is not 951 * responding. This has the side-effect of that server then 952 * being permanently unavailable for this failover mount, even if 953 * it recovers. That's unfortunate, but the best we can do until 954 * the mount code path is fixed. 955 */ 956 957 /* 958 * If the current server was down, loop indefinitely until we find 959 * at least one responsive server. 960 */ 961 do { 962 /* no locking needed for sv_next; it is only set at fs mount */ 963 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 964 struct nfs_args *next; 965 966 /* 967 * nargs_head: the head of the nfs_args list 968 * nargs: the current tail of the list 969 * next: the newly-created element to be added 970 */ 971 972 /* 973 * We've already tried the current server, above; 974 * if it was responding, we have already included it 975 * and it may now be ignored. 976 * 977 * Otherwise, try it again, since it may now have 978 * recovered. 979 */ 980 if (svp == mi->mi_curr_serv && esi_first != NULL) 981 continue; 982 983 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 984 if (svp->sv_flags & SV4_NOTINUSE) { 985 nfs_rw_exit(&svp->sv_lock); 986 continue; 987 } 988 nfs_rw_exit(&svp->sv_lock); 989 990 /* check if the server is responding */ 991 status = nfs4_trigger_ping_server(svp, nointr); 992 if (status == RPC_INTR) { 993 kmem_free(hostlist, MAXPATHLEN); 994 nfs4_trigger_esi_destroy(esi_first, vp); 995 nargs = nargs_head; 996 while (nargs != NULL) { 997 next = nargs->nfs_ext_u.nfs_extB.next; 998 nfs4_trigger_nargs_destroy(nargs); 999 nargs = next; 1000 } 1001 return (EINTR); 1002 } else if (status != RPC_SUCCESS) { 1003 /* if the server did not respond, ignore it */ 1004 continue; 1005 } 1006 1007 esi = nfs4_trigger_esi_create(vp, svp, cr); 1008 if (esi == NULL) 1009 continue; 1010 1011 /* 1012 * If the original current server (mi_curr_serv) 1013 * was down when when we first tried it, 1014 * (i.e. esi_first == NULL), 1015 * we select this new server (svp) to be the server 1016 * that we will actually contact (esi_first). 1017 * 1018 * Note that it's possible that mi_curr_serv == svp, 1019 * if that mi_curr_serv was down but has now recovered. 1020 */ 1021 next = nfs4_trigger_nargs_create(mi, svp, esi); 1022 if (esi_first == NULL) { 1023 ASSERT(nargs == NULL); 1024 ASSERT(nargs_head == NULL); 1025 nargs_head = next; 1026 esi_first = esi; 1027 (void) strlcpy(hostlist, 1028 esi_first->esi_hostname, MAXPATHLEN); 1029 } else { 1030 ASSERT(nargs_head != NULL); 1031 nargs->nfs_ext_u.nfs_extB.next = next; 1032 (void) strlcat(hostlist, ",", MAXPATHLEN); 1033 (void) strlcat(hostlist, esi->esi_hostname, 1034 MAXPATHLEN); 1035 /* esi was only needed for hostname & nargs */ 1036 nfs4_trigger_esi_destroy(esi, vp); 1037 } 1038 1039 nargs = next; 1040 } 1041 1042 /* if we've had no response at all, wait a second */ 1043 if (esi_first == NULL) 1044 delay(drv_usectohz(1000000)); 1045 1046 } while (esi_first == NULL); 1047 ASSERT(nargs_head != NULL); 1048 1049 dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP); 1050 dma->dma_esi = esi_first; 1051 dma->dma_hostlist = hostlist; 1052 dma->dma_nargs = nargs_head; 1053 *dmap = dma; 1054 1055 return (0); 1056 } 1057 1058 static void 1059 nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) 1060 { 1061 if (dma != NULL) { 1062 if (dma->dma_esi != NULL && vp != NULL) 1063 nfs4_trigger_esi_destroy(dma->dma_esi, vp); 1064 1065 if (dma->dma_hostlist != NULL) 1066 kmem_free(dma->dma_hostlist, MAXPATHLEN); 1067 1068 if (dma->dma_nargs != NULL) { 1069 struct nfs_args *nargs = dma->dma_nargs; 1070 1071 do { 1072 struct nfs_args *next = 1073 nargs->nfs_ext_u.nfs_extB.next; 1074 1075 nfs4_trigger_nargs_destroy(nargs); 1076 nargs = next; 1077 } while (nargs != NULL); 1078 } 1079 1080 kmem_free(dma, sizeof (domount_args_t)); 1081 } 1082 } 1083 1084 /* 1085 * The ephemeral_servinfo_t struct contains basic information we will need to 1086 * perform the mount. Whilst the structure is generic across different 1087 * types of ephemeral mount, the way we gather its contents differs. 1088 */ 1089 static ephemeral_servinfo_t * 1090 nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr) 1091 { 1092 ephemeral_servinfo_t *esi; 1093 rnode4_t *rp = VTOR4(vp); 1094 1095 ASSERT(RP_ISSTUB(rp)); 1096 1097 /* Call the ephemeral type-specific routine */ 1098 if (RP_ISSTUB_MIRRORMOUNT(rp)) 1099 esi = nfs4_trigger_esi_create_mirrormount(vp, svp); 1100 else if (RP_ISSTUB_REFERRAL(rp)) 1101 esi = nfs4_trigger_esi_create_referral(vp, cr); 1102 else 1103 esi = NULL; 1104 return (esi); 1105 } 1106 1107 static void 1108 nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) 1109 { 1110 rnode4_t *rp = VTOR4(vp); 1111 1112 ASSERT(RP_ISSTUB(rp)); 1113 1114 /* Currently, no need for an ephemeral type-specific routine */ 1115 1116 /* 1117 * The contents of ephemeral_servinfo_t goes into nfs_args, 1118 * and will be handled by nfs4_trigger_nargs_destroy(). 1119 * We need only free the structure itself. 1120 */ 1121 if (esi != NULL) 1122 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1123 } 1124 1125 /* 1126 * Some of this may turn out to be common with other ephemeral types, 1127 * in which case it should be moved to nfs4_trigger_esi_create(), or a 1128 * common function called. 1129 */ 1130 1131 /* 1132 * Mirror mounts case - should have all data available 1133 */ 1134 static ephemeral_servinfo_t * 1135 nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) 1136 { 1137 char *stubpath; 1138 struct knetconfig *sikncp, *svkncp; 1139 struct netbuf *bufp; 1140 ephemeral_servinfo_t *esi; 1141 1142 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1143 1144 /* initially set to be our type of ephemeral mount; may be added to */ 1145 esi->esi_mount_flags = NFSMNT_MIRRORMOUNT; 1146 1147 /* 1148 * We're copying info from the stub rnode's servinfo4, but 1149 * we must create new copies, not pointers, since this information 1150 * is to be associated with the new mount, which will be 1151 * unmounted (and its structures freed) separately 1152 */ 1153 1154 /* 1155 * Sizes passed to kmem_[z]alloc here must match those freed 1156 * in nfs4_free_args() 1157 */ 1158 1159 /* 1160 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this 1161 * is difficult to avoid: as we need to read svp to calculate the 1162 * sizes to be allocated. 1163 */ 1164 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1165 1166 esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP); 1167 (void) strcat(esi->esi_hostname, svp->sv_hostname); 1168 1169 esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1170 bufp = esi->esi_addr; 1171 bufp->len = svp->sv_addr.len; 1172 bufp->maxlen = svp->sv_addr.maxlen; 1173 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1174 bcopy(svp->sv_addr.buf, bufp->buf, bufp->len); 1175 1176 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1177 sikncp = esi->esi_knconf; 1178 svkncp = svp->sv_knconf; 1179 sikncp->knc_semantics = svkncp->knc_semantics; 1180 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1181 (void) strcat((char *)sikncp->knc_protofmly, 1182 (char *)svkncp->knc_protofmly); 1183 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1184 (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto); 1185 sikncp->knc_rdev = svkncp->knc_rdev; 1186 1187 /* 1188 * Used when AUTH_DH is negotiated. 1189 * 1190 * This is ephemeral mount-type specific, since it contains the 1191 * server's time-sync syncaddr. 1192 */ 1193 if (svp->sv_dhsec) { 1194 struct netbuf *bufp; 1195 sec_data_t *sdata; 1196 dh_k4_clntdata_t *data; 1197 1198 sdata = svp->sv_dhsec; 1199 data = (dh_k4_clntdata_t *)sdata->data; 1200 ASSERT(sdata->rpcflavor == AUTH_DH); 1201 1202 bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1203 bufp->len = data->syncaddr.len; 1204 bufp->maxlen = data->syncaddr.maxlen; 1205 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1206 bcopy(data->syncaddr.buf, bufp->buf, bufp->len); 1207 esi->esi_syncaddr = bufp; 1208 1209 if (data->netname != NULL) { 1210 int nmlen = data->netnamelen; 1211 1212 /* 1213 * We need to copy from a dh_k4_clntdata_t 1214 * netname/netnamelen pair to a NUL-terminated 1215 * netname string suitable for putting in nfs_args, 1216 * where the latter has no netnamelen field. 1217 */ 1218 esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP); 1219 bcopy(data->netname, esi->esi_netname, nmlen); 1220 } 1221 } else { 1222 esi->esi_syncaddr = NULL; 1223 esi->esi_netname = NULL; 1224 } 1225 1226 stubpath = fn_path(VTOSV(vp)->sv_name); 1227 /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */ 1228 ASSERT(*stubpath == '.'); 1229 stubpath += 1; 1230 1231 /* for nfs_args->fh */ 1232 esi->esi_path_len = strlen(stubpath) + 1; 1233 if (strcmp(svp->sv_path, "/") != 0) 1234 esi->esi_path_len += strlen(svp->sv_path); 1235 esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); 1236 if (strcmp(svp->sv_path, "/") != 0) 1237 (void) strcat(esi->esi_path, svp->sv_path); 1238 (void) strcat(esi->esi_path, stubpath); 1239 1240 stubpath -= 1; 1241 /* stubpath allocated by fn_path() */ 1242 kmem_free(stubpath, strlen(stubpath) + 1); 1243 1244 nfs_rw_exit(&svp->sv_lock); 1245 1246 return (esi); 1247 } 1248 1249 /* 1250 * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to 1251 * get network information required to do the mount call. 1252 */ 1253 int 1254 nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp) 1255 { 1256 door_arg_t door_args; 1257 door_handle_t dh; 1258 XDR xdr; 1259 refd_door_args_t *xdr_argsp; 1260 refd_door_res_t *orig_resp; 1261 k_sigset_t smask; 1262 int xdr_len = 0; 1263 int res_len = 16; /* length of an ip adress */ 1264 int orig_reslen = res_len; 1265 int error = 0; 1266 struct nfsidmap_globals *nig; 1267 1268 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) 1269 return (ECONNREFUSED); 1270 1271 nig = zone_getspecific(nfsidmap_zone_key, nfs_zone()); 1272 ASSERT(nig != NULL); 1273 1274 mutex_enter(&nig->nfsidmap_daemon_lock); 1275 dh = nig->nfsidmap_daemon_dh; 1276 if (dh == NULL) { 1277 mutex_exit(&nig->nfsidmap_daemon_lock); 1278 cmn_err(CE_NOTE, 1279 "nfs4_callmapid: nfsmapid daemon not " \ 1280 "running unable to resolve host name\n"); 1281 return (EINVAL); 1282 } 1283 door_ki_hold(dh); 1284 mutex_exit(&nig->nfsidmap_daemon_lock); 1285 1286 xdr_len = xdr_sizeof(&(xdr_utf8string), server); 1287 1288 xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP); 1289 xdr_argsp->xdr_len = xdr_len; 1290 xdr_argsp->cmd = NFSMAPID_SRV_NETINFO; 1291 1292 xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg, 1293 xdr_len, XDR_ENCODE); 1294 1295 if (!xdr_utf8string(&xdr, server)) { 1296 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1297 door_ki_rele(dh); 1298 return (1); 1299 } 1300 1301 if (orig_reslen) 1302 orig_resp = kmem_alloc(orig_reslen, KM_SLEEP); 1303 1304 door_args.data_ptr = (char *)xdr_argsp; 1305 door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len; 1306 door_args.desc_ptr = NULL; 1307 door_args.desc_num = 0; 1308 door_args.rbuf = orig_resp ? (char *)orig_resp : NULL; 1309 door_args.rsize = res_len; 1310 1311 sigintr(&smask, 1); 1312 error = door_ki_upcall(dh, &door_args); 1313 sigunintr(&smask); 1314 1315 door_ki_rele(dh); 1316 1317 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1318 if (error) { 1319 kmem_free(orig_resp, orig_reslen); 1320 /* 1321 * There is no door to connect to. The referral daemon 1322 * must not be running yet. 1323 */ 1324 cmn_err(CE_WARN, 1325 "nfsmapid not running cannot resolve host name"); 1326 goto out; 1327 } 1328 1329 /* 1330 * If the results buffer passed back are not the same as 1331 * what was sent free the old buffer and use the new one. 1332 */ 1333 if (orig_resp && orig_reslen) { 1334 refd_door_res_t *door_resp; 1335 1336 door_resp = (refd_door_res_t *)door_args.rbuf; 1337 if ((void *)door_args.rbuf != orig_resp) 1338 kmem_free(orig_resp, orig_reslen); 1339 if (door_resp->res_status == 0) { 1340 xdrmem_create(&xdr, (char *)&door_resp->xdr_res, 1341 door_resp->xdr_len, XDR_DECODE); 1342 bzero(resp, sizeof (struct nfs_fsl_info)); 1343 if (!xdr_nfs_fsl_info(&xdr, resp)) { 1344 DTRACE_PROBE2( 1345 nfs4clnt__debug__referral__upcall__xdrfail, 1346 struct nfs_fsl_info *, resp, 1347 char *, "nfs4_callmapid"); 1348 error = EINVAL; 1349 } 1350 } else { 1351 DTRACE_PROBE2( 1352 nfs4clnt__debug__referral__upcall__badstatus, 1353 int, door_resp->res_status, 1354 char *, "nfs4_callmapid"); 1355 error = door_resp->res_status; 1356 } 1357 kmem_free(door_args.rbuf, door_args.rsize); 1358 } 1359 out: 1360 DTRACE_PROBE2(nfs4clnt__func__referral__upcall, 1361 char *, server, int, error); 1362 return (error); 1363 } 1364 1365 /* 1366 * Fetches the fs_locations attribute. Typically called 1367 * from a Replication/Migration/Referrals/Mirror-mount context 1368 * 1369 * Fills in the attributes in garp. The caller is assumed 1370 * to have allocated memory for garp. 1371 * 1372 * lock: if set do not lock s_recovlock and mi_recovlock mutex, 1373 * it's already done by caller. Otherwise lock these mutexes 1374 * before doing the rfs4call(). 1375 * 1376 * Returns 1377 * 1 for success 1378 * 0 for failure 1379 */ 1380 int 1381 nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm, 1382 cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock) 1383 { 1384 COMPOUND4args_clnt args; 1385 COMPOUND4res_clnt res; 1386 nfs_argop4 *argop; 1387 int argoplist_size = 3 * sizeof (nfs_argop4); 1388 nfs4_server_t *sp = NULL; 1389 int doqueue = 1; 1390 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1391 int retval = 1; 1392 struct nfs4_clnt *nfscl; 1393 1394 if (lock == TRUE) 1395 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 1396 else 1397 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 1398 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 1399 1400 sp = find_nfs4_server(mi); 1401 if (lock == TRUE) 1402 nfs_rw_exit(&mi->mi_recovlock); 1403 1404 if (sp != NULL) 1405 mutex_exit(&sp->s_lock); 1406 1407 if (lock == TRUE) { 1408 if (sp != NULL) 1409 (void) nfs_rw_enter_sig(&sp->s_recovlock, 1410 RW_WRITER, 0); 1411 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 1412 } else { 1413 if (sp != NULL) { 1414 ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) || 1415 nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER)); 1416 } 1417 } 1418 1419 /* 1420 * Do we want to do the setup for recovery here? 1421 * 1422 * We know that the server responded to a null ping a very 1423 * short time ago, and we know that we intend to do a 1424 * single stateless operation - we want to fetch attributes, 1425 * so we know we can't encounter errors about state. If 1426 * something goes wrong with the GETATTR, like not being 1427 * able to get a response from the server or getting any 1428 * kind of FH error, we should fail the mount. 1429 * 1430 * We may want to re-visited this at a later time. 1431 */ 1432 argop = kmem_alloc(argoplist_size, KM_SLEEP); 1433 1434 args.ctag = TAG_GETATTR_FSLOCATION; 1435 /* PUTFH LOOKUP GETATTR */ 1436 args.array_len = 3; 1437 args.array = argop; 1438 1439 /* 0. putfh file */ 1440 argop[0].argop = OP_CPUTFH; 1441 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1442 1443 /* 1. lookup name, can't be dotdot */ 1444 argop[1].argop = OP_CLOOKUP; 1445 argop[1].nfs_argop4_u.opclookup.cname = nm; 1446 1447 /* 2. file attrs */ 1448 argop[2].argop = OP_GETATTR; 1449 argop[2].nfs_argop4_u.opgetattr.attr_request = 1450 FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK | 1451 FATTR4_MOUNTED_ON_FILEID_MASK; 1452 argop[2].nfs_argop4_u.opgetattr.mi = mi; 1453 1454 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1455 1456 if (lock == TRUE) { 1457 nfs_rw_exit(&mi->mi_recovlock); 1458 if (sp != NULL) 1459 nfs_rw_exit(&sp->s_recovlock); 1460 } 1461 1462 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1463 nfscl->nfscl_stat.referrals.value.ui64++; 1464 DTRACE_PROBE3(nfs4clnt__func__referral__fsloc, 1465 nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e); 1466 1467 if (e.error != 0) { 1468 if (sp != NULL) 1469 nfs4_server_rele(sp); 1470 kmem_free(argop, argoplist_size); 1471 return (0); 1472 } 1473 1474 /* 1475 * Check for all possible error conditions. 1476 * For valid replies without an ops array or for illegal 1477 * replies, return a failure. 1478 */ 1479 if (res.status != NFS4_OK || res.array_len < 3 || 1480 res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) { 1481 retval = 0; 1482 goto exit; 1483 } 1484 1485 /* 1486 * There isn't much value in putting the attributes 1487 * in the attr cache since fs_locations4 aren't 1488 * encountered very frequently, so just make them 1489 * available to the caller. 1490 */ 1491 *garp = res.array[2].nfs_resop4_u.opgetattr.ga_res; 1492 1493 DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc, 1494 nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations"); 1495 1496 /* No fs_locations? -- return a failure */ 1497 if (garp->n4g_ext_res == NULL || 1498 garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) { 1499 retval = 0; 1500 goto exit; 1501 } 1502 1503 if (!garp->n4g_fsid_valid) 1504 retval = 0; 1505 1506 exit: 1507 if (retval == 0) { 1508 /* the call was ok but failed validating the call results */ 1509 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1510 } else { 1511 ASSERT(callres != NULL); 1512 *callres = res; 1513 } 1514 1515 if (sp != NULL) 1516 nfs4_server_rele(sp); 1517 kmem_free(argop, argoplist_size); 1518 return (retval); 1519 } 1520 1521 /* tunable to disable referral mounts */ 1522 int nfs4_no_referrals = 0; 1523 1524 /* 1525 * Returns NULL if the vnode cannot be created or found. 1526 */ 1527 vnode_t * 1528 find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr) 1529 { 1530 nfs_fh4 *stub_fh, *dfh; 1531 nfs4_sharedfh_t *sfhp; 1532 char *newfhval; 1533 vnode_t *vp = NULL; 1534 fattr4_mounted_on_fileid mnt_on_fileid; 1535 nfs4_ga_res_t garp; 1536 mntinfo4_t *mi; 1537 COMPOUND4res_clnt callres; 1538 hrtime_t t; 1539 1540 if (nfs4_no_referrals) 1541 return (NULL); 1542 1543 /* 1544 * Get the mounted_on_fileid, unique on that server::fsid 1545 */ 1546 mi = VTOMI4(dvp); 1547 if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr, 1548 &garp, &callres, FALSE) == 0) 1549 return (NULL); 1550 mnt_on_fileid = garp.n4g_mon_fid; 1551 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1552 1553 /* 1554 * Build a fake filehandle from the dir FH and the mounted_on_fileid 1555 */ 1556 dfh = &VTOR4(dvp)->r_fh->sfh_fh; 1557 stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP); 1558 stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len + 1559 sizeof (fattr4_mounted_on_fileid), KM_SLEEP); 1560 newfhval = stub_fh->nfs_fh4_val; 1561 1562 /* copy directory's file handle */ 1563 bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len); 1564 stub_fh->nfs_fh4_len = dfh->nfs_fh4_len; 1565 newfhval = newfhval + dfh->nfs_fh4_len; 1566 1567 /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */ 1568 bcopy((char *)&mnt_on_fileid, newfhval, 1569 sizeof (fattr4_mounted_on_fileid)); 1570 stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid); 1571 1572 sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL); 1573 kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len + 1574 sizeof (fattr4_mounted_on_fileid)); 1575 kmem_free(stub_fh, sizeof (nfs_fh4)); 1576 if (sfhp == NULL) 1577 return (NULL); 1578 1579 t = gethrtime(); 1580 garp.n4g_va.va_type = VDIR; 1581 vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t, 1582 cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp)); 1583 1584 if (vp != NULL) 1585 vp->v_type = VDIR; 1586 1587 sfh4_rele(&sfhp); 1588 return (vp); 1589 } 1590 1591 int 1592 nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr) 1593 { 1594 vnode_t *nvp; 1595 rnode4_t *rp; 1596 1597 if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL) 1598 return (EINVAL); 1599 1600 rp = VTOR4(nvp); 1601 mutex_enter(&rp->r_statelock); 1602 r4_stub_referral(rp); 1603 mutex_exit(&rp->r_statelock); 1604 dnlc_enter(dvp, nm, nvp); 1605 1606 if (*vpp != NULL) 1607 VN_RELE(*vpp); /* no longer need this vnode */ 1608 1609 *vpp = nvp; 1610 1611 return (0); 1612 } 1613 1614 /* 1615 * Fetch the location information and resolve the new server. 1616 * Caller needs to free up the XDR data which is returned. 1617 * Input: mount info, shared filehandle, nodename 1618 * Return: Index to the result or Error(-1) 1619 * Output: FsLocations Info, Resolved Server Info. 1620 */ 1621 int 1622 nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, 1623 char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res, 1624 struct nfs_fsl_info *fsloc) 1625 { 1626 fs_location4 *fsp; 1627 struct nfs_fsl_info nfsfsloc; 1628 int ret, i, error; 1629 nfs4_ga_res_t garp; 1630 COMPOUND4res_clnt callres; 1631 struct knetconfig *knc; 1632 1633 ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE); 1634 if (ret == 0) 1635 return (-1); 1636 1637 /* 1638 * As a lame attempt to figuring out if we're 1639 * handling a migration event or a referral, 1640 * look for rnodes with this fsid in the rnode 1641 * cache. 1642 * 1643 * If we can find one or more such rnodes, it 1644 * means we're handling a migration event and 1645 * we want to bail out in that case. 1646 */ 1647 if (r4find_by_fsid(mi, &garp.n4g_fsid)) { 1648 DTRACE_PROBE3(nfs4clnt__debug__referral__migration, 1649 mntinfo4_t *, mi, nfs4_ga_res_t *, &garp, 1650 char *, "nfs4_process_referral"); 1651 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1652 return (-1); 1653 } 1654 1655 /* 1656 * Find the first responsive server to mount. When we find 1657 * one, fsp will point to it. 1658 */ 1659 for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) { 1660 1661 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i]; 1662 if (fsp->server_len == 0 || fsp->server_val == NULL) 1663 continue; 1664 1665 error = nfs4_callmapid(fsp->server_val, &nfsfsloc); 1666 if (error != 0) 1667 continue; 1668 1669 error = nfs4_ping_server_common(nfsfsloc.knconf, 1670 nfsfsloc.addr, !(mi->mi_flags & MI4_INT)); 1671 if (error == RPC_SUCCESS) 1672 break; 1673 1674 DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr, 1675 sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf, 1676 char *, "nfs4_process_referral"); 1677 1678 xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1679 } 1680 knc = nfsfsloc.knconf; 1681 if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) || 1682 (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) { 1683 DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc, 1684 nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral"); 1685 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1686 return (-1); 1687 } 1688 1689 /* Send the results back */ 1690 *fsloc = nfsfsloc; 1691 *grp = garp; 1692 *res = callres; 1693 return (i); 1694 } 1695 1696 /* 1697 * Referrals case - need to fetch referral data and then upcall to 1698 * user-level to get complete mount data. 1699 */ 1700 static ephemeral_servinfo_t * 1701 nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr) 1702 { 1703 struct knetconfig *sikncp, *svkncp; 1704 struct netbuf *bufp; 1705 ephemeral_servinfo_t *esi; 1706 vnode_t *dvp; 1707 rnode4_t *drp; 1708 fs_location4 *fsp; 1709 struct nfs_fsl_info nfsfsloc; 1710 nfs4_ga_res_t garp; 1711 char *p; 1712 char fn[MAXNAMELEN]; 1713 int i, index = -1; 1714 mntinfo4_t *mi; 1715 COMPOUND4res_clnt callres; 1716 1717 /* 1718 * If we're passed in a stub vnode that 1719 * isn't a "referral" stub, bail out 1720 * and return a failure 1721 */ 1722 if (!RP_ISSTUB_REFERRAL(VTOR4(vp))) 1723 return (NULL); 1724 1725 if (vtodv(vp, &dvp, CRED(), TRUE) != 0) 1726 return (NULL); 1727 1728 drp = VTOR4(dvp); 1729 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) { 1730 VN_RELE(dvp); 1731 return (NULL); 1732 } 1733 1734 if (vtoname(vp, fn, MAXNAMELEN) != 0) { 1735 nfs_rw_exit(&drp->r_rwlock); 1736 VN_RELE(dvp); 1737 return (NULL); 1738 } 1739 1740 mi = VTOMI4(dvp); 1741 index = nfs4_process_referral(mi, drp->r_fh, fn, cr, 1742 &garp, &callres, &nfsfsloc); 1743 nfs_rw_exit(&drp->r_rwlock); 1744 VN_RELE(dvp); 1745 if (index < 0) 1746 return (NULL); 1747 1748 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; 1749 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1750 1751 /* initially set to be our type of ephemeral mount; may be added to */ 1752 esi->esi_mount_flags = NFSMNT_REFERRAL; 1753 1754 esi->esi_hostname = 1755 kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP); 1756 bcopy(fsp->server_val->utf8string_val, esi->esi_hostname, 1757 fsp->server_val->utf8string_len); 1758 esi->esi_hostname[fsp->server_val->utf8string_len] = '\0'; 1759 1760 bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 1761 bufp->len = nfsfsloc.addr->len; 1762 bufp->maxlen = nfsfsloc.addr->maxlen; 1763 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1764 bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len); 1765 esi->esi_addr = bufp; 1766 1767 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1768 sikncp = esi->esi_knconf; 1769 1770 DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc, 1771 struct nfs_fsl_info *, &nfsfsloc, 1772 char *, "nfs4_trigger_esi_create_referral"); 1773 1774 svkncp = nfsfsloc.knconf; 1775 sikncp->knc_semantics = svkncp->knc_semantics; 1776 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1777 (void) strlcat((char *)sikncp->knc_protofmly, 1778 (char *)svkncp->knc_protofmly, KNC_STRSIZE); 1779 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1780 (void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto, 1781 KNC_STRSIZE); 1782 sikncp->knc_rdev = svkncp->knc_rdev; 1783 1784 DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf, 1785 struct knetconfig *, sikncp, 1786 char *, "nfs4_trigger_esi_create_referral"); 1787 1788 esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP); 1789 bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len); 1790 esi->esi_syncaddr = NULL; 1791 1792 esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1793 esi->esi_path_len = MAXPATHLEN; 1794 *p++ = '/'; 1795 for (i = 0; i < fsp->rootpath.pathname4_len; i++) { 1796 component4 *comp; 1797 1798 comp = &fsp->rootpath.pathname4_val[i]; 1799 /* If no space, null the string and bail */ 1800 if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN) 1801 goto err; 1802 bcopy(comp->utf8string_val, p, comp->utf8string_len); 1803 p += comp->utf8string_len; 1804 *p++ = '/'; 1805 } 1806 if (fsp->rootpath.pathname4_len != 0) 1807 *(p - 1) = '\0'; 1808 else 1809 *p = '\0'; 1810 p = esi->esi_path; 1811 esi->esi_path = strdup(p); 1812 esi->esi_path_len = strlen(p) + 1; 1813 kmem_free(p, MAXPATHLEN); 1814 1815 /* Allocated in nfs4_process_referral() */ 1816 xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1817 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1818 1819 return (esi); 1820 err: 1821 kmem_free(esi->esi_path, esi->esi_path_len); 1822 kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1); 1823 kmem_free(esi->esi_addr->buf, esi->esi_addr->len); 1824 kmem_free(esi->esi_addr, sizeof (struct netbuf)); 1825 kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE); 1826 kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE); 1827 kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf)); 1828 kmem_free(esi->esi_netname, nfsfsloc.netnm_len); 1829 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1830 xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1831 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1832 return (NULL); 1833 } 1834 1835 /* 1836 * Assemble the args, and call the generic VFS mount function to 1837 * finally perform the ephemeral mount. 1838 */ 1839 static int 1840 nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp, 1841 cred_t *cr, vnode_t **newvpp) 1842 { 1843 struct mounta *uap; 1844 char *mntpt, *orig_path, *path; 1845 const char *orig_mntpt; 1846 int retval; 1847 int mntpt_len; 1848 int spec_len; 1849 zone_t *zone = curproc->p_zone; 1850 bool_t has_leading_slash; 1851 int i; 1852 1853 vfs_t *stubvfsp = stubvp->v_vfsp; 1854 ephemeral_servinfo_t *esi = dma->dma_esi; 1855 struct nfs_args *nargs = dma->dma_nargs; 1856 1857 /* first, construct the mount point for the ephemeral mount */ 1858 orig_path = path = fn_path(VTOSV(stubvp)->sv_name); 1859 orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt); 1860 1861 if (*orig_path == '.') 1862 orig_path++; 1863 1864 /* 1865 * Get rid of zone's root path 1866 */ 1867 if (zone != global_zone) { 1868 /* 1869 * -1 for trailing '/' and -1 for EOS. 1870 */ 1871 if (strncmp(zone->zone_rootpath, orig_mntpt, 1872 zone->zone_rootpathlen - 1) == 0) { 1873 orig_mntpt += (zone->zone_rootpathlen - 2); 1874 } 1875 } 1876 1877 mntpt_len = strlen(orig_mntpt) + strlen(orig_path); 1878 mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP); 1879 (void) strcat(mntpt, orig_mntpt); 1880 (void) strcat(mntpt, orig_path); 1881 1882 kmem_free(path, strlen(path) + 1); 1883 path = esi->esi_path; 1884 if (*path == '.') 1885 path++; 1886 if (path[0] == '/' && path[1] == '/') 1887 path++; 1888 has_leading_slash = (*path == '/'); 1889 1890 spec_len = strlen(dma->dma_hostlist); 1891 spec_len += strlen(path); 1892 1893 /* We are going to have to add this in */ 1894 if (!has_leading_slash) 1895 spec_len++; 1896 1897 /* We need to get the ':' for dma_hostlist:esi_path */ 1898 spec_len++; 1899 1900 uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP); 1901 uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP); 1902 (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist, 1903 has_leading_slash ? "" : "/", path); 1904 1905 uap->dir = mntpt; 1906 1907 uap->flags = MS_SYSSPACE | MS_DATA; 1908 /* fstype-independent mount options not covered elsewhere */ 1909 /* copy parent's mount(1M) "-m" flag */ 1910 if (stubvfsp->vfs_flag & VFS_NOMNTTAB) 1911 uap->flags |= MS_NOMNTTAB; 1912 1913 uap->fstype = MNTTYPE_NFS4; 1914 uap->dataptr = (char *)nargs; 1915 /* not needed for MS_SYSSPACE */ 1916 uap->datalen = 0; 1917 1918 /* use optptr to pass in extra mount options */ 1919 uap->flags |= MS_OPTIONSTR; 1920 uap->optptr = nfs4_trigger_create_mntopts(stubvfsp); 1921 if (uap->optptr == NULL) { 1922 retval = EINVAL; 1923 goto done; 1924 } 1925 1926 /* domount() expects us to count the trailing NUL */ 1927 uap->optlen = strlen(uap->optptr) + 1; 1928 1929 /* 1930 * If we get EBUSY, we try again once to see if we can perform 1931 * the mount. We do this because of a spurious race condition. 1932 */ 1933 for (i = 0; i < 2; i++) { 1934 int error; 1935 bool_t was_mounted; 1936 1937 retval = domount(NULL, uap, stubvp, cr, vfsp); 1938 if (retval == 0) { 1939 retval = VFS_ROOT(*vfsp, newvpp); 1940 VFS_RELE(*vfsp); 1941 break; 1942 } else if (retval != EBUSY) { 1943 break; 1944 } 1945 1946 /* 1947 * We might find it mounted by the other racer... 1948 */ 1949 error = nfs4_trigger_mounted_already(stubvp, 1950 newvpp, &was_mounted, vfsp); 1951 if (error) { 1952 goto done; 1953 } else if (was_mounted) { 1954 retval = 0; 1955 break; 1956 } 1957 } 1958 1959 done: 1960 if (uap->optptr) 1961 nfs4_trigger_destroy_mntopts(uap->optptr); 1962 1963 kmem_free(uap->spec, spec_len + 1); 1964 kmem_free(uap, sizeof (struct mounta)); 1965 kmem_free(mntpt, mntpt_len + 1); 1966 1967 return (retval); 1968 } 1969 1970 /* 1971 * Build an nfs_args structure for passing to domount(). 1972 * 1973 * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t; 1974 * generic data - common to all ephemeral mount types - is read directly 1975 * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode. 1976 */ 1977 static struct nfs_args * 1978 nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, 1979 ephemeral_servinfo_t *esi) 1980 { 1981 sec_data_t *secdata; 1982 struct nfs_args *nargs; 1983 1984 /* setup the nfs args */ 1985 nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 1986 1987 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1988 1989 nargs->addr = esi->esi_addr; 1990 1991 /* for AUTH_DH by negotiation */ 1992 if (esi->esi_syncaddr || esi->esi_netname) { 1993 nargs->flags |= NFSMNT_SECURE; 1994 nargs->syncaddr = esi->esi_syncaddr; 1995 nargs->netname = esi->esi_netname; 1996 } 1997 1998 nargs->flags |= NFSMNT_KNCONF; 1999 nargs->knconf = esi->esi_knconf; 2000 nargs->flags |= NFSMNT_HOSTNAME; 2001 nargs->hostname = esi->esi_hostname; 2002 nargs->fh = esi->esi_path; 2003 2004 /* general mount settings, all copied from parent mount */ 2005 mutex_enter(&mi->mi_lock); 2006 2007 if (!(mi->mi_flags & MI4_HARD)) 2008 nargs->flags |= NFSMNT_SOFT; 2009 2010 nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO | 2011 NFSMNT_RETRANS; 2012 nargs->wsize = mi->mi_stsize; 2013 nargs->rsize = mi->mi_tsize; 2014 nargs->timeo = mi->mi_timeo; 2015 nargs->retrans = mi->mi_retrans; 2016 2017 if (mi->mi_flags & MI4_INT) 2018 nargs->flags |= NFSMNT_INT; 2019 if (mi->mi_flags & MI4_NOAC) 2020 nargs->flags |= NFSMNT_NOAC; 2021 2022 nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN | 2023 NFSMNT_ACDIRMAX; 2024 nargs->acregmin = HR2SEC(mi->mi_acregmin); 2025 nargs->acregmax = HR2SEC(mi->mi_acregmax); 2026 nargs->acdirmin = HR2SEC(mi->mi_acdirmin); 2027 nargs->acdirmax = HR2SEC(mi->mi_acdirmax); 2028 2029 /* add any specific flags for this type of ephemeral mount */ 2030 nargs->flags |= esi->esi_mount_flags; 2031 2032 if (mi->mi_flags & MI4_NOCTO) 2033 nargs->flags |= NFSMNT_NOCTO; 2034 if (mi->mi_flags & MI4_GRPID) 2035 nargs->flags |= NFSMNT_GRPID; 2036 if (mi->mi_flags & MI4_LLOCK) 2037 nargs->flags |= NFSMNT_LLOCK; 2038 if (mi->mi_flags & MI4_NOPRINT) 2039 nargs->flags |= NFSMNT_NOPRINT; 2040 if (mi->mi_flags & MI4_DIRECTIO) 2041 nargs->flags |= NFSMNT_DIRECTIO; 2042 if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT) 2043 nargs->flags |= NFSMNT_PUBLIC; 2044 2045 /* Do some referral-specific option tweaking */ 2046 if (nargs->flags & NFSMNT_REFERRAL) { 2047 nargs->flags &= ~NFSMNT_DORDMA; 2048 nargs->flags |= NFSMNT_TRYRDMA; 2049 } 2050 2051 mutex_exit(&mi->mi_lock); 2052 2053 /* 2054 * Security data & negotiation policy. 2055 * 2056 * For mirror mounts, we need to preserve the parent mount's 2057 * preference for security negotiation, translating SV4_TRYSECDEFAULT 2058 * to NFSMNT_SECDEFAULT if present. 2059 * 2060 * For referrals, we always want security negotiation and will 2061 * set NFSMNT_SECDEFAULT and we will not copy current secdata. 2062 * The reason is that we can't negotiate down from a parent's 2063 * Kerberos flavor to AUTH_SYS. 2064 * 2065 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific 2066 * security flavour was requested, with data in sv_secdata, and that 2067 * no negotiation should occur. If this specified flavour fails, that's 2068 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT. 2069 * 2070 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in 2071 * default flavour, in sv_secdata, but then negotiate a new flavour. 2072 * Possible flavours are recorded in an array in sv_secinfo, with 2073 * currently in-use flavour pointed to by sv_currsec. 2074 * 2075 * If sv_currsec is set, i.e. if negotiation has already occurred, 2076 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, 2077 * we will set NFSMNT_SECDEFAULT, to enable negotiation. 2078 */ 2079 if (nargs->flags & NFSMNT_REFERRAL) { 2080 /* enable negotiation for referral mount */ 2081 nargs->flags |= NFSMNT_SECDEFAULT; 2082 secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP); 2083 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 2084 secdata->data = NULL; 2085 } else if (svp->sv_flags & SV4_TRYSECDEFAULT) { 2086 /* enable negotiation for mirror mount */ 2087 nargs->flags |= NFSMNT_SECDEFAULT; 2088 2089 /* 2090 * As a starting point for negotiation, copy parent 2091 * mount's negotiated flavour (sv_currsec) if available, 2092 * or its passed-in flavour (sv_secdata) if not. 2093 */ 2094 if (svp->sv_currsec != NULL) 2095 secdata = copy_sec_data(svp->sv_currsec); 2096 else if (svp->sv_secdata != NULL) 2097 secdata = copy_sec_data(svp->sv_secdata); 2098 else 2099 secdata = NULL; 2100 } else { 2101 /* do not enable negotiation; copy parent's passed-in flavour */ 2102 if (svp->sv_secdata != NULL) 2103 secdata = copy_sec_data(svp->sv_secdata); 2104 else 2105 secdata = NULL; 2106 } 2107 2108 nfs_rw_exit(&svp->sv_lock); 2109 2110 nargs->flags |= NFSMNT_NEWARGS; 2111 nargs->nfs_args_ext = NFS_ARGS_EXTB; 2112 nargs->nfs_ext_u.nfs_extB.secdata = secdata; 2113 2114 /* for NFS RO failover; caller will set if necessary */ 2115 nargs->nfs_ext_u.nfs_extB.next = NULL; 2116 2117 return (nargs); 2118 } 2119 2120 static void 2121 nfs4_trigger_nargs_destroy(struct nfs_args *nargs) 2122 { 2123 /* 2124 * Either the mount failed, in which case the data is not needed, or 2125 * nfs4_mount() has either taken copies of what it needs or, 2126 * where it has merely copied the ptr, it has set *our* ptr to NULL, 2127 * whereby nfs4_free_args() will ignore it. 2128 */ 2129 nfs4_free_args(nargs); 2130 kmem_free(nargs, sizeof (struct nfs_args)); 2131 } 2132 2133 /* 2134 * When we finally get into the mounting, we need to add this 2135 * node to the ephemeral tree. 2136 * 2137 * This is called from nfs4_mount(). 2138 */ 2139 int 2140 nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) 2141 { 2142 mntinfo4_t *mi_parent; 2143 nfs4_ephemeral_t *eph; 2144 nfs4_ephemeral_tree_t *net; 2145 2146 nfs4_ephemeral_t *prior; 2147 nfs4_ephemeral_t *child; 2148 2149 nfs4_ephemeral_t *peer; 2150 2151 nfs4_trigger_globals_t *ntg; 2152 zone_t *zone = curproc->p_zone; 2153 2154 int rc = 0; 2155 2156 mi_parent = VTOMI4(mvp); 2157 2158 /* 2159 * Get this before grabbing anything else! 2160 */ 2161 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 2162 if (!ntg->ntg_thread_started) { 2163 nfs4_ephemeral_start_harvester(ntg); 2164 } 2165 2166 mutex_enter(&mi_parent->mi_lock); 2167 mutex_enter(&mi->mi_lock); 2168 2169 net = mi->mi_ephemeral_tree = 2170 mi_parent->mi_ephemeral_tree; 2171 2172 /* 2173 * If the mi_ephemeral_tree is NULL, then it 2174 * means that either the harvester or a manual 2175 * umount has cleared the tree out right before 2176 * we got here. 2177 * 2178 * There is nothing we can do here, so return 2179 * to the caller and let them decide whether they 2180 * try again. 2181 */ 2182 if (net == NULL) { 2183 mutex_exit(&mi->mi_lock); 2184 mutex_exit(&mi_parent->mi_lock); 2185 2186 return (EBUSY); 2187 } 2188 2189 /* 2190 * We've just tied the mntinfo to the tree, so 2191 * now we bump the refcnt and hold it there until 2192 * this mntinfo is removed from the tree. 2193 */ 2194 nfs4_ephemeral_tree_hold(net); 2195 2196 /* 2197 * We need to tack together the ephemeral mount 2198 * with this new mntinfo. 2199 */ 2200 eph = kmem_zalloc(sizeof (*eph), KM_SLEEP); 2201 eph->ne_mount = mi; 2202 MI4_HOLD(mi); 2203 VFS_HOLD(mi->mi_vfsp); 2204 eph->ne_ref_time = gethrestime_sec(); 2205 2206 /* 2207 * We need to tell the ephemeral mount when 2208 * to time out. 2209 */ 2210 eph->ne_mount_to = ntg->ntg_mount_to; 2211 2212 mi->mi_ephemeral = eph; 2213 2214 /* 2215 * If the enclosing mntinfo4 is also ephemeral, 2216 * then we need to point to its enclosing parent. 2217 * Else the enclosing mntinfo4 is the enclosing parent. 2218 * 2219 * We also need to weave this ephemeral node 2220 * into the tree. 2221 */ 2222 if (mi_parent->mi_flags & MI4_EPHEMERAL) { 2223 /* 2224 * We need to decide if we are 2225 * the root node of this branch 2226 * or if we are a sibling of this 2227 * branch. 2228 */ 2229 prior = mi_parent->mi_ephemeral; 2230 if (prior == NULL) { 2231 /* 2232 * Race condition, clean up, and 2233 * let caller handle mntinfo. 2234 */ 2235 mi->mi_flags &= ~MI4_EPHEMERAL; 2236 mi->mi_ephemeral = NULL; 2237 kmem_free(eph, sizeof (*eph)); 2238 VFS_RELE(mi->mi_vfsp); 2239 MI4_RELE(mi); 2240 nfs4_ephemeral_tree_rele(net); 2241 rc = EBUSY; 2242 } else { 2243 if (prior->ne_child == NULL) { 2244 prior->ne_child = eph; 2245 } else { 2246 child = prior->ne_child; 2247 2248 prior->ne_child = eph; 2249 eph->ne_peer = child; 2250 2251 child->ne_prior = eph; 2252 } 2253 2254 eph->ne_prior = prior; 2255 } 2256 } else { 2257 /* 2258 * The parent mntinfo4 is the non-ephemeral 2259 * root of the ephemeral tree. We 2260 * need to decide if we are the root 2261 * node of that tree or if we are a 2262 * sibling of the root node. 2263 * 2264 * We are the root if there is no 2265 * other node. 2266 */ 2267 if (net->net_root == NULL) { 2268 net->net_root = eph; 2269 } else { 2270 eph->ne_peer = peer = net->net_root; 2271 ASSERT(peer != NULL); 2272 net->net_root = eph; 2273 2274 peer->ne_prior = eph; 2275 } 2276 2277 eph->ne_prior = NULL; 2278 } 2279 2280 mutex_exit(&mi->mi_lock); 2281 mutex_exit(&mi_parent->mi_lock); 2282 2283 return (rc); 2284 } 2285 2286 /* 2287 * Commit the changes to the ephemeral tree for removing this node. 2288 */ 2289 static void 2290 nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph) 2291 { 2292 nfs4_ephemeral_t *e = eph; 2293 nfs4_ephemeral_t *peer; 2294 nfs4_ephemeral_t *prior; 2295 2296 peer = eph->ne_peer; 2297 prior = e->ne_prior; 2298 2299 /* 2300 * If this branch root was not the 2301 * tree root, then we need to fix back pointers. 2302 */ 2303 if (prior) { 2304 if (prior->ne_child == e) { 2305 prior->ne_child = peer; 2306 } else { 2307 prior->ne_peer = peer; 2308 } 2309 2310 if (peer) 2311 peer->ne_prior = prior; 2312 } else if (peer) { 2313 peer->ne_mount->mi_ephemeral_tree->net_root = peer; 2314 peer->ne_prior = NULL; 2315 } else { 2316 e->ne_mount->mi_ephemeral_tree->net_root = NULL; 2317 } 2318 } 2319 2320 /* 2321 * We want to avoid recursion at all costs. So we need to 2322 * unroll the tree. We do this by a depth first traversal to 2323 * leaf nodes. We blast away the leaf and work our way back 2324 * up and down the tree. 2325 */ 2326 static int 2327 nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, 2328 int isTreeRoot, int flag, cred_t *cr) 2329 { 2330 nfs4_ephemeral_t *e = eph; 2331 nfs4_ephemeral_t *prior; 2332 mntinfo4_t *mi; 2333 vfs_t *vfsp; 2334 int error; 2335 2336 /* 2337 * We use the loop while unrolling the ephemeral tree. 2338 */ 2339 for (;;) { 2340 /* 2341 * First we walk down the child. 2342 */ 2343 if (e->ne_child) { 2344 prior = e; 2345 e = e->ne_child; 2346 continue; 2347 } 2348 2349 /* 2350 * If we are the root of the branch we are removing, 2351 * we end it here. But if the branch is the root of 2352 * the tree, we have to forge on. We do not consider 2353 * the peer list for the root because while it may 2354 * be okay to remove, it is both extra work and a 2355 * potential for a false-positive error to stall the 2356 * unmount attempt. 2357 */ 2358 if (e == eph && isTreeRoot == FALSE) 2359 return (0); 2360 2361 /* 2362 * Next we walk down the peer list. 2363 */ 2364 if (e->ne_peer) { 2365 prior = e; 2366 e = e->ne_peer; 2367 continue; 2368 } 2369 2370 /* 2371 * We can only remove the node passed in by the 2372 * caller if it is the root of the ephemeral tree. 2373 * Otherwise, the caller will remove it. 2374 */ 2375 if (e == eph && isTreeRoot == FALSE) 2376 return (0); 2377 2378 /* 2379 * Okay, we have a leaf node, time 2380 * to prune it! 2381 * 2382 * Note that prior can only be NULL if 2383 * and only if it is the root of the 2384 * ephemeral tree. 2385 */ 2386 prior = e->ne_prior; 2387 2388 mi = e->ne_mount; 2389 mutex_enter(&mi->mi_lock); 2390 vfsp = mi->mi_vfsp; 2391 ASSERT(vfsp != NULL); 2392 2393 /* 2394 * Cleared by umount2_engine. 2395 */ 2396 VFS_HOLD(vfsp); 2397 2398 /* 2399 * Inform nfs4_unmount to not recursively 2400 * descend into this node's children when it 2401 * gets processed. 2402 */ 2403 mi->mi_flags |= MI4_EPHEMERAL_RECURSED; 2404 mutex_exit(&mi->mi_lock); 2405 2406 error = umount2_engine(vfsp, flag, cr, FALSE); 2407 if (error) { 2408 /* 2409 * We need to reenable nfs4_unmount's ability 2410 * to recursively descend on this node. 2411 */ 2412 mutex_enter(&mi->mi_lock); 2413 mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED; 2414 mutex_exit(&mi->mi_lock); 2415 2416 return (error); 2417 } 2418 2419 /* 2420 * If we are the current node, we do not want to 2421 * touch anything else. At this point, the only 2422 * way the current node can have survived to here 2423 * is if it is the root of the ephemeral tree and 2424 * we are unmounting the enclosing mntinfo4. 2425 */ 2426 if (e == eph) { 2427 ASSERT(prior == NULL); 2428 return (0); 2429 } 2430 2431 /* 2432 * Stitch up the prior node. Note that since 2433 * we have handled the root of the tree, prior 2434 * must be non-NULL. 2435 */ 2436 ASSERT(prior != NULL); 2437 if (prior->ne_child == e) { 2438 prior->ne_child = NULL; 2439 } else { 2440 ASSERT(prior->ne_peer == e); 2441 2442 prior->ne_peer = NULL; 2443 } 2444 2445 e = prior; 2446 } 2447 2448 /* NOTREACHED */ 2449 } 2450 2451 /* 2452 * Common code to safely release net_cnt_lock and net_tree_lock 2453 */ 2454 void 2455 nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, 2456 nfs4_ephemeral_tree_t **pnet) 2457 { 2458 nfs4_ephemeral_tree_t *net = *pnet; 2459 2460 if (*pmust_unlock) { 2461 mutex_enter(&net->net_cnt_lock); 2462 net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; 2463 mutex_exit(&net->net_cnt_lock); 2464 2465 mutex_exit(&net->net_tree_lock); 2466 2467 *pmust_unlock = FALSE; 2468 } 2469 } 2470 2471 /* 2472 * While we may have removed any child or sibling nodes of this 2473 * ephemeral node, we can not nuke it until we know that there 2474 * were no actived vnodes on it. This will do that final 2475 * work once we know it is not busy. 2476 */ 2477 void 2478 nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, 2479 nfs4_ephemeral_tree_t **pnet) 2480 { 2481 /* 2482 * Now we need to get rid of the ephemeral data if it exists. 2483 */ 2484 mutex_enter(&mi->mi_lock); 2485 if (mi->mi_ephemeral) { 2486 /* 2487 * If we are the root node of an ephemeral branch 2488 * which is being removed, then we need to fixup 2489 * pointers into and out of the node. 2490 */ 2491 if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) 2492 nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); 2493 2494 nfs4_ephemeral_tree_rele(*pnet); 2495 ASSERT(mi->mi_ephemeral != NULL); 2496 2497 kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); 2498 mi->mi_ephemeral = NULL; 2499 VFS_RELE(mi->mi_vfsp); 2500 MI4_RELE(mi); 2501 } 2502 mutex_exit(&mi->mi_lock); 2503 2504 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2505 } 2506 2507 /* 2508 * Unmount an ephemeral node. 2509 * 2510 * Note that if this code fails, then it must unlock. 2511 * 2512 * If it succeeds, then the caller must be prepared to do so. 2513 */ 2514 int 2515 nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, 2516 bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet) 2517 { 2518 int error = 0; 2519 nfs4_ephemeral_t *eph; 2520 nfs4_ephemeral_tree_t *net; 2521 int is_derooting = FALSE; 2522 int is_recursed = FALSE; 2523 int was_locked = FALSE; 2524 2525 /* 2526 * Make sure to set the default state for cleaning 2527 * up the tree in the caller (and on the way out). 2528 */ 2529 *pmust_unlock = FALSE; 2530 2531 /* 2532 * The active vnodes on this file system may be ephemeral 2533 * children. We need to check for and try to unmount them 2534 * here. If any can not be unmounted, we are going 2535 * to return EBUSY. 2536 */ 2537 mutex_enter(&mi->mi_lock); 2538 2539 /* 2540 * If an ephemeral tree, we need to check to see if 2541 * the lock is already held. If it is, then we need 2542 * to see if we are being called as a result of 2543 * the recursive removal of some node of the tree or 2544 * if we are another attempt to remove the tree. 2545 * 2546 * mi_flags & MI4_EPHEMERAL indicates an ephemeral 2547 * node. mi_ephemeral being non-NULL also does this. 2548 * 2549 * mi_ephemeral_tree being non-NULL is sufficient 2550 * to also indicate either it is an ephemeral node 2551 * or the enclosing mntinfo4. 2552 * 2553 * Do we need MI4_EPHEMERAL? Yes, it is useful for 2554 * when we delete the ephemeral node and need to 2555 * differentiate from an ephemeral node and the 2556 * enclosing root node. 2557 */ 2558 *pnet = net = mi->mi_ephemeral_tree; 2559 if (net == NULL) { 2560 mutex_exit(&mi->mi_lock); 2561 return (0); 2562 } 2563 2564 eph = mi->mi_ephemeral; 2565 is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; 2566 is_derooting = (eph == NULL); 2567 2568 mutex_enter(&net->net_cnt_lock); 2569 2570 /* 2571 * If this is not recursion, then we need to 2572 * check to see if a harvester thread has 2573 * already grabbed the lock. 2574 * 2575 * After we exit this branch, we may not 2576 * blindly return, we need to jump to 2577 * is_busy! 2578 */ 2579 if (!is_recursed) { 2580 if (net->net_status & 2581 NFS4_EPHEMERAL_TREE_LOCKED) { 2582 /* 2583 * If the tree is locked, we need 2584 * to decide whether we are the 2585 * harvester or some explicit call 2586 * for a umount. The only way that 2587 * we are the harvester is if 2588 * MS_SYSSPACE is set. 2589 * 2590 * We only let the harvester through 2591 * at this point. 2592 * 2593 * We return EBUSY so that the 2594 * caller knows something is 2595 * going on. Note that by that 2596 * time, the umount in the other 2597 * thread may have already occured. 2598 */ 2599 if (!(flag & MS_SYSSPACE)) { 2600 mutex_exit(&net->net_cnt_lock); 2601 mutex_exit(&mi->mi_lock); 2602 2603 return (EBUSY); 2604 } 2605 2606 was_locked = TRUE; 2607 } 2608 } 2609 2610 mutex_exit(&net->net_cnt_lock); 2611 mutex_exit(&mi->mi_lock); 2612 2613 /* 2614 * If we are not the harvester, we need to check 2615 * to see if we need to grab the tree lock. 2616 */ 2617 if (was_locked == FALSE) { 2618 /* 2619 * If we grab the lock, it means that no other 2620 * operation is working on the tree. If we don't 2621 * grab it, we need to decide if this is because 2622 * we are a recursive call or a new operation. 2623 */ 2624 if (mutex_tryenter(&net->net_tree_lock)) { 2625 *pmust_unlock = TRUE; 2626 } else { 2627 /* 2628 * If we are a recursive call, we can 2629 * proceed without the lock. 2630 * Otherwise we have to wait until 2631 * the lock becomes free. 2632 */ 2633 if (!is_recursed) { 2634 mutex_enter(&net->net_cnt_lock); 2635 if (net->net_status & 2636 (NFS4_EPHEMERAL_TREE_DEROOTING 2637 | NFS4_EPHEMERAL_TREE_INVALID)) { 2638 mutex_exit(&net->net_cnt_lock); 2639 goto is_busy; 2640 } 2641 mutex_exit(&net->net_cnt_lock); 2642 2643 /* 2644 * We can't hold any other locks whilst 2645 * we wait on this to free up. 2646 */ 2647 mutex_enter(&net->net_tree_lock); 2648 2649 /* 2650 * Note that while mi->mi_ephemeral 2651 * may change and thus we have to 2652 * update eph, it is the case that 2653 * we have tied down net and 2654 * do not care if mi->mi_ephemeral_tree 2655 * has changed. 2656 */ 2657 mutex_enter(&mi->mi_lock); 2658 eph = mi->mi_ephemeral; 2659 mutex_exit(&mi->mi_lock); 2660 2661 /* 2662 * Okay, we need to see if either the 2663 * tree got nuked or the current node 2664 * got nuked. Both of which will cause 2665 * an error. 2666 * 2667 * Note that a subsequent retry of the 2668 * umount shall work. 2669 */ 2670 mutex_enter(&net->net_cnt_lock); 2671 if (net->net_status & 2672 NFS4_EPHEMERAL_TREE_INVALID || 2673 (!is_derooting && eph == NULL)) { 2674 mutex_exit(&net->net_cnt_lock); 2675 mutex_exit(&net->net_tree_lock); 2676 goto is_busy; 2677 } 2678 mutex_exit(&net->net_cnt_lock); 2679 *pmust_unlock = TRUE; 2680 } 2681 } 2682 } 2683 2684 /* 2685 * Only once we have grabbed the lock can we mark what we 2686 * are planning on doing to the ephemeral tree. 2687 */ 2688 if (*pmust_unlock) { 2689 mutex_enter(&net->net_cnt_lock); 2690 net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING; 2691 2692 /* 2693 * Check to see if we are nuking the root. 2694 */ 2695 if (is_derooting) 2696 net->net_status |= 2697 NFS4_EPHEMERAL_TREE_DEROOTING; 2698 mutex_exit(&net->net_cnt_lock); 2699 } 2700 2701 if (!is_derooting) { 2702 /* 2703 * Only work on children if the caller has not already 2704 * done so. 2705 */ 2706 if (!is_recursed) { 2707 ASSERT(eph != NULL); 2708 2709 error = nfs4_ephemeral_unmount_engine(eph, 2710 FALSE, flag, cr); 2711 if (error) 2712 goto is_busy; 2713 } 2714 } else { 2715 eph = net->net_root; 2716 2717 /* 2718 * Only work if there is something there. 2719 */ 2720 if (eph) { 2721 error = nfs4_ephemeral_unmount_engine(eph, TRUE, 2722 flag, cr); 2723 if (error) { 2724 mutex_enter(&net->net_cnt_lock); 2725 net->net_status &= 2726 ~NFS4_EPHEMERAL_TREE_DEROOTING; 2727 mutex_exit(&net->net_cnt_lock); 2728 goto is_busy; 2729 } 2730 2731 /* 2732 * Nothing else which goes wrong will 2733 * invalidate the blowing away of the 2734 * ephmeral tree. 2735 */ 2736 net->net_root = NULL; 2737 } 2738 2739 /* 2740 * We have derooted and we have caused the tree to be 2741 * invalidated. 2742 */ 2743 mutex_enter(&net->net_cnt_lock); 2744 net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; 2745 net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; 2746 DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting, 2747 uint_t, net->net_refcnt); 2748 2749 /* 2750 * We will not finalize this node, so safe to 2751 * release it. 2752 */ 2753 nfs4_ephemeral_tree_decr(net); 2754 mutex_exit(&net->net_cnt_lock); 2755 2756 if (was_locked == FALSE) 2757 mutex_exit(&net->net_tree_lock); 2758 2759 /* 2760 * We have just blown away any notation of this 2761 * tree being locked or having a refcnt. 2762 * We can't let the caller try to clean things up. 2763 */ 2764 *pmust_unlock = FALSE; 2765 2766 /* 2767 * At this point, the tree should no longer be 2768 * associated with the mntinfo4. We need to pull 2769 * it off there and let the harvester take 2770 * care of it once the refcnt drops. 2771 */ 2772 mutex_enter(&mi->mi_lock); 2773 mi->mi_ephemeral_tree = NULL; 2774 mutex_exit(&mi->mi_lock); 2775 } 2776 2777 return (0); 2778 2779 is_busy: 2780 2781 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2782 2783 return (error); 2784 } 2785 2786 /* 2787 * Do the umount and record any error in the parent. 2788 */ 2789 static void 2790 nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag, 2791 nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior) 2792 { 2793 int error; 2794 2795 /* 2796 * Only act on if the fs is still mounted. 2797 */ 2798 if (vfsp == NULL) 2799 return; 2800 2801 error = umount2_engine(vfsp, flag, kcred, FALSE); 2802 if (error) { 2803 if (prior) { 2804 if (prior->ne_child == e) 2805 prior->ne_state |= 2806 NFS4_EPHEMERAL_CHILD_ERROR; 2807 else 2808 prior->ne_state |= 2809 NFS4_EPHEMERAL_PEER_ERROR; 2810 } 2811 } 2812 } 2813 2814 /* 2815 * For each tree in the forest (where the forest is in 2816 * effect all of the ephemeral trees for this zone), 2817 * scan to see if a node can be unmounted. Note that 2818 * unlike nfs4_ephemeral_unmount_engine(), we do 2819 * not process the current node before children or 2820 * siblings. I.e., if a node can be unmounted, we 2821 * do not recursively check to see if the nodes 2822 * hanging off of it can also be unmounted. 2823 * 2824 * Instead, we delve down deep to try and remove the 2825 * children first. Then, because we share code with 2826 * nfs4_ephemeral_unmount_engine(), we will try 2827 * them again. This could be a performance issue in 2828 * the future. 2829 * 2830 * Also note that unlike nfs4_ephemeral_unmount_engine(), 2831 * we do not halt on an error. We will not remove the 2832 * current node, but we will keep on trying to remove 2833 * the others. 2834 * 2835 * force indicates that we want the unmount to occur 2836 * even if there is something blocking it. 2837 * 2838 * time_check indicates that we want to see if the 2839 * mount has expired past mount_to or not. Typically 2840 * we want to do this and only on a shutdown of the 2841 * zone would we want to ignore the check. 2842 */ 2843 static void 2844 nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg, 2845 bool_t force, bool_t time_check) 2846 { 2847 nfs4_ephemeral_tree_t *net; 2848 nfs4_ephemeral_tree_t *prev = NULL; 2849 nfs4_ephemeral_tree_t *next; 2850 nfs4_ephemeral_t *e; 2851 nfs4_ephemeral_t *prior; 2852 time_t now = gethrestime_sec(); 2853 2854 nfs4_ephemeral_tree_t *harvest = NULL; 2855 2856 int flag; 2857 2858 mntinfo4_t *mi; 2859 vfs_t *vfsp; 2860 2861 if (force) 2862 flag = MS_FORCE | MS_SYSSPACE; 2863 else 2864 flag = MS_SYSSPACE; 2865 2866 mutex_enter(&ntg->ntg_forest_lock); 2867 for (net = ntg->ntg_forest; net != NULL; net = next) { 2868 next = net->net_next; 2869 2870 nfs4_ephemeral_tree_hold(net); 2871 2872 mutex_enter(&net->net_tree_lock); 2873 2874 /* 2875 * Let the unmount code know that the 2876 * tree is already locked! 2877 */ 2878 mutex_enter(&net->net_cnt_lock); 2879 net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED; 2880 mutex_exit(&net->net_cnt_lock); 2881 2882 /* 2883 * If the intent is force all ephemeral nodes to 2884 * be unmounted in this zone, we can short circuit a 2885 * lot of tree traversal and simply zap the root node. 2886 */ 2887 if (force) { 2888 if (net->net_root) { 2889 mi = net->net_root->ne_mount; 2890 2891 vfsp = mi->mi_vfsp; 2892 ASSERT(vfsp != NULL); 2893 2894 /* 2895 * Cleared by umount2_engine. 2896 */ 2897 VFS_HOLD(vfsp); 2898 2899 (void) umount2_engine(vfsp, flag, 2900 kcred, FALSE); 2901 2902 goto check_done; 2903 } 2904 } 2905 2906 e = net->net_root; 2907 if (e) 2908 e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD; 2909 2910 while (e) { 2911 if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) { 2912 e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING; 2913 if (e->ne_child) { 2914 e = e->ne_child; 2915 e->ne_state = 2916 NFS4_EPHEMERAL_VISIT_CHILD; 2917 } 2918 2919 continue; 2920 } else if (e->ne_state == 2921 NFS4_EPHEMERAL_VISIT_SIBLING) { 2922 e->ne_state = NFS4_EPHEMERAL_PROCESS_ME; 2923 if (e->ne_peer) { 2924 e = e->ne_peer; 2925 e->ne_state = 2926 NFS4_EPHEMERAL_VISIT_CHILD; 2927 } 2928 2929 continue; 2930 } else if (e->ne_state == 2931 NFS4_EPHEMERAL_CHILD_ERROR) { 2932 prior = e->ne_prior; 2933 2934 /* 2935 * If a child reported an error, do 2936 * not bother trying to unmount. 2937 * 2938 * If your prior node is a parent, 2939 * pass the error up such that they 2940 * also do not try to unmount. 2941 * 2942 * However, if your prior is a sibling, 2943 * let them try to unmount if they can. 2944 */ 2945 if (prior) { 2946 if (prior->ne_child == e) 2947 prior->ne_state |= 2948 NFS4_EPHEMERAL_CHILD_ERROR; 2949 else 2950 prior->ne_state |= 2951 NFS4_EPHEMERAL_PEER_ERROR; 2952 } 2953 2954 /* 2955 * Clear the error and if needed, process peers. 2956 * 2957 * Once we mask out the error, we know whether 2958 * or we have to process another node. 2959 */ 2960 e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR; 2961 if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME) 2962 e = prior; 2963 2964 continue; 2965 } else if (e->ne_state == 2966 NFS4_EPHEMERAL_PEER_ERROR) { 2967 prior = e->ne_prior; 2968 2969 if (prior) { 2970 if (prior->ne_child == e) 2971 prior->ne_state = 2972 NFS4_EPHEMERAL_CHILD_ERROR; 2973 else 2974 prior->ne_state = 2975 NFS4_EPHEMERAL_PEER_ERROR; 2976 } 2977 2978 /* 2979 * Clear the error from this node and do the 2980 * correct processing. 2981 */ 2982 e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR; 2983 continue; 2984 } 2985 2986 prior = e->ne_prior; 2987 e->ne_state = NFS4_EPHEMERAL_OK; 2988 2989 /* 2990 * It must be the case that we need to process 2991 * this node. 2992 */ 2993 if (!time_check || 2994 now - e->ne_ref_time > e->ne_mount_to) { 2995 mi = e->ne_mount; 2996 vfsp = mi->mi_vfsp; 2997 2998 /* 2999 * Cleared by umount2_engine. 3000 */ 3001 if (vfsp != NULL) 3002 VFS_HOLD(vfsp); 3003 3004 /* 3005 * Note that we effectively work down to the 3006 * leaf nodes first, try to unmount them, 3007 * then work our way back up into the leaf 3008 * nodes. 3009 * 3010 * Also note that we deal with a lot of 3011 * complexity by sharing the work with 3012 * the manual unmount code. 3013 */ 3014 nfs4_ephemeral_record_umount(vfsp, flag, 3015 e, prior); 3016 } 3017 3018 e = prior; 3019 } 3020 3021 check_done: 3022 3023 /* 3024 * At this point we are done processing this tree. 3025 * 3026 * If the tree is invalid and we were the only reference 3027 * to it, then we push it on the local linked list 3028 * to remove it at the end. We avoid that action now 3029 * to keep the tree processing going along at a fair clip. 3030 * 3031 * Else, even if we were the only reference, we 3032 * allow it to be reused as needed. 3033 */ 3034 mutex_enter(&net->net_cnt_lock); 3035 nfs4_ephemeral_tree_decr(net); 3036 if (net->net_refcnt == 0 && 3037 net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { 3038 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3039 mutex_exit(&net->net_cnt_lock); 3040 mutex_exit(&net->net_tree_lock); 3041 3042 if (prev) 3043 prev->net_next = net->net_next; 3044 else 3045 ntg->ntg_forest = net->net_next; 3046 3047 net->net_next = harvest; 3048 harvest = net; 3049 3050 VFS_RELE(net->net_mount->mi_vfsp); 3051 MI4_RELE(net->net_mount); 3052 3053 continue; 3054 } 3055 3056 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3057 mutex_exit(&net->net_cnt_lock); 3058 mutex_exit(&net->net_tree_lock); 3059 3060 prev = net; 3061 } 3062 mutex_exit(&ntg->ntg_forest_lock); 3063 3064 for (net = harvest; net != NULL; net = next) { 3065 next = net->net_next; 3066 3067 mutex_destroy(&net->net_tree_lock); 3068 mutex_destroy(&net->net_cnt_lock); 3069 kmem_free(net, sizeof (*net)); 3070 } 3071 } 3072 3073 /* 3074 * This is the thread which decides when the harvesting 3075 * can proceed and when to kill it off for this zone. 3076 */ 3077 static void 3078 nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg) 3079 { 3080 clock_t timeleft; 3081 zone_t *zone = curproc->p_zone; 3082 3083 for (;;) { 3084 timeleft = zone_status_timedwait(zone, ddi_get_lbolt() + 3085 nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN); 3086 3087 /* 3088 * zone is exiting... 3089 */ 3090 if (timeleft != -1) { 3091 ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN); 3092 zthread_exit(); 3093 /* NOTREACHED */ 3094 } 3095 3096 /* 3097 * Only bother scanning if there is potential 3098 * work to be done. 3099 */ 3100 if (ntg->ntg_forest == NULL) 3101 continue; 3102 3103 /* 3104 * Now scan the list and get rid of everything which 3105 * is old. 3106 */ 3107 nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE); 3108 } 3109 3110 /* NOTREACHED */ 3111 } 3112 3113 /* 3114 * The zone specific glue needed to start the unmount harvester. 3115 * 3116 * Note that we want to avoid holding the mutex as long as possible, 3117 * hence the multiple checks. 3118 * 3119 * The caller should avoid us getting down here in the first 3120 * place. 3121 */ 3122 static void 3123 nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg) 3124 { 3125 /* 3126 * It got started before we got here... 3127 */ 3128 if (ntg->ntg_thread_started) 3129 return; 3130 3131 mutex_enter(&nfs4_ephemeral_thread_lock); 3132 3133 if (ntg->ntg_thread_started) { 3134 mutex_exit(&nfs4_ephemeral_thread_lock); 3135 return; 3136 } 3137 3138 /* 3139 * Start the unmounter harvester thread for this zone. 3140 */ 3141 (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester, 3142 ntg, 0, minclsyspri); 3143 3144 ntg->ntg_thread_started = TRUE; 3145 mutex_exit(&nfs4_ephemeral_thread_lock); 3146 } 3147 3148 /*ARGSUSED*/ 3149 static void * 3150 nfs4_ephemeral_zsd_create(zoneid_t zoneid) 3151 { 3152 nfs4_trigger_globals_t *ntg; 3153 3154 ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP); 3155 ntg->ntg_thread_started = FALSE; 3156 3157 /* 3158 * This is the default.... 3159 */ 3160 ntg->ntg_mount_to = nfs4_trigger_mount_to; 3161 3162 mutex_init(&ntg->ntg_forest_lock, NULL, 3163 MUTEX_DEFAULT, NULL); 3164 3165 return (ntg); 3166 } 3167 3168 /* 3169 * Try a nice gentle walk down the forest and convince 3170 * all of the trees to gracefully give it up. 3171 */ 3172 /*ARGSUSED*/ 3173 static void 3174 nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg) 3175 { 3176 nfs4_trigger_globals_t *ntg = arg; 3177 3178 if (!ntg) 3179 return; 3180 3181 nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE); 3182 } 3183 3184 /* 3185 * Race along the forest and rip all of the trees out by 3186 * their rootballs! 3187 */ 3188 /*ARGSUSED*/ 3189 static void 3190 nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg) 3191 { 3192 nfs4_trigger_globals_t *ntg = arg; 3193 3194 if (!ntg) 3195 return; 3196 3197 nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE); 3198 3199 mutex_destroy(&ntg->ntg_forest_lock); 3200 kmem_free(ntg, sizeof (*ntg)); 3201 } 3202 3203 /* 3204 * This is the zone independent cleanup needed for 3205 * emphemeral mount processing. 3206 */ 3207 void 3208 nfs4_ephemeral_fini(void) 3209 { 3210 (void) zone_key_delete(nfs4_ephemeral_key); 3211 mutex_destroy(&nfs4_ephemeral_thread_lock); 3212 } 3213 3214 /* 3215 * This is the zone independent initialization needed for 3216 * emphemeral mount processing. 3217 */ 3218 void 3219 nfs4_ephemeral_init(void) 3220 { 3221 mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT, 3222 NULL); 3223 3224 zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create, 3225 nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy); 3226 } 3227 3228 /* 3229 * nfssys() calls this function to set the per-zone 3230 * value of mount_to to drive when an ephemeral mount is 3231 * timed out. Each mount will grab a copy of this value 3232 * when mounted. 3233 */ 3234 void 3235 nfs4_ephemeral_set_mount_to(uint_t mount_to) 3236 { 3237 nfs4_trigger_globals_t *ntg; 3238 zone_t *zone = curproc->p_zone; 3239 3240 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 3241 3242 ntg->ntg_mount_to = mount_to; 3243 } 3244 3245 /* 3246 * Walk the list of v4 mount options; if they are currently set in vfsp, 3247 * append them to a new comma-separated mount option string, and return it. 3248 * 3249 * Caller should free by calling nfs4_trigger_destroy_mntopts(). 3250 */ 3251 static char * 3252 nfs4_trigger_create_mntopts(vfs_t *vfsp) 3253 { 3254 uint_t i; 3255 char *mntopts; 3256 struct vfssw *vswp; 3257 mntopts_t *optproto; 3258 3259 mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP); 3260 3261 /* get the list of applicable mount options for v4; locks *vswp */ 3262 vswp = vfs_getvfssw(MNTTYPE_NFS4); 3263 optproto = &vswp->vsw_optproto; 3264 3265 for (i = 0; i < optproto->mo_count; i++) { 3266 struct mntopt *mop = &optproto->mo_list[i]; 3267 3268 if (mop->mo_flags & MO_EMPTY) 3269 continue; 3270 3271 if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) { 3272 kmem_free(mntopts, MAX_MNTOPT_STR); 3273 vfs_unrefvfssw(vswp); 3274 return (NULL); 3275 } 3276 } 3277 3278 vfs_unrefvfssw(vswp); 3279 3280 /* 3281 * MNTOPT_XATTR is not in the v4 mount opt proto list, 3282 * and it may only be passed via MS_OPTIONSTR, so we 3283 * must handle it here. 3284 * 3285 * Ideally, it would be in the list, but NFS does not specify its 3286 * own opt proto list, it uses instead the default one. Since 3287 * not all filesystems support extended attrs, it would not be 3288 * appropriate to add it there. 3289 */ 3290 if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) || 3291 nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) { 3292 kmem_free(mntopts, MAX_MNTOPT_STR); 3293 return (NULL); 3294 } 3295 3296 return (mntopts); 3297 } 3298 3299 static void 3300 nfs4_trigger_destroy_mntopts(char *mntopts) 3301 { 3302 if (mntopts) 3303 kmem_free(mntopts, MAX_MNTOPT_STR); 3304 } 3305 3306 /* 3307 * Check a single mount option (optname). Add to mntopts if it is set in VFS. 3308 */ 3309 static int 3310 nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) 3311 { 3312 if (mntopts == NULL || optname == NULL || vfsp == NULL) 3313 return (EINVAL); 3314 3315 if (vfs_optionisset(vfsp, optname, NULL)) { 3316 size_t mntoptslen = strlen(mntopts); 3317 size_t optnamelen = strlen(optname); 3318 3319 /* +1 for ',', +1 for NUL */ 3320 if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR) 3321 return (EOVERFLOW); 3322 3323 /* first or subsequent mount option? */ 3324 if (*mntopts != '\0') 3325 (void) strcat(mntopts, ","); 3326 3327 (void) strcat(mntopts, optname); 3328 } 3329 3330 return (0); 3331 } 3332 3333 static enum clnt_stat 3334 nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr) 3335 { 3336 int retries; 3337 uint_t max_msgsize; 3338 enum clnt_stat status; 3339 CLIENT *cl; 3340 struct timeval timeout; 3341 3342 /* as per recov_newserver() */ 3343 max_msgsize = 0; 3344 retries = 1; 3345 timeout.tv_sec = 2; 3346 timeout.tv_usec = 0; 3347 3348 if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4, 3349 max_msgsize, retries, CRED(), &cl) != 0) 3350 return (RPC_FAILED); 3351 3352 if (nointr) 3353 cl->cl_nosignal = TRUE; 3354 status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL, 3355 timeout); 3356 if (nointr) 3357 cl->cl_nosignal = FALSE; 3358 3359 AUTH_DESTROY(cl->cl_auth); 3360 CLNT_DESTROY(cl); 3361 3362 return (status); 3363 } 3364 3365 static enum clnt_stat 3366 nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) 3367 { 3368 return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr)); 3369 } 3370