1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are 29 * triggered from a "stub" rnode via a special set of vnodeops. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/time.h> 37 #include <sys/vnode.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/file.h> 41 #include <sys/filio.h> 42 #include <sys/uio.h> 43 #include <sys/buf.h> 44 #include <sys/mman.h> 45 #include <sys/pathname.h> 46 #include <sys/dirent.h> 47 #include <sys/debug.h> 48 #include <sys/vmsystm.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/swap.h> 52 #include <sys/errno.h> 53 #include <sys/strsubr.h> 54 #include <sys/sysmacros.h> 55 #include <sys/kmem.h> 56 #include <sys/mount.h> 57 #include <sys/cmn_err.h> 58 #include <sys/pathconf.h> 59 #include <sys/utsname.h> 60 #include <sys/dnlc.h> 61 #include <sys/acl.h> 62 #include <sys/systeminfo.h> 63 #include <sys/policy.h> 64 #include <sys/sdt.h> 65 #include <sys/list.h> 66 #include <sys/stat.h> 67 #include <sys/mntent.h> 68 #include <sys/priv.h> 69 70 #include <rpc/types.h> 71 #include <rpc/auth.h> 72 #include <rpc/clnt.h> 73 74 #include <nfs/nfs.h> 75 #include <nfs/nfs_clnt.h> 76 #include <nfs/nfs_acl.h> 77 #include <nfs/lm.h> 78 #include <nfs/nfs4.h> 79 #include <nfs/nfs4_kprot.h> 80 #include <nfs/rnode4.h> 81 #include <nfs/nfs4_clnt.h> 82 #include <nfs/nfsid_map.h> 83 #include <nfs/nfs4_idmap_impl.h> 84 85 #include <vm/hat.h> 86 #include <vm/as.h> 87 #include <vm/page.h> 88 #include <vm/pvn.h> 89 #include <vm/seg.h> 90 #include <vm/seg_map.h> 91 #include <vm/seg_kpm.h> 92 #include <vm/seg_vn.h> 93 94 #include <fs/fs_subr.h> 95 96 #include <sys/ddi.h> 97 #include <sys/int_fmtio.h> 98 99 #include <sys/sunddi.h> 100 101 #include <sys/priv_names.h> 102 103 extern zone_key_t nfs4clnt_zone_key; 104 extern zone_key_t nfsidmap_zone_key; 105 106 /* 107 * The automatic unmounter thread stuff! 108 */ 109 static int nfs4_trigger_thread_timer = 20; /* in seconds */ 110 111 /* 112 * Just a default.... 113 */ 114 static uint_t nfs4_trigger_mount_to = 240; 115 116 typedef struct nfs4_trigger_globals { 117 kmutex_t ntg_forest_lock; 118 uint_t ntg_mount_to; 119 int ntg_thread_started; 120 nfs4_ephemeral_tree_t *ntg_forest; 121 } nfs4_trigger_globals_t; 122 123 kmutex_t nfs4_ephemeral_thread_lock; 124 125 zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED; 126 127 static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *); 128 129 /* 130 * Used for ephemeral mounts; contains data either duplicated from 131 * servinfo4_t, or hand-crafted, depending on type of ephemeral mount. 132 * 133 * It's intended that this structure is used solely for ephemeral 134 * mount-type specific data, for passing this data to 135 * nfs4_trigger_nargs_create(). 136 */ 137 typedef struct ephemeral_servinfo { 138 char *esi_hostname; 139 char *esi_netname; 140 char *esi_path; 141 int esi_path_len; 142 int esi_mount_flags; 143 struct netbuf *esi_addr; 144 struct netbuf *esi_syncaddr; 145 struct knetconfig *esi_knconf; 146 } ephemeral_servinfo_t; 147 148 /* 149 * Collect together the mount-type specific and generic data args. 150 */ 151 typedef struct domount_args { 152 ephemeral_servinfo_t *dma_esi; 153 char *dma_hostlist; /* comma-sep. for RO failover */ 154 struct nfs_args *dma_nargs; 155 } domount_args_t; 156 157 158 /* 159 * The vnode ops functions for a trigger stub vnode 160 */ 161 static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *); 162 static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *, 163 caller_context_t *); 164 static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *, 165 caller_context_t *); 166 static int nfs4_trigger_access(vnode_t *, int, int, cred_t *, 167 caller_context_t *); 168 static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *, 169 caller_context_t *); 170 static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **, 171 struct pathname *, int, vnode_t *, cred_t *, caller_context_t *, 172 int *, pathname_t *); 173 static int nfs4_trigger_create(vnode_t *, char *, struct vattr *, 174 enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *, 175 vsecattr_t *); 176 static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *, 177 int); 178 static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *, 179 caller_context_t *, int); 180 static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *, 181 cred_t *, caller_context_t *, int); 182 static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *, 183 vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp); 184 static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *, 185 caller_context_t *, int); 186 static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *, 187 cred_t *, caller_context_t *, int); 188 static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *); 189 190 /* 191 * Regular NFSv4 vnodeops that we need to reference directly 192 */ 193 extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *, 194 caller_context_t *); 195 extern void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *); 196 extern int nfs4_rwlock(vnode_t *, int, caller_context_t *); 197 extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *); 198 extern int nfs4_lookup(vnode_t *, char *, vnode_t **, 199 struct pathname *, int, vnode_t *, cred_t *, 200 caller_context_t *, int *, pathname_t *); 201 extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *, 202 caller_context_t *); 203 extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 204 caller_context_t *); 205 extern int nfs4_fid(vnode_t *, fid_t *, caller_context_t *); 206 extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); 207 208 static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **); 209 static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, 210 cred_t *, vnode_t **); 211 static domount_args_t *nfs4_trigger_domount_args_create(vnode_t *, cred_t *); 212 static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, 213 vnode_t *vp); 214 static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *, 215 cred_t *); 216 static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); 217 static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, 218 servinfo4_t *); 219 static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *, 220 cred_t *); 221 static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, 222 ephemeral_servinfo_t *); 223 static void nfs4_trigger_nargs_destroy(struct nfs_args *); 224 static char *nfs4_trigger_create_mntopts(vfs_t *); 225 static void nfs4_trigger_destroy_mntopts(char *); 226 static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); 227 static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); 228 static enum clnt_stat nfs4_ping_server_common(struct knetconfig *, 229 struct netbuf *, int); 230 231 extern int umount2_engine(vfs_t *, int, cred_t *, int); 232 233 vnodeops_t *nfs4_trigger_vnodeops; 234 235 /* 236 * These are the vnodeops that we must define for stub vnodes. 237 * 238 * 239 * Many of the VOPs defined for NFSv4 do not need to be defined here, 240 * for various reasons. This will result in the VFS default function being 241 * used: 242 * 243 * - These VOPs require a previous VOP_OPEN to have occurred. That will have 244 * lost the reference to the stub vnode, meaning these should not be called: 245 * close, read, write, ioctl, readdir, seek. 246 * 247 * - These VOPs are meaningless for vnodes without data pages. Since the 248 * stub vnode is of type VDIR, these should not be called: 249 * space, getpage, putpage, map, addmap, delmap, pageio, fsync. 250 * 251 * - These VOPs are otherwise not applicable, and should not be called: 252 * dump, setsecattr. 253 * 254 * 255 * These VOPs we do not want to define, but nor do we want the VFS default 256 * action. Instead, we specify the VFS error function, with fs_error(), but 257 * note that fs_error() is not actually called. Instead it results in the 258 * use of the error function defined for the particular VOP, in vn_ops_table[]: 259 * 260 * - frlock, dispose, shrlock. 261 * 262 * 263 * These VOPs we define to use the corresponding regular NFSv4 vnodeop. 264 * NOTE: if any of these ops involve an OTW call with the stub FH, then 265 * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo() 266 * to protect the security data in the servinfo4_t for the "parent" 267 * filesystem that contains the stub. 268 * 269 * - These VOPs should not trigger a mount, so that "ls -l" does not: 270 * pathconf, getsecattr. 271 * 272 * - These VOPs would not make sense to trigger: 273 * inactive, rwlock, rwunlock, fid, realvp. 274 */ 275 const fs_operation_def_t nfs4_trigger_vnodeops_template[] = { 276 VOPNAME_OPEN, { .vop_open = nfs4_trigger_open }, 277 VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr }, 278 VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr }, 279 VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access }, 280 VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup }, 281 VOPNAME_CREATE, { .vop_create = nfs4_trigger_create }, 282 VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove }, 283 VOPNAME_LINK, { .vop_link = nfs4_trigger_link }, 284 VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename }, 285 VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir }, 286 VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir }, 287 VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink }, 288 VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink }, 289 VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, 290 VOPNAME_FID, { .vop_fid = nfs4_fid }, 291 VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, 292 VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, 293 VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, 294 VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, 295 VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, 296 VOPNAME_FRLOCK, { .error = fs_error }, 297 VOPNAME_DISPOSE, { .error = fs_error }, 298 VOPNAME_SHRLOCK, { .error = fs_error }, 299 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 300 NULL, NULL 301 }; 302 303 static void 304 nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net) 305 { 306 ASSERT(mutex_owned(&net->net_cnt_lock)); 307 net->net_refcnt++; 308 ASSERT(net->net_refcnt != 0); 309 } 310 311 static void 312 nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net) 313 { 314 mutex_enter(&net->net_cnt_lock); 315 nfs4_ephemeral_tree_incr(net); 316 mutex_exit(&net->net_cnt_lock); 317 } 318 319 /* 320 * We need a safe way to decrement the refcnt whilst the 321 * lock is being held. 322 */ 323 static void 324 nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net) 325 { 326 ASSERT(mutex_owned(&net->net_cnt_lock)); 327 ASSERT(net->net_refcnt != 0); 328 net->net_refcnt--; 329 } 330 331 static void 332 nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net) 333 { 334 mutex_enter(&net->net_cnt_lock); 335 nfs4_ephemeral_tree_decr(net); 336 mutex_exit(&net->net_cnt_lock); 337 } 338 339 /* 340 * Trigger ops for stub vnodes; for mirror mounts, etc. 341 * 342 * The general idea is that a "triggering" op will first call 343 * nfs4_trigger_mount(), which will find out whether a mount has already 344 * been triggered. 345 * 346 * If it has, then nfs4_trigger_mount() sets newvp to the root vnode 347 * of the covering vfs. 348 * 349 * If a mount has not yet been triggered, nfs4_trigger_mount() will do so, 350 * and again set newvp, as above. 351 * 352 * The triggering op may then re-issue the VOP by calling it on newvp. 353 * 354 * Note that some ops may perform custom action, and may or may not need 355 * to trigger a mount. 356 * 357 * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We 358 * obviously can't do this with VOP_<whatever>, since it's a stub vnode 359 * and that would just recurse. Instead, we call the v4 op directly, 360 * by name. This is OK, since we know that the vnode is for NFSv4, 361 * otherwise it couldn't be a stub. 362 * 363 */ 364 365 static int 366 nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 367 { 368 int error; 369 vnode_t *newvp; 370 371 error = nfs4_trigger_mount(*vpp, cr, &newvp); 372 if (error) 373 return (error); 374 375 /* Release the stub vnode, as we're losing the reference to it */ 376 VN_RELE(*vpp); 377 378 /* Give the caller the root vnode of the newly-mounted fs */ 379 *vpp = newvp; 380 381 /* return with VN_HELD(newvp) */ 382 return (VOP_OPEN(vpp, flag, cr, ct)); 383 } 384 385 void 386 nfs4_fake_attrs(vnode_t *vp, struct vattr *vap) 387 { 388 uint_t mask; 389 timespec_t now; 390 391 /* 392 * Set some attributes here for referrals. 393 */ 394 mask = vap->va_mask; 395 bzero(vap, sizeof (struct vattr)); 396 vap->va_mask = mask; 397 vap->va_uid = 0; 398 vap->va_gid = 0; 399 vap->va_nlink = 1; 400 vap->va_size = 1; 401 gethrestime(&now); 402 vap->va_atime = now; 403 vap->va_mtime = now; 404 vap->va_ctime = now; 405 vap->va_type = VDIR; 406 vap->va_mode = 0555; 407 vap->va_fsid = vp->v_vfsp->vfs_dev; 408 vap->va_rdev = 0; 409 vap->va_blksize = MAXBSIZE; 410 vap->va_nblocks = 1; 411 vap->va_seq = 0; 412 } 413 414 /* 415 * For the majority of cases, nfs4_trigger_getattr() will not trigger 416 * a mount. However, if ATTR_TRIGGER is set, we are being informed 417 * that we need to force the mount before we attempt to determine 418 * the attributes. The intent is an atomic operation for security 419 * testing. 420 * 421 * If we're not triggering a mount, we can still inquire about the 422 * actual attributes from the server in the mirror mount case, 423 * and will return manufactured attributes for a referral (see 424 * the 'create' branch of find_referral_stubvp()). 425 */ 426 static int 427 nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 428 caller_context_t *ct) 429 { 430 int error; 431 432 if (flags & ATTR_TRIGGER) { 433 vnode_t *newvp; 434 435 error = nfs4_trigger_mount(vp, cr, &newvp); 436 if (error) 437 return (error); 438 439 error = VOP_GETATTR(newvp, vap, flags, cr, ct); 440 VN_RELE(newvp); 441 442 } else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) { 443 444 error = nfs4_getattr(vp, vap, flags, cr, ct); 445 446 } else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) { 447 448 nfs4_fake_attrs(vp, vap); 449 error = 0; 450 } 451 452 return (error); 453 } 454 455 static int 456 nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 457 caller_context_t *ct) 458 { 459 int error; 460 vnode_t *newvp; 461 462 error = nfs4_trigger_mount(vp, cr, &newvp); 463 if (error) 464 return (error); 465 466 error = VOP_SETATTR(newvp, vap, flags, cr, ct); 467 VN_RELE(newvp); 468 469 return (error); 470 } 471 472 static int 473 nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr, 474 caller_context_t *ct) 475 { 476 int error; 477 vnode_t *newvp; 478 479 error = nfs4_trigger_mount(vp, cr, &newvp); 480 if (error) 481 return (error); 482 483 error = VOP_ACCESS(newvp, mode, flags, cr, ct); 484 VN_RELE(newvp); 485 486 return (error); 487 } 488 489 static int 490 nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 491 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 492 caller_context_t *ct, int *deflags, pathname_t *rpnp) 493 { 494 int error; 495 vnode_t *newdvp; 496 rnode4_t *drp = VTOR4(dvp); 497 498 ASSERT(RP_ISSTUB(drp)); 499 500 /* 501 * It's not legal to lookup ".." for an fs root, so we mustn't pass 502 * that up. Instead, pass onto the regular op, regardless of whether 503 * we've triggered a mount. 504 */ 505 if (strcmp(nm, "..") == 0) 506 if (RP_ISSTUB_MIRRORMOUNT(drp)) { 507 return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, 508 ct, deflags, rpnp)); 509 } else if (RP_ISSTUB_REFERRAL(drp)) { 510 /* Return the parent vnode */ 511 return (vtodv(dvp, vpp, cr, TRUE)); 512 } 513 514 error = nfs4_trigger_mount(dvp, cr, &newdvp); 515 if (error) 516 return (error); 517 518 error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct, 519 deflags, rpnp); 520 VN_RELE(newdvp); 521 522 return (error); 523 } 524 525 static int 526 nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va, 527 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr, 528 int flags, caller_context_t *ct, vsecattr_t *vsecp) 529 { 530 int error; 531 vnode_t *newdvp; 532 533 error = nfs4_trigger_mount(dvp, cr, &newdvp); 534 if (error) 535 return (error); 536 537 error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr, 538 flags, ct, vsecp); 539 VN_RELE(newdvp); 540 541 return (error); 542 } 543 544 static int 545 nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 546 int flags) 547 { 548 int error; 549 vnode_t *newdvp; 550 551 error = nfs4_trigger_mount(dvp, cr, &newdvp); 552 if (error) 553 return (error); 554 555 error = VOP_REMOVE(newdvp, nm, cr, ct, flags); 556 VN_RELE(newdvp); 557 558 return (error); 559 } 560 561 static int 562 nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr, 563 caller_context_t *ct, int flags) 564 { 565 int error; 566 vnode_t *newtdvp; 567 568 error = nfs4_trigger_mount(tdvp, cr, &newtdvp); 569 if (error) 570 return (error); 571 572 /* 573 * We don't check whether svp is a stub. Let the NFSv4 code 574 * detect that error, and return accordingly. 575 */ 576 error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags); 577 VN_RELE(newtdvp); 578 579 return (error); 580 } 581 582 static int 583 nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 584 cred_t *cr, caller_context_t *ct, int flags) 585 { 586 int error; 587 vnode_t *newsdvp; 588 rnode4_t *tdrp = VTOR4(tdvp); 589 590 /* 591 * We know that sdvp is a stub, otherwise we would not be here. 592 * 593 * If tdvp is also be a stub, there are two possibilities: it 594 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)] 595 * or it is a different stub [!VN_CMP(sdvp, tdvp)]. 596 * 597 * In the former case, just trigger sdvp, and treat tdvp as 598 * though it were not a stub. 599 * 600 * In the latter case, it might be a different stub for the 601 * same server fs as sdvp, or for a different server fs. 602 * Regardless, from the client perspective this would still 603 * be a cross-filesystem rename, and should not be allowed, 604 * so return EXDEV, without triggering either mount. 605 */ 606 if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp)) 607 return (EXDEV); 608 609 error = nfs4_trigger_mount(sdvp, cr, &newsdvp); 610 if (error) 611 return (error); 612 613 error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags); 614 615 VN_RELE(newsdvp); 616 617 return (error); 618 } 619 620 /* ARGSUSED */ 621 static int 622 nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, 623 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 624 { 625 int error; 626 vnode_t *newdvp; 627 628 error = nfs4_trigger_mount(dvp, cr, &newdvp); 629 if (error) 630 return (error); 631 632 error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp); 633 VN_RELE(newdvp); 634 635 return (error); 636 } 637 638 static int 639 nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 640 caller_context_t *ct, int flags) 641 { 642 int error; 643 vnode_t *newdvp; 644 645 error = nfs4_trigger_mount(dvp, cr, &newdvp); 646 if (error) 647 return (error); 648 649 error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags); 650 VN_RELE(newdvp); 651 652 return (error); 653 } 654 655 static int 656 nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, 657 cred_t *cr, caller_context_t *ct, int flags) 658 { 659 int error; 660 vnode_t *newdvp; 661 662 error = nfs4_trigger_mount(dvp, cr, &newdvp); 663 if (error) 664 return (error); 665 666 error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags); 667 VN_RELE(newdvp); 668 669 return (error); 670 } 671 672 static int 673 nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, 674 caller_context_t *ct) 675 { 676 int error; 677 vnode_t *newvp; 678 679 error = nfs4_trigger_mount(vp, cr, &newvp); 680 if (error) 681 return (error); 682 683 error = VOP_READLINK(newvp, uiop, cr, ct); 684 VN_RELE(newvp); 685 686 return (error); 687 } 688 689 /* end of trigger vnode ops */ 690 691 /* 692 * See if the mount has already been done by another caller. 693 */ 694 static int 695 nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp, 696 bool_t *was_mounted, vfs_t **vfsp) 697 { 698 int error; 699 mntinfo4_t *mi = VTOMI4(vp); 700 701 *was_mounted = FALSE; 702 703 error = vn_vfsrlock_wait(vp); 704 if (error) 705 return (error); 706 707 *vfsp = vn_mountedvfs(vp); 708 if (*vfsp != NULL) { 709 /* the mount has already occurred */ 710 error = VFS_ROOT(*vfsp, newvpp); 711 if (!error) { 712 /* need to update the reference time */ 713 mutex_enter(&mi->mi_lock); 714 if (mi->mi_ephemeral) 715 mi->mi_ephemeral->ne_ref_time = 716 gethrestime_sec(); 717 mutex_exit(&mi->mi_lock); 718 719 *was_mounted = TRUE; 720 } 721 } 722 723 vn_vfsunlock(vp); 724 return (0); 725 } 726 727 /* 728 * Mount upon a trigger vnode; for mirror-mounts, referrals, etc. 729 * 730 * The mount may have already occurred, via another thread. If not, 731 * assemble the location information - which may require fetching - and 732 * perform the mount. 733 * 734 * Sets newvp to be the root of the fs that is now covering vp. Note 735 * that we return with VN_HELD(*newvp). 736 * 737 * The caller is responsible for passing the VOP onto the covering fs. 738 */ 739 static int 740 nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) 741 { 742 int error; 743 vfs_t *vfsp; 744 rnode4_t *rp = VTOR4(vp); 745 mntinfo4_t *mi = VTOMI4(vp); 746 domount_args_t *dma; 747 748 nfs4_ephemeral_tree_t *net; 749 750 bool_t must_unlock = FALSE; 751 bool_t is_building = FALSE; 752 bool_t was_mounted = FALSE; 753 754 cred_t *mcred = NULL; 755 756 nfs4_trigger_globals_t *ntg; 757 758 zone_t *zone = curproc->p_zone; 759 760 ASSERT(RP_ISSTUB(rp)); 761 762 *newvpp = NULL; 763 764 /* 765 * Has the mount already occurred? 766 */ 767 error = nfs4_trigger_mounted_already(vp, newvpp, 768 &was_mounted, &vfsp); 769 if (error || was_mounted) 770 goto done; 771 772 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 773 ASSERT(ntg != NULL); 774 775 mutex_enter(&mi->mi_lock); 776 777 /* 778 * We need to lock down the ephemeral tree. 779 */ 780 if (mi->mi_ephemeral_tree == NULL) { 781 net = kmem_zalloc(sizeof (*net), KM_SLEEP); 782 mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL); 783 mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL); 784 net->net_refcnt = 1; 785 net->net_status = NFS4_EPHEMERAL_TREE_BUILDING; 786 is_building = TRUE; 787 788 /* 789 * We need to add it to the zone specific list for 790 * automatic unmounting and harvesting of deadwood. 791 */ 792 mutex_enter(&ntg->ntg_forest_lock); 793 if (ntg->ntg_forest != NULL) 794 net->net_next = ntg->ntg_forest; 795 ntg->ntg_forest = net; 796 mutex_exit(&ntg->ntg_forest_lock); 797 798 /* 799 * No lock order confusion with mi_lock because no 800 * other node could have grabbed net_tree_lock. 801 */ 802 mutex_enter(&net->net_tree_lock); 803 mi->mi_ephemeral_tree = net; 804 net->net_mount = mi; 805 mutex_exit(&mi->mi_lock); 806 } else { 807 net = mi->mi_ephemeral_tree; 808 nfs4_ephemeral_tree_hold(net); 809 810 mutex_exit(&mi->mi_lock); 811 812 mutex_enter(&net->net_tree_lock); 813 814 /* 815 * We can only procede if the tree is neither locked 816 * nor being torn down. 817 */ 818 mutex_enter(&net->net_cnt_lock); 819 if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) { 820 nfs4_ephemeral_tree_decr(net); 821 mutex_exit(&net->net_cnt_lock); 822 mutex_exit(&net->net_tree_lock); 823 824 return (EIO); 825 } 826 mutex_exit(&net->net_cnt_lock); 827 } 828 829 mutex_enter(&net->net_cnt_lock); 830 net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING; 831 mutex_exit(&net->net_cnt_lock); 832 833 must_unlock = TRUE; 834 835 dma = nfs4_trigger_domount_args_create(vp, cr); 836 if (dma == NULL) { 837 error = EINVAL; 838 goto done; 839 } 840 841 /* 842 * Note that since we define mirror mounts to work 843 * for any user, we simply extend the privileges of 844 * the user's credentials to allow the mount to 845 * proceed. 846 */ 847 mcred = crdup(cr); 848 if (mcred == NULL) { 849 error = EINVAL; 850 goto done; 851 } 852 853 crset_zone_privall(mcred); 854 if (is_system_labeled()) 855 (void) setpflags(NET_MAC_AWARE, 1, mcred); 856 857 error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp); 858 nfs4_trigger_domount_args_destroy(dma, vp); 859 860 DTRACE_PROBE2(nfs4clnt__func__referral__mount, 861 vnode_t *, vp, int, error); 862 863 crfree(mcred); 864 865 done: 866 867 if (must_unlock) { 868 mutex_enter(&net->net_cnt_lock); 869 net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; 870 871 /* 872 * REFCNT: If we are the root of the tree, then we need 873 * to keep a reference because we malloced the tree and 874 * this is where we tied it to our mntinfo. 875 * 876 * If we are not the root of the tree, then our tie to 877 * the mntinfo occured elsewhere and we need to 878 * decrement the reference to the tree. 879 */ 880 if (is_building) 881 net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; 882 else 883 nfs4_ephemeral_tree_decr(net); 884 mutex_exit(&net->net_cnt_lock); 885 886 mutex_exit(&net->net_tree_lock); 887 } 888 889 if (!error && (newvpp == NULL || *newvpp == NULL)) 890 error = ENOSYS; 891 892 return (error); 893 } 894 895 /* 896 * Collect together both the generic & mount-type specific args. 897 */ 898 static domount_args_t * 899 nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr) 900 { 901 int nointr; 902 char *hostlist; 903 servinfo4_t *svp; 904 struct nfs_args *nargs, *nargs_head; 905 enum clnt_stat status; 906 ephemeral_servinfo_t *esi, *esi_first; 907 domount_args_t *dma; 908 mntinfo4_t *mi = VTOMI4(vp); 909 910 nointr = !(mi->mi_flags & MI4_INT); 911 hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 912 913 svp = mi->mi_curr_serv; 914 /* check if the current server is responding */ 915 status = nfs4_trigger_ping_server(svp, nointr); 916 if (status == RPC_SUCCESS) { 917 esi_first = nfs4_trigger_esi_create(vp, svp, cr); 918 if (esi_first == NULL) { 919 kmem_free(hostlist, MAXPATHLEN); 920 return (NULL); 921 } 922 923 (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN); 924 925 nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first); 926 } else { 927 /* current server did not respond */ 928 esi_first = NULL; 929 nargs_head = NULL; 930 } 931 nargs = nargs_head; 932 933 /* 934 * NFS RO failover. 935 * 936 * If we have multiple servinfo4 structures, linked via sv_next, 937 * we must create one nfs_args for each, linking the nfs_args via 938 * nfs_ext_u.nfs_extB.next. 939 * 940 * We need to build a corresponding esi for each, too, but that is 941 * used solely for building nfs_args, and may be immediately 942 * discarded, as domount() requires the info from just one esi, 943 * but all the nfs_args. 944 * 945 * Currently, the NFS mount code will hang if not all servers 946 * requested are available. To avoid that, we need to ping each 947 * server, here, and remove it from the list if it is not 948 * responding. This has the side-effect of that server then 949 * being permanently unavailable for this failover mount, even if 950 * it recovers. That's unfortunate, but the best we can do until 951 * the mount code path is fixed. 952 */ 953 954 /* 955 * If the current server was down, loop indefinitely until we find 956 * at least one responsive server. 957 */ 958 do { 959 /* no locking needed for sv_next; it is only set at fs mount */ 960 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 961 struct nfs_args *next; 962 963 /* 964 * nargs_head: the head of the nfs_args list 965 * nargs: the current tail of the list 966 * next: the newly-created element to be added 967 */ 968 969 /* 970 * We've already tried the current server, above; 971 * if it was responding, we have already included it 972 * and it may now be ignored. 973 * 974 * Otherwise, try it again, since it may now have 975 * recovered. 976 */ 977 if (svp == mi->mi_curr_serv && esi_first != NULL) 978 continue; 979 980 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 981 if (svp->sv_flags & SV4_NOTINUSE) { 982 nfs_rw_exit(&svp->sv_lock); 983 continue; 984 } 985 nfs_rw_exit(&svp->sv_lock); 986 987 /* check if the server is responding */ 988 status = nfs4_trigger_ping_server(svp, nointr); 989 /* if the server did not respond, ignore it */ 990 if (status != RPC_SUCCESS) 991 continue; 992 993 esi = nfs4_trigger_esi_create(vp, svp, cr); 994 if (esi == NULL) 995 continue; 996 997 /* 998 * If the original current server (mi_curr_serv) 999 * was down when when we first tried it, 1000 * (i.e. esi_first == NULL), 1001 * we select this new server (svp) to be the server 1002 * that we will actually contact (esi_first). 1003 * 1004 * Note that it's possible that mi_curr_serv == svp, 1005 * if that mi_curr_serv was down but has now recovered. 1006 */ 1007 next = nfs4_trigger_nargs_create(mi, svp, esi); 1008 if (esi_first == NULL) { 1009 ASSERT(nargs == NULL); 1010 ASSERT(nargs_head == NULL); 1011 nargs_head = next; 1012 esi_first = esi; 1013 (void) strlcpy(hostlist, 1014 esi_first->esi_hostname, MAXPATHLEN); 1015 } else { 1016 ASSERT(nargs_head != NULL); 1017 nargs->nfs_ext_u.nfs_extB.next = next; 1018 (void) strlcat(hostlist, ",", MAXPATHLEN); 1019 (void) strlcat(hostlist, esi->esi_hostname, 1020 MAXPATHLEN); 1021 /* esi was only needed for hostname & nargs */ 1022 nfs4_trigger_esi_destroy(esi, vp); 1023 } 1024 1025 nargs = next; 1026 } 1027 1028 /* if we've had no response at all, wait a second */ 1029 if (esi_first == NULL) 1030 delay(drv_usectohz(1000000)); 1031 1032 } while (esi_first == NULL); 1033 ASSERT(nargs_head != NULL); 1034 1035 dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP); 1036 dma->dma_esi = esi_first; 1037 dma->dma_hostlist = hostlist; 1038 dma->dma_nargs = nargs_head; 1039 1040 return (dma); 1041 } 1042 1043 static void 1044 nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) 1045 { 1046 if (dma != NULL) { 1047 if (dma->dma_esi != NULL && vp != NULL) 1048 nfs4_trigger_esi_destroy(dma->dma_esi, vp); 1049 1050 if (dma->dma_hostlist != NULL) 1051 kmem_free(dma->dma_hostlist, MAXPATHLEN); 1052 1053 if (dma->dma_nargs != NULL) { 1054 struct nfs_args *nargs = dma->dma_nargs; 1055 1056 do { 1057 struct nfs_args *next = 1058 nargs->nfs_ext_u.nfs_extB.next; 1059 1060 nfs4_trigger_nargs_destroy(nargs); 1061 nargs = next; 1062 } while (nargs != NULL); 1063 } 1064 1065 kmem_free(dma, sizeof (domount_args_t)); 1066 } 1067 } 1068 1069 /* 1070 * The ephemeral_servinfo_t struct contains basic information we will need to 1071 * perform the mount. Whilst the structure is generic across different 1072 * types of ephemeral mount, the way we gather its contents differs. 1073 */ 1074 static ephemeral_servinfo_t * 1075 nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr) 1076 { 1077 ephemeral_servinfo_t *esi; 1078 rnode4_t *rp = VTOR4(vp); 1079 1080 ASSERT(RP_ISSTUB(rp)); 1081 1082 /* Call the ephemeral type-specific routine */ 1083 if (RP_ISSTUB_MIRRORMOUNT(rp)) 1084 esi = nfs4_trigger_esi_create_mirrormount(vp, svp); 1085 else if (RP_ISSTUB_REFERRAL(rp)) 1086 esi = nfs4_trigger_esi_create_referral(vp, cr); 1087 else 1088 esi = NULL; 1089 return (esi); 1090 } 1091 1092 static void 1093 nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) 1094 { 1095 rnode4_t *rp = VTOR4(vp); 1096 1097 ASSERT(RP_ISSTUB(rp)); 1098 1099 /* Currently, no need for an ephemeral type-specific routine */ 1100 1101 /* 1102 * The contents of ephemeral_servinfo_t goes into nfs_args, 1103 * and will be handled by nfs4_trigger_nargs_destroy(). 1104 * We need only free the structure itself. 1105 */ 1106 if (esi != NULL) 1107 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1108 } 1109 1110 /* 1111 * Some of this may turn out to be common with other ephemeral types, 1112 * in which case it should be moved to nfs4_trigger_esi_create(), or a 1113 * common function called. 1114 */ 1115 1116 /* 1117 * Mirror mounts case - should have all data available 1118 */ 1119 static ephemeral_servinfo_t * 1120 nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) 1121 { 1122 char *stubpath; 1123 struct knetconfig *sikncp, *svkncp; 1124 struct netbuf *bufp; 1125 ephemeral_servinfo_t *esi; 1126 1127 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1128 1129 /* initially set to be our type of ephemeral mount; may be added to */ 1130 esi->esi_mount_flags = NFSMNT_MIRRORMOUNT; 1131 1132 /* 1133 * We're copying info from the stub rnode's servinfo4, but 1134 * we must create new copies, not pointers, since this information 1135 * is to be associated with the new mount, which will be 1136 * unmounted (and its structures freed) separately 1137 */ 1138 1139 /* 1140 * Sizes passed to kmem_[z]alloc here must match those freed 1141 * in nfs4_free_args() 1142 */ 1143 1144 /* 1145 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this 1146 * is difficult to avoid: as we need to read svp to calculate the 1147 * sizes to be allocated. 1148 */ 1149 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1150 1151 esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP); 1152 (void) strcat(esi->esi_hostname, svp->sv_hostname); 1153 1154 esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1155 bufp = esi->esi_addr; 1156 bufp->len = svp->sv_addr.len; 1157 bufp->maxlen = svp->sv_addr.maxlen; 1158 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1159 bcopy(svp->sv_addr.buf, bufp->buf, bufp->len); 1160 1161 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1162 sikncp = esi->esi_knconf; 1163 svkncp = svp->sv_knconf; 1164 sikncp->knc_semantics = svkncp->knc_semantics; 1165 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1166 (void) strcat((char *)sikncp->knc_protofmly, 1167 (char *)svkncp->knc_protofmly); 1168 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1169 (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto); 1170 sikncp->knc_rdev = svkncp->knc_rdev; 1171 1172 /* 1173 * Used when AUTH_DH is negotiated. 1174 * 1175 * This is ephemeral mount-type specific, since it contains the 1176 * server's time-sync syncaddr. 1177 */ 1178 if (svp->sv_dhsec) { 1179 struct netbuf *bufp; 1180 sec_data_t *sdata; 1181 dh_k4_clntdata_t *data; 1182 1183 sdata = svp->sv_dhsec; 1184 data = (dh_k4_clntdata_t *)sdata->data; 1185 ASSERT(sdata->rpcflavor == AUTH_DH); 1186 1187 bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1188 bufp->len = data->syncaddr.len; 1189 bufp->maxlen = data->syncaddr.maxlen; 1190 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1191 bcopy(data->syncaddr.buf, bufp->buf, bufp->len); 1192 esi->esi_syncaddr = bufp; 1193 1194 if (data->netname != NULL) { 1195 int nmlen = data->netnamelen; 1196 1197 /* 1198 * We need to copy from a dh_k4_clntdata_t 1199 * netname/netnamelen pair to a NUL-terminated 1200 * netname string suitable for putting in nfs_args, 1201 * where the latter has no netnamelen field. 1202 */ 1203 esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP); 1204 bcopy(data->netname, esi->esi_netname, nmlen); 1205 } 1206 } else { 1207 esi->esi_syncaddr = NULL; 1208 esi->esi_netname = NULL; 1209 } 1210 1211 stubpath = fn_path(VTOSV(vp)->sv_name); 1212 /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */ 1213 ASSERT(*stubpath == '.'); 1214 stubpath += 1; 1215 1216 /* for nfs_args->fh */ 1217 esi->esi_path_len = strlen(stubpath) + 1; 1218 if (strcmp(svp->sv_path, "/") != 0) 1219 esi->esi_path_len += strlen(svp->sv_path); 1220 esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); 1221 if (strcmp(svp->sv_path, "/") != 0) 1222 (void) strcat(esi->esi_path, svp->sv_path); 1223 (void) strcat(esi->esi_path, stubpath); 1224 1225 stubpath -= 1; 1226 /* stubpath allocated by fn_path() */ 1227 kmem_free(stubpath, strlen(stubpath) + 1); 1228 1229 nfs_rw_exit(&svp->sv_lock); 1230 1231 return (esi); 1232 } 1233 1234 /* 1235 * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to 1236 * get network information required to do the mount call. 1237 */ 1238 int 1239 nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp) 1240 { 1241 door_arg_t door_args; 1242 door_handle_t dh; 1243 XDR xdr; 1244 refd_door_args_t *xdr_argsp; 1245 refd_door_res_t *orig_resp; 1246 k_sigset_t smask; 1247 int xdr_len = 0; 1248 int res_len = 16; /* length of an ip adress */ 1249 int orig_reslen = res_len; 1250 int error = 0; 1251 struct nfsidmap_globals *nig; 1252 1253 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) 1254 return (ECONNREFUSED); 1255 1256 nig = zone_getspecific(nfsidmap_zone_key, nfs_zone()); 1257 ASSERT(nig != NULL); 1258 1259 mutex_enter(&nig->nfsidmap_daemon_lock); 1260 dh = nig->nfsidmap_daemon_dh; 1261 if (dh == NULL) { 1262 mutex_exit(&nig->nfsidmap_daemon_lock); 1263 cmn_err(CE_NOTE, 1264 "nfs4_callmapid: nfsmapid daemon not " \ 1265 "running unable to resolve host name\n"); 1266 return (EINVAL); 1267 } 1268 door_ki_hold(dh); 1269 mutex_exit(&nig->nfsidmap_daemon_lock); 1270 1271 xdr_len = xdr_sizeof(&(xdr_utf8string), server); 1272 1273 xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP); 1274 xdr_argsp->xdr_len = xdr_len; 1275 xdr_argsp->cmd = NFSMAPID_SRV_NETINFO; 1276 1277 xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg, 1278 xdr_len, XDR_ENCODE); 1279 1280 if (!xdr_utf8string(&xdr, server)) { 1281 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1282 door_ki_rele(dh); 1283 return (1); 1284 } 1285 1286 if (orig_reslen) 1287 orig_resp = kmem_alloc(orig_reslen, KM_SLEEP); 1288 1289 door_args.data_ptr = (char *)xdr_argsp; 1290 door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len; 1291 door_args.desc_ptr = NULL; 1292 door_args.desc_num = 0; 1293 door_args.rbuf = orig_resp ? (char *)orig_resp : NULL; 1294 door_args.rsize = res_len; 1295 1296 sigintr(&smask, 1); 1297 error = door_ki_upcall(dh, &door_args); 1298 sigunintr(&smask); 1299 1300 door_ki_rele(dh); 1301 1302 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1303 if (error) { 1304 kmem_free(orig_resp, orig_reslen); 1305 /* 1306 * There is no door to connect to. The referral daemon 1307 * must not be running yet. 1308 */ 1309 cmn_err(CE_WARN, 1310 "nfsmapid not running cannot resolve host name"); 1311 goto out; 1312 } 1313 1314 /* 1315 * If the results buffer passed back are not the same as 1316 * what was sent free the old buffer and use the new one. 1317 */ 1318 if (orig_resp && orig_reslen) { 1319 refd_door_res_t *door_resp; 1320 1321 door_resp = (refd_door_res_t *)door_args.rbuf; 1322 if ((void *)door_args.rbuf != orig_resp) 1323 kmem_free(orig_resp, orig_reslen); 1324 if (door_resp->res_status == 0) { 1325 xdrmem_create(&xdr, (char *)&door_resp->xdr_res, 1326 door_resp->xdr_len, XDR_DECODE); 1327 bzero(resp, sizeof (struct nfs_fsl_info)); 1328 if (!xdr_nfs_fsl_info(&xdr, resp)) { 1329 DTRACE_PROBE2( 1330 nfs4clnt__debug__referral__upcall__xdrfail, 1331 struct nfs_fsl_info *, resp, 1332 char *, "nfs4_callmapid"); 1333 error = EINVAL; 1334 } 1335 } else { 1336 DTRACE_PROBE2( 1337 nfs4clnt__debug__referral__upcall__badstatus, 1338 int, door_resp->res_status, 1339 char *, "nfs4_callmapid"); 1340 error = door_resp->res_status; 1341 } 1342 kmem_free(door_args.rbuf, door_args.rsize); 1343 } 1344 out: 1345 DTRACE_PROBE2(nfs4clnt__func__referral__upcall, 1346 char *, server, int, error); 1347 return (error); 1348 } 1349 1350 /* 1351 * Fetches the fs_locations attribute. Typically called 1352 * from a Replication/Migration/Referrals/Mirror-mount context 1353 * 1354 * Fills in the attributes in garp. The caller is assumed 1355 * to have allocated memory for garp. 1356 * 1357 * lock: if set do not lock s_recovlock and mi_recovlock mutex, 1358 * it's already done by caller. Otherwise lock these mutexes 1359 * before doing the rfs4call(). 1360 * 1361 * Returns 1362 * 1 for success 1363 * 0 for failure 1364 */ 1365 int 1366 nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm, 1367 cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock) 1368 { 1369 COMPOUND4args_clnt args; 1370 COMPOUND4res_clnt res; 1371 nfs_argop4 *argop; 1372 int argoplist_size = 3 * sizeof (nfs_argop4); 1373 nfs4_server_t *sp = NULL; 1374 int doqueue = 1; 1375 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1376 int retval = 1; 1377 struct nfs4_clnt *nfscl; 1378 1379 if (lock == TRUE) 1380 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 1381 else 1382 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 1383 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 1384 1385 sp = find_nfs4_server(mi); 1386 if (lock == TRUE) 1387 nfs_rw_exit(&mi->mi_recovlock); 1388 1389 if (sp != NULL) 1390 mutex_exit(&sp->s_lock); 1391 1392 if (lock == TRUE) { 1393 if (sp != NULL) 1394 (void) nfs_rw_enter_sig(&sp->s_recovlock, 1395 RW_WRITER, 0); 1396 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 1397 } else { 1398 if (sp != NULL) { 1399 ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) || 1400 nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER)); 1401 } 1402 } 1403 1404 /* 1405 * Do we want to do the setup for recovery here? 1406 * 1407 * We know that the server responded to a null ping a very 1408 * short time ago, and we know that we intend to do a 1409 * single stateless operation - we want to fetch attributes, 1410 * so we know we can't encounter errors about state. If 1411 * something goes wrong with the GETATTR, like not being 1412 * able to get a response from the server or getting any 1413 * kind of FH error, we should fail the mount. 1414 * 1415 * We may want to re-visited this at a later time. 1416 */ 1417 argop = kmem_alloc(argoplist_size, KM_SLEEP); 1418 1419 args.ctag = TAG_GETATTR_FSLOCATION; 1420 /* PUTFH LOOKUP GETATTR */ 1421 args.array_len = 3; 1422 args.array = argop; 1423 1424 /* 0. putfh file */ 1425 argop[0].argop = OP_CPUTFH; 1426 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1427 1428 /* 1. lookup name, can't be dotdot */ 1429 argop[1].argop = OP_CLOOKUP; 1430 argop[1].nfs_argop4_u.opclookup.cname = nm; 1431 1432 /* 2. file attrs */ 1433 argop[2].argop = OP_GETATTR; 1434 argop[2].nfs_argop4_u.opgetattr.attr_request = 1435 FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK | 1436 FATTR4_MOUNTED_ON_FILEID_MASK; 1437 argop[2].nfs_argop4_u.opgetattr.mi = mi; 1438 1439 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1440 1441 if (lock == TRUE) { 1442 nfs_rw_exit(&mi->mi_recovlock); 1443 if (sp != NULL) 1444 nfs_rw_exit(&sp->s_recovlock); 1445 } 1446 1447 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1448 nfscl->nfscl_stat.referrals.value.ui64++; 1449 DTRACE_PROBE3(nfs4clnt__func__referral__fsloc, 1450 nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e); 1451 1452 if (e.error != 0) { 1453 if (sp != NULL) 1454 nfs4_server_rele(sp); 1455 kmem_free(argop, argoplist_size); 1456 return (0); 1457 } 1458 1459 /* 1460 * Check for all possible error conditions. 1461 * For valid replies without an ops array or for illegal 1462 * replies, return a failure. 1463 */ 1464 if (res.status != NFS4_OK || res.array_len < 3 || 1465 res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) { 1466 retval = 0; 1467 goto exit; 1468 } 1469 1470 /* 1471 * There isn't much value in putting the attributes 1472 * in the attr cache since fs_locations4 aren't 1473 * encountered very frequently, so just make them 1474 * available to the caller. 1475 */ 1476 *garp = res.array[2].nfs_resop4_u.opgetattr.ga_res; 1477 1478 DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc, 1479 nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations"); 1480 1481 /* No fs_locations? -- return a failure */ 1482 if (garp->n4g_ext_res == NULL || 1483 garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) { 1484 retval = 0; 1485 goto exit; 1486 } 1487 1488 if (!garp->n4g_fsid_valid) 1489 retval = 0; 1490 1491 exit: 1492 if (retval == 0) { 1493 /* the call was ok but failed validating the call results */ 1494 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1495 } else { 1496 ASSERT(callres != NULL); 1497 *callres = res; 1498 } 1499 1500 if (sp != NULL) 1501 nfs4_server_rele(sp); 1502 kmem_free(argop, argoplist_size); 1503 return (retval); 1504 } 1505 1506 /* tunable to disable referral mounts */ 1507 int nfs4_no_referrals = 0; 1508 1509 /* 1510 * Returns NULL if the vnode cannot be created or found. 1511 */ 1512 vnode_t * 1513 find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr) 1514 { 1515 nfs_fh4 *stub_fh, *dfh; 1516 nfs4_sharedfh_t *sfhp; 1517 char *newfhval; 1518 vnode_t *vp = NULL; 1519 fattr4_mounted_on_fileid mnt_on_fileid; 1520 nfs4_ga_res_t garp; 1521 mntinfo4_t *mi; 1522 COMPOUND4res_clnt callres; 1523 hrtime_t t; 1524 1525 if (nfs4_no_referrals) 1526 return (NULL); 1527 1528 /* 1529 * Get the mounted_on_fileid, unique on that server::fsid 1530 */ 1531 mi = VTOMI4(dvp); 1532 if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr, 1533 &garp, &callres, FALSE) == 0) 1534 return (NULL); 1535 mnt_on_fileid = garp.n4g_mon_fid; 1536 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1537 1538 /* 1539 * Build a fake filehandle from the dir FH and the mounted_on_fileid 1540 */ 1541 dfh = &VTOR4(dvp)->r_fh->sfh_fh; 1542 stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP); 1543 stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len + 1544 sizeof (fattr4_mounted_on_fileid), KM_SLEEP); 1545 newfhval = stub_fh->nfs_fh4_val; 1546 1547 /* copy directory's file handle */ 1548 bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len); 1549 stub_fh->nfs_fh4_len = dfh->nfs_fh4_len; 1550 newfhval = newfhval + dfh->nfs_fh4_len; 1551 1552 /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */ 1553 bcopy((char *)&mnt_on_fileid, newfhval, 1554 sizeof (fattr4_mounted_on_fileid)); 1555 stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid); 1556 1557 sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL); 1558 kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len + 1559 sizeof (fattr4_mounted_on_fileid)); 1560 kmem_free(stub_fh, sizeof (nfs_fh4)); 1561 if (sfhp == NULL) 1562 return (NULL); 1563 1564 t = gethrtime(); 1565 garp.n4g_va.va_type = VDIR; 1566 vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t, 1567 cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp)); 1568 1569 if (vp != NULL) 1570 vp->v_type = VDIR; 1571 1572 sfh4_rele(&sfhp); 1573 return (vp); 1574 } 1575 1576 int 1577 nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr) 1578 { 1579 vnode_t *nvp; 1580 rnode4_t *rp; 1581 1582 if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL) 1583 return (EINVAL); 1584 1585 rp = VTOR4(nvp); 1586 mutex_enter(&rp->r_statelock); 1587 r4_stub_referral(rp); 1588 mutex_exit(&rp->r_statelock); 1589 dnlc_enter(dvp, nm, nvp); 1590 1591 if (*vpp != NULL) 1592 VN_RELE(*vpp); /* no longer need this vnode */ 1593 1594 *vpp = nvp; 1595 1596 return (0); 1597 } 1598 1599 /* 1600 * Fetch the location information and resolve the new server. 1601 * Caller needs to free up the XDR data which is returned. 1602 * Input: mount info, shared filehandle, nodename 1603 * Return: Index to the result or Error(-1) 1604 * Output: FsLocations Info, Resolved Server Info. 1605 */ 1606 int 1607 nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, 1608 char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res, 1609 struct nfs_fsl_info *fsloc) 1610 { 1611 fs_location4 *fsp; 1612 struct nfs_fsl_info nfsfsloc; 1613 int ret, i, error; 1614 nfs4_ga_res_t garp; 1615 COMPOUND4res_clnt callres; 1616 struct knetconfig *knc; 1617 1618 ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE); 1619 if (ret == 0) 1620 return (-1); 1621 1622 /* 1623 * As a lame attempt to figuring out if we're 1624 * handling a migration event or a referral, 1625 * look for rnodes with this fsid in the rnode 1626 * cache. 1627 * 1628 * If we can find one or more such rnodes, it 1629 * means we're handling a migration event and 1630 * we want to bail out in that case. 1631 */ 1632 if (r4find_by_fsid(mi, &garp.n4g_fsid)) { 1633 DTRACE_PROBE3(nfs4clnt__debug__referral__migration, 1634 mntinfo4_t *, mi, nfs4_ga_res_t *, &garp, 1635 char *, "nfs4_process_referral"); 1636 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1637 return (-1); 1638 } 1639 1640 /* 1641 * Find the first responsive server to mount. When we find 1642 * one, fsp will point to it. 1643 */ 1644 for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) { 1645 1646 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i]; 1647 if (fsp->server_len == 0 || fsp->server_val == NULL) 1648 continue; 1649 1650 error = nfs4_callmapid(fsp->server_val, &nfsfsloc); 1651 if (error != 0) 1652 continue; 1653 1654 error = nfs4_ping_server_common(nfsfsloc.knconf, 1655 nfsfsloc.addr, !(mi->mi_flags & MI4_INT)); 1656 if (error == RPC_SUCCESS) 1657 break; 1658 1659 DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr, 1660 sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf, 1661 char *, "nfs4_process_referral"); 1662 1663 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1664 } 1665 knc = nfsfsloc.knconf; 1666 if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) || 1667 (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) { 1668 DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc, 1669 nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral"); 1670 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1671 return (-1); 1672 } 1673 1674 /* Send the results back */ 1675 *fsloc = nfsfsloc; 1676 *grp = garp; 1677 *res = callres; 1678 return (i); 1679 } 1680 1681 /* 1682 * Referrals case - need to fetch referral data and then upcall to 1683 * user-level to get complete mount data. 1684 */ 1685 static ephemeral_servinfo_t * 1686 nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr) 1687 { 1688 struct knetconfig *sikncp, *svkncp; 1689 struct netbuf *bufp; 1690 ephemeral_servinfo_t *esi; 1691 vnode_t *dvp; 1692 rnode4_t *drp; 1693 fs_location4 *fsp; 1694 struct nfs_fsl_info nfsfsloc; 1695 nfs4_ga_res_t garp; 1696 char *p; 1697 char fn[MAXNAMELEN]; 1698 int i, index = -1; 1699 mntinfo4_t *mi; 1700 COMPOUND4res_clnt callres; 1701 1702 /* 1703 * If we're passed in a stub vnode that 1704 * isn't a "referral" stub, bail out 1705 * and return a failure 1706 */ 1707 if (!RP_ISSTUB_REFERRAL(VTOR4(vp))) 1708 return (NULL); 1709 1710 if (vtodv(vp, &dvp, CRED(), TRUE) != 0) 1711 return (NULL); 1712 1713 drp = VTOR4(dvp); 1714 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) { 1715 VN_RELE(dvp); 1716 return (NULL); 1717 } 1718 1719 if (vtoname(vp, fn, MAXNAMELEN) != 0) { 1720 nfs_rw_exit(&drp->r_rwlock); 1721 VN_RELE(dvp); 1722 return (NULL); 1723 } 1724 1725 mi = VTOMI4(dvp); 1726 index = nfs4_process_referral(mi, drp->r_fh, fn, cr, 1727 &garp, &callres, &nfsfsloc); 1728 nfs_rw_exit(&drp->r_rwlock); 1729 VN_RELE(dvp); 1730 if (index < 0) 1731 return (NULL); 1732 1733 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; 1734 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1735 1736 /* initially set to be our type of ephemeral mount; may be added to */ 1737 esi->esi_mount_flags = NFSMNT_REFERRAL; 1738 1739 esi->esi_hostname = 1740 kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP); 1741 bcopy(fsp->server_val->utf8string_val, esi->esi_hostname, 1742 fsp->server_val->utf8string_len); 1743 esi->esi_hostname[fsp->server_val->utf8string_len] = '\0'; 1744 1745 bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 1746 bufp->len = nfsfsloc.addr->len; 1747 bufp->maxlen = nfsfsloc.addr->maxlen; 1748 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1749 bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len); 1750 esi->esi_addr = bufp; 1751 1752 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1753 sikncp = esi->esi_knconf; 1754 1755 DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc, 1756 struct nfs_fsl_info *, &nfsfsloc, 1757 char *, "nfs4_trigger_esi_create_referral"); 1758 1759 svkncp = nfsfsloc.knconf; 1760 sikncp->knc_semantics = svkncp->knc_semantics; 1761 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1762 (void) strlcat((char *)sikncp->knc_protofmly, 1763 (char *)svkncp->knc_protofmly, KNC_STRSIZE); 1764 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1765 (void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto, 1766 KNC_STRSIZE); 1767 sikncp->knc_rdev = svkncp->knc_rdev; 1768 1769 DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf, 1770 struct knetconfig *, sikncp, 1771 char *, "nfs4_trigger_esi_create_referral"); 1772 1773 esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP); 1774 bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len); 1775 esi->esi_syncaddr = NULL; 1776 1777 esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1778 esi->esi_path_len = MAXPATHLEN; 1779 *p++ = '/'; 1780 for (i = 0; i < fsp->rootpath.pathname4_len; i++) { 1781 component4 *comp; 1782 1783 comp = &fsp->rootpath.pathname4_val[i]; 1784 /* If no space, null the string and bail */ 1785 if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN) 1786 goto err; 1787 bcopy(comp->utf8string_val, p, comp->utf8string_len); 1788 p += comp->utf8string_len; 1789 *p++ = '/'; 1790 } 1791 if (fsp->rootpath.pathname4_len != 0) 1792 *(p - 1) = '\0'; 1793 else 1794 *p = '\0'; 1795 p = esi->esi_path; 1796 esi->esi_path = strdup(p); 1797 esi->esi_path_len = strlen(p) + 1; 1798 kmem_free(p, MAXPATHLEN); 1799 1800 /* Allocated in nfs4_process_referral() */ 1801 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1802 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1803 1804 return (esi); 1805 err: 1806 kmem_free(esi->esi_path, esi->esi_path_len); 1807 kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1); 1808 kmem_free(esi->esi_addr->buf, esi->esi_addr->len); 1809 kmem_free(esi->esi_addr, sizeof (struct netbuf)); 1810 kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE); 1811 kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE); 1812 kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf)); 1813 kmem_free(esi->esi_netname, nfsfsloc.netnm_len); 1814 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1815 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1816 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1817 return (NULL); 1818 } 1819 1820 /* 1821 * Assemble the args, and call the generic VFS mount function to 1822 * finally perform the ephemeral mount. 1823 */ 1824 static int 1825 nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp, 1826 cred_t *cr, vnode_t **newvpp) 1827 { 1828 struct mounta *uap; 1829 char *mntpt, *orig_path, *path; 1830 const char *orig_mntpt; 1831 int retval; 1832 int mntpt_len; 1833 int spec_len; 1834 zone_t *zone = curproc->p_zone; 1835 bool_t has_leading_slash; 1836 int i; 1837 1838 vfs_t *stubvfsp = stubvp->v_vfsp; 1839 ephemeral_servinfo_t *esi = dma->dma_esi; 1840 struct nfs_args *nargs = dma->dma_nargs; 1841 1842 /* first, construct the mount point for the ephemeral mount */ 1843 orig_path = path = fn_path(VTOSV(stubvp)->sv_name); 1844 orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt); 1845 1846 if (*orig_path == '.') 1847 orig_path++; 1848 1849 /* 1850 * Get rid of zone's root path 1851 */ 1852 if (zone != global_zone) { 1853 /* 1854 * -1 for trailing '/' and -1 for EOS. 1855 */ 1856 if (strncmp(zone->zone_rootpath, orig_mntpt, 1857 zone->zone_rootpathlen - 1) == 0) { 1858 orig_mntpt += (zone->zone_rootpathlen - 2); 1859 } 1860 } 1861 1862 mntpt_len = strlen(orig_mntpt) + strlen(orig_path); 1863 mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP); 1864 (void) strcat(mntpt, orig_mntpt); 1865 (void) strcat(mntpt, orig_path); 1866 1867 kmem_free(path, strlen(path) + 1); 1868 path = esi->esi_path; 1869 if (*path == '.') 1870 path++; 1871 if (path[0] == '/' && path[1] == '/') 1872 path++; 1873 has_leading_slash = (*path == '/'); 1874 1875 spec_len = strlen(dma->dma_hostlist); 1876 spec_len += strlen(path); 1877 1878 /* We are going to have to add this in */ 1879 if (!has_leading_slash) 1880 spec_len++; 1881 1882 /* We need to get the ':' for dma_hostlist:esi_path */ 1883 spec_len++; 1884 1885 uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP); 1886 uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP); 1887 (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist, 1888 has_leading_slash ? "" : "/", path); 1889 1890 uap->dir = mntpt; 1891 1892 uap->flags = MS_SYSSPACE | MS_DATA; 1893 /* fstype-independent mount options not covered elsewhere */ 1894 /* copy parent's mount(1M) "-m" flag */ 1895 if (stubvfsp->vfs_flag & VFS_NOMNTTAB) 1896 uap->flags |= MS_NOMNTTAB; 1897 1898 uap->fstype = MNTTYPE_NFS4; 1899 uap->dataptr = (char *)nargs; 1900 /* not needed for MS_SYSSPACE */ 1901 uap->datalen = 0; 1902 1903 /* use optptr to pass in extra mount options */ 1904 uap->flags |= MS_OPTIONSTR; 1905 uap->optptr = nfs4_trigger_create_mntopts(stubvfsp); 1906 if (uap->optptr == NULL) { 1907 retval = EINVAL; 1908 goto done; 1909 } 1910 1911 /* domount() expects us to count the trailing NUL */ 1912 uap->optlen = strlen(uap->optptr) + 1; 1913 1914 /* 1915 * If we get EBUSY, we try again once to see if we can perform 1916 * the mount. We do this because of a spurious race condition. 1917 */ 1918 for (i = 0; i < 2; i++) { 1919 int error; 1920 bool_t was_mounted; 1921 1922 retval = domount(NULL, uap, stubvp, cr, vfsp); 1923 if (retval == 0) { 1924 retval = VFS_ROOT(*vfsp, newvpp); 1925 VFS_RELE(*vfsp); 1926 break; 1927 } else if (retval != EBUSY) { 1928 break; 1929 } 1930 1931 /* 1932 * We might find it mounted by the other racer... 1933 */ 1934 error = nfs4_trigger_mounted_already(stubvp, 1935 newvpp, &was_mounted, vfsp); 1936 if (error) { 1937 goto done; 1938 } else if (was_mounted) { 1939 retval = 0; 1940 break; 1941 } 1942 } 1943 1944 done: 1945 if (uap->optptr) 1946 nfs4_trigger_destroy_mntopts(uap->optptr); 1947 1948 kmem_free(uap->spec, spec_len + 1); 1949 kmem_free(uap, sizeof (struct mounta)); 1950 kmem_free(mntpt, mntpt_len + 1); 1951 1952 return (retval); 1953 } 1954 1955 /* 1956 * Build an nfs_args structure for passing to domount(). 1957 * 1958 * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t; 1959 * generic data - common to all ephemeral mount types - is read directly 1960 * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode. 1961 */ 1962 static struct nfs_args * 1963 nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, 1964 ephemeral_servinfo_t *esi) 1965 { 1966 sec_data_t *secdata; 1967 struct nfs_args *nargs; 1968 1969 /* setup the nfs args */ 1970 nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 1971 1972 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1973 1974 nargs->addr = esi->esi_addr; 1975 1976 /* for AUTH_DH by negotiation */ 1977 if (esi->esi_syncaddr || esi->esi_netname) { 1978 nargs->flags |= NFSMNT_SECURE; 1979 nargs->syncaddr = esi->esi_syncaddr; 1980 nargs->netname = esi->esi_netname; 1981 } 1982 1983 nargs->flags |= NFSMNT_KNCONF; 1984 nargs->knconf = esi->esi_knconf; 1985 nargs->flags |= NFSMNT_HOSTNAME; 1986 nargs->hostname = esi->esi_hostname; 1987 nargs->fh = esi->esi_path; 1988 1989 /* general mount settings, all copied from parent mount */ 1990 mutex_enter(&mi->mi_lock); 1991 1992 if (!(mi->mi_flags & MI4_HARD)) 1993 nargs->flags |= NFSMNT_SOFT; 1994 1995 nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO | 1996 NFSMNT_RETRANS; 1997 nargs->wsize = mi->mi_stsize; 1998 nargs->rsize = mi->mi_tsize; 1999 nargs->timeo = mi->mi_timeo; 2000 nargs->retrans = mi->mi_retrans; 2001 2002 if (mi->mi_flags & MI4_INT) 2003 nargs->flags |= NFSMNT_INT; 2004 if (mi->mi_flags & MI4_NOAC) 2005 nargs->flags |= NFSMNT_NOAC; 2006 2007 nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN | 2008 NFSMNT_ACDIRMAX; 2009 nargs->acregmin = HR2SEC(mi->mi_acregmin); 2010 nargs->acregmax = HR2SEC(mi->mi_acregmax); 2011 nargs->acdirmin = HR2SEC(mi->mi_acdirmin); 2012 nargs->acdirmax = HR2SEC(mi->mi_acdirmax); 2013 2014 /* add any specific flags for this type of ephemeral mount */ 2015 nargs->flags |= esi->esi_mount_flags; 2016 2017 if (mi->mi_flags & MI4_NOCTO) 2018 nargs->flags |= NFSMNT_NOCTO; 2019 if (mi->mi_flags & MI4_GRPID) 2020 nargs->flags |= NFSMNT_GRPID; 2021 if (mi->mi_flags & MI4_LLOCK) 2022 nargs->flags |= NFSMNT_LLOCK; 2023 if (mi->mi_flags & MI4_NOPRINT) 2024 nargs->flags |= NFSMNT_NOPRINT; 2025 if (mi->mi_flags & MI4_DIRECTIO) 2026 nargs->flags |= NFSMNT_DIRECTIO; 2027 if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT) 2028 nargs->flags |= NFSMNT_PUBLIC; 2029 2030 /* Do some referral-specific option tweaking */ 2031 if (nargs->flags & NFSMNT_REFERRAL) { 2032 nargs->flags &= ~NFSMNT_DORDMA; 2033 nargs->flags |= NFSMNT_TRYRDMA; 2034 } 2035 2036 mutex_exit(&mi->mi_lock); 2037 2038 /* 2039 * Security data & negotiation policy. 2040 * 2041 * For mirror mounts, we need to preserve the parent mount's 2042 * preference for security negotiation, translating SV4_TRYSECDEFAULT 2043 * to NFSMNT_SECDEFAULT if present. 2044 * 2045 * For referrals, we always want security negotiation and will 2046 * set NFSMNT_SECDEFAULT and we will not copy current secdata. 2047 * The reason is that we can't negotiate down from a parent's 2048 * Kerberos flavor to AUTH_SYS. 2049 * 2050 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific 2051 * security flavour was requested, with data in sv_secdata, and that 2052 * no negotiation should occur. If this specified flavour fails, that's 2053 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT. 2054 * 2055 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in 2056 * default flavour, in sv_secdata, but then negotiate a new flavour. 2057 * Possible flavours are recorded in an array in sv_secinfo, with 2058 * currently in-use flavour pointed to by sv_currsec. 2059 * 2060 * If sv_currsec is set, i.e. if negotiation has already occurred, 2061 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, 2062 * we will set NFSMNT_SECDEFAULT, to enable negotiation. 2063 */ 2064 if (nargs->flags & NFSMNT_REFERRAL) { 2065 /* enable negotiation for referral mount */ 2066 nargs->flags |= NFSMNT_SECDEFAULT; 2067 secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP); 2068 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 2069 secdata->data = NULL; 2070 } 2071 2072 else if (svp->sv_flags & SV4_TRYSECDEFAULT) { 2073 /* enable negotiation for mirror mount */ 2074 nargs->flags |= NFSMNT_SECDEFAULT; 2075 2076 /* 2077 * As a starting point for negotiation, copy parent 2078 * mount's negotiated flavour (sv_currsec) if available, 2079 * or its passed-in flavour (sv_secdata) if not. 2080 */ 2081 if (svp->sv_currsec != NULL) 2082 secdata = copy_sec_data(svp->sv_currsec); 2083 else if (svp->sv_secdata != NULL) 2084 secdata = copy_sec_data(svp->sv_secdata); 2085 else 2086 secdata = NULL; 2087 } else { 2088 /* do not enable negotiation; copy parent's passed-in flavour */ 2089 if (svp->sv_secdata != NULL) 2090 secdata = copy_sec_data(svp->sv_secdata); 2091 else 2092 secdata = NULL; 2093 } 2094 2095 nfs_rw_exit(&svp->sv_lock); 2096 2097 nargs->flags |= NFSMNT_NEWARGS; 2098 nargs->nfs_args_ext = NFS_ARGS_EXTB; 2099 nargs->nfs_ext_u.nfs_extB.secdata = secdata; 2100 2101 /* for NFS RO failover; caller will set if necessary */ 2102 nargs->nfs_ext_u.nfs_extB.next = NULL; 2103 2104 return (nargs); 2105 } 2106 2107 static void 2108 nfs4_trigger_nargs_destroy(struct nfs_args *nargs) 2109 { 2110 /* 2111 * Either the mount failed, in which case the data is not needed, or 2112 * nfs4_mount() has either taken copies of what it needs or, 2113 * where it has merely copied the ptr, it has set *our* ptr to NULL, 2114 * whereby nfs4_free_args() will ignore it. 2115 */ 2116 nfs4_free_args(nargs); 2117 kmem_free(nargs, sizeof (struct nfs_args)); 2118 } 2119 2120 /* 2121 * When we finally get into the mounting, we need to add this 2122 * node to the ephemeral tree. 2123 * 2124 * This is called from nfs4_mount(). 2125 */ 2126 int 2127 nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) 2128 { 2129 mntinfo4_t *mi_parent; 2130 nfs4_ephemeral_t *eph; 2131 nfs4_ephemeral_tree_t *net; 2132 2133 nfs4_ephemeral_t *prior; 2134 nfs4_ephemeral_t *child; 2135 2136 nfs4_ephemeral_t *peer; 2137 2138 nfs4_trigger_globals_t *ntg; 2139 zone_t *zone = curproc->p_zone; 2140 2141 int rc = 0; 2142 2143 mi_parent = VTOMI4(mvp); 2144 2145 /* 2146 * Get this before grabbing anything else! 2147 */ 2148 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 2149 if (!ntg->ntg_thread_started) { 2150 nfs4_ephemeral_start_harvester(ntg); 2151 } 2152 2153 mutex_enter(&mi_parent->mi_lock); 2154 mutex_enter(&mi->mi_lock); 2155 2156 net = mi->mi_ephemeral_tree = 2157 mi_parent->mi_ephemeral_tree; 2158 2159 /* 2160 * If the mi_ephemeral_tree is NULL, then it 2161 * means that either the harvester or a manual 2162 * umount has cleared the tree out right before 2163 * we got here. 2164 * 2165 * There is nothing we can do here, so return 2166 * to the caller and let them decide whether they 2167 * try again. 2168 */ 2169 if (net == NULL) { 2170 mutex_exit(&mi->mi_lock); 2171 mutex_exit(&mi_parent->mi_lock); 2172 2173 return (EBUSY); 2174 } 2175 2176 /* 2177 * We've just tied the mntinfo to the tree, so 2178 * now we bump the refcnt and hold it there until 2179 * this mntinfo is removed from the tree. 2180 */ 2181 nfs4_ephemeral_tree_hold(net); 2182 2183 /* 2184 * We need to tack together the ephemeral mount 2185 * with this new mntinfo. 2186 */ 2187 eph = kmem_zalloc(sizeof (*eph), KM_SLEEP); 2188 eph->ne_mount = mi; 2189 eph->ne_ref_time = gethrestime_sec(); 2190 2191 /* 2192 * We need to tell the ephemeral mount when 2193 * to time out. 2194 */ 2195 eph->ne_mount_to = ntg->ntg_mount_to; 2196 2197 mi->mi_ephemeral = eph; 2198 2199 /* 2200 * If the enclosing mntinfo4 is also ephemeral, 2201 * then we need to point to its enclosing parent. 2202 * Else the enclosing mntinfo4 is the enclosing parent. 2203 * 2204 * We also need to weave this ephemeral node 2205 * into the tree. 2206 */ 2207 if (mi_parent->mi_flags & MI4_EPHEMERAL) { 2208 /* 2209 * We need to decide if we are 2210 * the root node of this branch 2211 * or if we are a sibling of this 2212 * branch. 2213 */ 2214 prior = mi_parent->mi_ephemeral; 2215 if (prior == NULL) { 2216 /* 2217 * Race condition, clean up, and 2218 * let caller handle mntinfo. 2219 */ 2220 mi->mi_flags &= ~MI4_EPHEMERAL; 2221 mi->mi_ephemeral = NULL; 2222 kmem_free(eph, sizeof (*eph)); 2223 nfs4_ephemeral_tree_rele(net); 2224 rc = EBUSY; 2225 } else { 2226 if (prior->ne_child == NULL) { 2227 prior->ne_child = eph; 2228 } else { 2229 child = prior->ne_child; 2230 2231 prior->ne_child = eph; 2232 eph->ne_peer = child; 2233 2234 child->ne_prior = eph; 2235 } 2236 2237 eph->ne_prior = prior; 2238 } 2239 } else { 2240 /* 2241 * The parent mntinfo4 is the non-ephemeral 2242 * root of the ephemeral tree. We 2243 * need to decide if we are the root 2244 * node of that tree or if we are a 2245 * sibling of the root node. 2246 * 2247 * We are the root if there is no 2248 * other node. 2249 */ 2250 if (net->net_root == NULL) { 2251 net->net_root = eph; 2252 } else { 2253 eph->ne_peer = peer = net->net_root; 2254 ASSERT(peer != NULL); 2255 net->net_root = eph; 2256 2257 peer->ne_prior = eph; 2258 } 2259 2260 eph->ne_prior = NULL; 2261 } 2262 2263 mutex_exit(&mi->mi_lock); 2264 mutex_exit(&mi_parent->mi_lock); 2265 2266 return (rc); 2267 } 2268 2269 /* 2270 * Commit the changes to the ephemeral tree for removing this node. 2271 */ 2272 static void 2273 nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph) 2274 { 2275 nfs4_ephemeral_t *e = eph; 2276 nfs4_ephemeral_t *peer; 2277 nfs4_ephemeral_t *prior; 2278 2279 peer = eph->ne_peer; 2280 prior = e->ne_prior; 2281 2282 /* 2283 * If this branch root was not the 2284 * tree root, then we need to fix back pointers. 2285 */ 2286 if (prior) { 2287 if (prior->ne_child == e) { 2288 prior->ne_child = peer; 2289 } else { 2290 prior->ne_peer = peer; 2291 } 2292 2293 if (peer) 2294 peer->ne_prior = prior; 2295 } else if (peer) { 2296 peer->ne_mount->mi_ephemeral_tree->net_root = peer; 2297 peer->ne_prior = NULL; 2298 } else { 2299 e->ne_mount->mi_ephemeral_tree->net_root = NULL; 2300 } 2301 } 2302 2303 /* 2304 * We want to avoid recursion at all costs. So we need to 2305 * unroll the tree. We do this by a depth first traversal to 2306 * leaf nodes. We blast away the leaf and work our way back 2307 * up and down the tree. 2308 */ 2309 static int 2310 nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, 2311 int isTreeRoot, int flag, cred_t *cr) 2312 { 2313 nfs4_ephemeral_t *e = eph; 2314 nfs4_ephemeral_t *prior; 2315 mntinfo4_t *mi; 2316 vfs_t *vfsp; 2317 int error; 2318 2319 /* 2320 * We use the loop while unrolling the ephemeral tree. 2321 */ 2322 for (;;) { 2323 /* 2324 * First we walk down the child. 2325 */ 2326 if (e->ne_child) { 2327 prior = e; 2328 e = e->ne_child; 2329 continue; 2330 } 2331 2332 /* 2333 * If we are the root of the branch we are removing, 2334 * we end it here. But if the branch is the root of 2335 * the tree, we have to forge on. We do not consider 2336 * the peer list for the root because while it may 2337 * be okay to remove, it is both extra work and a 2338 * potential for a false-positive error to stall the 2339 * unmount attempt. 2340 */ 2341 if (e == eph && isTreeRoot == FALSE) 2342 return (0); 2343 2344 /* 2345 * Next we walk down the peer list. 2346 */ 2347 if (e->ne_peer) { 2348 prior = e; 2349 e = e->ne_peer; 2350 continue; 2351 } 2352 2353 /* 2354 * We can only remove the node passed in by the 2355 * caller if it is the root of the ephemeral tree. 2356 * Otherwise, the caller will remove it. 2357 */ 2358 if (e == eph && isTreeRoot == FALSE) 2359 return (0); 2360 2361 /* 2362 * Okay, we have a leaf node, time 2363 * to prune it! 2364 * 2365 * Note that prior can only be NULL if 2366 * and only if it is the root of the 2367 * ephemeral tree. 2368 */ 2369 prior = e->ne_prior; 2370 2371 mi = e->ne_mount; 2372 mutex_enter(&mi->mi_lock); 2373 vfsp = mi->mi_vfsp; 2374 2375 /* 2376 * Cleared by umount2_engine. 2377 */ 2378 VFS_HOLD(vfsp); 2379 2380 /* 2381 * Inform nfs4_unmount to not recursively 2382 * descend into this node's children when it 2383 * gets processed. 2384 */ 2385 mi->mi_flags |= MI4_EPHEMERAL_RECURSED; 2386 mutex_exit(&mi->mi_lock); 2387 2388 error = umount2_engine(vfsp, flag, cr, FALSE); 2389 if (error) { 2390 /* 2391 * We need to reenable nfs4_unmount's ability 2392 * to recursively descend on this node. 2393 */ 2394 mutex_enter(&mi->mi_lock); 2395 mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED; 2396 mutex_exit(&mi->mi_lock); 2397 2398 return (error); 2399 } 2400 2401 /* 2402 * If we are the current node, we do not want to 2403 * touch anything else. At this point, the only 2404 * way the current node can have survived to here 2405 * is if it is the root of the ephemeral tree and 2406 * we are unmounting the enclosing mntinfo4. 2407 */ 2408 if (e == eph) { 2409 ASSERT(prior == NULL); 2410 return (0); 2411 } 2412 2413 /* 2414 * Stitch up the prior node. Note that since 2415 * we have handled the root of the tree, prior 2416 * must be non-NULL. 2417 */ 2418 ASSERT(prior != NULL); 2419 if (prior->ne_child == e) { 2420 prior->ne_child = NULL; 2421 } else { 2422 ASSERT(prior->ne_peer == e); 2423 2424 prior->ne_peer = NULL; 2425 } 2426 2427 e = prior; 2428 } 2429 2430 /* NOTREACHED */ 2431 } 2432 2433 /* 2434 * Common code to safely release net_cnt_lock and net_tree_lock 2435 */ 2436 void 2437 nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, 2438 nfs4_ephemeral_tree_t **pnet) 2439 { 2440 nfs4_ephemeral_tree_t *net = *pnet; 2441 2442 if (*pmust_unlock) { 2443 mutex_enter(&net->net_cnt_lock); 2444 net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; 2445 mutex_exit(&net->net_cnt_lock); 2446 2447 mutex_exit(&net->net_tree_lock); 2448 2449 *pmust_unlock = FALSE; 2450 } 2451 } 2452 2453 /* 2454 * While we may have removed any child or sibling nodes of this 2455 * ephemeral node, we can not nuke it until we know that there 2456 * were no actived vnodes on it. This will do that final 2457 * work once we know it is not busy. 2458 */ 2459 void 2460 nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, 2461 nfs4_ephemeral_tree_t **pnet) 2462 { 2463 /* 2464 * Now we need to get rid of the ephemeral data if it exists. 2465 */ 2466 mutex_enter(&mi->mi_lock); 2467 if (mi->mi_ephemeral) { 2468 /* 2469 * If we are the root node of an ephemeral branch 2470 * which is being removed, then we need to fixup 2471 * pointers into and out of the node. 2472 */ 2473 if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) 2474 nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); 2475 2476 nfs4_ephemeral_tree_rele(*pnet); 2477 ASSERT(mi->mi_ephemeral != NULL); 2478 2479 kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); 2480 mi->mi_ephemeral = NULL; 2481 } 2482 mutex_exit(&mi->mi_lock); 2483 2484 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2485 } 2486 2487 /* 2488 * Unmount an ephemeral node. 2489 * 2490 * Note that if this code fails, then it must unlock. 2491 * 2492 * If it succeeds, then the caller must be prepared to do so. 2493 */ 2494 int 2495 nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, 2496 bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet) 2497 { 2498 int error = 0; 2499 nfs4_ephemeral_t *eph; 2500 nfs4_ephemeral_tree_t *net; 2501 int is_derooting = FALSE; 2502 int is_recursed = FALSE; 2503 int was_locked = FALSE; 2504 2505 /* 2506 * Make sure to set the default state for cleaning 2507 * up the tree in the caller (and on the way out). 2508 */ 2509 *pmust_unlock = FALSE; 2510 2511 /* 2512 * The active vnodes on this file system may be ephemeral 2513 * children. We need to check for and try to unmount them 2514 * here. If any can not be unmounted, we are going 2515 * to return EBUSY. 2516 */ 2517 mutex_enter(&mi->mi_lock); 2518 2519 /* 2520 * If an ephemeral tree, we need to check to see if 2521 * the lock is already held. If it is, then we need 2522 * to see if we are being called as a result of 2523 * the recursive removal of some node of the tree or 2524 * if we are another attempt to remove the tree. 2525 * 2526 * mi_flags & MI4_EPHEMERAL indicates an ephemeral 2527 * node. mi_ephemeral being non-NULL also does this. 2528 * 2529 * mi_ephemeral_tree being non-NULL is sufficient 2530 * to also indicate either it is an ephemeral node 2531 * or the enclosing mntinfo4. 2532 * 2533 * Do we need MI4_EPHEMERAL? Yes, it is useful for 2534 * when we delete the ephemeral node and need to 2535 * differentiate from an ephemeral node and the 2536 * enclosing root node. 2537 */ 2538 *pnet = net = mi->mi_ephemeral_tree; 2539 if (net == NULL) { 2540 mutex_exit(&mi->mi_lock); 2541 return (0); 2542 } 2543 2544 eph = mi->mi_ephemeral; 2545 is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; 2546 is_derooting = (eph == NULL); 2547 2548 mutex_enter(&net->net_cnt_lock); 2549 2550 /* 2551 * If this is not recursion, then we need to 2552 * check to see if a harvester thread has 2553 * already grabbed the lock. 2554 * 2555 * After we exit this branch, we may not 2556 * blindly return, we need to jump to 2557 * is_busy! 2558 */ 2559 if (!is_recursed) { 2560 if (net->net_status & 2561 NFS4_EPHEMERAL_TREE_LOCKED) { 2562 /* 2563 * If the tree is locked, we need 2564 * to decide whether we are the 2565 * harvester or some explicit call 2566 * for a umount. The only way that 2567 * we are the harvester is if 2568 * MS_SYSSPACE is set. 2569 * 2570 * We only let the harvester through 2571 * at this point. 2572 * 2573 * We return EBUSY so that the 2574 * caller knows something is 2575 * going on. Note that by that 2576 * time, the umount in the other 2577 * thread may have already occured. 2578 */ 2579 if (!(flag & MS_SYSSPACE)) { 2580 mutex_exit(&net->net_cnt_lock); 2581 mutex_exit(&mi->mi_lock); 2582 2583 return (EBUSY); 2584 } 2585 2586 was_locked = TRUE; 2587 } 2588 } 2589 2590 mutex_exit(&net->net_cnt_lock); 2591 mutex_exit(&mi->mi_lock); 2592 2593 /* 2594 * If we are not the harvester, we need to check 2595 * to see if we need to grab the tree lock. 2596 */ 2597 if (was_locked == FALSE) { 2598 /* 2599 * If we grab the lock, it means that no other 2600 * operation is working on the tree. If we don't 2601 * grab it, we need to decide if this is because 2602 * we are a recursive call or a new operation. 2603 */ 2604 if (mutex_tryenter(&net->net_tree_lock)) { 2605 *pmust_unlock = TRUE; 2606 } else { 2607 /* 2608 * If we are a recursive call, we can 2609 * proceed without the lock. 2610 * Otherwise we have to wait until 2611 * the lock becomes free. 2612 */ 2613 if (!is_recursed) { 2614 mutex_enter(&net->net_cnt_lock); 2615 if (net->net_status & 2616 (NFS4_EPHEMERAL_TREE_DEROOTING 2617 | NFS4_EPHEMERAL_TREE_INVALID)) { 2618 mutex_exit(&net->net_cnt_lock); 2619 goto is_busy; 2620 } 2621 mutex_exit(&net->net_cnt_lock); 2622 2623 /* 2624 * We can't hold any other locks whilst 2625 * we wait on this to free up. 2626 */ 2627 mutex_enter(&net->net_tree_lock); 2628 2629 /* 2630 * Note that while mi->mi_ephemeral 2631 * may change and thus we have to 2632 * update eph, it is the case that 2633 * we have tied down net and 2634 * do not care if mi->mi_ephemeral_tree 2635 * has changed. 2636 */ 2637 mutex_enter(&mi->mi_lock); 2638 eph = mi->mi_ephemeral; 2639 mutex_exit(&mi->mi_lock); 2640 2641 /* 2642 * Okay, we need to see if either the 2643 * tree got nuked or the current node 2644 * got nuked. Both of which will cause 2645 * an error. 2646 * 2647 * Note that a subsequent retry of the 2648 * umount shall work. 2649 */ 2650 mutex_enter(&net->net_cnt_lock); 2651 if (net->net_status & 2652 NFS4_EPHEMERAL_TREE_INVALID || 2653 (!is_derooting && eph == NULL)) { 2654 mutex_exit(&net->net_cnt_lock); 2655 mutex_exit(&net->net_tree_lock); 2656 goto is_busy; 2657 } 2658 mutex_exit(&net->net_cnt_lock); 2659 *pmust_unlock = TRUE; 2660 } 2661 } 2662 } 2663 2664 /* 2665 * Only once we have grabbed the lock can we mark what we 2666 * are planning on doing to the ephemeral tree. 2667 */ 2668 if (*pmust_unlock) { 2669 mutex_enter(&net->net_cnt_lock); 2670 net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING; 2671 2672 /* 2673 * Check to see if we are nuking the root. 2674 */ 2675 if (is_derooting) 2676 net->net_status |= 2677 NFS4_EPHEMERAL_TREE_DEROOTING; 2678 mutex_exit(&net->net_cnt_lock); 2679 } 2680 2681 if (!is_derooting) { 2682 /* 2683 * Only work on children if the caller has not already 2684 * done so. 2685 */ 2686 if (!is_recursed) { 2687 ASSERT(eph != NULL); 2688 2689 error = nfs4_ephemeral_unmount_engine(eph, 2690 FALSE, flag, cr); 2691 if (error) 2692 goto is_busy; 2693 } 2694 } else { 2695 eph = net->net_root; 2696 2697 /* 2698 * Only work if there is something there. 2699 */ 2700 if (eph) { 2701 error = nfs4_ephemeral_unmount_engine(eph, TRUE, 2702 flag, cr); 2703 if (error) { 2704 mutex_enter(&net->net_cnt_lock); 2705 net->net_status &= 2706 ~NFS4_EPHEMERAL_TREE_DEROOTING; 2707 mutex_exit(&net->net_cnt_lock); 2708 goto is_busy; 2709 } 2710 2711 /* 2712 * Nothing else which goes wrong will 2713 * invalidate the blowing away of the 2714 * ephmeral tree. 2715 */ 2716 net->net_root = NULL; 2717 } 2718 2719 /* 2720 * We have derooted and we have caused the tree to be 2721 * invalidated. 2722 */ 2723 mutex_enter(&net->net_cnt_lock); 2724 net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; 2725 net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; 2726 DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting, 2727 uint_t, net->net_refcnt); 2728 2729 /* 2730 * We will not finalize this node, so safe to 2731 * release it. 2732 */ 2733 nfs4_ephemeral_tree_decr(net); 2734 mutex_exit(&net->net_cnt_lock); 2735 2736 if (was_locked == FALSE) 2737 mutex_exit(&net->net_tree_lock); 2738 2739 /* 2740 * We have just blown away any notation of this 2741 * tree being locked or having a refcnt. 2742 * We can't let the caller try to clean things up. 2743 */ 2744 *pmust_unlock = FALSE; 2745 2746 /* 2747 * At this point, the tree should no longer be 2748 * associated with the mntinfo4. We need to pull 2749 * it off there and let the harvester take 2750 * care of it once the refcnt drops. 2751 */ 2752 mutex_enter(&mi->mi_lock); 2753 mi->mi_ephemeral_tree = NULL; 2754 mutex_exit(&mi->mi_lock); 2755 } 2756 2757 return (0); 2758 2759 is_busy: 2760 2761 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2762 2763 return (error); 2764 } 2765 2766 /* 2767 * Do the umount and record any error in the parent. 2768 */ 2769 static void 2770 nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag, 2771 nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior) 2772 { 2773 int error; 2774 2775 error = umount2_engine(vfsp, flag, kcred, FALSE); 2776 if (error) { 2777 if (prior) { 2778 if (prior->ne_child == e) 2779 prior->ne_state |= 2780 NFS4_EPHEMERAL_CHILD_ERROR; 2781 else 2782 prior->ne_state |= 2783 NFS4_EPHEMERAL_PEER_ERROR; 2784 } 2785 } 2786 } 2787 2788 /* 2789 * For each tree in the forest (where the forest is in 2790 * effect all of the ephemeral trees for this zone), 2791 * scan to see if a node can be unmounted. Note that 2792 * unlike nfs4_ephemeral_unmount_engine(), we do 2793 * not process the current node before children or 2794 * siblings. I.e., if a node can be unmounted, we 2795 * do not recursively check to see if the nodes 2796 * hanging off of it can also be unmounted. 2797 * 2798 * Instead, we delve down deep to try and remove the 2799 * children first. Then, because we share code with 2800 * nfs4_ephemeral_unmount_engine(), we will try 2801 * them again. This could be a performance issue in 2802 * the future. 2803 * 2804 * Also note that unlike nfs4_ephemeral_unmount_engine(), 2805 * we do not halt on an error. We will not remove the 2806 * current node, but we will keep on trying to remove 2807 * the others. 2808 * 2809 * force indicates that we want the unmount to occur 2810 * even if there is something blocking it. 2811 * 2812 * time_check indicates that we want to see if the 2813 * mount has expired past mount_to or not. Typically 2814 * we want to do this and only on a shutdown of the 2815 * zone would we want to ignore the check. 2816 */ 2817 static void 2818 nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg, 2819 bool_t force, bool_t time_check) 2820 { 2821 nfs4_ephemeral_tree_t *net; 2822 nfs4_ephemeral_tree_t *prev = NULL; 2823 nfs4_ephemeral_tree_t *next; 2824 nfs4_ephemeral_t *e; 2825 nfs4_ephemeral_t *prior; 2826 time_t now = gethrestime_sec(); 2827 2828 nfs4_ephemeral_tree_t *harvest = NULL; 2829 2830 int flag; 2831 2832 mntinfo4_t *mi; 2833 vfs_t *vfsp; 2834 2835 if (force) 2836 flag = MS_FORCE | MS_SYSSPACE; 2837 else 2838 flag = MS_SYSSPACE; 2839 2840 mutex_enter(&ntg->ntg_forest_lock); 2841 for (net = ntg->ntg_forest; net != NULL; net = next) { 2842 next = net->net_next; 2843 2844 nfs4_ephemeral_tree_hold(net); 2845 2846 mutex_enter(&net->net_tree_lock); 2847 2848 /* 2849 * Let the unmount code know that the 2850 * tree is already locked! 2851 */ 2852 mutex_enter(&net->net_cnt_lock); 2853 net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED; 2854 mutex_exit(&net->net_cnt_lock); 2855 2856 /* 2857 * If the intent is force all ephemeral nodes to 2858 * be unmounted in this zone, we can short circuit a 2859 * lot of tree traversal and simply zap the root node. 2860 */ 2861 if (force) { 2862 if (net->net_root) { 2863 mi = net->net_root->ne_mount; 2864 vfsp = mi->mi_vfsp; 2865 2866 /* 2867 * Cleared by umount2_engine. 2868 */ 2869 VFS_HOLD(vfsp); 2870 2871 (void) umount2_engine(vfsp, flag, 2872 kcred, FALSE); 2873 2874 goto check_done; 2875 } 2876 } 2877 2878 e = net->net_root; 2879 if (e) 2880 e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD; 2881 2882 while (e) { 2883 if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) { 2884 e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING; 2885 if (e->ne_child) { 2886 e = e->ne_child; 2887 e->ne_state = 2888 NFS4_EPHEMERAL_VISIT_CHILD; 2889 } 2890 2891 continue; 2892 } else if (e->ne_state == 2893 NFS4_EPHEMERAL_VISIT_SIBLING) { 2894 e->ne_state = NFS4_EPHEMERAL_PROCESS_ME; 2895 if (e->ne_peer) { 2896 e = e->ne_peer; 2897 e->ne_state = 2898 NFS4_EPHEMERAL_VISIT_CHILD; 2899 } 2900 2901 continue; 2902 } else if (e->ne_state == 2903 NFS4_EPHEMERAL_CHILD_ERROR) { 2904 prior = e->ne_prior; 2905 2906 /* 2907 * If a child reported an error, do 2908 * not bother trying to unmount. 2909 * 2910 * If your prior node is a parent, 2911 * pass the error up such that they 2912 * also do not try to unmount. 2913 * 2914 * However, if your prior is a sibling, 2915 * let them try to unmount if they can. 2916 */ 2917 if (prior) { 2918 if (prior->ne_child == e) 2919 prior->ne_state |= 2920 NFS4_EPHEMERAL_CHILD_ERROR; 2921 else 2922 prior->ne_state |= 2923 NFS4_EPHEMERAL_PEER_ERROR; 2924 } 2925 2926 /* 2927 * Clear the error and if needed, process peers. 2928 * 2929 * Once we mask out the error, we know whether 2930 * or we have to process another node. 2931 */ 2932 e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR; 2933 if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME) 2934 e = prior; 2935 2936 continue; 2937 } else if (e->ne_state == 2938 NFS4_EPHEMERAL_PEER_ERROR) { 2939 prior = e->ne_prior; 2940 2941 if (prior) { 2942 if (prior->ne_child == e) 2943 prior->ne_state = 2944 NFS4_EPHEMERAL_CHILD_ERROR; 2945 else 2946 prior->ne_state = 2947 NFS4_EPHEMERAL_PEER_ERROR; 2948 } 2949 2950 /* 2951 * Clear the error from this node and do the 2952 * correct processing. 2953 */ 2954 e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR; 2955 continue; 2956 } 2957 2958 prior = e->ne_prior; 2959 e->ne_state = NFS4_EPHEMERAL_OK; 2960 2961 /* 2962 * It must be the case that we need to process 2963 * this node. 2964 */ 2965 if (!time_check || 2966 now - e->ne_ref_time > e->ne_mount_to) { 2967 mi = e->ne_mount; 2968 vfsp = mi->mi_vfsp; 2969 2970 /* 2971 * Cleared by umount2_engine. 2972 */ 2973 VFS_HOLD(vfsp); 2974 2975 /* 2976 * Note that we effectively work down to the 2977 * leaf nodes first, try to unmount them, 2978 * then work our way back up into the leaf 2979 * nodes. 2980 * 2981 * Also note that we deal with a lot of 2982 * complexity by sharing the work with 2983 * the manual unmount code. 2984 */ 2985 nfs4_ephemeral_record_umount(vfsp, flag, 2986 e, prior); 2987 } 2988 2989 e = prior; 2990 } 2991 2992 check_done: 2993 2994 /* 2995 * At this point we are done processing this tree. 2996 * 2997 * If the tree is invalid and we were the only reference 2998 * to it, then we push it on the local linked list 2999 * to remove it at the end. We avoid that action now 3000 * to keep the tree processing going along at a fair clip. 3001 * 3002 * Else, even if we were the only reference, we 3003 * allow it to be reused as needed. 3004 */ 3005 mutex_enter(&net->net_cnt_lock); 3006 nfs4_ephemeral_tree_decr(net); 3007 if (net->net_refcnt == 0 && 3008 net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { 3009 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3010 mutex_exit(&net->net_cnt_lock); 3011 mutex_exit(&net->net_tree_lock); 3012 3013 if (prev) 3014 prev->net_next = net->net_next; 3015 else 3016 ntg->ntg_forest = net->net_next; 3017 3018 net->net_next = harvest; 3019 harvest = net; 3020 continue; 3021 } 3022 3023 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3024 mutex_exit(&net->net_cnt_lock); 3025 mutex_exit(&net->net_tree_lock); 3026 3027 prev = net; 3028 } 3029 mutex_exit(&ntg->ntg_forest_lock); 3030 3031 for (net = harvest; net != NULL; net = next) { 3032 next = net->net_next; 3033 3034 mutex_destroy(&net->net_tree_lock); 3035 mutex_destroy(&net->net_cnt_lock); 3036 kmem_free(net, sizeof (*net)); 3037 } 3038 } 3039 3040 /* 3041 * This is the thread which decides when the harvesting 3042 * can proceed and when to kill it off for this zone. 3043 */ 3044 static void 3045 nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg) 3046 { 3047 clock_t timeleft; 3048 zone_t *zone = curproc->p_zone; 3049 3050 for (;;) { 3051 timeleft = zone_status_timedwait(zone, ddi_get_lbolt() + 3052 nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN); 3053 3054 /* 3055 * zone is exiting... 3056 */ 3057 if (timeleft != -1) { 3058 ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN); 3059 zthread_exit(); 3060 /* NOTREACHED */ 3061 } 3062 3063 /* 3064 * Only bother scanning if there is potential 3065 * work to be done. 3066 */ 3067 if (ntg->ntg_forest == NULL) 3068 continue; 3069 3070 /* 3071 * Now scan the list and get rid of everything which 3072 * is old. 3073 */ 3074 nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE); 3075 } 3076 3077 /* NOTREACHED */ 3078 } 3079 3080 /* 3081 * The zone specific glue needed to start the unmount harvester. 3082 * 3083 * Note that we want to avoid holding the mutex as long as possible, 3084 * hence the multiple checks. 3085 * 3086 * The caller should avoid us getting down here in the first 3087 * place. 3088 */ 3089 static void 3090 nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg) 3091 { 3092 /* 3093 * It got started before we got here... 3094 */ 3095 if (ntg->ntg_thread_started) 3096 return; 3097 3098 mutex_enter(&nfs4_ephemeral_thread_lock); 3099 3100 if (ntg->ntg_thread_started) { 3101 mutex_exit(&nfs4_ephemeral_thread_lock); 3102 return; 3103 } 3104 3105 /* 3106 * Start the unmounter harvester thread for this zone. 3107 */ 3108 (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester, 3109 ntg, 0, minclsyspri); 3110 3111 ntg->ntg_thread_started = TRUE; 3112 mutex_exit(&nfs4_ephemeral_thread_lock); 3113 } 3114 3115 /*ARGSUSED*/ 3116 static void * 3117 nfs4_ephemeral_zsd_create(zoneid_t zoneid) 3118 { 3119 nfs4_trigger_globals_t *ntg; 3120 3121 ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP); 3122 ntg->ntg_thread_started = FALSE; 3123 3124 /* 3125 * This is the default.... 3126 */ 3127 ntg->ntg_mount_to = nfs4_trigger_thread_timer; 3128 3129 mutex_init(&ntg->ntg_forest_lock, NULL, 3130 MUTEX_DEFAULT, NULL); 3131 3132 return (ntg); 3133 } 3134 3135 /* 3136 * Try a nice gentle walk down the forest and convince 3137 * all of the trees to gracefully give it up. 3138 */ 3139 /*ARGSUSED*/ 3140 static void 3141 nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg) 3142 { 3143 nfs4_trigger_globals_t *ntg = arg; 3144 3145 if (!ntg) 3146 return; 3147 3148 nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE); 3149 } 3150 3151 /* 3152 * Race along the forest and rip all of the trees out by 3153 * their rootballs! 3154 */ 3155 /*ARGSUSED*/ 3156 static void 3157 nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg) 3158 { 3159 nfs4_trigger_globals_t *ntg = arg; 3160 3161 if (!ntg) 3162 return; 3163 3164 nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE); 3165 3166 mutex_destroy(&ntg->ntg_forest_lock); 3167 kmem_free(ntg, sizeof (*ntg)); 3168 } 3169 3170 /* 3171 * This is the zone independent cleanup needed for 3172 * emphemeral mount processing. 3173 */ 3174 void 3175 nfs4_ephemeral_fini(void) 3176 { 3177 (void) zone_key_delete(nfs4_ephemeral_key); 3178 mutex_destroy(&nfs4_ephemeral_thread_lock); 3179 } 3180 3181 /* 3182 * This is the zone independent initialization needed for 3183 * emphemeral mount processing. 3184 */ 3185 void 3186 nfs4_ephemeral_init(void) 3187 { 3188 mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT, 3189 NULL); 3190 3191 zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create, 3192 nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy); 3193 } 3194 3195 /* 3196 * nfssys() calls this function to set the per-zone 3197 * value of mount_to to drive when an ephemeral mount is 3198 * timed out. Each mount will grab a copy of this value 3199 * when mounted. 3200 */ 3201 void 3202 nfs4_ephemeral_set_mount_to(uint_t mount_to) 3203 { 3204 nfs4_trigger_globals_t *ntg; 3205 zone_t *zone = curproc->p_zone; 3206 3207 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 3208 3209 ntg->ntg_mount_to = mount_to; 3210 } 3211 3212 /* 3213 * Walk the list of v4 mount options; if they are currently set in vfsp, 3214 * append them to a new comma-separated mount option string, and return it. 3215 * 3216 * Caller should free by calling nfs4_trigger_destroy_mntopts(). 3217 */ 3218 static char * 3219 nfs4_trigger_create_mntopts(vfs_t *vfsp) 3220 { 3221 uint_t i; 3222 char *mntopts; 3223 struct vfssw *vswp; 3224 mntopts_t *optproto; 3225 3226 mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP); 3227 3228 /* get the list of applicable mount options for v4; locks *vswp */ 3229 vswp = vfs_getvfssw(MNTTYPE_NFS4); 3230 optproto = &vswp->vsw_optproto; 3231 3232 for (i = 0; i < optproto->mo_count; i++) { 3233 struct mntopt *mop = &optproto->mo_list[i]; 3234 3235 if (mop->mo_flags & MO_EMPTY) 3236 continue; 3237 3238 if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) { 3239 kmem_free(mntopts, MAX_MNTOPT_STR); 3240 vfs_unrefvfssw(vswp); 3241 return (NULL); 3242 } 3243 } 3244 3245 vfs_unrefvfssw(vswp); 3246 3247 /* 3248 * MNTOPT_XATTR is not in the v4 mount opt proto list, 3249 * and it may only be passed via MS_OPTIONSTR, so we 3250 * must handle it here. 3251 * 3252 * Ideally, it would be in the list, but NFS does not specify its 3253 * own opt proto list, it uses instead the default one. Since 3254 * not all filesystems support extended attrs, it would not be 3255 * appropriate to add it there. 3256 */ 3257 if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) || 3258 nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) { 3259 kmem_free(mntopts, MAX_MNTOPT_STR); 3260 return (NULL); 3261 } 3262 3263 return (mntopts); 3264 } 3265 3266 static void 3267 nfs4_trigger_destroy_mntopts(char *mntopts) 3268 { 3269 if (mntopts) 3270 kmem_free(mntopts, MAX_MNTOPT_STR); 3271 } 3272 3273 /* 3274 * Check a single mount option (optname). Add to mntopts if it is set in VFS. 3275 */ 3276 static int 3277 nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) 3278 { 3279 if (mntopts == NULL || optname == NULL || vfsp == NULL) 3280 return (EINVAL); 3281 3282 if (vfs_optionisset(vfsp, optname, NULL)) { 3283 size_t mntoptslen = strlen(mntopts); 3284 size_t optnamelen = strlen(optname); 3285 3286 /* +1 for ',', +1 for NUL */ 3287 if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR) 3288 return (EOVERFLOW); 3289 3290 /* first or subsequent mount option? */ 3291 if (*mntopts != '\0') 3292 (void) strcat(mntopts, ","); 3293 3294 (void) strcat(mntopts, optname); 3295 } 3296 3297 return (0); 3298 } 3299 3300 static enum clnt_stat 3301 nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr) 3302 { 3303 int retries; 3304 uint_t max_msgsize; 3305 enum clnt_stat status; 3306 CLIENT *cl; 3307 struct timeval timeout; 3308 3309 /* as per recov_newserver() */ 3310 max_msgsize = 0; 3311 retries = 1; 3312 timeout.tv_sec = 2; 3313 timeout.tv_usec = 0; 3314 3315 if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4, 3316 max_msgsize, retries, CRED(), &cl) != 0) 3317 return (RPC_FAILED); 3318 3319 if (nointr) 3320 cl->cl_nosignal = TRUE; 3321 status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL, 3322 timeout); 3323 if (nointr) 3324 cl->cl_nosignal = FALSE; 3325 3326 AUTH_DESTROY(cl->cl_auth); 3327 CLNT_DESTROY(cl); 3328 3329 return (status); 3330 } 3331 3332 static enum clnt_stat 3333 nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) 3334 { 3335 return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr)); 3336 } 3337