1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are 29 * triggered from a "stub" rnode via a special set of vnodeops. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/time.h> 37 #include <sys/vnode.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/file.h> 41 #include <sys/filio.h> 42 #include <sys/uio.h> 43 #include <sys/buf.h> 44 #include <sys/mman.h> 45 #include <sys/pathname.h> 46 #include <sys/dirent.h> 47 #include <sys/debug.h> 48 #include <sys/vmsystm.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/swap.h> 52 #include <sys/errno.h> 53 #include <sys/strsubr.h> 54 #include <sys/sysmacros.h> 55 #include <sys/kmem.h> 56 #include <sys/mount.h> 57 #include <sys/cmn_err.h> 58 #include <sys/pathconf.h> 59 #include <sys/utsname.h> 60 #include <sys/dnlc.h> 61 #include <sys/acl.h> 62 #include <sys/systeminfo.h> 63 #include <sys/policy.h> 64 #include <sys/sdt.h> 65 #include <sys/list.h> 66 #include <sys/stat.h> 67 #include <sys/mntent.h> 68 #include <sys/priv.h> 69 70 #include <rpc/types.h> 71 #include <rpc/auth.h> 72 #include <rpc/clnt.h> 73 74 #include <nfs/nfs.h> 75 #include <nfs/nfs_clnt.h> 76 #include <nfs/nfs_acl.h> 77 #include <nfs/lm.h> 78 #include <nfs/nfs4.h> 79 #include <nfs/nfs4_kprot.h> 80 #include <nfs/rnode4.h> 81 #include <nfs/nfs4_clnt.h> 82 #include <nfs/nfsid_map.h> 83 #include <nfs/nfs4_idmap_impl.h> 84 85 #include <vm/hat.h> 86 #include <vm/as.h> 87 #include <vm/page.h> 88 #include <vm/pvn.h> 89 #include <vm/seg.h> 90 #include <vm/seg_map.h> 91 #include <vm/seg_kpm.h> 92 #include <vm/seg_vn.h> 93 94 #include <fs/fs_subr.h> 95 96 #include <sys/ddi.h> 97 #include <sys/int_fmtio.h> 98 99 #include <sys/sunddi.h> 100 101 #include <sys/priv_names.h> 102 103 extern zone_key_t nfs4clnt_zone_key; 104 extern zone_key_t nfsidmap_zone_key; 105 106 /* 107 * The automatic unmounter thread stuff! 108 */ 109 static int nfs4_trigger_thread_timer = 20; /* in seconds */ 110 111 /* 112 * Just a default.... 113 */ 114 static uint_t nfs4_trigger_mount_to = 240; 115 116 typedef struct nfs4_trigger_globals { 117 kmutex_t ntg_forest_lock; 118 uint_t ntg_mount_to; 119 int ntg_thread_started; 120 nfs4_ephemeral_tree_t *ntg_forest; 121 } nfs4_trigger_globals_t; 122 123 kmutex_t nfs4_ephemeral_thread_lock; 124 125 zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED; 126 127 static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *); 128 129 /* 130 * Used for ephemeral mounts; contains data either duplicated from 131 * servinfo4_t, or hand-crafted, depending on type of ephemeral mount. 132 * 133 * It's intended that this structure is used solely for ephemeral 134 * mount-type specific data, for passing this data to 135 * nfs4_trigger_nargs_create(). 136 */ 137 typedef struct ephemeral_servinfo { 138 char *esi_hostname; 139 char *esi_netname; 140 char *esi_path; 141 int esi_path_len; 142 int esi_mount_flags; 143 struct netbuf *esi_addr; 144 struct netbuf *esi_syncaddr; 145 struct knetconfig *esi_knconf; 146 } ephemeral_servinfo_t; 147 148 /* 149 * Collect together the mount-type specific and generic data args. 150 */ 151 typedef struct domount_args { 152 ephemeral_servinfo_t *dma_esi; 153 char *dma_hostlist; /* comma-sep. for RO failover */ 154 struct nfs_args *dma_nargs; 155 } domount_args_t; 156 157 158 /* 159 * The vnode ops functions for a trigger stub vnode 160 */ 161 static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *); 162 static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *, 163 caller_context_t *); 164 static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *, 165 caller_context_t *); 166 static int nfs4_trigger_access(vnode_t *, int, int, cred_t *, 167 caller_context_t *); 168 static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *, 169 caller_context_t *); 170 static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **, 171 struct pathname *, int, vnode_t *, cred_t *, caller_context_t *, 172 int *, pathname_t *); 173 static int nfs4_trigger_create(vnode_t *, char *, struct vattr *, 174 enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *, 175 vsecattr_t *); 176 static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *, 177 int); 178 static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *, 179 caller_context_t *, int); 180 static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *, 181 cred_t *, caller_context_t *, int); 182 static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *, 183 vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp); 184 static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *, 185 caller_context_t *, int); 186 static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *, 187 cred_t *, caller_context_t *, int); 188 static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *); 189 190 /* 191 * Regular NFSv4 vnodeops that we need to reference directly 192 */ 193 extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *, 194 caller_context_t *); 195 extern void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *); 196 extern int nfs4_rwlock(vnode_t *, int, caller_context_t *); 197 extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *); 198 extern int nfs4_lookup(vnode_t *, char *, vnode_t **, 199 struct pathname *, int, vnode_t *, cred_t *, 200 caller_context_t *, int *, pathname_t *); 201 extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *, 202 caller_context_t *); 203 extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 204 caller_context_t *); 205 extern int nfs4_fid(vnode_t *, fid_t *, caller_context_t *); 206 extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); 207 208 static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **); 209 static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, 210 cred_t *, vnode_t **); 211 static domount_args_t *nfs4_trigger_domount_args_create(vnode_t *, cred_t *); 212 static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, 213 vnode_t *vp); 214 static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *, 215 cred_t *); 216 static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); 217 static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, 218 servinfo4_t *); 219 static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *, 220 cred_t *); 221 static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, 222 ephemeral_servinfo_t *); 223 static void nfs4_trigger_nargs_destroy(struct nfs_args *); 224 static char *nfs4_trigger_create_mntopts(vfs_t *); 225 static void nfs4_trigger_destroy_mntopts(char *); 226 static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); 227 static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); 228 static enum clnt_stat nfs4_ping_server_common(struct knetconfig *, 229 struct netbuf *, int); 230 231 extern int umount2_engine(vfs_t *, int, cred_t *, int); 232 233 vnodeops_t *nfs4_trigger_vnodeops; 234 235 /* 236 * These are the vnodeops that we must define for stub vnodes. 237 * 238 * 239 * Many of the VOPs defined for NFSv4 do not need to be defined here, 240 * for various reasons. This will result in the VFS default function being 241 * used: 242 * 243 * - These VOPs require a previous VOP_OPEN to have occurred. That will have 244 * lost the reference to the stub vnode, meaning these should not be called: 245 * close, read, write, ioctl, readdir, seek. 246 * 247 * - These VOPs are meaningless for vnodes without data pages. Since the 248 * stub vnode is of type VDIR, these should not be called: 249 * space, getpage, putpage, map, addmap, delmap, pageio, fsync. 250 * 251 * - These VOPs are otherwise not applicable, and should not be called: 252 * dump, setsecattr. 253 * 254 * 255 * These VOPs we do not want to define, but nor do we want the VFS default 256 * action. Instead, we specify the VFS error function, with fs_error(), but 257 * note that fs_error() is not actually called. Instead it results in the 258 * use of the error function defined for the particular VOP, in vn_ops_table[]: 259 * 260 * - frlock, dispose, shrlock. 261 * 262 * 263 * These VOPs we define to use the corresponding regular NFSv4 vnodeop. 264 * NOTE: if any of these ops involve an OTW call with the stub FH, then 265 * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo() 266 * to protect the security data in the servinfo4_t for the "parent" 267 * filesystem that contains the stub. 268 * 269 * - These VOPs should not trigger a mount, so that "ls -l" does not: 270 * pathconf, getsecattr. 271 * 272 * - These VOPs would not make sense to trigger: 273 * inactive, rwlock, rwunlock, fid, realvp. 274 */ 275 const fs_operation_def_t nfs4_trigger_vnodeops_template[] = { 276 VOPNAME_OPEN, { .vop_open = nfs4_trigger_open }, 277 VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr }, 278 VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr }, 279 VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access }, 280 VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup }, 281 VOPNAME_CREATE, { .vop_create = nfs4_trigger_create }, 282 VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove }, 283 VOPNAME_LINK, { .vop_link = nfs4_trigger_link }, 284 VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename }, 285 VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir }, 286 VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir }, 287 VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink }, 288 VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink }, 289 VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, 290 VOPNAME_FID, { .vop_fid = nfs4_fid }, 291 VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, 292 VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, 293 VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, 294 VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, 295 VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, 296 VOPNAME_FRLOCK, { .error = fs_error }, 297 VOPNAME_DISPOSE, { .error = fs_error }, 298 VOPNAME_SHRLOCK, { .error = fs_error }, 299 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 300 NULL, NULL 301 }; 302 303 static void 304 nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net) 305 { 306 ASSERT(mutex_owned(&net->net_cnt_lock)); 307 net->net_refcnt++; 308 ASSERT(net->net_refcnt != 0); 309 } 310 311 static void 312 nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net) 313 { 314 mutex_enter(&net->net_cnt_lock); 315 nfs4_ephemeral_tree_incr(net); 316 mutex_exit(&net->net_cnt_lock); 317 } 318 319 /* 320 * We need a safe way to decrement the refcnt whilst the 321 * lock is being held. 322 */ 323 static void 324 nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net) 325 { 326 ASSERT(mutex_owned(&net->net_cnt_lock)); 327 ASSERT(net->net_refcnt != 0); 328 net->net_refcnt--; 329 } 330 331 static void 332 nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net) 333 { 334 mutex_enter(&net->net_cnt_lock); 335 nfs4_ephemeral_tree_decr(net); 336 mutex_exit(&net->net_cnt_lock); 337 } 338 339 /* 340 * Trigger ops for stub vnodes; for mirror mounts, etc. 341 * 342 * The general idea is that a "triggering" op will first call 343 * nfs4_trigger_mount(), which will find out whether a mount has already 344 * been triggered. 345 * 346 * If it has, then nfs4_trigger_mount() sets newvp to the root vnode 347 * of the covering vfs. 348 * 349 * If a mount has not yet been triggered, nfs4_trigger_mount() will do so, 350 * and again set newvp, as above. 351 * 352 * The triggering op may then re-issue the VOP by calling it on newvp. 353 * 354 * Note that some ops may perform custom action, and may or may not need 355 * to trigger a mount. 356 * 357 * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We 358 * obviously can't do this with VOP_<whatever>, since it's a stub vnode 359 * and that would just recurse. Instead, we call the v4 op directly, 360 * by name. This is OK, since we know that the vnode is for NFSv4, 361 * otherwise it couldn't be a stub. 362 * 363 */ 364 365 static int 366 nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 367 { 368 int error; 369 vnode_t *newvp; 370 371 error = nfs4_trigger_mount(*vpp, cr, &newvp); 372 if (error) 373 return (error); 374 375 /* Release the stub vnode, as we're losing the reference to it */ 376 VN_RELE(*vpp); 377 378 /* Give the caller the root vnode of the newly-mounted fs */ 379 *vpp = newvp; 380 381 /* return with VN_HELD(newvp) */ 382 return (VOP_OPEN(vpp, flag, cr, ct)); 383 } 384 385 void 386 nfs4_fake_attrs(vnode_t *vp, struct vattr *vap) 387 { 388 uint_t mask; 389 timespec_t now; 390 391 /* 392 * Set some attributes here for referrals. 393 */ 394 mask = vap->va_mask; 395 bzero(vap, sizeof (struct vattr)); 396 vap->va_mask = mask; 397 vap->va_uid = 0; 398 vap->va_gid = 0; 399 vap->va_nlink = 1; 400 vap->va_size = 1; 401 gethrestime(&now); 402 vap->va_atime = now; 403 vap->va_mtime = now; 404 vap->va_ctime = now; 405 vap->va_type = VDIR; 406 vap->va_mode = 0555; 407 vap->va_fsid = vp->v_vfsp->vfs_dev; 408 vap->va_rdev = 0; 409 vap->va_blksize = MAXBSIZE; 410 vap->va_nblocks = 1; 411 vap->va_seq = 0; 412 } 413 414 /* 415 * For the majority of cases, nfs4_trigger_getattr() will not trigger 416 * a mount. However, if ATTR_TRIGGER is set, we are being informed 417 * that we need to force the mount before we attempt to determine 418 * the attributes. The intent is an atomic operation for security 419 * testing. 420 * 421 * If we're not triggering a mount, we can still inquire about the 422 * actual attributes from the server in the mirror mount case, 423 * and will return manufactured attributes for a referral (see 424 * the 'create' branch of find_referral_stubvp()). 425 */ 426 static int 427 nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 428 caller_context_t *ct) 429 { 430 int error; 431 432 if (flags & ATTR_TRIGGER) { 433 vnode_t *newvp; 434 435 error = nfs4_trigger_mount(vp, cr, &newvp); 436 if (error) 437 return (error); 438 439 error = VOP_GETATTR(newvp, vap, flags, cr, ct); 440 VN_RELE(newvp); 441 442 } else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) { 443 444 error = nfs4_getattr(vp, vap, flags, cr, ct); 445 446 } else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) { 447 448 nfs4_fake_attrs(vp, vap); 449 error = 0; 450 } 451 452 return (error); 453 } 454 455 static int 456 nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 457 caller_context_t *ct) 458 { 459 int error; 460 vnode_t *newvp; 461 462 error = nfs4_trigger_mount(vp, cr, &newvp); 463 if (error) 464 return (error); 465 466 error = VOP_SETATTR(newvp, vap, flags, cr, ct); 467 VN_RELE(newvp); 468 469 return (error); 470 } 471 472 static int 473 nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr, 474 caller_context_t *ct) 475 { 476 int error; 477 vnode_t *newvp; 478 479 error = nfs4_trigger_mount(vp, cr, &newvp); 480 if (error) 481 return (error); 482 483 error = VOP_ACCESS(newvp, mode, flags, cr, ct); 484 VN_RELE(newvp); 485 486 return (error); 487 } 488 489 static int 490 nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 491 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 492 caller_context_t *ct, int *deflags, pathname_t *rpnp) 493 { 494 int error; 495 vnode_t *newdvp; 496 rnode4_t *drp = VTOR4(dvp); 497 498 ASSERT(RP_ISSTUB(drp)); 499 500 /* 501 * It's not legal to lookup ".." for an fs root, so we mustn't pass 502 * that up. Instead, pass onto the regular op, regardless of whether 503 * we've triggered a mount. 504 */ 505 if (strcmp(nm, "..") == 0) 506 if (RP_ISSTUB_MIRRORMOUNT(drp)) { 507 return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, 508 ct, deflags, rpnp)); 509 } else if (RP_ISSTUB_REFERRAL(drp)) { 510 /* Return the parent vnode */ 511 return (vtodv(dvp, vpp, cr, TRUE)); 512 } 513 514 error = nfs4_trigger_mount(dvp, cr, &newdvp); 515 if (error) 516 return (error); 517 518 error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct, 519 deflags, rpnp); 520 VN_RELE(newdvp); 521 522 return (error); 523 } 524 525 static int 526 nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va, 527 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr, 528 int flags, caller_context_t *ct, vsecattr_t *vsecp) 529 { 530 int error; 531 vnode_t *newdvp; 532 533 error = nfs4_trigger_mount(dvp, cr, &newdvp); 534 if (error) 535 return (error); 536 537 error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr, 538 flags, ct, vsecp); 539 VN_RELE(newdvp); 540 541 return (error); 542 } 543 544 static int 545 nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 546 int flags) 547 { 548 int error; 549 vnode_t *newdvp; 550 551 error = nfs4_trigger_mount(dvp, cr, &newdvp); 552 if (error) 553 return (error); 554 555 error = VOP_REMOVE(newdvp, nm, cr, ct, flags); 556 VN_RELE(newdvp); 557 558 return (error); 559 } 560 561 static int 562 nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr, 563 caller_context_t *ct, int flags) 564 { 565 int error; 566 vnode_t *newtdvp; 567 568 error = nfs4_trigger_mount(tdvp, cr, &newtdvp); 569 if (error) 570 return (error); 571 572 /* 573 * We don't check whether svp is a stub. Let the NFSv4 code 574 * detect that error, and return accordingly. 575 */ 576 error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags); 577 VN_RELE(newtdvp); 578 579 return (error); 580 } 581 582 static int 583 nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 584 cred_t *cr, caller_context_t *ct, int flags) 585 { 586 int error; 587 vnode_t *newsdvp; 588 rnode4_t *tdrp = VTOR4(tdvp); 589 590 /* 591 * We know that sdvp is a stub, otherwise we would not be here. 592 * 593 * If tdvp is also be a stub, there are two possibilities: it 594 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)] 595 * or it is a different stub [!VN_CMP(sdvp, tdvp)]. 596 * 597 * In the former case, just trigger sdvp, and treat tdvp as 598 * though it were not a stub. 599 * 600 * In the latter case, it might be a different stub for the 601 * same server fs as sdvp, or for a different server fs. 602 * Regardless, from the client perspective this would still 603 * be a cross-filesystem rename, and should not be allowed, 604 * so return EXDEV, without triggering either mount. 605 */ 606 if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp)) 607 return (EXDEV); 608 609 error = nfs4_trigger_mount(sdvp, cr, &newsdvp); 610 if (error) 611 return (error); 612 613 error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags); 614 615 VN_RELE(newsdvp); 616 617 return (error); 618 } 619 620 /* ARGSUSED */ 621 static int 622 nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, 623 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 624 { 625 int error; 626 vnode_t *newdvp; 627 628 error = nfs4_trigger_mount(dvp, cr, &newdvp); 629 if (error) 630 return (error); 631 632 error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp); 633 VN_RELE(newdvp); 634 635 return (error); 636 } 637 638 static int 639 nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 640 caller_context_t *ct, int flags) 641 { 642 int error; 643 vnode_t *newdvp; 644 645 error = nfs4_trigger_mount(dvp, cr, &newdvp); 646 if (error) 647 return (error); 648 649 error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags); 650 VN_RELE(newdvp); 651 652 return (error); 653 } 654 655 static int 656 nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, 657 cred_t *cr, caller_context_t *ct, int flags) 658 { 659 int error; 660 vnode_t *newdvp; 661 662 error = nfs4_trigger_mount(dvp, cr, &newdvp); 663 if (error) 664 return (error); 665 666 error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags); 667 VN_RELE(newdvp); 668 669 return (error); 670 } 671 672 static int 673 nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, 674 caller_context_t *ct) 675 { 676 int error; 677 vnode_t *newvp; 678 679 error = nfs4_trigger_mount(vp, cr, &newvp); 680 if (error) 681 return (error); 682 683 error = VOP_READLINK(newvp, uiop, cr, ct); 684 VN_RELE(newvp); 685 686 return (error); 687 } 688 689 /* end of trigger vnode ops */ 690 691 /* 692 * See if the mount has already been done by another caller. 693 */ 694 static int 695 nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp, 696 bool_t *was_mounted, vfs_t **vfsp) 697 { 698 int error; 699 mntinfo4_t *mi = VTOMI4(vp); 700 701 *was_mounted = FALSE; 702 703 error = vn_vfsrlock_wait(vp); 704 if (error) 705 return (error); 706 707 *vfsp = vn_mountedvfs(vp); 708 if (*vfsp != NULL) { 709 /* the mount has already occurred */ 710 error = VFS_ROOT(*vfsp, newvpp); 711 if (!error) { 712 /* need to update the reference time */ 713 mutex_enter(&mi->mi_lock); 714 if (mi->mi_ephemeral) 715 mi->mi_ephemeral->ne_ref_time = 716 gethrestime_sec(); 717 mutex_exit(&mi->mi_lock); 718 719 *was_mounted = TRUE; 720 } 721 } 722 723 vn_vfsunlock(vp); 724 return (0); 725 } 726 727 /* 728 * Mount upon a trigger vnode; for mirror-mounts, referrals, etc. 729 * 730 * The mount may have already occurred, via another thread. If not, 731 * assemble the location information - which may require fetching - and 732 * perform the mount. 733 * 734 * Sets newvp to be the root of the fs that is now covering vp. Note 735 * that we return with VN_HELD(*newvp). 736 * 737 * The caller is responsible for passing the VOP onto the covering fs. 738 */ 739 static int 740 nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) 741 { 742 int error; 743 vfs_t *vfsp; 744 rnode4_t *rp = VTOR4(vp); 745 mntinfo4_t *mi = VTOMI4(vp); 746 domount_args_t *dma; 747 748 nfs4_ephemeral_tree_t *net; 749 750 bool_t must_unlock = FALSE; 751 bool_t is_building = FALSE; 752 bool_t was_mounted = FALSE; 753 754 cred_t *mcred = NULL; 755 756 nfs4_trigger_globals_t *ntg; 757 758 zone_t *zone = curproc->p_zone; 759 760 ASSERT(RP_ISSTUB(rp)); 761 762 *newvpp = NULL; 763 764 /* 765 * Has the mount already occurred? 766 */ 767 error = nfs4_trigger_mounted_already(vp, newvpp, 768 &was_mounted, &vfsp); 769 if (error || was_mounted) 770 goto done; 771 772 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 773 ASSERT(ntg != NULL); 774 775 mutex_enter(&mi->mi_lock); 776 777 /* 778 * We need to lock down the ephemeral tree. 779 */ 780 if (mi->mi_ephemeral_tree == NULL) { 781 net = kmem_zalloc(sizeof (*net), KM_SLEEP); 782 mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL); 783 mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL); 784 net->net_refcnt = 1; 785 net->net_status = NFS4_EPHEMERAL_TREE_BUILDING; 786 is_building = TRUE; 787 788 /* 789 * We need to add it to the zone specific list for 790 * automatic unmounting and harvesting of deadwood. 791 */ 792 mutex_enter(&ntg->ntg_forest_lock); 793 if (ntg->ntg_forest != NULL) 794 net->net_next = ntg->ntg_forest; 795 ntg->ntg_forest = net; 796 mutex_exit(&ntg->ntg_forest_lock); 797 798 /* 799 * No lock order confusion with mi_lock because no 800 * other node could have grabbed net_tree_lock. 801 */ 802 mutex_enter(&net->net_tree_lock); 803 mi->mi_ephemeral_tree = net; 804 net->net_mount = mi; 805 mutex_exit(&mi->mi_lock); 806 807 MI4_HOLD(mi); 808 VFS_HOLD(mi->mi_vfsp); 809 } else { 810 net = mi->mi_ephemeral_tree; 811 nfs4_ephemeral_tree_hold(net); 812 813 mutex_exit(&mi->mi_lock); 814 815 mutex_enter(&net->net_tree_lock); 816 817 /* 818 * We can only procede if the tree is neither locked 819 * nor being torn down. 820 */ 821 mutex_enter(&net->net_cnt_lock); 822 if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) { 823 nfs4_ephemeral_tree_decr(net); 824 mutex_exit(&net->net_cnt_lock); 825 mutex_exit(&net->net_tree_lock); 826 827 return (EIO); 828 } 829 mutex_exit(&net->net_cnt_lock); 830 } 831 832 mutex_enter(&net->net_cnt_lock); 833 net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING; 834 mutex_exit(&net->net_cnt_lock); 835 836 must_unlock = TRUE; 837 838 dma = nfs4_trigger_domount_args_create(vp, cr); 839 if (dma == NULL) { 840 error = EINVAL; 841 goto done; 842 } 843 844 /* 845 * Note that since we define mirror mounts to work 846 * for any user, we simply extend the privileges of 847 * the user's credentials to allow the mount to 848 * proceed. 849 */ 850 mcred = crdup(cr); 851 if (mcred == NULL) { 852 error = EINVAL; 853 goto done; 854 } 855 856 crset_zone_privall(mcred); 857 if (is_system_labeled()) 858 (void) setpflags(NET_MAC_AWARE, 1, mcred); 859 860 error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp); 861 nfs4_trigger_domount_args_destroy(dma, vp); 862 863 DTRACE_PROBE2(nfs4clnt__func__referral__mount, 864 vnode_t *, vp, int, error); 865 866 crfree(mcred); 867 868 done: 869 870 if (must_unlock) { 871 mutex_enter(&net->net_cnt_lock); 872 net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; 873 874 /* 875 * REFCNT: If we are the root of the tree, then we need 876 * to keep a reference because we malloced the tree and 877 * this is where we tied it to our mntinfo. 878 * 879 * If we are not the root of the tree, then our tie to 880 * the mntinfo occured elsewhere and we need to 881 * decrement the reference to the tree. 882 */ 883 if (is_building) 884 net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; 885 else 886 nfs4_ephemeral_tree_decr(net); 887 mutex_exit(&net->net_cnt_lock); 888 889 mutex_exit(&net->net_tree_lock); 890 } 891 892 if (!error && (newvpp == NULL || *newvpp == NULL)) 893 error = ENOSYS; 894 895 return (error); 896 } 897 898 /* 899 * Collect together both the generic & mount-type specific args. 900 */ 901 static domount_args_t * 902 nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr) 903 { 904 int nointr; 905 char *hostlist; 906 servinfo4_t *svp; 907 struct nfs_args *nargs, *nargs_head; 908 enum clnt_stat status; 909 ephemeral_servinfo_t *esi, *esi_first; 910 domount_args_t *dma; 911 mntinfo4_t *mi = VTOMI4(vp); 912 913 nointr = !(mi->mi_flags & MI4_INT); 914 hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 915 916 svp = mi->mi_curr_serv; 917 /* check if the current server is responding */ 918 status = nfs4_trigger_ping_server(svp, nointr); 919 if (status == RPC_SUCCESS) { 920 esi_first = nfs4_trigger_esi_create(vp, svp, cr); 921 if (esi_first == NULL) { 922 kmem_free(hostlist, MAXPATHLEN); 923 return (NULL); 924 } 925 926 (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN); 927 928 nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first); 929 } else { 930 /* current server did not respond */ 931 esi_first = NULL; 932 nargs_head = NULL; 933 } 934 nargs = nargs_head; 935 936 /* 937 * NFS RO failover. 938 * 939 * If we have multiple servinfo4 structures, linked via sv_next, 940 * we must create one nfs_args for each, linking the nfs_args via 941 * nfs_ext_u.nfs_extB.next. 942 * 943 * We need to build a corresponding esi for each, too, but that is 944 * used solely for building nfs_args, and may be immediately 945 * discarded, as domount() requires the info from just one esi, 946 * but all the nfs_args. 947 * 948 * Currently, the NFS mount code will hang if not all servers 949 * requested are available. To avoid that, we need to ping each 950 * server, here, and remove it from the list if it is not 951 * responding. This has the side-effect of that server then 952 * being permanently unavailable for this failover mount, even if 953 * it recovers. That's unfortunate, but the best we can do until 954 * the mount code path is fixed. 955 */ 956 957 /* 958 * If the current server was down, loop indefinitely until we find 959 * at least one responsive server. 960 */ 961 do { 962 /* no locking needed for sv_next; it is only set at fs mount */ 963 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 964 struct nfs_args *next; 965 966 /* 967 * nargs_head: the head of the nfs_args list 968 * nargs: the current tail of the list 969 * next: the newly-created element to be added 970 */ 971 972 /* 973 * We've already tried the current server, above; 974 * if it was responding, we have already included it 975 * and it may now be ignored. 976 * 977 * Otherwise, try it again, since it may now have 978 * recovered. 979 */ 980 if (svp == mi->mi_curr_serv && esi_first != NULL) 981 continue; 982 983 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 984 if (svp->sv_flags & SV4_NOTINUSE) { 985 nfs_rw_exit(&svp->sv_lock); 986 continue; 987 } 988 nfs_rw_exit(&svp->sv_lock); 989 990 /* check if the server is responding */ 991 status = nfs4_trigger_ping_server(svp, nointr); 992 /* if the server did not respond, ignore it */ 993 if (status != RPC_SUCCESS) 994 continue; 995 996 esi = nfs4_trigger_esi_create(vp, svp, cr); 997 if (esi == NULL) 998 continue; 999 1000 /* 1001 * If the original current server (mi_curr_serv) 1002 * was down when when we first tried it, 1003 * (i.e. esi_first == NULL), 1004 * we select this new server (svp) to be the server 1005 * that we will actually contact (esi_first). 1006 * 1007 * Note that it's possible that mi_curr_serv == svp, 1008 * if that mi_curr_serv was down but has now recovered. 1009 */ 1010 next = nfs4_trigger_nargs_create(mi, svp, esi); 1011 if (esi_first == NULL) { 1012 ASSERT(nargs == NULL); 1013 ASSERT(nargs_head == NULL); 1014 nargs_head = next; 1015 esi_first = esi; 1016 (void) strlcpy(hostlist, 1017 esi_first->esi_hostname, MAXPATHLEN); 1018 } else { 1019 ASSERT(nargs_head != NULL); 1020 nargs->nfs_ext_u.nfs_extB.next = next; 1021 (void) strlcat(hostlist, ",", MAXPATHLEN); 1022 (void) strlcat(hostlist, esi->esi_hostname, 1023 MAXPATHLEN); 1024 /* esi was only needed for hostname & nargs */ 1025 nfs4_trigger_esi_destroy(esi, vp); 1026 } 1027 1028 nargs = next; 1029 } 1030 1031 /* if we've had no response at all, wait a second */ 1032 if (esi_first == NULL) 1033 delay(drv_usectohz(1000000)); 1034 1035 } while (esi_first == NULL); 1036 ASSERT(nargs_head != NULL); 1037 1038 dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP); 1039 dma->dma_esi = esi_first; 1040 dma->dma_hostlist = hostlist; 1041 dma->dma_nargs = nargs_head; 1042 1043 return (dma); 1044 } 1045 1046 static void 1047 nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) 1048 { 1049 if (dma != NULL) { 1050 if (dma->dma_esi != NULL && vp != NULL) 1051 nfs4_trigger_esi_destroy(dma->dma_esi, vp); 1052 1053 if (dma->dma_hostlist != NULL) 1054 kmem_free(dma->dma_hostlist, MAXPATHLEN); 1055 1056 if (dma->dma_nargs != NULL) { 1057 struct nfs_args *nargs = dma->dma_nargs; 1058 1059 do { 1060 struct nfs_args *next = 1061 nargs->nfs_ext_u.nfs_extB.next; 1062 1063 nfs4_trigger_nargs_destroy(nargs); 1064 nargs = next; 1065 } while (nargs != NULL); 1066 } 1067 1068 kmem_free(dma, sizeof (domount_args_t)); 1069 } 1070 } 1071 1072 /* 1073 * The ephemeral_servinfo_t struct contains basic information we will need to 1074 * perform the mount. Whilst the structure is generic across different 1075 * types of ephemeral mount, the way we gather its contents differs. 1076 */ 1077 static ephemeral_servinfo_t * 1078 nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr) 1079 { 1080 ephemeral_servinfo_t *esi; 1081 rnode4_t *rp = VTOR4(vp); 1082 1083 ASSERT(RP_ISSTUB(rp)); 1084 1085 /* Call the ephemeral type-specific routine */ 1086 if (RP_ISSTUB_MIRRORMOUNT(rp)) 1087 esi = nfs4_trigger_esi_create_mirrormount(vp, svp); 1088 else if (RP_ISSTUB_REFERRAL(rp)) 1089 esi = nfs4_trigger_esi_create_referral(vp, cr); 1090 else 1091 esi = NULL; 1092 return (esi); 1093 } 1094 1095 static void 1096 nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) 1097 { 1098 rnode4_t *rp = VTOR4(vp); 1099 1100 ASSERT(RP_ISSTUB(rp)); 1101 1102 /* Currently, no need for an ephemeral type-specific routine */ 1103 1104 /* 1105 * The contents of ephemeral_servinfo_t goes into nfs_args, 1106 * and will be handled by nfs4_trigger_nargs_destroy(). 1107 * We need only free the structure itself. 1108 */ 1109 if (esi != NULL) 1110 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1111 } 1112 1113 /* 1114 * Some of this may turn out to be common with other ephemeral types, 1115 * in which case it should be moved to nfs4_trigger_esi_create(), or a 1116 * common function called. 1117 */ 1118 1119 /* 1120 * Mirror mounts case - should have all data available 1121 */ 1122 static ephemeral_servinfo_t * 1123 nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) 1124 { 1125 char *stubpath; 1126 struct knetconfig *sikncp, *svkncp; 1127 struct netbuf *bufp; 1128 ephemeral_servinfo_t *esi; 1129 1130 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1131 1132 /* initially set to be our type of ephemeral mount; may be added to */ 1133 esi->esi_mount_flags = NFSMNT_MIRRORMOUNT; 1134 1135 /* 1136 * We're copying info from the stub rnode's servinfo4, but 1137 * we must create new copies, not pointers, since this information 1138 * is to be associated with the new mount, which will be 1139 * unmounted (and its structures freed) separately 1140 */ 1141 1142 /* 1143 * Sizes passed to kmem_[z]alloc here must match those freed 1144 * in nfs4_free_args() 1145 */ 1146 1147 /* 1148 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this 1149 * is difficult to avoid: as we need to read svp to calculate the 1150 * sizes to be allocated. 1151 */ 1152 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1153 1154 esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP); 1155 (void) strcat(esi->esi_hostname, svp->sv_hostname); 1156 1157 esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1158 bufp = esi->esi_addr; 1159 bufp->len = svp->sv_addr.len; 1160 bufp->maxlen = svp->sv_addr.maxlen; 1161 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1162 bcopy(svp->sv_addr.buf, bufp->buf, bufp->len); 1163 1164 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1165 sikncp = esi->esi_knconf; 1166 svkncp = svp->sv_knconf; 1167 sikncp->knc_semantics = svkncp->knc_semantics; 1168 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1169 (void) strcat((char *)sikncp->knc_protofmly, 1170 (char *)svkncp->knc_protofmly); 1171 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1172 (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto); 1173 sikncp->knc_rdev = svkncp->knc_rdev; 1174 1175 /* 1176 * Used when AUTH_DH is negotiated. 1177 * 1178 * This is ephemeral mount-type specific, since it contains the 1179 * server's time-sync syncaddr. 1180 */ 1181 if (svp->sv_dhsec) { 1182 struct netbuf *bufp; 1183 sec_data_t *sdata; 1184 dh_k4_clntdata_t *data; 1185 1186 sdata = svp->sv_dhsec; 1187 data = (dh_k4_clntdata_t *)sdata->data; 1188 ASSERT(sdata->rpcflavor == AUTH_DH); 1189 1190 bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1191 bufp->len = data->syncaddr.len; 1192 bufp->maxlen = data->syncaddr.maxlen; 1193 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1194 bcopy(data->syncaddr.buf, bufp->buf, bufp->len); 1195 esi->esi_syncaddr = bufp; 1196 1197 if (data->netname != NULL) { 1198 int nmlen = data->netnamelen; 1199 1200 /* 1201 * We need to copy from a dh_k4_clntdata_t 1202 * netname/netnamelen pair to a NUL-terminated 1203 * netname string suitable for putting in nfs_args, 1204 * where the latter has no netnamelen field. 1205 */ 1206 esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP); 1207 bcopy(data->netname, esi->esi_netname, nmlen); 1208 } 1209 } else { 1210 esi->esi_syncaddr = NULL; 1211 esi->esi_netname = NULL; 1212 } 1213 1214 stubpath = fn_path(VTOSV(vp)->sv_name); 1215 /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */ 1216 ASSERT(*stubpath == '.'); 1217 stubpath += 1; 1218 1219 /* for nfs_args->fh */ 1220 esi->esi_path_len = strlen(stubpath) + 1; 1221 if (strcmp(svp->sv_path, "/") != 0) 1222 esi->esi_path_len += strlen(svp->sv_path); 1223 esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); 1224 if (strcmp(svp->sv_path, "/") != 0) 1225 (void) strcat(esi->esi_path, svp->sv_path); 1226 (void) strcat(esi->esi_path, stubpath); 1227 1228 stubpath -= 1; 1229 /* stubpath allocated by fn_path() */ 1230 kmem_free(stubpath, strlen(stubpath) + 1); 1231 1232 nfs_rw_exit(&svp->sv_lock); 1233 1234 return (esi); 1235 } 1236 1237 /* 1238 * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to 1239 * get network information required to do the mount call. 1240 */ 1241 int 1242 nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp) 1243 { 1244 door_arg_t door_args; 1245 door_handle_t dh; 1246 XDR xdr; 1247 refd_door_args_t *xdr_argsp; 1248 refd_door_res_t *orig_resp; 1249 k_sigset_t smask; 1250 int xdr_len = 0; 1251 int res_len = 16; /* length of an ip adress */ 1252 int orig_reslen = res_len; 1253 int error = 0; 1254 struct nfsidmap_globals *nig; 1255 1256 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) 1257 return (ECONNREFUSED); 1258 1259 nig = zone_getspecific(nfsidmap_zone_key, nfs_zone()); 1260 ASSERT(nig != NULL); 1261 1262 mutex_enter(&nig->nfsidmap_daemon_lock); 1263 dh = nig->nfsidmap_daemon_dh; 1264 if (dh == NULL) { 1265 mutex_exit(&nig->nfsidmap_daemon_lock); 1266 cmn_err(CE_NOTE, 1267 "nfs4_callmapid: nfsmapid daemon not " \ 1268 "running unable to resolve host name\n"); 1269 return (EINVAL); 1270 } 1271 door_ki_hold(dh); 1272 mutex_exit(&nig->nfsidmap_daemon_lock); 1273 1274 xdr_len = xdr_sizeof(&(xdr_utf8string), server); 1275 1276 xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP); 1277 xdr_argsp->xdr_len = xdr_len; 1278 xdr_argsp->cmd = NFSMAPID_SRV_NETINFO; 1279 1280 xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg, 1281 xdr_len, XDR_ENCODE); 1282 1283 if (!xdr_utf8string(&xdr, server)) { 1284 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1285 door_ki_rele(dh); 1286 return (1); 1287 } 1288 1289 if (orig_reslen) 1290 orig_resp = kmem_alloc(orig_reslen, KM_SLEEP); 1291 1292 door_args.data_ptr = (char *)xdr_argsp; 1293 door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len; 1294 door_args.desc_ptr = NULL; 1295 door_args.desc_num = 0; 1296 door_args.rbuf = orig_resp ? (char *)orig_resp : NULL; 1297 door_args.rsize = res_len; 1298 1299 sigintr(&smask, 1); 1300 error = door_ki_upcall(dh, &door_args); 1301 sigunintr(&smask); 1302 1303 door_ki_rele(dh); 1304 1305 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1306 if (error) { 1307 kmem_free(orig_resp, orig_reslen); 1308 /* 1309 * There is no door to connect to. The referral daemon 1310 * must not be running yet. 1311 */ 1312 cmn_err(CE_WARN, 1313 "nfsmapid not running cannot resolve host name"); 1314 goto out; 1315 } 1316 1317 /* 1318 * If the results buffer passed back are not the same as 1319 * what was sent free the old buffer and use the new one. 1320 */ 1321 if (orig_resp && orig_reslen) { 1322 refd_door_res_t *door_resp; 1323 1324 door_resp = (refd_door_res_t *)door_args.rbuf; 1325 if ((void *)door_args.rbuf != orig_resp) 1326 kmem_free(orig_resp, orig_reslen); 1327 if (door_resp->res_status == 0) { 1328 xdrmem_create(&xdr, (char *)&door_resp->xdr_res, 1329 door_resp->xdr_len, XDR_DECODE); 1330 bzero(resp, sizeof (struct nfs_fsl_info)); 1331 if (!xdr_nfs_fsl_info(&xdr, resp)) { 1332 DTRACE_PROBE2( 1333 nfs4clnt__debug__referral__upcall__xdrfail, 1334 struct nfs_fsl_info *, resp, 1335 char *, "nfs4_callmapid"); 1336 error = EINVAL; 1337 } 1338 } else { 1339 DTRACE_PROBE2( 1340 nfs4clnt__debug__referral__upcall__badstatus, 1341 int, door_resp->res_status, 1342 char *, "nfs4_callmapid"); 1343 error = door_resp->res_status; 1344 } 1345 kmem_free(door_args.rbuf, door_args.rsize); 1346 } 1347 out: 1348 DTRACE_PROBE2(nfs4clnt__func__referral__upcall, 1349 char *, server, int, error); 1350 return (error); 1351 } 1352 1353 /* 1354 * Fetches the fs_locations attribute. Typically called 1355 * from a Replication/Migration/Referrals/Mirror-mount context 1356 * 1357 * Fills in the attributes in garp. The caller is assumed 1358 * to have allocated memory for garp. 1359 * 1360 * lock: if set do not lock s_recovlock and mi_recovlock mutex, 1361 * it's already done by caller. Otherwise lock these mutexes 1362 * before doing the rfs4call(). 1363 * 1364 * Returns 1365 * 1 for success 1366 * 0 for failure 1367 */ 1368 int 1369 nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm, 1370 cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock) 1371 { 1372 COMPOUND4args_clnt args; 1373 COMPOUND4res_clnt res; 1374 nfs_argop4 *argop; 1375 int argoplist_size = 3 * sizeof (nfs_argop4); 1376 nfs4_server_t *sp = NULL; 1377 int doqueue = 1; 1378 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1379 int retval = 1; 1380 struct nfs4_clnt *nfscl; 1381 1382 if (lock == TRUE) 1383 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 1384 else 1385 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 1386 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 1387 1388 sp = find_nfs4_server(mi); 1389 if (lock == TRUE) 1390 nfs_rw_exit(&mi->mi_recovlock); 1391 1392 if (sp != NULL) 1393 mutex_exit(&sp->s_lock); 1394 1395 if (lock == TRUE) { 1396 if (sp != NULL) 1397 (void) nfs_rw_enter_sig(&sp->s_recovlock, 1398 RW_WRITER, 0); 1399 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 1400 } else { 1401 if (sp != NULL) { 1402 ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) || 1403 nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER)); 1404 } 1405 } 1406 1407 /* 1408 * Do we want to do the setup for recovery here? 1409 * 1410 * We know that the server responded to a null ping a very 1411 * short time ago, and we know that we intend to do a 1412 * single stateless operation - we want to fetch attributes, 1413 * so we know we can't encounter errors about state. If 1414 * something goes wrong with the GETATTR, like not being 1415 * able to get a response from the server or getting any 1416 * kind of FH error, we should fail the mount. 1417 * 1418 * We may want to re-visited this at a later time. 1419 */ 1420 argop = kmem_alloc(argoplist_size, KM_SLEEP); 1421 1422 args.ctag = TAG_GETATTR_FSLOCATION; 1423 /* PUTFH LOOKUP GETATTR */ 1424 args.array_len = 3; 1425 args.array = argop; 1426 1427 /* 0. putfh file */ 1428 argop[0].argop = OP_CPUTFH; 1429 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1430 1431 /* 1. lookup name, can't be dotdot */ 1432 argop[1].argop = OP_CLOOKUP; 1433 argop[1].nfs_argop4_u.opclookup.cname = nm; 1434 1435 /* 2. file attrs */ 1436 argop[2].argop = OP_GETATTR; 1437 argop[2].nfs_argop4_u.opgetattr.attr_request = 1438 FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK | 1439 FATTR4_MOUNTED_ON_FILEID_MASK; 1440 argop[2].nfs_argop4_u.opgetattr.mi = mi; 1441 1442 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1443 1444 if (lock == TRUE) { 1445 nfs_rw_exit(&mi->mi_recovlock); 1446 if (sp != NULL) 1447 nfs_rw_exit(&sp->s_recovlock); 1448 } 1449 1450 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1451 nfscl->nfscl_stat.referrals.value.ui64++; 1452 DTRACE_PROBE3(nfs4clnt__func__referral__fsloc, 1453 nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e); 1454 1455 if (e.error != 0) { 1456 if (sp != NULL) 1457 nfs4_server_rele(sp); 1458 kmem_free(argop, argoplist_size); 1459 return (0); 1460 } 1461 1462 /* 1463 * Check for all possible error conditions. 1464 * For valid replies without an ops array or for illegal 1465 * replies, return a failure. 1466 */ 1467 if (res.status != NFS4_OK || res.array_len < 3 || 1468 res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) { 1469 retval = 0; 1470 goto exit; 1471 } 1472 1473 /* 1474 * There isn't much value in putting the attributes 1475 * in the attr cache since fs_locations4 aren't 1476 * encountered very frequently, so just make them 1477 * available to the caller. 1478 */ 1479 *garp = res.array[2].nfs_resop4_u.opgetattr.ga_res; 1480 1481 DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc, 1482 nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations"); 1483 1484 /* No fs_locations? -- return a failure */ 1485 if (garp->n4g_ext_res == NULL || 1486 garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) { 1487 retval = 0; 1488 goto exit; 1489 } 1490 1491 if (!garp->n4g_fsid_valid) 1492 retval = 0; 1493 1494 exit: 1495 if (retval == 0) { 1496 /* the call was ok but failed validating the call results */ 1497 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1498 } else { 1499 ASSERT(callres != NULL); 1500 *callres = res; 1501 } 1502 1503 if (sp != NULL) 1504 nfs4_server_rele(sp); 1505 kmem_free(argop, argoplist_size); 1506 return (retval); 1507 } 1508 1509 /* tunable to disable referral mounts */ 1510 int nfs4_no_referrals = 0; 1511 1512 /* 1513 * Returns NULL if the vnode cannot be created or found. 1514 */ 1515 vnode_t * 1516 find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr) 1517 { 1518 nfs_fh4 *stub_fh, *dfh; 1519 nfs4_sharedfh_t *sfhp; 1520 char *newfhval; 1521 vnode_t *vp = NULL; 1522 fattr4_mounted_on_fileid mnt_on_fileid; 1523 nfs4_ga_res_t garp; 1524 mntinfo4_t *mi; 1525 COMPOUND4res_clnt callres; 1526 hrtime_t t; 1527 1528 if (nfs4_no_referrals) 1529 return (NULL); 1530 1531 /* 1532 * Get the mounted_on_fileid, unique on that server::fsid 1533 */ 1534 mi = VTOMI4(dvp); 1535 if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr, 1536 &garp, &callres, FALSE) == 0) 1537 return (NULL); 1538 mnt_on_fileid = garp.n4g_mon_fid; 1539 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1540 1541 /* 1542 * Build a fake filehandle from the dir FH and the mounted_on_fileid 1543 */ 1544 dfh = &VTOR4(dvp)->r_fh->sfh_fh; 1545 stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP); 1546 stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len + 1547 sizeof (fattr4_mounted_on_fileid), KM_SLEEP); 1548 newfhval = stub_fh->nfs_fh4_val; 1549 1550 /* copy directory's file handle */ 1551 bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len); 1552 stub_fh->nfs_fh4_len = dfh->nfs_fh4_len; 1553 newfhval = newfhval + dfh->nfs_fh4_len; 1554 1555 /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */ 1556 bcopy((char *)&mnt_on_fileid, newfhval, 1557 sizeof (fattr4_mounted_on_fileid)); 1558 stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid); 1559 1560 sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL); 1561 kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len + 1562 sizeof (fattr4_mounted_on_fileid)); 1563 kmem_free(stub_fh, sizeof (nfs_fh4)); 1564 if (sfhp == NULL) 1565 return (NULL); 1566 1567 t = gethrtime(); 1568 garp.n4g_va.va_type = VDIR; 1569 vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t, 1570 cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp)); 1571 1572 if (vp != NULL) 1573 vp->v_type = VDIR; 1574 1575 sfh4_rele(&sfhp); 1576 return (vp); 1577 } 1578 1579 int 1580 nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr) 1581 { 1582 vnode_t *nvp; 1583 rnode4_t *rp; 1584 1585 if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL) 1586 return (EINVAL); 1587 1588 rp = VTOR4(nvp); 1589 mutex_enter(&rp->r_statelock); 1590 r4_stub_referral(rp); 1591 mutex_exit(&rp->r_statelock); 1592 dnlc_enter(dvp, nm, nvp); 1593 1594 if (*vpp != NULL) 1595 VN_RELE(*vpp); /* no longer need this vnode */ 1596 1597 *vpp = nvp; 1598 1599 return (0); 1600 } 1601 1602 /* 1603 * Fetch the location information and resolve the new server. 1604 * Caller needs to free up the XDR data which is returned. 1605 * Input: mount info, shared filehandle, nodename 1606 * Return: Index to the result or Error(-1) 1607 * Output: FsLocations Info, Resolved Server Info. 1608 */ 1609 int 1610 nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, 1611 char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res, 1612 struct nfs_fsl_info *fsloc) 1613 { 1614 fs_location4 *fsp; 1615 struct nfs_fsl_info nfsfsloc; 1616 int ret, i, error; 1617 nfs4_ga_res_t garp; 1618 COMPOUND4res_clnt callres; 1619 struct knetconfig *knc; 1620 1621 ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE); 1622 if (ret == 0) 1623 return (-1); 1624 1625 /* 1626 * As a lame attempt to figuring out if we're 1627 * handling a migration event or a referral, 1628 * look for rnodes with this fsid in the rnode 1629 * cache. 1630 * 1631 * If we can find one or more such rnodes, it 1632 * means we're handling a migration event and 1633 * we want to bail out in that case. 1634 */ 1635 if (r4find_by_fsid(mi, &garp.n4g_fsid)) { 1636 DTRACE_PROBE3(nfs4clnt__debug__referral__migration, 1637 mntinfo4_t *, mi, nfs4_ga_res_t *, &garp, 1638 char *, "nfs4_process_referral"); 1639 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1640 return (-1); 1641 } 1642 1643 /* 1644 * Find the first responsive server to mount. When we find 1645 * one, fsp will point to it. 1646 */ 1647 for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) { 1648 1649 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i]; 1650 if (fsp->server_len == 0 || fsp->server_val == NULL) 1651 continue; 1652 1653 error = nfs4_callmapid(fsp->server_val, &nfsfsloc); 1654 if (error != 0) 1655 continue; 1656 1657 error = nfs4_ping_server_common(nfsfsloc.knconf, 1658 nfsfsloc.addr, !(mi->mi_flags & MI4_INT)); 1659 if (error == RPC_SUCCESS) 1660 break; 1661 1662 DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr, 1663 sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf, 1664 char *, "nfs4_process_referral"); 1665 1666 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1667 } 1668 knc = nfsfsloc.knconf; 1669 if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) || 1670 (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) { 1671 DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc, 1672 nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral"); 1673 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1674 return (-1); 1675 } 1676 1677 /* Send the results back */ 1678 *fsloc = nfsfsloc; 1679 *grp = garp; 1680 *res = callres; 1681 return (i); 1682 } 1683 1684 /* 1685 * Referrals case - need to fetch referral data and then upcall to 1686 * user-level to get complete mount data. 1687 */ 1688 static ephemeral_servinfo_t * 1689 nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr) 1690 { 1691 struct knetconfig *sikncp, *svkncp; 1692 struct netbuf *bufp; 1693 ephemeral_servinfo_t *esi; 1694 vnode_t *dvp; 1695 rnode4_t *drp; 1696 fs_location4 *fsp; 1697 struct nfs_fsl_info nfsfsloc; 1698 nfs4_ga_res_t garp; 1699 char *p; 1700 char fn[MAXNAMELEN]; 1701 int i, index = -1; 1702 mntinfo4_t *mi; 1703 COMPOUND4res_clnt callres; 1704 1705 /* 1706 * If we're passed in a stub vnode that 1707 * isn't a "referral" stub, bail out 1708 * and return a failure 1709 */ 1710 if (!RP_ISSTUB_REFERRAL(VTOR4(vp))) 1711 return (NULL); 1712 1713 if (vtodv(vp, &dvp, CRED(), TRUE) != 0) 1714 return (NULL); 1715 1716 drp = VTOR4(dvp); 1717 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) { 1718 VN_RELE(dvp); 1719 return (NULL); 1720 } 1721 1722 if (vtoname(vp, fn, MAXNAMELEN) != 0) { 1723 nfs_rw_exit(&drp->r_rwlock); 1724 VN_RELE(dvp); 1725 return (NULL); 1726 } 1727 1728 mi = VTOMI4(dvp); 1729 index = nfs4_process_referral(mi, drp->r_fh, fn, cr, 1730 &garp, &callres, &nfsfsloc); 1731 nfs_rw_exit(&drp->r_rwlock); 1732 VN_RELE(dvp); 1733 if (index < 0) 1734 return (NULL); 1735 1736 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; 1737 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1738 1739 /* initially set to be our type of ephemeral mount; may be added to */ 1740 esi->esi_mount_flags = NFSMNT_REFERRAL; 1741 1742 esi->esi_hostname = 1743 kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP); 1744 bcopy(fsp->server_val->utf8string_val, esi->esi_hostname, 1745 fsp->server_val->utf8string_len); 1746 esi->esi_hostname[fsp->server_val->utf8string_len] = '\0'; 1747 1748 bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 1749 bufp->len = nfsfsloc.addr->len; 1750 bufp->maxlen = nfsfsloc.addr->maxlen; 1751 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1752 bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len); 1753 esi->esi_addr = bufp; 1754 1755 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1756 sikncp = esi->esi_knconf; 1757 1758 DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc, 1759 struct nfs_fsl_info *, &nfsfsloc, 1760 char *, "nfs4_trigger_esi_create_referral"); 1761 1762 svkncp = nfsfsloc.knconf; 1763 sikncp->knc_semantics = svkncp->knc_semantics; 1764 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1765 (void) strlcat((char *)sikncp->knc_protofmly, 1766 (char *)svkncp->knc_protofmly, KNC_STRSIZE); 1767 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1768 (void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto, 1769 KNC_STRSIZE); 1770 sikncp->knc_rdev = svkncp->knc_rdev; 1771 1772 DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf, 1773 struct knetconfig *, sikncp, 1774 char *, "nfs4_trigger_esi_create_referral"); 1775 1776 esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP); 1777 bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len); 1778 esi->esi_syncaddr = NULL; 1779 1780 esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1781 esi->esi_path_len = MAXPATHLEN; 1782 *p++ = '/'; 1783 for (i = 0; i < fsp->rootpath.pathname4_len; i++) { 1784 component4 *comp; 1785 1786 comp = &fsp->rootpath.pathname4_val[i]; 1787 /* If no space, null the string and bail */ 1788 if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN) 1789 goto err; 1790 bcopy(comp->utf8string_val, p, comp->utf8string_len); 1791 p += comp->utf8string_len; 1792 *p++ = '/'; 1793 } 1794 if (fsp->rootpath.pathname4_len != 0) 1795 *(p - 1) = '\0'; 1796 else 1797 *p = '\0'; 1798 p = esi->esi_path; 1799 esi->esi_path = strdup(p); 1800 esi->esi_path_len = strlen(p) + 1; 1801 kmem_free(p, MAXPATHLEN); 1802 1803 /* Allocated in nfs4_process_referral() */ 1804 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1805 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1806 1807 return (esi); 1808 err: 1809 kmem_free(esi->esi_path, esi->esi_path_len); 1810 kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1); 1811 kmem_free(esi->esi_addr->buf, esi->esi_addr->len); 1812 kmem_free(esi->esi_addr, sizeof (struct netbuf)); 1813 kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE); 1814 kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE); 1815 kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf)); 1816 kmem_free(esi->esi_netname, nfsfsloc.netnm_len); 1817 kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1818 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1819 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1820 return (NULL); 1821 } 1822 1823 /* 1824 * Assemble the args, and call the generic VFS mount function to 1825 * finally perform the ephemeral mount. 1826 */ 1827 static int 1828 nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp, 1829 cred_t *cr, vnode_t **newvpp) 1830 { 1831 struct mounta *uap; 1832 char *mntpt, *orig_path, *path; 1833 const char *orig_mntpt; 1834 int retval; 1835 int mntpt_len; 1836 int spec_len; 1837 zone_t *zone = curproc->p_zone; 1838 bool_t has_leading_slash; 1839 int i; 1840 1841 vfs_t *stubvfsp = stubvp->v_vfsp; 1842 ephemeral_servinfo_t *esi = dma->dma_esi; 1843 struct nfs_args *nargs = dma->dma_nargs; 1844 1845 /* first, construct the mount point for the ephemeral mount */ 1846 orig_path = path = fn_path(VTOSV(stubvp)->sv_name); 1847 orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt); 1848 1849 if (*orig_path == '.') 1850 orig_path++; 1851 1852 /* 1853 * Get rid of zone's root path 1854 */ 1855 if (zone != global_zone) { 1856 /* 1857 * -1 for trailing '/' and -1 for EOS. 1858 */ 1859 if (strncmp(zone->zone_rootpath, orig_mntpt, 1860 zone->zone_rootpathlen - 1) == 0) { 1861 orig_mntpt += (zone->zone_rootpathlen - 2); 1862 } 1863 } 1864 1865 mntpt_len = strlen(orig_mntpt) + strlen(orig_path); 1866 mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP); 1867 (void) strcat(mntpt, orig_mntpt); 1868 (void) strcat(mntpt, orig_path); 1869 1870 kmem_free(path, strlen(path) + 1); 1871 path = esi->esi_path; 1872 if (*path == '.') 1873 path++; 1874 if (path[0] == '/' && path[1] == '/') 1875 path++; 1876 has_leading_slash = (*path == '/'); 1877 1878 spec_len = strlen(dma->dma_hostlist); 1879 spec_len += strlen(path); 1880 1881 /* We are going to have to add this in */ 1882 if (!has_leading_slash) 1883 spec_len++; 1884 1885 /* We need to get the ':' for dma_hostlist:esi_path */ 1886 spec_len++; 1887 1888 uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP); 1889 uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP); 1890 (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist, 1891 has_leading_slash ? "" : "/", path); 1892 1893 uap->dir = mntpt; 1894 1895 uap->flags = MS_SYSSPACE | MS_DATA; 1896 /* fstype-independent mount options not covered elsewhere */ 1897 /* copy parent's mount(1M) "-m" flag */ 1898 if (stubvfsp->vfs_flag & VFS_NOMNTTAB) 1899 uap->flags |= MS_NOMNTTAB; 1900 1901 uap->fstype = MNTTYPE_NFS4; 1902 uap->dataptr = (char *)nargs; 1903 /* not needed for MS_SYSSPACE */ 1904 uap->datalen = 0; 1905 1906 /* use optptr to pass in extra mount options */ 1907 uap->flags |= MS_OPTIONSTR; 1908 uap->optptr = nfs4_trigger_create_mntopts(stubvfsp); 1909 if (uap->optptr == NULL) { 1910 retval = EINVAL; 1911 goto done; 1912 } 1913 1914 /* domount() expects us to count the trailing NUL */ 1915 uap->optlen = strlen(uap->optptr) + 1; 1916 1917 /* 1918 * If we get EBUSY, we try again once to see if we can perform 1919 * the mount. We do this because of a spurious race condition. 1920 */ 1921 for (i = 0; i < 2; i++) { 1922 int error; 1923 bool_t was_mounted; 1924 1925 retval = domount(NULL, uap, stubvp, cr, vfsp); 1926 if (retval == 0) { 1927 retval = VFS_ROOT(*vfsp, newvpp); 1928 VFS_RELE(*vfsp); 1929 break; 1930 } else if (retval != EBUSY) { 1931 break; 1932 } 1933 1934 /* 1935 * We might find it mounted by the other racer... 1936 */ 1937 error = nfs4_trigger_mounted_already(stubvp, 1938 newvpp, &was_mounted, vfsp); 1939 if (error) { 1940 goto done; 1941 } else if (was_mounted) { 1942 retval = 0; 1943 break; 1944 } 1945 } 1946 1947 done: 1948 if (uap->optptr) 1949 nfs4_trigger_destroy_mntopts(uap->optptr); 1950 1951 kmem_free(uap->spec, spec_len + 1); 1952 kmem_free(uap, sizeof (struct mounta)); 1953 kmem_free(mntpt, mntpt_len + 1); 1954 1955 return (retval); 1956 } 1957 1958 /* 1959 * Build an nfs_args structure for passing to domount(). 1960 * 1961 * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t; 1962 * generic data - common to all ephemeral mount types - is read directly 1963 * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode. 1964 */ 1965 static struct nfs_args * 1966 nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, 1967 ephemeral_servinfo_t *esi) 1968 { 1969 sec_data_t *secdata; 1970 struct nfs_args *nargs; 1971 1972 /* setup the nfs args */ 1973 nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 1974 1975 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1976 1977 nargs->addr = esi->esi_addr; 1978 1979 /* for AUTH_DH by negotiation */ 1980 if (esi->esi_syncaddr || esi->esi_netname) { 1981 nargs->flags |= NFSMNT_SECURE; 1982 nargs->syncaddr = esi->esi_syncaddr; 1983 nargs->netname = esi->esi_netname; 1984 } 1985 1986 nargs->flags |= NFSMNT_KNCONF; 1987 nargs->knconf = esi->esi_knconf; 1988 nargs->flags |= NFSMNT_HOSTNAME; 1989 nargs->hostname = esi->esi_hostname; 1990 nargs->fh = esi->esi_path; 1991 1992 /* general mount settings, all copied from parent mount */ 1993 mutex_enter(&mi->mi_lock); 1994 1995 if (!(mi->mi_flags & MI4_HARD)) 1996 nargs->flags |= NFSMNT_SOFT; 1997 1998 nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO | 1999 NFSMNT_RETRANS; 2000 nargs->wsize = mi->mi_stsize; 2001 nargs->rsize = mi->mi_tsize; 2002 nargs->timeo = mi->mi_timeo; 2003 nargs->retrans = mi->mi_retrans; 2004 2005 if (mi->mi_flags & MI4_INT) 2006 nargs->flags |= NFSMNT_INT; 2007 if (mi->mi_flags & MI4_NOAC) 2008 nargs->flags |= NFSMNT_NOAC; 2009 2010 nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN | 2011 NFSMNT_ACDIRMAX; 2012 nargs->acregmin = HR2SEC(mi->mi_acregmin); 2013 nargs->acregmax = HR2SEC(mi->mi_acregmax); 2014 nargs->acdirmin = HR2SEC(mi->mi_acdirmin); 2015 nargs->acdirmax = HR2SEC(mi->mi_acdirmax); 2016 2017 /* add any specific flags for this type of ephemeral mount */ 2018 nargs->flags |= esi->esi_mount_flags; 2019 2020 if (mi->mi_flags & MI4_NOCTO) 2021 nargs->flags |= NFSMNT_NOCTO; 2022 if (mi->mi_flags & MI4_GRPID) 2023 nargs->flags |= NFSMNT_GRPID; 2024 if (mi->mi_flags & MI4_LLOCK) 2025 nargs->flags |= NFSMNT_LLOCK; 2026 if (mi->mi_flags & MI4_NOPRINT) 2027 nargs->flags |= NFSMNT_NOPRINT; 2028 if (mi->mi_flags & MI4_DIRECTIO) 2029 nargs->flags |= NFSMNT_DIRECTIO; 2030 if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT) 2031 nargs->flags |= NFSMNT_PUBLIC; 2032 2033 /* Do some referral-specific option tweaking */ 2034 if (nargs->flags & NFSMNT_REFERRAL) { 2035 nargs->flags &= ~NFSMNT_DORDMA; 2036 nargs->flags |= NFSMNT_TRYRDMA; 2037 } 2038 2039 mutex_exit(&mi->mi_lock); 2040 2041 /* 2042 * Security data & negotiation policy. 2043 * 2044 * For mirror mounts, we need to preserve the parent mount's 2045 * preference for security negotiation, translating SV4_TRYSECDEFAULT 2046 * to NFSMNT_SECDEFAULT if present. 2047 * 2048 * For referrals, we always want security negotiation and will 2049 * set NFSMNT_SECDEFAULT and we will not copy current secdata. 2050 * The reason is that we can't negotiate down from a parent's 2051 * Kerberos flavor to AUTH_SYS. 2052 * 2053 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific 2054 * security flavour was requested, with data in sv_secdata, and that 2055 * no negotiation should occur. If this specified flavour fails, that's 2056 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT. 2057 * 2058 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in 2059 * default flavour, in sv_secdata, but then negotiate a new flavour. 2060 * Possible flavours are recorded in an array in sv_secinfo, with 2061 * currently in-use flavour pointed to by sv_currsec. 2062 * 2063 * If sv_currsec is set, i.e. if negotiation has already occurred, 2064 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, 2065 * we will set NFSMNT_SECDEFAULT, to enable negotiation. 2066 */ 2067 if (nargs->flags & NFSMNT_REFERRAL) { 2068 /* enable negotiation for referral mount */ 2069 nargs->flags |= NFSMNT_SECDEFAULT; 2070 secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP); 2071 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 2072 secdata->data = NULL; 2073 } else if (svp->sv_flags & SV4_TRYSECDEFAULT) { 2074 /* enable negotiation for mirror mount */ 2075 nargs->flags |= NFSMNT_SECDEFAULT; 2076 2077 /* 2078 * As a starting point for negotiation, copy parent 2079 * mount's negotiated flavour (sv_currsec) if available, 2080 * or its passed-in flavour (sv_secdata) if not. 2081 */ 2082 if (svp->sv_currsec != NULL) 2083 secdata = copy_sec_data(svp->sv_currsec); 2084 else if (svp->sv_secdata != NULL) 2085 secdata = copy_sec_data(svp->sv_secdata); 2086 else 2087 secdata = NULL; 2088 } else { 2089 /* do not enable negotiation; copy parent's passed-in flavour */ 2090 if (svp->sv_secdata != NULL) 2091 secdata = copy_sec_data(svp->sv_secdata); 2092 else 2093 secdata = NULL; 2094 } 2095 2096 nfs_rw_exit(&svp->sv_lock); 2097 2098 nargs->flags |= NFSMNT_NEWARGS; 2099 nargs->nfs_args_ext = NFS_ARGS_EXTB; 2100 nargs->nfs_ext_u.nfs_extB.secdata = secdata; 2101 2102 /* for NFS RO failover; caller will set if necessary */ 2103 nargs->nfs_ext_u.nfs_extB.next = NULL; 2104 2105 return (nargs); 2106 } 2107 2108 static void 2109 nfs4_trigger_nargs_destroy(struct nfs_args *nargs) 2110 { 2111 /* 2112 * Either the mount failed, in which case the data is not needed, or 2113 * nfs4_mount() has either taken copies of what it needs or, 2114 * where it has merely copied the ptr, it has set *our* ptr to NULL, 2115 * whereby nfs4_free_args() will ignore it. 2116 */ 2117 nfs4_free_args(nargs); 2118 kmem_free(nargs, sizeof (struct nfs_args)); 2119 } 2120 2121 /* 2122 * When we finally get into the mounting, we need to add this 2123 * node to the ephemeral tree. 2124 * 2125 * This is called from nfs4_mount(). 2126 */ 2127 int 2128 nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) 2129 { 2130 mntinfo4_t *mi_parent; 2131 nfs4_ephemeral_t *eph; 2132 nfs4_ephemeral_tree_t *net; 2133 2134 nfs4_ephemeral_t *prior; 2135 nfs4_ephemeral_t *child; 2136 2137 nfs4_ephemeral_t *peer; 2138 2139 nfs4_trigger_globals_t *ntg; 2140 zone_t *zone = curproc->p_zone; 2141 2142 int rc = 0; 2143 2144 mi_parent = VTOMI4(mvp); 2145 2146 /* 2147 * Get this before grabbing anything else! 2148 */ 2149 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 2150 if (!ntg->ntg_thread_started) { 2151 nfs4_ephemeral_start_harvester(ntg); 2152 } 2153 2154 mutex_enter(&mi_parent->mi_lock); 2155 mutex_enter(&mi->mi_lock); 2156 2157 net = mi->mi_ephemeral_tree = 2158 mi_parent->mi_ephemeral_tree; 2159 2160 /* 2161 * If the mi_ephemeral_tree is NULL, then it 2162 * means that either the harvester or a manual 2163 * umount has cleared the tree out right before 2164 * we got here. 2165 * 2166 * There is nothing we can do here, so return 2167 * to the caller and let them decide whether they 2168 * try again. 2169 */ 2170 if (net == NULL) { 2171 mutex_exit(&mi->mi_lock); 2172 mutex_exit(&mi_parent->mi_lock); 2173 2174 return (EBUSY); 2175 } 2176 2177 /* 2178 * We've just tied the mntinfo to the tree, so 2179 * now we bump the refcnt and hold it there until 2180 * this mntinfo is removed from the tree. 2181 */ 2182 nfs4_ephemeral_tree_hold(net); 2183 2184 /* 2185 * We need to tack together the ephemeral mount 2186 * with this new mntinfo. 2187 */ 2188 eph = kmem_zalloc(sizeof (*eph), KM_SLEEP); 2189 eph->ne_mount = mi; 2190 MI4_HOLD(mi); 2191 VFS_HOLD(mi->mi_vfsp); 2192 eph->ne_ref_time = gethrestime_sec(); 2193 2194 /* 2195 * We need to tell the ephemeral mount when 2196 * to time out. 2197 */ 2198 eph->ne_mount_to = ntg->ntg_mount_to; 2199 2200 mi->mi_ephemeral = eph; 2201 2202 /* 2203 * If the enclosing mntinfo4 is also ephemeral, 2204 * then we need to point to its enclosing parent. 2205 * Else the enclosing mntinfo4 is the enclosing parent. 2206 * 2207 * We also need to weave this ephemeral node 2208 * into the tree. 2209 */ 2210 if (mi_parent->mi_flags & MI4_EPHEMERAL) { 2211 /* 2212 * We need to decide if we are 2213 * the root node of this branch 2214 * or if we are a sibling of this 2215 * branch. 2216 */ 2217 prior = mi_parent->mi_ephemeral; 2218 if (prior == NULL) { 2219 /* 2220 * Race condition, clean up, and 2221 * let caller handle mntinfo. 2222 */ 2223 mi->mi_flags &= ~MI4_EPHEMERAL; 2224 mi->mi_ephemeral = NULL; 2225 kmem_free(eph, sizeof (*eph)); 2226 VFS_RELE(mi->mi_vfsp); 2227 MI4_RELE(mi); 2228 nfs4_ephemeral_tree_rele(net); 2229 rc = EBUSY; 2230 } else { 2231 if (prior->ne_child == NULL) { 2232 prior->ne_child = eph; 2233 } else { 2234 child = prior->ne_child; 2235 2236 prior->ne_child = eph; 2237 eph->ne_peer = child; 2238 2239 child->ne_prior = eph; 2240 } 2241 2242 eph->ne_prior = prior; 2243 } 2244 } else { 2245 /* 2246 * The parent mntinfo4 is the non-ephemeral 2247 * root of the ephemeral tree. We 2248 * need to decide if we are the root 2249 * node of that tree or if we are a 2250 * sibling of the root node. 2251 * 2252 * We are the root if there is no 2253 * other node. 2254 */ 2255 if (net->net_root == NULL) { 2256 net->net_root = eph; 2257 } else { 2258 eph->ne_peer = peer = net->net_root; 2259 ASSERT(peer != NULL); 2260 net->net_root = eph; 2261 2262 peer->ne_prior = eph; 2263 } 2264 2265 eph->ne_prior = NULL; 2266 } 2267 2268 mutex_exit(&mi->mi_lock); 2269 mutex_exit(&mi_parent->mi_lock); 2270 2271 return (rc); 2272 } 2273 2274 /* 2275 * Commit the changes to the ephemeral tree for removing this node. 2276 */ 2277 static void 2278 nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph) 2279 { 2280 nfs4_ephemeral_t *e = eph; 2281 nfs4_ephemeral_t *peer; 2282 nfs4_ephemeral_t *prior; 2283 2284 peer = eph->ne_peer; 2285 prior = e->ne_prior; 2286 2287 /* 2288 * If this branch root was not the 2289 * tree root, then we need to fix back pointers. 2290 */ 2291 if (prior) { 2292 if (prior->ne_child == e) { 2293 prior->ne_child = peer; 2294 } else { 2295 prior->ne_peer = peer; 2296 } 2297 2298 if (peer) 2299 peer->ne_prior = prior; 2300 } else if (peer) { 2301 peer->ne_mount->mi_ephemeral_tree->net_root = peer; 2302 peer->ne_prior = NULL; 2303 } else { 2304 e->ne_mount->mi_ephemeral_tree->net_root = NULL; 2305 } 2306 } 2307 2308 /* 2309 * We want to avoid recursion at all costs. So we need to 2310 * unroll the tree. We do this by a depth first traversal to 2311 * leaf nodes. We blast away the leaf and work our way back 2312 * up and down the tree. 2313 */ 2314 static int 2315 nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, 2316 int isTreeRoot, int flag, cred_t *cr) 2317 { 2318 nfs4_ephemeral_t *e = eph; 2319 nfs4_ephemeral_t *prior; 2320 mntinfo4_t *mi; 2321 vfs_t *vfsp; 2322 int error; 2323 2324 /* 2325 * We use the loop while unrolling the ephemeral tree. 2326 */ 2327 for (;;) { 2328 /* 2329 * First we walk down the child. 2330 */ 2331 if (e->ne_child) { 2332 prior = e; 2333 e = e->ne_child; 2334 continue; 2335 } 2336 2337 /* 2338 * If we are the root of the branch we are removing, 2339 * we end it here. But if the branch is the root of 2340 * the tree, we have to forge on. We do not consider 2341 * the peer list for the root because while it may 2342 * be okay to remove, it is both extra work and a 2343 * potential for a false-positive error to stall the 2344 * unmount attempt. 2345 */ 2346 if (e == eph && isTreeRoot == FALSE) 2347 return (0); 2348 2349 /* 2350 * Next we walk down the peer list. 2351 */ 2352 if (e->ne_peer) { 2353 prior = e; 2354 e = e->ne_peer; 2355 continue; 2356 } 2357 2358 /* 2359 * We can only remove the node passed in by the 2360 * caller if it is the root of the ephemeral tree. 2361 * Otherwise, the caller will remove it. 2362 */ 2363 if (e == eph && isTreeRoot == FALSE) 2364 return (0); 2365 2366 /* 2367 * Okay, we have a leaf node, time 2368 * to prune it! 2369 * 2370 * Note that prior can only be NULL if 2371 * and only if it is the root of the 2372 * ephemeral tree. 2373 */ 2374 prior = e->ne_prior; 2375 2376 mi = e->ne_mount; 2377 mutex_enter(&mi->mi_lock); 2378 vfsp = mi->mi_vfsp; 2379 ASSERT(vfsp != NULL); 2380 2381 /* 2382 * Cleared by umount2_engine. 2383 */ 2384 VFS_HOLD(vfsp); 2385 2386 /* 2387 * Inform nfs4_unmount to not recursively 2388 * descend into this node's children when it 2389 * gets processed. 2390 */ 2391 mi->mi_flags |= MI4_EPHEMERAL_RECURSED; 2392 mutex_exit(&mi->mi_lock); 2393 2394 error = umount2_engine(vfsp, flag, cr, FALSE); 2395 if (error) { 2396 /* 2397 * We need to reenable nfs4_unmount's ability 2398 * to recursively descend on this node. 2399 */ 2400 mutex_enter(&mi->mi_lock); 2401 mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED; 2402 mutex_exit(&mi->mi_lock); 2403 2404 return (error); 2405 } 2406 2407 /* 2408 * If we are the current node, we do not want to 2409 * touch anything else. At this point, the only 2410 * way the current node can have survived to here 2411 * is if it is the root of the ephemeral tree and 2412 * we are unmounting the enclosing mntinfo4. 2413 */ 2414 if (e == eph) { 2415 ASSERT(prior == NULL); 2416 return (0); 2417 } 2418 2419 /* 2420 * Stitch up the prior node. Note that since 2421 * we have handled the root of the tree, prior 2422 * must be non-NULL. 2423 */ 2424 ASSERT(prior != NULL); 2425 if (prior->ne_child == e) { 2426 prior->ne_child = NULL; 2427 } else { 2428 ASSERT(prior->ne_peer == e); 2429 2430 prior->ne_peer = NULL; 2431 } 2432 2433 e = prior; 2434 } 2435 2436 /* NOTREACHED */ 2437 } 2438 2439 /* 2440 * Common code to safely release net_cnt_lock and net_tree_lock 2441 */ 2442 void 2443 nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, 2444 nfs4_ephemeral_tree_t **pnet) 2445 { 2446 nfs4_ephemeral_tree_t *net = *pnet; 2447 2448 if (*pmust_unlock) { 2449 mutex_enter(&net->net_cnt_lock); 2450 net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; 2451 mutex_exit(&net->net_cnt_lock); 2452 2453 mutex_exit(&net->net_tree_lock); 2454 2455 *pmust_unlock = FALSE; 2456 } 2457 } 2458 2459 /* 2460 * While we may have removed any child or sibling nodes of this 2461 * ephemeral node, we can not nuke it until we know that there 2462 * were no actived vnodes on it. This will do that final 2463 * work once we know it is not busy. 2464 */ 2465 void 2466 nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, 2467 nfs4_ephemeral_tree_t **pnet) 2468 { 2469 /* 2470 * Now we need to get rid of the ephemeral data if it exists. 2471 */ 2472 mutex_enter(&mi->mi_lock); 2473 if (mi->mi_ephemeral) { 2474 /* 2475 * If we are the root node of an ephemeral branch 2476 * which is being removed, then we need to fixup 2477 * pointers into and out of the node. 2478 */ 2479 if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) 2480 nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); 2481 2482 nfs4_ephemeral_tree_rele(*pnet); 2483 ASSERT(mi->mi_ephemeral != NULL); 2484 2485 kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); 2486 mi->mi_ephemeral = NULL; 2487 VFS_RELE(mi->mi_vfsp); 2488 MI4_RELE(mi); 2489 } 2490 mutex_exit(&mi->mi_lock); 2491 2492 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2493 } 2494 2495 /* 2496 * Unmount an ephemeral node. 2497 * 2498 * Note that if this code fails, then it must unlock. 2499 * 2500 * If it succeeds, then the caller must be prepared to do so. 2501 */ 2502 int 2503 nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, 2504 bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet) 2505 { 2506 int error = 0; 2507 nfs4_ephemeral_t *eph; 2508 nfs4_ephemeral_tree_t *net; 2509 int is_derooting = FALSE; 2510 int is_recursed = FALSE; 2511 int was_locked = FALSE; 2512 2513 /* 2514 * Make sure to set the default state for cleaning 2515 * up the tree in the caller (and on the way out). 2516 */ 2517 *pmust_unlock = FALSE; 2518 2519 /* 2520 * The active vnodes on this file system may be ephemeral 2521 * children. We need to check for and try to unmount them 2522 * here. If any can not be unmounted, we are going 2523 * to return EBUSY. 2524 */ 2525 mutex_enter(&mi->mi_lock); 2526 2527 /* 2528 * If an ephemeral tree, we need to check to see if 2529 * the lock is already held. If it is, then we need 2530 * to see if we are being called as a result of 2531 * the recursive removal of some node of the tree or 2532 * if we are another attempt to remove the tree. 2533 * 2534 * mi_flags & MI4_EPHEMERAL indicates an ephemeral 2535 * node. mi_ephemeral being non-NULL also does this. 2536 * 2537 * mi_ephemeral_tree being non-NULL is sufficient 2538 * to also indicate either it is an ephemeral node 2539 * or the enclosing mntinfo4. 2540 * 2541 * Do we need MI4_EPHEMERAL? Yes, it is useful for 2542 * when we delete the ephemeral node and need to 2543 * differentiate from an ephemeral node and the 2544 * enclosing root node. 2545 */ 2546 *pnet = net = mi->mi_ephemeral_tree; 2547 if (net == NULL) { 2548 mutex_exit(&mi->mi_lock); 2549 return (0); 2550 } 2551 2552 eph = mi->mi_ephemeral; 2553 is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; 2554 is_derooting = (eph == NULL); 2555 2556 mutex_enter(&net->net_cnt_lock); 2557 2558 /* 2559 * If this is not recursion, then we need to 2560 * check to see if a harvester thread has 2561 * already grabbed the lock. 2562 * 2563 * After we exit this branch, we may not 2564 * blindly return, we need to jump to 2565 * is_busy! 2566 */ 2567 if (!is_recursed) { 2568 if (net->net_status & 2569 NFS4_EPHEMERAL_TREE_LOCKED) { 2570 /* 2571 * If the tree is locked, we need 2572 * to decide whether we are the 2573 * harvester or some explicit call 2574 * for a umount. The only way that 2575 * we are the harvester is if 2576 * MS_SYSSPACE is set. 2577 * 2578 * We only let the harvester through 2579 * at this point. 2580 * 2581 * We return EBUSY so that the 2582 * caller knows something is 2583 * going on. Note that by that 2584 * time, the umount in the other 2585 * thread may have already occured. 2586 */ 2587 if (!(flag & MS_SYSSPACE)) { 2588 mutex_exit(&net->net_cnt_lock); 2589 mutex_exit(&mi->mi_lock); 2590 2591 return (EBUSY); 2592 } 2593 2594 was_locked = TRUE; 2595 } 2596 } 2597 2598 mutex_exit(&net->net_cnt_lock); 2599 mutex_exit(&mi->mi_lock); 2600 2601 /* 2602 * If we are not the harvester, we need to check 2603 * to see if we need to grab the tree lock. 2604 */ 2605 if (was_locked == FALSE) { 2606 /* 2607 * If we grab the lock, it means that no other 2608 * operation is working on the tree. If we don't 2609 * grab it, we need to decide if this is because 2610 * we are a recursive call or a new operation. 2611 */ 2612 if (mutex_tryenter(&net->net_tree_lock)) { 2613 *pmust_unlock = TRUE; 2614 } else { 2615 /* 2616 * If we are a recursive call, we can 2617 * proceed without the lock. 2618 * Otherwise we have to wait until 2619 * the lock becomes free. 2620 */ 2621 if (!is_recursed) { 2622 mutex_enter(&net->net_cnt_lock); 2623 if (net->net_status & 2624 (NFS4_EPHEMERAL_TREE_DEROOTING 2625 | NFS4_EPHEMERAL_TREE_INVALID)) { 2626 mutex_exit(&net->net_cnt_lock); 2627 goto is_busy; 2628 } 2629 mutex_exit(&net->net_cnt_lock); 2630 2631 /* 2632 * We can't hold any other locks whilst 2633 * we wait on this to free up. 2634 */ 2635 mutex_enter(&net->net_tree_lock); 2636 2637 /* 2638 * Note that while mi->mi_ephemeral 2639 * may change and thus we have to 2640 * update eph, it is the case that 2641 * we have tied down net and 2642 * do not care if mi->mi_ephemeral_tree 2643 * has changed. 2644 */ 2645 mutex_enter(&mi->mi_lock); 2646 eph = mi->mi_ephemeral; 2647 mutex_exit(&mi->mi_lock); 2648 2649 /* 2650 * Okay, we need to see if either the 2651 * tree got nuked or the current node 2652 * got nuked. Both of which will cause 2653 * an error. 2654 * 2655 * Note that a subsequent retry of the 2656 * umount shall work. 2657 */ 2658 mutex_enter(&net->net_cnt_lock); 2659 if (net->net_status & 2660 NFS4_EPHEMERAL_TREE_INVALID || 2661 (!is_derooting && eph == NULL)) { 2662 mutex_exit(&net->net_cnt_lock); 2663 mutex_exit(&net->net_tree_lock); 2664 goto is_busy; 2665 } 2666 mutex_exit(&net->net_cnt_lock); 2667 *pmust_unlock = TRUE; 2668 } 2669 } 2670 } 2671 2672 /* 2673 * Only once we have grabbed the lock can we mark what we 2674 * are planning on doing to the ephemeral tree. 2675 */ 2676 if (*pmust_unlock) { 2677 mutex_enter(&net->net_cnt_lock); 2678 net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING; 2679 2680 /* 2681 * Check to see if we are nuking the root. 2682 */ 2683 if (is_derooting) 2684 net->net_status |= 2685 NFS4_EPHEMERAL_TREE_DEROOTING; 2686 mutex_exit(&net->net_cnt_lock); 2687 } 2688 2689 if (!is_derooting) { 2690 /* 2691 * Only work on children if the caller has not already 2692 * done so. 2693 */ 2694 if (!is_recursed) { 2695 ASSERT(eph != NULL); 2696 2697 error = nfs4_ephemeral_unmount_engine(eph, 2698 FALSE, flag, cr); 2699 if (error) 2700 goto is_busy; 2701 } 2702 } else { 2703 eph = net->net_root; 2704 2705 /* 2706 * Only work if there is something there. 2707 */ 2708 if (eph) { 2709 error = nfs4_ephemeral_unmount_engine(eph, TRUE, 2710 flag, cr); 2711 if (error) { 2712 mutex_enter(&net->net_cnt_lock); 2713 net->net_status &= 2714 ~NFS4_EPHEMERAL_TREE_DEROOTING; 2715 mutex_exit(&net->net_cnt_lock); 2716 goto is_busy; 2717 } 2718 2719 /* 2720 * Nothing else which goes wrong will 2721 * invalidate the blowing away of the 2722 * ephmeral tree. 2723 */ 2724 net->net_root = NULL; 2725 } 2726 2727 /* 2728 * We have derooted and we have caused the tree to be 2729 * invalidated. 2730 */ 2731 mutex_enter(&net->net_cnt_lock); 2732 net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; 2733 net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; 2734 DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting, 2735 uint_t, net->net_refcnt); 2736 2737 /* 2738 * We will not finalize this node, so safe to 2739 * release it. 2740 */ 2741 nfs4_ephemeral_tree_decr(net); 2742 mutex_exit(&net->net_cnt_lock); 2743 2744 if (was_locked == FALSE) 2745 mutex_exit(&net->net_tree_lock); 2746 2747 /* 2748 * We have just blown away any notation of this 2749 * tree being locked or having a refcnt. 2750 * We can't let the caller try to clean things up. 2751 */ 2752 *pmust_unlock = FALSE; 2753 2754 /* 2755 * At this point, the tree should no longer be 2756 * associated with the mntinfo4. We need to pull 2757 * it off there and let the harvester take 2758 * care of it once the refcnt drops. 2759 */ 2760 mutex_enter(&mi->mi_lock); 2761 mi->mi_ephemeral_tree = NULL; 2762 mutex_exit(&mi->mi_lock); 2763 } 2764 2765 return (0); 2766 2767 is_busy: 2768 2769 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2770 2771 return (error); 2772 } 2773 2774 /* 2775 * Do the umount and record any error in the parent. 2776 */ 2777 static void 2778 nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag, 2779 nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior) 2780 { 2781 int error; 2782 2783 /* 2784 * Only act on if the fs is still mounted. 2785 */ 2786 if (vfsp == NULL) 2787 return; 2788 2789 error = umount2_engine(vfsp, flag, kcred, FALSE); 2790 if (error) { 2791 if (prior) { 2792 if (prior->ne_child == e) 2793 prior->ne_state |= 2794 NFS4_EPHEMERAL_CHILD_ERROR; 2795 else 2796 prior->ne_state |= 2797 NFS4_EPHEMERAL_PEER_ERROR; 2798 } 2799 } 2800 } 2801 2802 /* 2803 * For each tree in the forest (where the forest is in 2804 * effect all of the ephemeral trees for this zone), 2805 * scan to see if a node can be unmounted. Note that 2806 * unlike nfs4_ephemeral_unmount_engine(), we do 2807 * not process the current node before children or 2808 * siblings. I.e., if a node can be unmounted, we 2809 * do not recursively check to see if the nodes 2810 * hanging off of it can also be unmounted. 2811 * 2812 * Instead, we delve down deep to try and remove the 2813 * children first. Then, because we share code with 2814 * nfs4_ephemeral_unmount_engine(), we will try 2815 * them again. This could be a performance issue in 2816 * the future. 2817 * 2818 * Also note that unlike nfs4_ephemeral_unmount_engine(), 2819 * we do not halt on an error. We will not remove the 2820 * current node, but we will keep on trying to remove 2821 * the others. 2822 * 2823 * force indicates that we want the unmount to occur 2824 * even if there is something blocking it. 2825 * 2826 * time_check indicates that we want to see if the 2827 * mount has expired past mount_to or not. Typically 2828 * we want to do this and only on a shutdown of the 2829 * zone would we want to ignore the check. 2830 */ 2831 static void 2832 nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg, 2833 bool_t force, bool_t time_check) 2834 { 2835 nfs4_ephemeral_tree_t *net; 2836 nfs4_ephemeral_tree_t *prev = NULL; 2837 nfs4_ephemeral_tree_t *next; 2838 nfs4_ephemeral_t *e; 2839 nfs4_ephemeral_t *prior; 2840 time_t now = gethrestime_sec(); 2841 2842 nfs4_ephemeral_tree_t *harvest = NULL; 2843 2844 int flag; 2845 2846 mntinfo4_t *mi; 2847 vfs_t *vfsp; 2848 2849 if (force) 2850 flag = MS_FORCE | MS_SYSSPACE; 2851 else 2852 flag = MS_SYSSPACE; 2853 2854 mutex_enter(&ntg->ntg_forest_lock); 2855 for (net = ntg->ntg_forest; net != NULL; net = next) { 2856 next = net->net_next; 2857 2858 nfs4_ephemeral_tree_hold(net); 2859 2860 mutex_enter(&net->net_tree_lock); 2861 2862 /* 2863 * Let the unmount code know that the 2864 * tree is already locked! 2865 */ 2866 mutex_enter(&net->net_cnt_lock); 2867 net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED; 2868 mutex_exit(&net->net_cnt_lock); 2869 2870 /* 2871 * If the intent is force all ephemeral nodes to 2872 * be unmounted in this zone, we can short circuit a 2873 * lot of tree traversal and simply zap the root node. 2874 */ 2875 if (force) { 2876 if (net->net_root) { 2877 mi = net->net_root->ne_mount; 2878 2879 vfsp = mi->mi_vfsp; 2880 ASSERT(vfsp != NULL); 2881 2882 /* 2883 * Cleared by umount2_engine. 2884 */ 2885 VFS_HOLD(vfsp); 2886 2887 (void) umount2_engine(vfsp, flag, 2888 kcred, FALSE); 2889 2890 goto check_done; 2891 } 2892 } 2893 2894 e = net->net_root; 2895 if (e) 2896 e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD; 2897 2898 while (e) { 2899 if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) { 2900 e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING; 2901 if (e->ne_child) { 2902 e = e->ne_child; 2903 e->ne_state = 2904 NFS4_EPHEMERAL_VISIT_CHILD; 2905 } 2906 2907 continue; 2908 } else if (e->ne_state == 2909 NFS4_EPHEMERAL_VISIT_SIBLING) { 2910 e->ne_state = NFS4_EPHEMERAL_PROCESS_ME; 2911 if (e->ne_peer) { 2912 e = e->ne_peer; 2913 e->ne_state = 2914 NFS4_EPHEMERAL_VISIT_CHILD; 2915 } 2916 2917 continue; 2918 } else if (e->ne_state == 2919 NFS4_EPHEMERAL_CHILD_ERROR) { 2920 prior = e->ne_prior; 2921 2922 /* 2923 * If a child reported an error, do 2924 * not bother trying to unmount. 2925 * 2926 * If your prior node is a parent, 2927 * pass the error up such that they 2928 * also do not try to unmount. 2929 * 2930 * However, if your prior is a sibling, 2931 * let them try to unmount if they can. 2932 */ 2933 if (prior) { 2934 if (prior->ne_child == e) 2935 prior->ne_state |= 2936 NFS4_EPHEMERAL_CHILD_ERROR; 2937 else 2938 prior->ne_state |= 2939 NFS4_EPHEMERAL_PEER_ERROR; 2940 } 2941 2942 /* 2943 * Clear the error and if needed, process peers. 2944 * 2945 * Once we mask out the error, we know whether 2946 * or we have to process another node. 2947 */ 2948 e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR; 2949 if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME) 2950 e = prior; 2951 2952 continue; 2953 } else if (e->ne_state == 2954 NFS4_EPHEMERAL_PEER_ERROR) { 2955 prior = e->ne_prior; 2956 2957 if (prior) { 2958 if (prior->ne_child == e) 2959 prior->ne_state = 2960 NFS4_EPHEMERAL_CHILD_ERROR; 2961 else 2962 prior->ne_state = 2963 NFS4_EPHEMERAL_PEER_ERROR; 2964 } 2965 2966 /* 2967 * Clear the error from this node and do the 2968 * correct processing. 2969 */ 2970 e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR; 2971 continue; 2972 } 2973 2974 prior = e->ne_prior; 2975 e->ne_state = NFS4_EPHEMERAL_OK; 2976 2977 /* 2978 * It must be the case that we need to process 2979 * this node. 2980 */ 2981 if (!time_check || 2982 now - e->ne_ref_time > e->ne_mount_to) { 2983 mi = e->ne_mount; 2984 vfsp = mi->mi_vfsp; 2985 2986 /* 2987 * Cleared by umount2_engine. 2988 */ 2989 if (vfsp != NULL) 2990 VFS_HOLD(vfsp); 2991 2992 /* 2993 * Note that we effectively work down to the 2994 * leaf nodes first, try to unmount them, 2995 * then work our way back up into the leaf 2996 * nodes. 2997 * 2998 * Also note that we deal with a lot of 2999 * complexity by sharing the work with 3000 * the manual unmount code. 3001 */ 3002 nfs4_ephemeral_record_umount(vfsp, flag, 3003 e, prior); 3004 } 3005 3006 e = prior; 3007 } 3008 3009 check_done: 3010 3011 /* 3012 * At this point we are done processing this tree. 3013 * 3014 * If the tree is invalid and we were the only reference 3015 * to it, then we push it on the local linked list 3016 * to remove it at the end. We avoid that action now 3017 * to keep the tree processing going along at a fair clip. 3018 * 3019 * Else, even if we were the only reference, we 3020 * allow it to be reused as needed. 3021 */ 3022 mutex_enter(&net->net_cnt_lock); 3023 nfs4_ephemeral_tree_decr(net); 3024 if (net->net_refcnt == 0 && 3025 net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { 3026 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3027 mutex_exit(&net->net_cnt_lock); 3028 mutex_exit(&net->net_tree_lock); 3029 3030 if (prev) 3031 prev->net_next = net->net_next; 3032 else 3033 ntg->ntg_forest = net->net_next; 3034 3035 net->net_next = harvest; 3036 harvest = net; 3037 3038 VFS_RELE(net->net_mount->mi_vfsp); 3039 MI4_RELE(net->net_mount); 3040 3041 continue; 3042 } 3043 3044 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3045 mutex_exit(&net->net_cnt_lock); 3046 mutex_exit(&net->net_tree_lock); 3047 3048 prev = net; 3049 } 3050 mutex_exit(&ntg->ntg_forest_lock); 3051 3052 for (net = harvest; net != NULL; net = next) { 3053 next = net->net_next; 3054 3055 mutex_destroy(&net->net_tree_lock); 3056 mutex_destroy(&net->net_cnt_lock); 3057 kmem_free(net, sizeof (*net)); 3058 } 3059 } 3060 3061 /* 3062 * This is the thread which decides when the harvesting 3063 * can proceed and when to kill it off for this zone. 3064 */ 3065 static void 3066 nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg) 3067 { 3068 clock_t timeleft; 3069 zone_t *zone = curproc->p_zone; 3070 3071 for (;;) { 3072 timeleft = zone_status_timedwait(zone, ddi_get_lbolt() + 3073 nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN); 3074 3075 /* 3076 * zone is exiting... 3077 */ 3078 if (timeleft != -1) { 3079 ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN); 3080 zthread_exit(); 3081 /* NOTREACHED */ 3082 } 3083 3084 /* 3085 * Only bother scanning if there is potential 3086 * work to be done. 3087 */ 3088 if (ntg->ntg_forest == NULL) 3089 continue; 3090 3091 /* 3092 * Now scan the list and get rid of everything which 3093 * is old. 3094 */ 3095 nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE); 3096 } 3097 3098 /* NOTREACHED */ 3099 } 3100 3101 /* 3102 * The zone specific glue needed to start the unmount harvester. 3103 * 3104 * Note that we want to avoid holding the mutex as long as possible, 3105 * hence the multiple checks. 3106 * 3107 * The caller should avoid us getting down here in the first 3108 * place. 3109 */ 3110 static void 3111 nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg) 3112 { 3113 /* 3114 * It got started before we got here... 3115 */ 3116 if (ntg->ntg_thread_started) 3117 return; 3118 3119 mutex_enter(&nfs4_ephemeral_thread_lock); 3120 3121 if (ntg->ntg_thread_started) { 3122 mutex_exit(&nfs4_ephemeral_thread_lock); 3123 return; 3124 } 3125 3126 /* 3127 * Start the unmounter harvester thread for this zone. 3128 */ 3129 (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester, 3130 ntg, 0, minclsyspri); 3131 3132 ntg->ntg_thread_started = TRUE; 3133 mutex_exit(&nfs4_ephemeral_thread_lock); 3134 } 3135 3136 /*ARGSUSED*/ 3137 static void * 3138 nfs4_ephemeral_zsd_create(zoneid_t zoneid) 3139 { 3140 nfs4_trigger_globals_t *ntg; 3141 3142 ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP); 3143 ntg->ntg_thread_started = FALSE; 3144 3145 /* 3146 * This is the default.... 3147 */ 3148 ntg->ntg_mount_to = nfs4_trigger_thread_timer; 3149 3150 mutex_init(&ntg->ntg_forest_lock, NULL, 3151 MUTEX_DEFAULT, NULL); 3152 3153 return (ntg); 3154 } 3155 3156 /* 3157 * Try a nice gentle walk down the forest and convince 3158 * all of the trees to gracefully give it up. 3159 */ 3160 /*ARGSUSED*/ 3161 static void 3162 nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg) 3163 { 3164 nfs4_trigger_globals_t *ntg = arg; 3165 3166 if (!ntg) 3167 return; 3168 3169 nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE); 3170 } 3171 3172 /* 3173 * Race along the forest and rip all of the trees out by 3174 * their rootballs! 3175 */ 3176 /*ARGSUSED*/ 3177 static void 3178 nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg) 3179 { 3180 nfs4_trigger_globals_t *ntg = arg; 3181 3182 if (!ntg) 3183 return; 3184 3185 nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE); 3186 3187 mutex_destroy(&ntg->ntg_forest_lock); 3188 kmem_free(ntg, sizeof (*ntg)); 3189 } 3190 3191 /* 3192 * This is the zone independent cleanup needed for 3193 * emphemeral mount processing. 3194 */ 3195 void 3196 nfs4_ephemeral_fini(void) 3197 { 3198 (void) zone_key_delete(nfs4_ephemeral_key); 3199 mutex_destroy(&nfs4_ephemeral_thread_lock); 3200 } 3201 3202 /* 3203 * This is the zone independent initialization needed for 3204 * emphemeral mount processing. 3205 */ 3206 void 3207 nfs4_ephemeral_init(void) 3208 { 3209 mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT, 3210 NULL); 3211 3212 zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create, 3213 nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy); 3214 } 3215 3216 /* 3217 * nfssys() calls this function to set the per-zone 3218 * value of mount_to to drive when an ephemeral mount is 3219 * timed out. Each mount will grab a copy of this value 3220 * when mounted. 3221 */ 3222 void 3223 nfs4_ephemeral_set_mount_to(uint_t mount_to) 3224 { 3225 nfs4_trigger_globals_t *ntg; 3226 zone_t *zone = curproc->p_zone; 3227 3228 ntg = zone_getspecific(nfs4_ephemeral_key, zone); 3229 3230 ntg->ntg_mount_to = mount_to; 3231 } 3232 3233 /* 3234 * Walk the list of v4 mount options; if they are currently set in vfsp, 3235 * append them to a new comma-separated mount option string, and return it. 3236 * 3237 * Caller should free by calling nfs4_trigger_destroy_mntopts(). 3238 */ 3239 static char * 3240 nfs4_trigger_create_mntopts(vfs_t *vfsp) 3241 { 3242 uint_t i; 3243 char *mntopts; 3244 struct vfssw *vswp; 3245 mntopts_t *optproto; 3246 3247 mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP); 3248 3249 /* get the list of applicable mount options for v4; locks *vswp */ 3250 vswp = vfs_getvfssw(MNTTYPE_NFS4); 3251 optproto = &vswp->vsw_optproto; 3252 3253 for (i = 0; i < optproto->mo_count; i++) { 3254 struct mntopt *mop = &optproto->mo_list[i]; 3255 3256 if (mop->mo_flags & MO_EMPTY) 3257 continue; 3258 3259 if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) { 3260 kmem_free(mntopts, MAX_MNTOPT_STR); 3261 vfs_unrefvfssw(vswp); 3262 return (NULL); 3263 } 3264 } 3265 3266 vfs_unrefvfssw(vswp); 3267 3268 /* 3269 * MNTOPT_XATTR is not in the v4 mount opt proto list, 3270 * and it may only be passed via MS_OPTIONSTR, so we 3271 * must handle it here. 3272 * 3273 * Ideally, it would be in the list, but NFS does not specify its 3274 * own opt proto list, it uses instead the default one. Since 3275 * not all filesystems support extended attrs, it would not be 3276 * appropriate to add it there. 3277 */ 3278 if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) || 3279 nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) { 3280 kmem_free(mntopts, MAX_MNTOPT_STR); 3281 return (NULL); 3282 } 3283 3284 return (mntopts); 3285 } 3286 3287 static void 3288 nfs4_trigger_destroy_mntopts(char *mntopts) 3289 { 3290 if (mntopts) 3291 kmem_free(mntopts, MAX_MNTOPT_STR); 3292 } 3293 3294 /* 3295 * Check a single mount option (optname). Add to mntopts if it is set in VFS. 3296 */ 3297 static int 3298 nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) 3299 { 3300 if (mntopts == NULL || optname == NULL || vfsp == NULL) 3301 return (EINVAL); 3302 3303 if (vfs_optionisset(vfsp, optname, NULL)) { 3304 size_t mntoptslen = strlen(mntopts); 3305 size_t optnamelen = strlen(optname); 3306 3307 /* +1 for ',', +1 for NUL */ 3308 if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR) 3309 return (EOVERFLOW); 3310 3311 /* first or subsequent mount option? */ 3312 if (*mntopts != '\0') 3313 (void) strcat(mntopts, ","); 3314 3315 (void) strcat(mntopts, optname); 3316 } 3317 3318 return (0); 3319 } 3320 3321 static enum clnt_stat 3322 nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr) 3323 { 3324 int retries; 3325 uint_t max_msgsize; 3326 enum clnt_stat status; 3327 CLIENT *cl; 3328 struct timeval timeout; 3329 3330 /* as per recov_newserver() */ 3331 max_msgsize = 0; 3332 retries = 1; 3333 timeout.tv_sec = 2; 3334 timeout.tv_usec = 0; 3335 3336 if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4, 3337 max_msgsize, retries, CRED(), &cl) != 0) 3338 return (RPC_FAILED); 3339 3340 if (nointr) 3341 cl->cl_nosignal = TRUE; 3342 status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL, 3343 timeout); 3344 if (nointr) 3345 cl->cl_nosignal = FALSE; 3346 3347 AUTH_DESTROY(cl->cl_auth); 3348 CLNT_DESTROY(cl); 3349 3350 return (status); 3351 } 3352 3353 static enum clnt_stat 3354 nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) 3355 { 3356 return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr)); 3357 } 3358