1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2015 Joyent, Inc. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/errno.h> 30 #include <sys/vnode.h> 31 #include <sys/vfs.h> 32 #include <sys/vfs_opreg.h> 33 #include <sys/uio.h> 34 #include <sys/cred.h> 35 #include <sys/pathname.h> 36 #include <sys/debug.h> 37 #include <sys/fs/lofs_node.h> 38 #include <sys/fs/lofs_info.h> 39 #include <fs/fs_subr.h> 40 #include <vm/as.h> 41 #include <vm/seg.h> 42 43 /* 44 * These are the vnode ops routines which implement the vnode interface to 45 * the looped-back file system. These routines just take their parameters, 46 * and then calling the appropriate real vnode routine(s) to do the work. 47 */ 48 49 static int 50 lo_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct) 51 { 52 vnode_t *vp = *vpp; 53 vnode_t *rvp; 54 vnode_t *oldvp; 55 int error; 56 57 #ifdef LODEBUG 58 lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n", 59 vp, vp->v_count, realvp(vp), realvp(vp)->v_count); 60 #endif 61 62 oldvp = vp; 63 vp = rvp = realvp(vp); 64 /* 65 * Need to hold new reference to vp since VOP_OPEN() may 66 * decide to release it. 67 */ 68 VN_HOLD(vp); 69 error = VOP_OPEN(&rvp, flag, cr, ct); 70 71 if (!error && rvp != vp) { 72 /* 73 * the FS which we called should have released the 74 * new reference on vp 75 */ 76 *vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0); 77 if ((*vpp)->v_type == VDIR) { 78 /* 79 * Copy over any looping flags to the new lnode. 80 */ 81 (vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping; 82 } 83 if (IS_DEVVP(*vpp)) { 84 vnode_t *svp; 85 86 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 87 VN_RELE(*vpp); 88 if (svp == NULL) 89 error = ENOSYS; 90 else 91 *vpp = svp; 92 } 93 VN_RELE(oldvp); 94 } else { 95 ASSERT(rvp->v_count > 1); 96 VN_RELE(rvp); 97 } 98 99 return (error); 100 } 101 102 static int 103 lo_close( 104 vnode_t *vp, 105 int flag, 106 int count, 107 offset_t offset, 108 struct cred *cr, 109 caller_context_t *ct) 110 { 111 #ifdef LODEBUG 112 lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp)); 113 #endif 114 vp = realvp(vp); 115 return (VOP_CLOSE(vp, flag, count, offset, cr, ct)); 116 } 117 118 static int 119 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr, 120 caller_context_t *ct) 121 { 122 #ifdef LODEBUG 123 lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp)); 124 #endif 125 vp = realvp(vp); 126 return (VOP_READ(vp, uiop, ioflag, cr, ct)); 127 } 128 129 static int 130 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr, 131 caller_context_t *ct) 132 { 133 #ifdef LODEBUG 134 lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp)); 135 #endif 136 vp = realvp(vp); 137 return (VOP_WRITE(vp, uiop, ioflag, cr, ct)); 138 } 139 140 static int 141 lo_ioctl( 142 vnode_t *vp, 143 int cmd, 144 intptr_t arg, 145 int flag, 146 struct cred *cr, 147 int *rvalp, 148 caller_context_t *ct) 149 { 150 #ifdef LODEBUG 151 lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp)); 152 #endif 153 vp = realvp(vp); 154 return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct)); 155 } 156 157 static int 158 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct) 159 { 160 vp = realvp(vp); 161 return (VOP_SETFL(vp, oflags, nflags, cr, ct)); 162 } 163 164 static int 165 lo_getattr( 166 vnode_t *vp, 167 struct vattr *vap, 168 int flags, 169 struct cred *cr, 170 caller_context_t *ct) 171 { 172 int error; 173 174 #ifdef LODEBUG 175 lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp)); 176 #endif 177 if (error = VOP_GETATTR(realvp(vp), vap, flags, cr, ct)) 178 return (error); 179 180 return (0); 181 } 182 183 static int 184 lo_setattr( 185 vnode_t *vp, 186 struct vattr *vap, 187 int flags, 188 struct cred *cr, 189 caller_context_t *ct) 190 { 191 #ifdef LODEBUG 192 lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp)); 193 #endif 194 vp = realvp(vp); 195 return (VOP_SETATTR(vp, vap, flags, cr, ct)); 196 } 197 198 static int 199 lo_access( 200 vnode_t *vp, 201 int mode, 202 int flags, 203 struct cred *cr, 204 caller_context_t *ct) 205 { 206 #ifdef LODEBUG 207 lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp)); 208 #endif 209 if (mode & VWRITE) { 210 if (vp->v_type == VREG && vn_is_readonly(vp)) 211 return (EROFS); 212 } 213 vp = realvp(vp); 214 return (VOP_ACCESS(vp, mode, flags, cr, ct)); 215 } 216 217 static int 218 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct) 219 { 220 #ifdef LODEBUG 221 lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp)); 222 #endif 223 vp = realvp(vp); 224 return (VOP_FSYNC(vp, syncflag, cr, ct)); 225 } 226 227 /*ARGSUSED*/ 228 static void 229 lo_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct) 230 { 231 #ifdef LODEBUG 232 lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp)); 233 #endif 234 freelonode(vtol(vp)); 235 } 236 237 /* ARGSUSED */ 238 static int 239 lo_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct) 240 { 241 #ifdef LODEBUG 242 lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp)); 243 #endif 244 vp = realvp(vp); 245 return (VOP_FID(vp, fidp, ct)); 246 } 247 248 /* 249 * Given a vnode of lofs type, lookup nm name and 250 * return a shadow vnode (of lofs type) of the 251 * real vnode found. 252 * 253 * Due to the nature of lofs, there is a potential 254 * looping in path traversal. 255 * 256 * starting from the mount point of an lofs; 257 * a loop is defined to be a traversal path 258 * where the mount point or the real vnode of 259 * the root of this lofs is encountered twice. 260 * Once at the start of traversal and second 261 * when the looping is found. 262 * 263 * When a loop is encountered, a shadow of the 264 * covered vnode is returned to stop the looping. 265 * 266 * This normally works, but with the advent of 267 * the new automounter, returning the shadow of the 268 * covered vnode (autonode, in this case) does not 269 * stop the loop. Because further lookup on this 270 * lonode will cause the autonode to call lo_lookup() 271 * on the lonode covering it. 272 * 273 * example "/net/jurassic/net/jurassic" is a loop. 274 * returning the shadow of the autonode corresponding to 275 * "/net/jurassic/net/jurassic" will not terminate the 276 * loop. To solve this problem we allow the loop to go 277 * through one more level component lookup. Whichever 278 * directory is then looked up in "/net/jurassic/net/jurassic" 279 * the vnode returned is the vnode covered by the autonode 280 * "net" and this will terminate the loop. 281 * 282 * Lookup for dot dot has to be dealt with separately. 283 * It will be nice to have a "one size fits all" kind 284 * of solution, so that we don't have so many ifs statement 285 * in the lo_lookup() to handle dotdot. But, since 286 * there are so many special cases to handle different 287 * kinds looping above, we need special codes to handle 288 * dotdot lookup as well. 289 */ 290 static int 291 lo_lookup( 292 vnode_t *dvp, 293 char *nm, 294 vnode_t **vpp, 295 struct pathname *pnp, 296 int flags, 297 vnode_t *rdir, 298 struct cred *cr, 299 caller_context_t *ct, 300 int *direntflags, 301 pathname_t *realpnp) 302 { 303 vnode_t *vp = NULL, *tvp = NULL, *nonlovp; 304 int error, is_indirectloop; 305 vnode_t *realdvp = realvp(dvp); 306 struct loinfo *li = vtoli(dvp->v_vfsp); 307 int looping = 0; 308 int autoloop = 0; 309 int doingdotdot = 0; 310 int nosub = 0; 311 int mkflag = 0; 312 313 /* 314 * If name is empty and no XATTR flags are set, then return 315 * dvp (empty name == lookup "."). If an XATTR flag is set 316 * then we need to call VOP_LOOKUP to get the xattr dir. 317 */ 318 if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) { 319 VN_HOLD(dvp); 320 *vpp = dvp; 321 return (0); 322 } 323 324 if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') { 325 doingdotdot++; 326 /* 327 * Handle ".." out of mounted filesystem 328 */ 329 while ((realdvp->v_flag & VROOT) && realdvp != rootdir) { 330 realdvp = realdvp->v_vfsp->vfs_vnodecovered; 331 ASSERT(realdvp != NULL); 332 } 333 } 334 335 *vpp = NULL; /* default(error) case */ 336 337 /* 338 * Do the normal lookup 339 */ 340 if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr, 341 ct, direntflags, realpnp)) { 342 vp = NULL; 343 goto out; 344 } 345 346 /* 347 * We do this check here to avoid returning a stale file handle to the 348 * caller. 349 */ 350 if (nm[0] == '.' && nm[1] == '\0') { 351 ASSERT(vp == realdvp); 352 VN_HOLD(dvp); 353 VN_RELE(vp); 354 *vpp = dvp; 355 return (0); 356 } 357 358 if (doingdotdot) { 359 if ((vtol(dvp))->lo_looping & LO_LOOPING) { 360 vfs_t *vfsp; 361 362 error = vn_vfsrlock_wait(realdvp); 363 if (error) 364 goto out; 365 vfsp = vn_mountedvfs(realdvp); 366 /* 367 * In the standard case if the looping flag is set and 368 * performing dotdot we would be returning from a 369 * covered vnode, implying vfsp could not be null. The 370 * exceptions being if we have looping and overlay 371 * mounts or looping and covered file systems. 372 */ 373 if (vfsp == NULL) { 374 /* 375 * Overlay mount or covered file system, 376 * so just make the shadow node. 377 */ 378 vn_vfsunlock(realdvp); 379 *vpp = makelonode(vp, li, 0); 380 (vtol(*vpp))->lo_looping |= LO_LOOPING; 381 return (0); 382 } 383 /* 384 * When looping get the actual found vnode 385 * instead of the vnode covered. 386 * Here we have to hold the lock for realdvp 387 * since an unmount during the traversal to the 388 * root vnode would turn *vfsp into garbage 389 * which would be fatal. 390 */ 391 error = VFS_ROOT(vfsp, &tvp); 392 vn_vfsunlock(realdvp); 393 394 if (error) 395 goto out; 396 397 if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) { 398 /* 399 * we're back at the real vnode 400 * of the rootvp 401 * 402 * return the rootvp 403 * Ex: /mnt/mnt/.. 404 * where / has been lofs-mounted 405 * onto /mnt. Return the lofs 406 * node mounted at /mnt. 407 */ 408 *vpp = tvp; 409 VN_RELE(vp); 410 return (0); 411 } else { 412 /* 413 * We are returning from a covered 414 * node whose vfs_mountedhere is 415 * not pointing to vfs of the current 416 * root vnode. 417 * This is a condn where in we 418 * returned a covered node say Zc 419 * but Zc is not the cover of current 420 * root. 421 * i.e.., if X is the root vnode 422 * lookup(Zc,"..") is taking us to 423 * X. 424 * Ex: /net/X/net/X/Y 425 * 426 * If LO_AUTOLOOP (autofs/lofs looping detected) 427 * has been set then we are encountering the 428 * cover of Y (Y being any directory vnode 429 * under /net/X/net/X/). 430 * When performing a dotdot set the 431 * returned vp to the vnode covered 432 * by the mounted lofs, ie /net/X/net/X 433 */ 434 VN_RELE(tvp); 435 if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) { 436 VN_RELE(vp); 437 vp = li->li_rootvp; 438 vp = vp->v_vfsp->vfs_vnodecovered; 439 VN_HOLD(vp); 440 *vpp = makelonode(vp, li, 0); 441 (vtol(*vpp))->lo_looping |= LO_LOOPING; 442 return (0); 443 } 444 } 445 } else { 446 /* 447 * No frills just make the shadow node. 448 */ 449 *vpp = makelonode(vp, li, 0); 450 return (0); 451 } 452 } 453 454 nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB); 455 456 /* 457 * If this vnode is mounted on, then we 458 * traverse to the vnode which is the root of 459 * the mounted file system. 460 */ 461 if (!nosub && (error = traverse(&vp))) 462 goto out; 463 464 /* 465 * Make a lnode for the real vnode. 466 */ 467 if (vp->v_type != VDIR || nosub) { 468 *vpp = makelonode(vp, li, 0); 469 if (IS_DEVVP(*vpp)) { 470 vnode_t *svp; 471 472 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 473 VN_RELE(*vpp); 474 if (svp == NULL) 475 error = ENOSYS; 476 else 477 *vpp = svp; 478 } 479 return (error); 480 } 481 482 /* 483 * if the found vnode (vp) is not of type lofs 484 * then we're just going to make a shadow of that 485 * vp and get out. 486 * 487 * If the found vnode (vp) is of lofs type, and 488 * we're not doing dotdot, check if we are 489 * looping. 490 */ 491 if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) { 492 /* 493 * Check if we're looping, i.e. 494 * vp equals the root vp of the lofs, directly 495 * or indirectly, return the covered node. 496 */ 497 498 if (!((vtol(dvp))->lo_looping & LO_LOOPING)) { 499 if (vp == li->li_rootvp) { 500 /* 501 * Direct looping condn. 502 * Ex:- X is / mounted directory so lookup of 503 * /X/X is a direct looping condn. 504 */ 505 tvp = vp; 506 vp = vp->v_vfsp->vfs_vnodecovered; 507 VN_HOLD(vp); 508 VN_RELE(tvp); 509 looping++; 510 } else { 511 /* 512 * Indirect looping can be defined as 513 * real lookup returning rootvp of the current 514 * tree in any level of recursion. 515 * 516 * This check is useful if there are multiple 517 * levels of lofs indirections. Suppose vnode X 518 * in the current lookup has as its real vnode 519 * another lofs node. Y = realvp(X) Y should be 520 * a lofs node for the check to continue or Y 521 * is not the rootvp of X. 522 * Ex:- say X and Y are two vnodes 523 * say real(Y) is X and real(X) is Z 524 * parent vnode for X and Y is Z 525 * lookup(Y,"path") say we are looking for Y 526 * again under Y and we have to return Yc. 527 * but the lookup of Y under Y doesnot return 528 * Y the root vnode again here is why. 529 * 1. lookup(Y,"path of Y") will go to 530 * 2. lookup(real(Y),"path of Y") and then to 531 * 3. lookup(real(X),"path of Y"). 532 * and now what lookup level 1 sees is the 533 * outcome of 2 but the vnode Y is due to 534 * lookup(Z,"path of Y") so we have to skip 535 * intermediate levels to find if in any level 536 * there is a looping. 537 */ 538 is_indirectloop = 0; 539 nonlovp = vp; 540 while ( 541 vfs_matchops(nonlovp->v_vfsp, lo_vfsops) && 542 !(is_indirectloop)) { 543 if (li->li_rootvp == nonlovp) { 544 is_indirectloop++; 545 break; 546 } 547 nonlovp = realvp(nonlovp); 548 } 549 550 if (is_indirectloop) { 551 VN_RELE(vp); 552 vp = nonlovp; 553 vp = vp->v_vfsp->vfs_vnodecovered; 554 VN_HOLD(vp); 555 looping++; 556 } 557 } 558 } else { 559 /* 560 * come here only because of the interaction between 561 * the autofs and lofs. 562 * 563 * Lookup of "/net/X/net/X" will return a shadow of 564 * an autonode X_a which we call X_l. 565 * 566 * Lookup of anything under X_l, will trigger a call to 567 * auto_lookup(X_a,nm) which will eventually call 568 * lo_lookup(X_lr,nm) where X_lr is the root vnode of 569 * the current lofs. 570 * 571 * We come here only when we are called with X_l as dvp 572 * and look for something underneath. 573 * 574 * Now that an autofs/lofs looping condition has been 575 * identified any directory vnode contained within 576 * dvp will be set to the vnode covered by the 577 * mounted autofs. Thus all directories within dvp 578 * will appear empty hence teminating the looping. 579 * The LO_AUTOLOOP flag is set on the returned lonode 580 * to indicate the termination of the autofs/lofs 581 * looping. This is required for the correct behaviour 582 * when performing a dotdot. 583 */ 584 realdvp = realvp(dvp); 585 while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) { 586 realdvp = realvp(realdvp); 587 } 588 589 error = VFS_ROOT(realdvp->v_vfsp, &tvp); 590 if (error) 591 goto out; 592 /* 593 * tvp now contains the rootvp of the vfs of the 594 * real vnode of dvp. The directory vnode vp is set 595 * to the covered vnode to terminate looping. No 596 * distinction is made between any vp as all directory 597 * vnodes contained in dvp are returned as the covered 598 * vnode. 599 */ 600 VN_RELE(vp); 601 vp = tvp; /* possibly is an autonode */ 602 603 /* 604 * Need to find the covered vnode 605 */ 606 if (vp->v_vfsp->vfs_vnodecovered == NULL) { 607 /* 608 * We don't have a covered vnode so this isn't 609 * an autonode. To find the autonode simply 610 * find the vnode covered by the lofs rootvp. 611 */ 612 vp = li->li_rootvp; 613 vp = vp->v_vfsp->vfs_vnodecovered; 614 VN_RELE(tvp); 615 error = VFS_ROOT(vp->v_vfsp, &tvp); 616 if (error) 617 goto out; 618 vp = tvp; /* now this is an autonode */ 619 if (vp->v_vfsp->vfs_vnodecovered == NULL) { 620 /* 621 * Still can't find a covered vnode. 622 * Fail the lookup, or we'd loop. 623 */ 624 error = ENOENT; 625 goto out; 626 } 627 } 628 vp = vp->v_vfsp->vfs_vnodecovered; 629 VN_HOLD(vp); 630 VN_RELE(tvp); 631 /* 632 * Force the creation of a new lnode even if the hash 633 * table contains a lnode that references this vnode. 634 */ 635 mkflag = LOF_FORCE; 636 autoloop++; 637 } 638 } 639 *vpp = makelonode(vp, li, mkflag); 640 641 if ((looping) || 642 (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) { 643 (vtol(*vpp))->lo_looping |= LO_LOOPING; 644 } 645 646 if (autoloop) { 647 (vtol(*vpp))->lo_looping |= LO_AUTOLOOP; 648 } 649 650 out: 651 if (error != 0 && vp != NULL) 652 VN_RELE(vp); 653 #ifdef LODEBUG 654 lo_dprint(4, 655 "lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n", 656 dvp, realvp(dvp), nm, *vpp, vp, error); 657 #endif 658 return (error); 659 } 660 661 /*ARGSUSED*/ 662 static int 663 lo_create( 664 vnode_t *dvp, 665 char *nm, 666 struct vattr *va, 667 enum vcexcl exclusive, 668 int mode, 669 vnode_t **vpp, 670 struct cred *cr, 671 int flag, 672 caller_context_t *ct, 673 vsecattr_t *vsecp) 674 { 675 int error; 676 vnode_t *vp = NULL; 677 678 #ifdef LODEBUG 679 lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp)); 680 #endif 681 if (*nm == '\0') { 682 ASSERT(vpp && dvp == *vpp); 683 vp = realvp(*vpp); 684 } 685 686 error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag, 687 ct, vsecp); 688 if (!error) { 689 *vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0); 690 if (IS_DEVVP(*vpp)) { 691 vnode_t *svp; 692 693 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 694 VN_RELE(*vpp); 695 if (svp == NULL) 696 error = ENOSYS; 697 else 698 *vpp = svp; 699 } 700 } else if (error == ENOSYS && exclusive == NONEXCL && 701 dvp == vtoli(dvp->v_vfsp)->li_rootvp && 702 realvp(dvp)->v_type == VREG) { 703 /* 704 * We have a single regular file lofs mounted, thus the file is 705 * the root vnode (the directory vp is the file vp). Some 706 * underlying file systems (e.g. tmpfs or ufs) properly handle 707 * this style of create but at least zfs won't support create 708 * this way (see zfs_fvnodeops_template which has fs_nosys for 709 * the vop_create entry because zfs_create doesn't work 710 * properly for this case). 711 */ 712 if ((error = VOP_ACCESS(dvp, mode, 0, cr, NULL)) == 0) { 713 /* 714 * Since we already know the vnode for the existing 715 * file we can handle create as a no-op, as expected, 716 * truncating the file if necessary. 717 */ 718 struct vattr vattr; 719 720 vattr.va_size = 0; 721 vattr.va_mask = AT_SIZE; 722 723 if ((va->va_mask & AT_SIZE) != 0 && va->va_size == 0 && 724 VOP_SETATTR(dvp, &vattr, 0, CRED(), NULL) != 0) 725 return (error); 726 727 /* 728 * vn_createat will do a vn_rele on the file if it is 729 * pre-existing, which it is in the case of a single 730 * file mounted as the root. Thus, when we eventually 731 * close the file the count will already be 1 so the 732 * vnode would be freed. To prevent that, we add an 733 * extra hold here. 734 */ 735 VN_HOLD(dvp); 736 *vpp = dvp; 737 error = 0; 738 } 739 } 740 741 return (error); 742 } 743 744 static int 745 lo_remove( 746 vnode_t *dvp, 747 char *nm, 748 struct cred *cr, 749 caller_context_t *ct, 750 int flags) 751 { 752 #ifdef LODEBUG 753 lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp)); 754 #endif 755 dvp = realvp(dvp); 756 return (VOP_REMOVE(dvp, nm, cr, ct, flags)); 757 } 758 759 static int 760 lo_link( 761 vnode_t *tdvp, 762 vnode_t *vp, 763 char *tnm, 764 struct cred *cr, 765 caller_context_t *ct, 766 int flags) 767 { 768 vnode_t *realvp; 769 770 #ifdef LODEBUG 771 lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp)); 772 #endif 773 774 /* 775 * The source and destination vnodes may be in different lofs 776 * filesystems sharing the same underlying filesystem, so we need to 777 * make sure that the filesystem containing the source vnode is not 778 * mounted read-only (vn_link() has already checked the target vnode). 779 * 780 * In a situation such as: 781 * 782 * /data - regular filesystem 783 * /foo - lofs mount of /data/foo 784 * /bar - read-only lofs mount of /data/bar 785 * 786 * This disallows a link from /bar/somefile to /foo/somefile, 787 * which would otherwise allow changes to somefile on the read-only 788 * mounted /bar. 789 */ 790 791 if (vn_is_readonly(vp)) { 792 return (EROFS); 793 } 794 while (vn_matchops(vp, lo_vnodeops)) { 795 vp = realvp(vp); 796 } 797 798 /* 799 * In the case where the source vnode is on another stacking 800 * filesystem (such as specfs), the loop above will 801 * terminate before finding the true underlying vnode. 802 * 803 * We use VOP_REALVP here to continue the search. 804 */ 805 if (VOP_REALVP(vp, &realvp, ct) == 0) 806 vp = realvp; 807 808 while (vn_matchops(tdvp, lo_vnodeops)) { 809 tdvp = realvp(tdvp); 810 } 811 if (vp->v_vfsp != tdvp->v_vfsp) 812 return (EXDEV); 813 return (VOP_LINK(tdvp, vp, tnm, cr, ct, flags)); 814 } 815 816 static int 817 lo_rename( 818 vnode_t *odvp, 819 char *onm, 820 vnode_t *ndvp, 821 char *nnm, 822 struct cred *cr, 823 caller_context_t *ct, 824 int flags) 825 { 826 vnode_t *tnvp; 827 828 #ifdef LODEBUG 829 lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp)); 830 #endif 831 /* 832 * If we are coming from a loop back mounted fs, that has been 833 * mounted in the same filesystem as where we want to move to, 834 * and that filesystem is read/write, but the lofs filesystem is 835 * read only, we don't want to allow a rename of the file. The 836 * vn_rename code checks to be sure the target is read/write already 837 * so that is not necessary here. However, consider the following 838 * example: 839 * / - regular root fs 840 * /foo - directory in root 841 * /foo/bar - file in foo directory(in root fs) 842 * /baz - directory in root 843 * mount -F lofs -o ro /foo /baz - all still in root 844 * directory 845 * The fact that we mounted /foo on /baz read only should stop us 846 * from renaming the file /foo/bar /bar, but it doesn't since 847 * / is read/write. We are still renaming here since we are still 848 * in the same filesystem, it is just that we do not check to see 849 * if the filesystem we are coming from in this case is read only. 850 */ 851 if (odvp->v_vfsp->vfs_flag & VFS_RDONLY) 852 return (EROFS); 853 /* 854 * We need to make sure we're not trying to remove a mount point for a 855 * filesystem mounted on top of lofs, which only we know about. 856 */ 857 if (vn_matchops(ndvp, lo_vnodeops)) /* Not our problem. */ 858 goto rename; 859 860 /* 861 * XXXci - Once case-insensitive behavior is implemented, it should 862 * be added here. 863 */ 864 if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr, 865 ct, NULL, NULL) != 0) 866 goto rename; 867 if (tnvp->v_type != VDIR) { 868 VN_RELE(tnvp); 869 goto rename; 870 } 871 if (vn_mountedvfs(tnvp)) { 872 VN_RELE(tnvp); 873 return (EBUSY); 874 } 875 VN_RELE(tnvp); 876 rename: 877 /* 878 * Since the case we're dealing with above can happen at any layer in 879 * the stack of lofs filesystems, we need to recurse down the stack, 880 * checking to see if there are any instances of a filesystem mounted on 881 * top of lofs. In order to keep on using the lofs version of 882 * VOP_RENAME(), we make sure that while the target directory is of type 883 * lofs, the source directory (the one used for getting the fs-specific 884 * version of VOP_RENAME()) is also of type lofs. 885 */ 886 if (vn_matchops(ndvp, lo_vnodeops)) { 887 ndvp = realvp(ndvp); /* Check the next layer */ 888 } else { 889 /* 890 * We can go fast here 891 */ 892 while (vn_matchops(odvp, lo_vnodeops)) { 893 odvp = realvp(odvp); 894 } 895 if (odvp->v_vfsp != ndvp->v_vfsp) 896 return (EXDEV); 897 } 898 return (VOP_RENAME(odvp, onm, ndvp, nnm, cr, ct, flags)); 899 } 900 901 static int 902 lo_mkdir( 903 vnode_t *dvp, 904 char *nm, 905 struct vattr *va, 906 vnode_t **vpp, 907 struct cred *cr, 908 caller_context_t *ct, 909 int flags, 910 vsecattr_t *vsecp) 911 { 912 int error; 913 914 #ifdef LODEBUG 915 lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp)); 916 #endif 917 error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr, ct, flags, vsecp); 918 if (!error) 919 *vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0); 920 return (error); 921 } 922 923 static int 924 lo_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) 925 { 926 #ifdef LODEBUG 927 lo_dprint(4, "lo_realvp %p\n", vp); 928 #endif 929 while (vn_matchops(vp, lo_vnodeops)) 930 vp = realvp(vp); 931 932 if (VOP_REALVP(vp, vpp, ct) != 0) 933 *vpp = vp; 934 return (0); 935 } 936 937 static int 938 lo_rmdir( 939 vnode_t *dvp, 940 char *nm, 941 vnode_t *cdir, 942 struct cred *cr, 943 caller_context_t *ct, 944 int flags) 945 { 946 vnode_t *rvp = cdir; 947 948 #ifdef LODEBUG 949 lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp)); 950 #endif 951 /* if cdir is lofs vnode ptr get its real vnode ptr */ 952 if (vn_matchops(dvp, vn_getops(rvp))) 953 (void) lo_realvp(cdir, &rvp, ct); 954 dvp = realvp(dvp); 955 return (VOP_RMDIR(dvp, nm, rvp, cr, ct, flags)); 956 } 957 958 static int 959 lo_symlink( 960 vnode_t *dvp, 961 char *lnm, 962 struct vattr *tva, 963 char *tnm, 964 struct cred *cr, 965 caller_context_t *ct, 966 int flags) 967 { 968 #ifdef LODEBUG 969 lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp)); 970 #endif 971 dvp = realvp(dvp); 972 return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr, ct, flags)); 973 } 974 975 static int 976 lo_readlink( 977 vnode_t *vp, 978 struct uio *uiop, 979 struct cred *cr, 980 caller_context_t *ct) 981 { 982 vp = realvp(vp); 983 return (VOP_READLINK(vp, uiop, cr, ct)); 984 } 985 986 static int 987 lo_readdir( 988 vnode_t *vp, 989 struct uio *uiop, 990 struct cred *cr, 991 int *eofp, 992 caller_context_t *ct, 993 int flags) 994 { 995 #ifdef LODEBUG 996 lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp)); 997 #endif 998 vp = realvp(vp); 999 return (VOP_READDIR(vp, uiop, cr, eofp, ct, flags)); 1000 } 1001 1002 static int 1003 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct) 1004 { 1005 vp = realvp(vp); 1006 return (VOP_RWLOCK(vp, write_lock, ct)); 1007 } 1008 1009 static void 1010 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct) 1011 { 1012 vp = realvp(vp); 1013 VOP_RWUNLOCK(vp, write_lock, ct); 1014 } 1015 1016 static int 1017 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1018 { 1019 vp = realvp(vp); 1020 return (VOP_SEEK(vp, ooff, noffp, ct)); 1021 } 1022 1023 static int 1024 lo_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 1025 { 1026 while (vn_matchops(vp1, lo_vnodeops)) 1027 vp1 = realvp(vp1); 1028 while (vn_matchops(vp2, lo_vnodeops)) 1029 vp2 = realvp(vp2); 1030 return (VOP_CMP(vp1, vp2, ct)); 1031 } 1032 1033 static int 1034 lo_frlock( 1035 vnode_t *vp, 1036 int cmd, 1037 struct flock64 *bfp, 1038 int flag, 1039 offset_t offset, 1040 struct flk_callback *flk_cbp, 1041 cred_t *cr, 1042 caller_context_t *ct) 1043 { 1044 vp = realvp(vp); 1045 return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1046 } 1047 1048 static int 1049 lo_space( 1050 vnode_t *vp, 1051 int cmd, 1052 struct flock64 *bfp, 1053 int flag, 1054 offset_t offset, 1055 struct cred *cr, 1056 caller_context_t *ct) 1057 { 1058 vp = realvp(vp); 1059 return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct)); 1060 } 1061 1062 static int 1063 lo_getpage( 1064 vnode_t *vp, 1065 offset_t off, 1066 size_t len, 1067 uint_t *prot, 1068 struct page *parr[], 1069 size_t psz, 1070 struct seg *seg, 1071 caddr_t addr, 1072 enum seg_rw rw, 1073 struct cred *cr, 1074 caller_context_t *ct) 1075 { 1076 vp = realvp(vp); 1077 return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr, 1078 ct)); 1079 } 1080 1081 static int 1082 lo_putpage( 1083 vnode_t *vp, 1084 offset_t off, 1085 size_t len, 1086 int flags, 1087 struct cred *cr, 1088 caller_context_t *ct) 1089 { 1090 vp = realvp(vp); 1091 return (VOP_PUTPAGE(vp, off, len, flags, cr, ct)); 1092 } 1093 1094 static int 1095 lo_map( 1096 vnode_t *vp, 1097 offset_t off, 1098 struct as *as, 1099 caddr_t *addrp, 1100 size_t len, 1101 uchar_t prot, 1102 uchar_t maxprot, 1103 uint_t flags, 1104 struct cred *cr, 1105 caller_context_t *ct) 1106 { 1107 vp = realvp(vp); 1108 return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct)); 1109 } 1110 1111 static int 1112 lo_addmap( 1113 vnode_t *vp, 1114 offset_t off, 1115 struct as *as, 1116 caddr_t addr, 1117 size_t len, 1118 uchar_t prot, 1119 uchar_t maxprot, 1120 uint_t flags, 1121 struct cred *cr, 1122 caller_context_t *ct) 1123 { 1124 vp = realvp(vp); 1125 return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr, 1126 ct)); 1127 } 1128 1129 static int 1130 lo_delmap( 1131 vnode_t *vp, 1132 offset_t off, 1133 struct as *as, 1134 caddr_t addr, 1135 size_t len, 1136 uint_t prot, 1137 uint_t maxprot, 1138 uint_t flags, 1139 struct cred *cr, 1140 caller_context_t *ct) 1141 { 1142 vp = realvp(vp); 1143 return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr, 1144 ct)); 1145 } 1146 1147 static int 1148 lo_poll( 1149 vnode_t *vp, 1150 short events, 1151 int anyyet, 1152 short *reventsp, 1153 struct pollhead **phpp, 1154 caller_context_t *ct) 1155 { 1156 vp = realvp(vp); 1157 return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct)); 1158 } 1159 1160 static int 1161 lo_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count, 1162 caller_context_t *ct) 1163 { 1164 vp = realvp(vp); 1165 return (VOP_DUMP(vp, addr, bn, count, ct)); 1166 } 1167 1168 static int 1169 lo_pathconf( 1170 vnode_t *vp, 1171 int cmd, 1172 ulong_t *valp, 1173 struct cred *cr, 1174 caller_context_t *ct) 1175 { 1176 vp = realvp(vp); 1177 return (VOP_PATHCONF(vp, cmd, valp, cr, ct)); 1178 } 1179 1180 static int 1181 lo_pageio( 1182 vnode_t *vp, 1183 struct page *pp, 1184 u_offset_t io_off, 1185 size_t io_len, 1186 int flags, 1187 cred_t *cr, 1188 caller_context_t *ct) 1189 { 1190 vp = realvp(vp); 1191 return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct)); 1192 } 1193 1194 static void 1195 lo_dispose( 1196 vnode_t *vp, 1197 page_t *pp, 1198 int fl, 1199 int dn, 1200 cred_t *cr, 1201 caller_context_t *ct) 1202 { 1203 vp = realvp(vp); 1204 if (vp != NULL && !VN_ISKAS(vp)) 1205 VOP_DISPOSE(vp, pp, fl, dn, cr, ct); 1206 } 1207 1208 static int 1209 lo_setsecattr( 1210 vnode_t *vp, 1211 vsecattr_t *secattr, 1212 int flags, 1213 struct cred *cr, 1214 caller_context_t *ct) 1215 { 1216 if (vn_is_readonly(vp)) 1217 return (EROFS); 1218 vp = realvp(vp); 1219 return (VOP_SETSECATTR(vp, secattr, flags, cr, ct)); 1220 } 1221 1222 static int 1223 lo_getsecattr( 1224 vnode_t *vp, 1225 vsecattr_t *secattr, 1226 int flags, 1227 struct cred *cr, 1228 caller_context_t *ct) 1229 { 1230 vp = realvp(vp); 1231 return (VOP_GETSECATTR(vp, secattr, flags, cr, ct)); 1232 } 1233 1234 static int 1235 lo_shrlock( 1236 vnode_t *vp, 1237 int cmd, 1238 struct shrlock *shr, 1239 int flag, 1240 cred_t *cr, 1241 caller_context_t *ct) 1242 { 1243 vp = realvp(vp); 1244 return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct)); 1245 } 1246 1247 /* 1248 * Loopback vnode operations vector. 1249 */ 1250 1251 struct vnodeops *lo_vnodeops; 1252 1253 const fs_operation_def_t lo_vnodeops_template[] = { 1254 VOPNAME_OPEN, { .vop_open = lo_open }, 1255 VOPNAME_CLOSE, { .vop_close = lo_close }, 1256 VOPNAME_READ, { .vop_read = lo_read }, 1257 VOPNAME_WRITE, { .vop_write = lo_write }, 1258 VOPNAME_IOCTL, { .vop_ioctl = lo_ioctl }, 1259 VOPNAME_SETFL, { .vop_setfl = lo_setfl }, 1260 VOPNAME_GETATTR, { .vop_getattr = lo_getattr }, 1261 VOPNAME_SETATTR, { .vop_setattr = lo_setattr }, 1262 VOPNAME_ACCESS, { .vop_access = lo_access }, 1263 VOPNAME_LOOKUP, { .vop_lookup = lo_lookup }, 1264 VOPNAME_CREATE, { .vop_create = lo_create }, 1265 VOPNAME_REMOVE, { .vop_remove = lo_remove }, 1266 VOPNAME_LINK, { .vop_link = lo_link }, 1267 VOPNAME_RENAME, { .vop_rename = lo_rename }, 1268 VOPNAME_MKDIR, { .vop_mkdir = lo_mkdir }, 1269 VOPNAME_RMDIR, { .vop_rmdir = lo_rmdir }, 1270 VOPNAME_READDIR, { .vop_readdir = lo_readdir }, 1271 VOPNAME_SYMLINK, { .vop_symlink = lo_symlink }, 1272 VOPNAME_READLINK, { .vop_readlink = lo_readlink }, 1273 VOPNAME_FSYNC, { .vop_fsync = lo_fsync }, 1274 VOPNAME_INACTIVE, { .vop_inactive = lo_inactive }, 1275 VOPNAME_FID, { .vop_fid = lo_fid }, 1276 VOPNAME_RWLOCK, { .vop_rwlock = lo_rwlock }, 1277 VOPNAME_RWUNLOCK, { .vop_rwunlock = lo_rwunlock }, 1278 VOPNAME_SEEK, { .vop_seek = lo_seek }, 1279 VOPNAME_CMP, { .vop_cmp = lo_cmp }, 1280 VOPNAME_FRLOCK, { .vop_frlock = lo_frlock }, 1281 VOPNAME_SPACE, { .vop_space = lo_space }, 1282 VOPNAME_REALVP, { .vop_realvp = lo_realvp }, 1283 VOPNAME_GETPAGE, { .vop_getpage = lo_getpage }, 1284 VOPNAME_PUTPAGE, { .vop_putpage = lo_putpage }, 1285 VOPNAME_MAP, { .vop_map = lo_map }, 1286 VOPNAME_ADDMAP, { .vop_addmap = lo_addmap }, 1287 VOPNAME_DELMAP, { .vop_delmap = lo_delmap }, 1288 VOPNAME_POLL, { .vop_poll = lo_poll }, 1289 VOPNAME_DUMP, { .vop_dump = lo_dump }, 1290 VOPNAME_DUMPCTL, { .error = fs_error }, /* XXX - why? */ 1291 VOPNAME_PATHCONF, { .vop_pathconf = lo_pathconf }, 1292 VOPNAME_PAGEIO, { .vop_pageio = lo_pageio }, 1293 VOPNAME_DISPOSE, { .vop_dispose = lo_dispose }, 1294 VOPNAME_SETSECATTR, { .vop_setsecattr = lo_setsecattr }, 1295 VOPNAME_GETSECATTR, { .vop_getsecattr = lo_getsecattr }, 1296 VOPNAME_SHRLOCK, { .vop_shrlock = lo_shrlock }, 1297 NULL, NULL 1298 }; 1299