1 /* 2 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. 3 * Copyright (c) 1992, 1993, 1994, 1995 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/fcntl.h> 45 #include <sys/stat.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/malloc.h> 51 #include <sys/buf.h> 52 #include <sys/lock.h> 53 #include <sys/sysctl.h> 54 #include <miscfs/union/union.h> 55 56 #include <vm/vm.h> 57 #include <vm/vnode_pager.h> 58 59 #include <vm/vm_prot.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_pager.h> 63 #include <vm/vm_extern.h> 64 65 int uniondebug = 0; 66 67 #if UDEBUG_ENABLED 68 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); 69 #else 70 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); 71 #endif 72 73 static int union_abortop __P((struct vop_abortop_args *ap)); 74 static int union_access __P((struct vop_access_args *ap)); 75 static int union_advlock __P((struct vop_advlock_args *ap)); 76 static int union_bmap __P((struct vop_bmap_args *ap)); 77 static int union_close __P((struct vop_close_args *ap)); 78 static int union_create __P((struct vop_create_args *ap)); 79 static int union_fsync __P((struct vop_fsync_args *ap)); 80 static int union_getattr __P((struct vop_getattr_args *ap)); 81 static int union_inactive __P((struct vop_inactive_args *ap)); 82 static int union_ioctl __P((struct vop_ioctl_args *ap)); 83 static int union_lease __P((struct vop_lease_args *ap)); 84 static int union_link __P((struct vop_link_args *ap)); 85 static int union_lock __P((struct vop_lock_args *ap)); 86 static int union_lookup __P((struct vop_lookup_args *ap)); 87 static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvp, 88 struct vnode **vpp, 89 struct componentname *cnp)); 90 static int union_mkdir __P((struct vop_mkdir_args *ap)); 91 static int union_mknod __P((struct vop_mknod_args *ap)); 92 static int union_mmap __P((struct vop_mmap_args *ap)); 93 static int union_open __P((struct vop_open_args *ap)); 94 static int union_pathconf __P((struct vop_pathconf_args *ap)); 95 static int union_print __P((struct vop_print_args *ap)); 96 static int union_read __P((struct vop_read_args *ap)); 97 static int union_readdir __P((struct vop_readdir_args *ap)); 98 static int union_readlink __P((struct vop_readlink_args *ap)); 99 static int union_reclaim __P((struct vop_reclaim_args *ap)); 100 static int union_remove __P((struct vop_remove_args *ap)); 101 static int union_rename __P((struct vop_rename_args *ap)); 102 static int union_revoke __P((struct vop_revoke_args *ap)); 103 static int union_rmdir __P((struct vop_rmdir_args *ap)); 104 static int union_poll __P((struct vop_poll_args *ap)); 105 static int union_setattr __P((struct vop_setattr_args *ap)); 106 static int union_strategy __P((struct vop_strategy_args *ap)); 107 static int union_getpages __P((struct vop_getpages_args *ap)); 108 static int union_putpages __P((struct vop_putpages_args *ap)); 109 static int union_symlink __P((struct vop_symlink_args *ap)); 110 static int union_unlock __P((struct vop_unlock_args *ap)); 111 static int union_whiteout __P((struct vop_whiteout_args *ap)); 112 static int union_write __P((struct vop_read_args *ap)); 113 114 static __inline 115 struct vnode * 116 union_lock_upper(struct union_node *un, struct proc *p) 117 { 118 struct vnode *uppervp; 119 120 if ((uppervp = un->un_uppervp) != NULL) { 121 VREF(uppervp); 122 vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); 123 } 124 KASSERT((uppervp == NULL || uppervp->v_usecount > 0), ("uppervp usecount is 0")); 125 return(uppervp); 126 } 127 128 static __inline 129 void 130 union_unlock_upper(struct vnode *uppervp, struct proc *p) 131 { 132 vput(uppervp); 133 } 134 135 static __inline 136 struct vnode * 137 union_lock_other(struct union_node *un, struct proc *p) 138 { 139 struct vnode *vp; 140 141 if (un->un_uppervp != NULL) { 142 vp = union_lock_upper(un, p); 143 } else if ((vp = un->un_lowervp) != NULL) { 144 VREF(vp); 145 vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); 146 } 147 return(vp); 148 } 149 150 static __inline 151 void 152 union_unlock_other(struct vnode *vp, struct proc *p) 153 { 154 vput(vp); 155 } 156 157 /* 158 * union_lookup: 159 * 160 * udvp must be exclusively locked on call and will remain 161 * exclusively locked on return. This is the mount point 162 * for out filesystem. 163 * 164 * dvp Our base directory, locked and referenced. 165 * The passed dvp will be dereferenced and unlocked on return 166 * and a new dvp will be returned which is locked and 167 * referenced in the same variable. 168 * 169 * vpp is filled in with the result if no error occured, 170 * locked and ref'd. 171 * 172 * If an error is returned, *vpp is set to NULLVP. If no 173 * error occurs, *vpp is returned with a reference and an 174 * exclusive lock. 175 */ 176 177 static int 178 union_lookup1(udvp, pdvp, vpp, cnp) 179 struct vnode *udvp; 180 struct vnode **pdvp; 181 struct vnode **vpp; 182 struct componentname *cnp; 183 { 184 int error; 185 struct proc *p = cnp->cn_proc; 186 struct vnode *dvp = *pdvp; 187 struct vnode *tdvp; 188 struct mount *mp; 189 190 /* 191 * If stepping up the directory tree, check for going 192 * back across the mount point, in which case do what 193 * lookup would do by stepping back down the mount 194 * hierarchy. 195 */ 196 if (cnp->cn_flags & ISDOTDOT) { 197 while ((dvp != udvp) && (dvp->v_flag & VROOT)) { 198 /* 199 * Don't do the NOCROSSMOUNT check 200 * at this level. By definition, 201 * union fs deals with namespaces, not 202 * filesystems. 203 */ 204 tdvp = dvp; 205 dvp = dvp->v_mount->mnt_vnodecovered; 206 VREF(dvp); 207 vput(tdvp); 208 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); 209 } 210 } 211 212 /* 213 * Set return dvp to be the upperdvp 'parent directory. 214 */ 215 *pdvp = dvp; 216 217 /* 218 * If the VOP_LOOKUP call generates an error, tdvp is invalid and no 219 * changes will have been made to dvp, so we are set to return. 220 */ 221 222 error = VOP_LOOKUP(dvp, &tdvp, cnp); 223 if (error) { 224 UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); 225 *vpp = NULL; 226 return (error); 227 } 228 229 /* 230 * The parent directory will have been unlocked, unless lookup 231 * found the last component or if dvp == tdvp (tdvp must be locked). 232 * 233 * We want our dvp to remain locked and ref'd. We also want tdvp 234 * to remain locked and ref'd. 235 */ 236 UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); 237 238 if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0) 239 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); 240 241 /* 242 * Lastly check if the current node is a mount point in 243 * which case walk up the mount hierarchy making sure not to 244 * bump into the root of the mount tree (ie. dvp != udvp). 245 * 246 * We use dvp as a temporary variable here, it is no longer related 247 * to the dvp above. However, we have to ensure that both *pdvp and 248 * tdvp are locked on return. 249 */ 250 251 dvp = tdvp; 252 while ( 253 dvp != udvp && 254 (dvp->v_type == VDIR) && 255 (mp = dvp->v_mountedhere) 256 ) { 257 int relock_pdvp = 0; 258 259 if (vfs_busy(mp, 0, 0, p)) 260 continue; 261 262 if (dvp == *pdvp) 263 relock_pdvp = 1; 264 vput(dvp); 265 dvp = NULL; 266 error = VFS_ROOT(mp, &dvp); 267 268 vfs_unbusy(mp, p); 269 270 if (relock_pdvp) 271 vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, p); 272 273 if (error) { 274 *vpp = NULL; 275 return (error); 276 } 277 } 278 *vpp = dvp; 279 return (0); 280 } 281 282 static int 283 union_lookup(ap) 284 struct vop_lookup_args /* { 285 struct vnodeop_desc *a_desc; 286 struct vnode *a_dvp; 287 struct vnode **a_vpp; 288 struct componentname *a_cnp; 289 } */ *ap; 290 { 291 int error; 292 int uerror, lerror; 293 struct vnode *uppervp, *lowervp; 294 struct vnode *upperdvp, *lowerdvp; 295 struct vnode *dvp = ap->a_dvp; /* starting dir */ 296 struct union_node *dun = VTOUNION(dvp); /* associated union node */ 297 struct componentname *cnp = ap->a_cnp; 298 struct proc *p = cnp->cn_proc; 299 int lockparent = cnp->cn_flags & LOCKPARENT; 300 struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); 301 struct ucred *saved_cred = NULL; 302 int iswhiteout; 303 struct vattr va; 304 305 *ap->a_vpp = NULLVP; 306 307 /* 308 * Disallow write attemps to the filesystem mounted read-only. 309 */ 310 if ((cnp->cn_flags & ISLASTCN) && 311 (dvp->v_mount->mnt_flag & MNT_RDONLY) && 312 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 313 return (EROFS); 314 } 315 316 /* 317 * For any lookup's we do, always return with the parent locked 318 */ 319 cnp->cn_flags |= LOCKPARENT; 320 321 lowerdvp = dun->un_lowervp; 322 uppervp = NULLVP; 323 lowervp = NULLVP; 324 iswhiteout = 0; 325 326 uerror = ENOENT; 327 lerror = ENOENT; 328 329 /* 330 * Get a private lock on uppervp and a reference, effectively 331 * taking it out of the union_node's control. 332 * 333 * We must lock upperdvp while holding our lock on dvp 334 * to avoid a deadlock. 335 */ 336 upperdvp = union_lock_upper(dun, p); 337 338 /* 339 * do the lookup in the upper level. 340 * if that level comsumes additional pathnames, 341 * then assume that something special is going 342 * on and just return that vnode. 343 */ 344 if (upperdvp != NULLVP) { 345 /* 346 * We do not have to worry about the DOTDOT case, we've 347 * already unlocked dvp. 348 */ 349 UDEBUG(("A %p\n", upperdvp)); 350 351 /* 352 * Do the lookup. We must supply a locked and referenced 353 * upperdvp to the function and will get a new locked and 354 * referenced upperdvp back with the old having been 355 * dereferenced. 356 * 357 * If an error is returned, uppervp will be NULLVP. If no 358 * error occurs, uppervp will be the locked and referenced 359 * return vnode or possibly NULL, depending on what is being 360 * requested. It is possible that the returned uppervp 361 * will be the same as upperdvp. 362 */ 363 uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); 364 UDEBUG(( 365 "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", 366 uerror, 367 upperdvp, 368 upperdvp->v_usecount, 369 VOP_ISLOCKED(upperdvp), 370 uppervp, 371 (uppervp ? uppervp->v_usecount : -99), 372 (uppervp ? VOP_ISLOCKED(uppervp) : -99) 373 )); 374 375 /* 376 * Disallow write attemps to the filesystem mounted read-only. 377 */ 378 if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && 379 (dvp->v_mount->mnt_flag & MNT_RDONLY) && 380 (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { 381 error = EROFS; 382 goto out; 383 } 384 385 /* 386 * Special case. If cn_consume != 0 skip out. The result 387 * of the lookup is transfered to our return variable. If 388 * an error occured we have to throw away the results. 389 */ 390 391 if (cnp->cn_consume != 0) { 392 if ((error = uerror) == 0) { 393 *ap->a_vpp = uppervp; 394 uppervp = NULL; 395 } 396 goto out; 397 } 398 399 /* 400 * Calculate whiteout, fall through 401 */ 402 403 if (uerror == ENOENT || uerror == EJUSTRETURN) { 404 if (cnp->cn_flags & ISWHITEOUT) { 405 iswhiteout = 1; 406 } else if (lowerdvp != NULLVP) { 407 int terror; 408 409 terror = VOP_GETATTR(upperdvp, &va, 410 cnp->cn_cred, cnp->cn_proc); 411 if (terror == 0 && (va.va_flags & OPAQUE)) 412 iswhiteout = 1; 413 } 414 } 415 } 416 417 /* 418 * in a similar way to the upper layer, do the lookup 419 * in the lower layer. this time, if there is some 420 * component magic going on, then vput whatever we got 421 * back from the upper layer and return the lower vnode 422 * instead. 423 */ 424 425 if (lowerdvp != NULLVP && !iswhiteout) { 426 int nameiop; 427 428 UDEBUG(("B %p\n", lowerdvp)); 429 430 /* 431 * Force only LOOKUPs on the lower node, since 432 * we won't be making changes to it anyway. 433 */ 434 nameiop = cnp->cn_nameiop; 435 cnp->cn_nameiop = LOOKUP; 436 if (um->um_op == UNMNT_BELOW) { 437 saved_cred = cnp->cn_cred; 438 cnp->cn_cred = um->um_cred; 439 } 440 441 /* 442 * We shouldn't have to worry about locking interactions 443 * between the lower layer and our union layer (w.r.t. 444 * `..' processing) because we don't futz with lowervp 445 * locks in the union-node instantiation code path. 446 * 447 * union_lookup1() requires lowervp to be locked on entry, 448 * and it will be unlocked on return. The ref count will 449 * not change. On return lowervp doesn't represent anything 450 * to us so we NULL it out. 451 */ 452 VREF(lowerdvp); 453 vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); 454 lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); 455 if (lowerdvp == lowervp) 456 vrele(lowerdvp); 457 else 458 vput(lowerdvp); 459 lowerdvp = NULL; /* lowerdvp invalid after vput */ 460 461 if (um->um_op == UNMNT_BELOW) 462 cnp->cn_cred = saved_cred; 463 cnp->cn_nameiop = nameiop; 464 465 if (cnp->cn_consume != 0 || lerror == EACCES) { 466 if ((error = lerror) == 0) { 467 *ap->a_vpp = lowervp; 468 lowervp = NULL; 469 } 470 goto out; 471 } 472 } else { 473 UDEBUG(("C %p\n", lowerdvp)); 474 if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { 475 if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { 476 VREF(lowervp); 477 vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); 478 lerror = 0; 479 } 480 } 481 } 482 483 /* 484 * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. 485 * 486 * 1. If both layers returned an error, select the upper layer. 487 * 488 * 2. If the upper layer faile and the bottom layer succeeded, 489 * two subcases occur: 490 * 491 * a. The bottom vnode is not a directory, in which case 492 * just return a new union vnode referencing an 493 * empty top layer and the existing bottom layer. 494 * 495 * b. The button vnode is a directory, in which case 496 * create a new directory in the top layer and 497 * and fall through to case 3. 498 * 499 * 3. If the top layer succeeded then return a new union 500 * vnode referencing whatever the new top layer and 501 * whatever the bottom layer returned. 502 */ 503 504 /* case 1. */ 505 if ((uerror != 0) && (lerror != 0)) { 506 error = uerror; 507 goto out; 508 } 509 510 /* case 2. */ 511 if (uerror != 0 /* && (lerror == 0) */ ) { 512 if (lowervp->v_type == VDIR) { /* case 2b. */ 513 KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); 514 /* 515 * oops, uppervp has a problem, we may have to shadow. 516 */ 517 uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); 518 if (uerror) { 519 error = uerror; 520 goto out; 521 } 522 } 523 } 524 525 /* 526 * Must call union_allocvp with both the upper and lower vnodes 527 * referenced and the upper vnode locked. ap->a_vpp is returned 528 * referenced and locked. lowervp, uppervp, and upperdvp are 529 * absorbed by union_allocvp() whether it succeeds or fails. 530 * 531 * upperdvp is the parent directory of uppervp which may be 532 * different, depending on the path, from dvp->un_uppervp. That's 533 * why it is a separate argument. Note that it must be unlocked. 534 * 535 * dvp must be locked on entry to the call and will be locked on 536 * return. 537 */ 538 539 if (uppervp && uppervp != upperdvp) 540 VOP_UNLOCK(uppervp, 0, p); 541 if (lowervp) 542 VOP_UNLOCK(lowervp, 0, p); 543 if (upperdvp) 544 VOP_UNLOCK(upperdvp, 0, p); 545 546 error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, 547 uppervp, lowervp, 1); 548 549 UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99)); 550 551 uppervp = NULL; 552 upperdvp = NULL; 553 lowervp = NULL; 554 555 /* 556 * Termination Code 557 * 558 * - put away any extra junk laying around. Note that lowervp 559 * (if not NULL) will never be the same as *ap->a_vp and 560 * neither will uppervp, because when we set that state we 561 * NULL-out lowervp or uppervp. On the otherhand, upperdvp 562 * may match uppervp or *ap->a_vpp. 563 * 564 * - relock/unlock dvp if appropriate. 565 */ 566 567 out: 568 if (upperdvp) { 569 if (upperdvp == uppervp || upperdvp == *ap->a_vpp) 570 vrele(upperdvp); 571 else 572 vput(upperdvp); 573 } 574 575 if (uppervp) 576 vput(uppervp); 577 578 if (lowervp) 579 vput(lowervp); 580 581 /* 582 * Restore LOCKPARENT state 583 */ 584 585 if (!lockparent) 586 cnp->cn_flags &= ~LOCKPARENT; 587 588 UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, 589 ((*ap->a_vpp) ? (*ap->a_vpp)->v_usecount : -99), 590 lowervp, uppervp)); 591 592 /* 593 * dvp lock state, determine whether to relock dvp. dvp is expected 594 * to be locked on return if: 595 * 596 * - there was an error (except not EJUSTRETURN), or 597 * - we hit the last component and lockparent is true 598 * 599 * dvp_is_locked is the current state of the dvp lock, not counting 600 * the possibility that *ap->a_vpp == dvp (in which case it is locked 601 * anyway). Note that *ap->a_vpp == dvp only if no error occured. 602 */ 603 604 if (*ap->a_vpp != dvp) { 605 if ((error == 0 || error == EJUSTRETURN) && 606 (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) { 607 VOP_UNLOCK(dvp, 0, p); 608 } 609 } 610 611 /* 612 * Diagnostics 613 */ 614 615 #ifdef DIAGNOSTIC 616 if (cnp->cn_namelen == 1 && 617 cnp->cn_nameptr[0] == '.' && 618 *ap->a_vpp != dvp) { 619 panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); 620 } 621 #endif 622 623 return (error); 624 } 625 626 /* 627 * union_create: 628 * 629 * a_dvp is locked on entry and remains locked on return. a_vpp is returned 630 * locked if no error occurs, otherwise it is garbage. 631 */ 632 633 static int 634 union_create(ap) 635 struct vop_create_args /* { 636 struct vnode *a_dvp; 637 struct vnode **a_vpp; 638 struct componentname *a_cnp; 639 struct vattr *a_vap; 640 } */ *ap; 641 { 642 struct union_node *dun = VTOUNION(ap->a_dvp); 643 struct componentname *cnp = ap->a_cnp; 644 struct proc *p = cnp->cn_proc; 645 struct vnode *dvp; 646 int error = EROFS; 647 648 if ((dvp = union_lock_upper(dun, p)) != NULL) { 649 struct vnode *vp; 650 struct mount *mp; 651 652 error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); 653 if (error == 0) { 654 mp = ap->a_dvp->v_mount; 655 VOP_UNLOCK(vp, 0, p); 656 UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount)); 657 error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, 658 cnp, vp, NULLVP, 1); 659 UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); 660 } 661 union_unlock_upper(dvp, p); 662 } 663 return (error); 664 } 665 666 static int 667 union_whiteout(ap) 668 struct vop_whiteout_args /* { 669 struct vnode *a_dvp; 670 struct componentname *a_cnp; 671 int a_flags; 672 } */ *ap; 673 { 674 struct union_node *un = VTOUNION(ap->a_dvp); 675 struct componentname *cnp = ap->a_cnp; 676 struct vnode *uppervp; 677 int error = EOPNOTSUPP; 678 679 if ((uppervp = union_lock_upper(un, cnp->cn_proc)) != NULLVP) { 680 error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); 681 union_unlock_upper(uppervp, cnp->cn_proc); 682 } 683 return(error); 684 } 685 686 /* 687 * union_mknod: 688 * 689 * a_dvp is locked on entry and should remain locked on return. 690 * a_vpp is garbagre whether an error occurs or not. 691 */ 692 693 static int 694 union_mknod(ap) 695 struct vop_mknod_args /* { 696 struct vnode *a_dvp; 697 struct vnode **a_vpp; 698 struct componentname *a_cnp; 699 struct vattr *a_vap; 700 } */ *ap; 701 { 702 struct union_node *dun = VTOUNION(ap->a_dvp); 703 struct componentname *cnp = ap->a_cnp; 704 struct vnode *dvp; 705 int error = EROFS; 706 707 if ((dvp = union_lock_upper(dun, cnp->cn_proc)) != NULL) { 708 struct vnode *vp; 709 error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); 710 /* vp is garbage whether an error occurs or not */ 711 union_unlock_upper(dvp, cnp->cn_proc); 712 } 713 return (error); 714 } 715 716 /* 717 * union_open: 718 * 719 * run open VOP. When opening the underlying vnode we have to mimic 720 * vn_open. What we *really* need to do to avoid screwups if the 721 * open semantics change is to call vn_open(). For example, ufs blows 722 * up if you open a file but do not vmio it prior to writing. 723 */ 724 725 static int 726 union_open(ap) 727 struct vop_open_args /* { 728 struct vnodeop_desc *a_desc; 729 struct vnode *a_vp; 730 int a_mode; 731 struct ucred *a_cred; 732 struct proc *a_p; 733 } */ *ap; 734 { 735 struct union_node *un = VTOUNION(ap->a_vp); 736 struct vnode *tvp; 737 int mode = ap->a_mode; 738 struct ucred *cred = ap->a_cred; 739 struct proc *p = ap->a_p; 740 int error = 0; 741 int tvpisupper = 1; 742 743 /* 744 * If there is an existing upper vp then simply open that. 745 * The upper vp takes precedence over the lower vp. When opening 746 * a lower vp for writing copy it to the uppervp and then open the 747 * uppervp. 748 * 749 * At the end of this section tvp will be left locked. 750 */ 751 if ((tvp = union_lock_upper(un, p)) == NULLVP) { 752 /* 753 * If the lower vnode is being opened for writing, then 754 * copy the file contents to the upper vnode and open that, 755 * otherwise can simply open the lower vnode. 756 */ 757 tvp = un->un_lowervp; 758 if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { 759 int docopy = !(mode & O_TRUNC); 760 error = union_copyup(un, docopy, cred, p); 761 tvp = union_lock_upper(un, p); 762 } else { 763 un->un_openl++; 764 VREF(tvp); 765 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); 766 tvpisupper = 0; 767 } 768 } 769 770 /* 771 * We are holding the correct vnode, open it 772 */ 773 774 if (error == 0) 775 error = VOP_OPEN(tvp, mode, cred, p); 776 777 /* 778 * Absolutely necessary or UFS will blowup 779 */ 780 if (error == 0 && vn_canvmio(tvp) == TRUE) { 781 error = vfs_object_create(tvp, p, cred); 782 } 783 784 /* 785 * Release any locks held 786 */ 787 if (tvpisupper) { 788 if (tvp) 789 union_unlock_upper(tvp, p); 790 } else { 791 vput(tvp); 792 } 793 return (error); 794 } 795 796 /* 797 * union_close: 798 * 799 * It is unclear whether a_vp is passed locked or unlocked. Whatever 800 * the case we do not change it. 801 */ 802 803 static int 804 union_close(ap) 805 struct vop_close_args /* { 806 struct vnode *a_vp; 807 int a_fflag; 808 struct ucred *a_cred; 809 struct proc *a_p; 810 } */ *ap; 811 { 812 struct union_node *un = VTOUNION(ap->a_vp); 813 struct vnode *vp; 814 815 if ((vp = un->un_uppervp) == NULLVP) { 816 #ifdef UNION_DIAGNOSTIC 817 if (un->un_openl <= 0) 818 panic("union: un_openl cnt"); 819 #endif 820 --un->un_openl; 821 vp = un->un_lowervp; 822 } 823 ap->a_vp = vp; 824 return (VCALL(vp, VOFFSET(vop_close), ap)); 825 } 826 827 /* 828 * Check access permission on the union vnode. 829 * The access check being enforced is to check 830 * against both the underlying vnode, and any 831 * copied vnode. This ensures that no additional 832 * file permissions are given away simply because 833 * the user caused an implicit file copy. 834 */ 835 static int 836 union_access(ap) 837 struct vop_access_args /* { 838 struct vnodeop_desc *a_desc; 839 struct vnode *a_vp; 840 int a_mode; 841 struct ucred *a_cred; 842 struct proc *a_p; 843 } */ *ap; 844 { 845 struct union_node *un = VTOUNION(ap->a_vp); 846 struct proc *p = ap->a_p; 847 int error = EACCES; 848 struct vnode *vp; 849 850 /* 851 * Disallow write attempts on filesystems mounted read-only. 852 */ 853 if ((ap->a_mode & VWRITE) && 854 (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { 855 switch (ap->a_vp->v_type) { 856 case VREG: 857 case VDIR: 858 case VLNK: 859 return (EROFS); 860 default: 861 break; 862 } 863 } 864 865 if ((vp = union_lock_upper(un, p)) != NULLVP) { 866 ap->a_vp = vp; 867 error = VCALL(vp, VOFFSET(vop_access), ap); 868 union_unlock_upper(vp, p); 869 return(error); 870 } 871 872 if ((vp = un->un_lowervp) != NULLVP) { 873 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 874 ap->a_vp = vp; 875 876 /* 877 * Remove VWRITE from a_mode if our mount point is RW, because 878 * we want to allow writes and lowervp may be read-only. 879 */ 880 if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) 881 ap->a_mode &= ~VWRITE; 882 883 error = VCALL(vp, VOFFSET(vop_access), ap); 884 if (error == 0) { 885 struct union_mount *um; 886 887 um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); 888 889 if (um->um_op == UNMNT_BELOW) { 890 ap->a_cred = um->um_cred; 891 error = VCALL(vp, VOFFSET(vop_access), ap); 892 } 893 } 894 VOP_UNLOCK(vp, 0, p); 895 } 896 return(error); 897 } 898 899 /* 900 * We handle getattr only to change the fsid and 901 * track object sizes 902 * 903 * It's not clear whether VOP_GETATTR is to be 904 * called with the vnode locked or not. stat() calls 905 * it with (vp) locked, and fstat calls it with 906 * (vp) unlocked. 907 * 908 * Because of this we cannot use our normal locking functions 909 * if we do not intend to lock the main a_vp node. At the moment 910 * we are running without any specific locking at all, but beware 911 * to any programmer that care must be taken if locking is added 912 * to this function. 913 */ 914 915 static int 916 union_getattr(ap) 917 struct vop_getattr_args /* { 918 struct vnode *a_vp; 919 struct vattr *a_vap; 920 struct ucred *a_cred; 921 struct proc *a_p; 922 } */ *ap; 923 { 924 int error; 925 struct union_node *un = VTOUNION(ap->a_vp); 926 struct vnode *vp; 927 struct vattr *vap; 928 struct vattr va; 929 930 /* 931 * Some programs walk the filesystem hierarchy by counting 932 * links to directories to avoid stat'ing all the time. 933 * This means the link count on directories needs to be "correct". 934 * The only way to do that is to call getattr on both layers 935 * and fix up the link count. The link count will not necessarily 936 * be accurate but will be large enough to defeat the tree walkers. 937 */ 938 939 vap = ap->a_vap; 940 941 if ((vp = un->un_uppervp) != NULLVP) { 942 error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); 943 if (error) 944 return (error); 945 /* XXX isn't this dangerouso without a lock? */ 946 union_newsize(ap->a_vp, vap->va_size, VNOVAL); 947 } 948 949 if (vp == NULLVP) { 950 vp = un->un_lowervp; 951 } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) { 952 vp = un->un_lowervp; 953 vap = &va; 954 } else { 955 vp = NULLVP; 956 } 957 958 if (vp != NULLVP) { 959 error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); 960 if (error) 961 return (error); 962 /* XXX isn't this dangerous without a lock? */ 963 union_newsize(ap->a_vp, VNOVAL, vap->va_size); 964 } 965 966 if ((vap != ap->a_vap) && (vap->va_type == VDIR)) 967 ap->a_vap->va_nlink += vap->va_nlink; 968 return (0); 969 } 970 971 static int 972 union_setattr(ap) 973 struct vop_setattr_args /* { 974 struct vnode *a_vp; 975 struct vattr *a_vap; 976 struct ucred *a_cred; 977 struct proc *a_p; 978 } */ *ap; 979 { 980 struct union_node *un = VTOUNION(ap->a_vp); 981 struct proc *p = ap->a_p; 982 struct vattr *vap = ap->a_vap; 983 struct vnode *uppervp; 984 int error; 985 986 /* 987 * Disallow write attempts on filesystems mounted read-only. 988 */ 989 if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && 990 (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 991 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 992 vap->va_mtime.tv_sec != VNOVAL || 993 vap->va_mode != (mode_t)VNOVAL)) { 994 return (EROFS); 995 } 996 997 /* 998 * Handle case of truncating lower object to zero size, 999 * by creating a zero length upper object. This is to 1000 * handle the case of open with O_TRUNC and O_CREAT. 1001 */ 1002 if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { 1003 error = union_copyup(un, (ap->a_vap->va_size != 0), 1004 ap->a_cred, ap->a_p); 1005 if (error) 1006 return (error); 1007 } 1008 1009 /* 1010 * Try to set attributes in upper layer, 1011 * otherwise return read-only filesystem error. 1012 */ 1013 error = EROFS; 1014 if ((uppervp = union_lock_upper(un, p)) != NULLVP) { 1015 error = VOP_SETATTR(un->un_uppervp, ap->a_vap, 1016 ap->a_cred, ap->a_p); 1017 if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) 1018 union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); 1019 union_unlock_upper(uppervp, p); 1020 } 1021 return (error); 1022 } 1023 1024 /* 1025 * union_getpages: 1026 */ 1027 1028 static int 1029 union_getpages(struct vop_getpages_args *ap) 1030 { 1031 int r; 1032 1033 r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 1034 ap->a_count, ap->a_reqpage); 1035 return(r); 1036 } 1037 1038 /* 1039 * union_putpages: 1040 */ 1041 1042 static int 1043 union_putpages(struct vop_putpages_args *ap) 1044 { 1045 int r; 1046 1047 r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 1048 ap->a_sync, ap->a_rtvals); 1049 return(r); 1050 } 1051 1052 static int 1053 union_read(ap) 1054 struct vop_read_args /* { 1055 struct vnode *a_vp; 1056 struct uio *a_uio; 1057 int a_ioflag; 1058 struct ucred *a_cred; 1059 } */ *ap; 1060 { 1061 struct union_node *un = VTOUNION(ap->a_vp); 1062 struct proc *p = ap->a_uio->uio_procp; 1063 struct vnode *uvp; 1064 int error; 1065 1066 uvp = union_lock_other(un, p); 1067 KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); 1068 1069 if (ap->a_vp->v_flag & VOBJBUF) 1070 union_vm_coherency(ap->a_vp, ap->a_uio, 0); 1071 1072 error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); 1073 union_unlock_other(uvp, p); 1074 1075 /* 1076 * XXX 1077 * perhaps the size of the underlying object has changed under 1078 * our feet. take advantage of the offset information present 1079 * in the uio structure. 1080 */ 1081 if (error == 0) { 1082 struct union_node *un = VTOUNION(ap->a_vp); 1083 off_t cur = ap->a_uio->uio_offset; 1084 1085 if (uvp == un->un_uppervp) { 1086 if (cur > un->un_uppersz) 1087 union_newsize(ap->a_vp, cur, VNOVAL); 1088 } else { 1089 if (cur > un->un_lowersz) 1090 union_newsize(ap->a_vp, VNOVAL, cur); 1091 } 1092 } 1093 return (error); 1094 } 1095 1096 static int 1097 union_write(ap) 1098 struct vop_read_args /* { 1099 struct vnode *a_vp; 1100 struct uio *a_uio; 1101 int a_ioflag; 1102 struct ucred *a_cred; 1103 } */ *ap; 1104 { 1105 struct union_node *un = VTOUNION(ap->a_vp); 1106 struct proc *p = ap->a_uio->uio_procp; 1107 struct vnode *uppervp; 1108 int error; 1109 1110 if ((uppervp = union_lock_upper(un, p)) == NULLVP) 1111 panic("union: missing upper layer in write"); 1112 1113 /* 1114 * Since our VM pages are associated with our vnode rather then 1115 * the real vnode, and since we do not run our reads and writes 1116 * through our own VM cache, we have a VM/VFS coherency problem. 1117 * We solve them by invalidating or flushing the associated VM 1118 * pages prior to allowing a normal read or write to occur. 1119 * 1120 * VM-backed writes (UIO_NOCOPY) have to be converted to normal 1121 * writes because we are not cache-coherent. Normal writes need 1122 * to be made coherent with our VM-backing store, which we do by 1123 * first flushing any dirty VM pages associated with the write 1124 * range, and then destroying any clean VM pages associated with 1125 * the write range. 1126 */ 1127 1128 if (ap->a_uio->uio_segflg == UIO_NOCOPY) { 1129 ap->a_uio->uio_segflg = UIO_SYSSPACE; 1130 } else if (ap->a_vp->v_flag & VOBJBUF) { 1131 union_vm_coherency(ap->a_vp, ap->a_uio, 1); 1132 } 1133 1134 error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); 1135 1136 /* 1137 * the size of the underlying object may be changed by the 1138 * write. 1139 */ 1140 if (error == 0) { 1141 off_t cur = ap->a_uio->uio_offset; 1142 1143 if (cur > un->un_uppersz) 1144 union_newsize(ap->a_vp, cur, VNOVAL); 1145 } 1146 union_unlock_upper(uppervp, p); 1147 return (error); 1148 } 1149 1150 static int 1151 union_lease(ap) 1152 struct vop_lease_args /* { 1153 struct vnode *a_vp; 1154 struct proc *a_p; 1155 struct ucred *a_cred; 1156 int a_flag; 1157 } */ *ap; 1158 { 1159 struct vnode *ovp = OTHERVP(ap->a_vp); 1160 1161 ap->a_vp = ovp; 1162 return (VCALL(ovp, VOFFSET(vop_lease), ap)); 1163 } 1164 1165 static int 1166 union_ioctl(ap) 1167 struct vop_ioctl_args /* { 1168 struct vnode *a_vp; 1169 int a_command; 1170 caddr_t a_data; 1171 int a_fflag; 1172 struct ucred *a_cred; 1173 struct proc *a_p; 1174 } */ *ap; 1175 { 1176 struct vnode *ovp = OTHERVP(ap->a_vp); 1177 1178 ap->a_vp = ovp; 1179 return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); 1180 } 1181 1182 static int 1183 union_poll(ap) 1184 struct vop_poll_args /* { 1185 struct vnode *a_vp; 1186 int a_events; 1187 struct ucred *a_cred; 1188 struct proc *a_p; 1189 } */ *ap; 1190 { 1191 struct vnode *ovp = OTHERVP(ap->a_vp); 1192 1193 ap->a_vp = ovp; 1194 return (VCALL(ovp, VOFFSET(vop_poll), ap)); 1195 } 1196 1197 static int 1198 union_revoke(ap) 1199 struct vop_revoke_args /* { 1200 struct vnode *a_vp; 1201 int a_flags; 1202 struct proc *a_p; 1203 } */ *ap; 1204 { 1205 struct vnode *vp = ap->a_vp; 1206 1207 if (UPPERVP(vp)) 1208 VOP_REVOKE(UPPERVP(vp), ap->a_flags); 1209 if (LOWERVP(vp)) 1210 VOP_REVOKE(LOWERVP(vp), ap->a_flags); 1211 vgone(vp); 1212 return (0); 1213 } 1214 1215 static int 1216 union_mmap(ap) 1217 struct vop_mmap_args /* { 1218 struct vnode *a_vp; 1219 int a_fflags; 1220 struct ucred *a_cred; 1221 struct proc *a_p; 1222 } */ *ap; 1223 { 1224 struct vnode *ovp = OTHERVP(ap->a_vp); 1225 1226 ap->a_vp = ovp; 1227 return (VCALL(ovp, VOFFSET(vop_mmap), ap)); 1228 } 1229 1230 static int 1231 union_fsync(ap) 1232 struct vop_fsync_args /* { 1233 struct vnode *a_vp; 1234 struct ucred *a_cred; 1235 int a_waitfor; 1236 struct proc *a_p; 1237 } */ *ap; 1238 { 1239 int error = 0; 1240 struct proc *p = ap->a_p; 1241 struct vnode *targetvp; 1242 struct union_node *un = VTOUNION(ap->a_vp); 1243 1244 if ((targetvp = union_lock_other(un, p)) != NULLVP) { 1245 error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); 1246 union_unlock_other(targetvp, p); 1247 } 1248 1249 return (error); 1250 } 1251 1252 /* 1253 * union_remove: 1254 * 1255 * Remove the specified cnp. The dvp and vp are passed to us locked 1256 * and must remain locked on return. 1257 */ 1258 1259 static int 1260 union_remove(ap) 1261 struct vop_remove_args /* { 1262 struct vnode *a_dvp; 1263 struct vnode *a_vp; 1264 struct componentname *a_cnp; 1265 } */ *ap; 1266 { 1267 struct union_node *dun = VTOUNION(ap->a_dvp); 1268 struct union_node *un = VTOUNION(ap->a_vp); 1269 struct componentname *cnp = ap->a_cnp; 1270 struct proc *p = cnp->cn_proc; 1271 struct vnode *uppervp; 1272 struct vnode *upperdvp; 1273 int error; 1274 1275 if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) 1276 panic("union remove: null upper vnode"); 1277 1278 if ((uppervp = union_lock_upper(un, p)) != NULLVP) { 1279 if (union_dowhiteout(un, cnp->cn_cred, p)) 1280 cnp->cn_flags |= DOWHITEOUT; 1281 error = VOP_REMOVE(upperdvp, uppervp, cnp); 1282 #if 0 1283 /* XXX */ 1284 if (!error) 1285 union_removed_upper(un); 1286 #endif 1287 union_unlock_upper(uppervp, p); 1288 } else { 1289 error = union_mkwhiteout( 1290 MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), 1291 upperdvp, ap->a_cnp, un->un_path); 1292 } 1293 union_unlock_upper(upperdvp, p); 1294 return (error); 1295 } 1296 1297 /* 1298 * union_link: 1299 * 1300 * tdvp will be locked on entry, vp will not be locked on entry. 1301 * tdvp should remain locked on return and vp should remain unlocked 1302 * on return. 1303 */ 1304 1305 static int 1306 union_link(ap) 1307 struct vop_link_args /* { 1308 struct vnode *a_tdvp; 1309 struct vnode *a_vp; 1310 struct componentname *a_cnp; 1311 } */ *ap; 1312 { 1313 struct componentname *cnp = ap->a_cnp; 1314 struct proc *p = cnp->cn_proc; 1315 struct union_node *dun = VTOUNION(ap->a_tdvp); 1316 struct vnode *vp; 1317 struct vnode *tdvp; 1318 int error = 0; 1319 1320 if (ap->a_tdvp->v_op != ap->a_vp->v_op) { 1321 vp = ap->a_vp; 1322 } else { 1323 struct union_node *tun = VTOUNION(ap->a_vp); 1324 1325 if (tun->un_uppervp == NULLVP) { 1326 vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); 1327 #if 0 1328 if (dun->un_uppervp == tun->un_dirvp) { 1329 if (dun->un_flags & UN_ULOCK) { 1330 dun->un_flags &= ~UN_ULOCK; 1331 VOP_UNLOCK(dun->un_uppervp, 0, p); 1332 } 1333 } 1334 #endif 1335 error = union_copyup(tun, 1, cnp->cn_cred, p); 1336 #if 0 1337 if (dun->un_uppervp == tun->un_dirvp) { 1338 vn_lock(dun->un_uppervp, 1339 LK_EXCLUSIVE | LK_RETRY, p); 1340 dun->un_flags |= UN_ULOCK; 1341 } 1342 #endif 1343 VOP_UNLOCK(ap->a_vp, 0, p); 1344 } 1345 vp = tun->un_uppervp; 1346 } 1347 1348 if (error) 1349 return (error); 1350 1351 /* 1352 * Make sure upper is locked, then unlock the union directory we were 1353 * called with to avoid a deadlock while we are calling VOP_LINK on 1354 * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp 1355 * is expected to be locked on return. 1356 */ 1357 1358 if ((tdvp = union_lock_upper(dun, p)) == NULLVP) 1359 return (EROFS); 1360 1361 VOP_UNLOCK(ap->a_tdvp, 0, p); /* unlock calling node */ 1362 error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ 1363 1364 /* 1365 * We have to unlock tdvp prior to relocking our calling node in 1366 * order to avoid a deadlock. 1367 */ 1368 union_unlock_upper(tdvp, p); 1369 vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, p); 1370 return (error); 1371 } 1372 1373 static int 1374 union_rename(ap) 1375 struct vop_rename_args /* { 1376 struct vnode *a_fdvp; 1377 struct vnode *a_fvp; 1378 struct componentname *a_fcnp; 1379 struct vnode *a_tdvp; 1380 struct vnode *a_tvp; 1381 struct componentname *a_tcnp; 1382 } */ *ap; 1383 { 1384 int error; 1385 struct vnode *fdvp = ap->a_fdvp; 1386 struct vnode *fvp = ap->a_fvp; 1387 struct vnode *tdvp = ap->a_tdvp; 1388 struct vnode *tvp = ap->a_tvp; 1389 1390 /* 1391 * Figure out what fdvp to pass to our upper or lower vnode. If we 1392 * replace the fdvp, release the original one and ref the new one. 1393 */ 1394 1395 if (fdvp->v_op == union_vnodeop_p) { /* always true */ 1396 struct union_node *un = VTOUNION(fdvp); 1397 if (un->un_uppervp == NULLVP) { 1398 /* 1399 * this should never happen in normal 1400 * operation but might if there was 1401 * a problem creating the top-level shadow 1402 * directory. 1403 */ 1404 error = EXDEV; 1405 goto bad; 1406 } 1407 fdvp = un->un_uppervp; 1408 VREF(fdvp); 1409 vrele(ap->a_fdvp); 1410 } 1411 1412 /* 1413 * Figure out what fvp to pass to our upper or lower vnode. If we 1414 * replace the fvp, release the original one and ref the new one. 1415 */ 1416 1417 if (fvp->v_op == union_vnodeop_p) { /* always true */ 1418 struct union_node *un = VTOUNION(fvp); 1419 #if 0 1420 struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); 1421 #endif 1422 1423 if (un->un_uppervp == NULLVP) { 1424 switch(fvp->v_type) { 1425 case VREG: 1426 vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); 1427 error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_proc); 1428 VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_proc); 1429 if (error) 1430 goto bad; 1431 break; 1432 case VDIR: 1433 /* 1434 * XXX not yet. 1435 * 1436 * There is only one way to rename a directory 1437 * based in the lowervp, and that is to copy 1438 * the entire directory hierarchy. Otherwise 1439 * it would not last across a reboot. 1440 */ 1441 #if 0 1442 vrele(fvp); 1443 fvp = NULL; 1444 vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); 1445 error = union_mkshadow(um, fdvp, 1446 ap->a_fcnp, &un->un_uppervp); 1447 VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_proc); 1448 if (un->un_uppervp) 1449 VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_proc); 1450 if (error) 1451 goto bad; 1452 break; 1453 #endif 1454 default: 1455 error = EXDEV; 1456 goto bad; 1457 } 1458 } 1459 1460 if (un->un_lowervp != NULLVP) 1461 ap->a_fcnp->cn_flags |= DOWHITEOUT; 1462 fvp = un->un_uppervp; 1463 VREF(fvp); 1464 vrele(ap->a_fvp); 1465 } 1466 1467 /* 1468 * Figure out what tdvp (destination directory) to pass to the 1469 * lower level. If we replace it with uppervp, we need to vput the 1470 * old one. The exclusive lock is transfered to what we will pass 1471 * down in the VOP_RENAME and we replace uppervp with a simple 1472 * reference. 1473 */ 1474 1475 if (tdvp->v_op == union_vnodeop_p) { 1476 struct union_node *un = VTOUNION(tdvp); 1477 1478 if (un->un_uppervp == NULLVP) { 1479 /* 1480 * this should never happen in normal 1481 * operation but might if there was 1482 * a problem creating the top-level shadow 1483 * directory. 1484 */ 1485 error = EXDEV; 1486 goto bad; 1487 } 1488 1489 /* 1490 * new tdvp is a lock and reference on uppervp, put away 1491 * the old tdvp. 1492 */ 1493 tdvp = union_lock_upper(un, ap->a_tcnp->cn_proc); 1494 vput(ap->a_tdvp); 1495 } 1496 1497 /* 1498 * Figure out what tvp (destination file) to pass to the 1499 * lower level. 1500 * 1501 * If the uppervp file does not exist put away the (wrong) 1502 * file and change tvp to NULL. 1503 */ 1504 1505 if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { 1506 struct union_node *un = VTOUNION(tvp); 1507 1508 tvp = union_lock_upper(un, ap->a_tcnp->cn_proc); 1509 vput(ap->a_tvp); 1510 /* note: tvp may be NULL */ 1511 } 1512 1513 /* 1514 * VOP_RENAME releases/vputs prior to returning, so we have no 1515 * cleanup to do. 1516 */ 1517 1518 return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); 1519 1520 /* 1521 * Error. We still have to release / vput the various elements. 1522 */ 1523 1524 bad: 1525 vrele(fdvp); 1526 if (fvp) 1527 vrele(fvp); 1528 vput(tdvp); 1529 if (tvp != NULLVP) { 1530 if (tvp != tdvp) 1531 vput(tvp); 1532 else 1533 vrele(tvp); 1534 } 1535 return (error); 1536 } 1537 1538 static int 1539 union_mkdir(ap) 1540 struct vop_mkdir_args /* { 1541 struct vnode *a_dvp; 1542 struct vnode **a_vpp; 1543 struct componentname *a_cnp; 1544 struct vattr *a_vap; 1545 } */ *ap; 1546 { 1547 struct union_node *dun = VTOUNION(ap->a_dvp); 1548 struct componentname *cnp = ap->a_cnp; 1549 struct proc *p = cnp->cn_proc; 1550 struct vnode *upperdvp; 1551 int error = EROFS; 1552 1553 if ((upperdvp = union_lock_upper(dun, p)) != NULLVP) { 1554 struct vnode *vp; 1555 1556 error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); 1557 union_unlock_upper(upperdvp, p); 1558 1559 if (error == 0) { 1560 VOP_UNLOCK(vp, 0, p); 1561 UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount)); 1562 error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, 1563 ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); 1564 UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); 1565 } 1566 } 1567 return (error); 1568 } 1569 1570 static int 1571 union_rmdir(ap) 1572 struct vop_rmdir_args /* { 1573 struct vnode *a_dvp; 1574 struct vnode *a_vp; 1575 struct componentname *a_cnp; 1576 } */ *ap; 1577 { 1578 struct union_node *dun = VTOUNION(ap->a_dvp); 1579 struct union_node *un = VTOUNION(ap->a_vp); 1580 struct componentname *cnp = ap->a_cnp; 1581 struct proc *p = cnp->cn_proc; 1582 struct vnode *upperdvp; 1583 struct vnode *uppervp; 1584 int error; 1585 1586 if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) 1587 panic("union rmdir: null upper vnode"); 1588 1589 if ((uppervp = union_lock_upper(un, p)) != NULLVP) { 1590 if (union_dowhiteout(un, cnp->cn_cred, p)) 1591 cnp->cn_flags |= DOWHITEOUT; 1592 error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); 1593 union_unlock_upper(uppervp, p); 1594 } else { 1595 error = union_mkwhiteout( 1596 MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), 1597 dun->un_uppervp, ap->a_cnp, un->un_path); 1598 } 1599 union_unlock_upper(upperdvp, p); 1600 return (error); 1601 } 1602 1603 /* 1604 * union_symlink: 1605 * 1606 * dvp is locked on entry and remains locked on return. a_vpp is garbage 1607 * (unused). 1608 */ 1609 1610 static int 1611 union_symlink(ap) 1612 struct vop_symlink_args /* { 1613 struct vnode *a_dvp; 1614 struct vnode **a_vpp; 1615 struct componentname *a_cnp; 1616 struct vattr *a_vap; 1617 char *a_target; 1618 } */ *ap; 1619 { 1620 struct union_node *dun = VTOUNION(ap->a_dvp); 1621 struct componentname *cnp = ap->a_cnp; 1622 struct proc *p = cnp->cn_proc; 1623 struct vnode *dvp; 1624 int error = EROFS; 1625 1626 if ((dvp = union_lock_upper(dun, p)) != NULLVP) { 1627 struct vnode *vp; 1628 1629 error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); 1630 /* vp is garbage whether an error occurs or not */ 1631 *ap->a_vpp = NULLVP; 1632 union_unlock_upper(dvp, p); 1633 } 1634 return (error); 1635 } 1636 1637 /* 1638 * union_readdir works in concert with getdirentries and 1639 * readdir(3) to provide a list of entries in the unioned 1640 * directories. getdirentries is responsible for walking 1641 * down the union stack. readdir(3) is responsible for 1642 * eliminating duplicate names from the returned data stream. 1643 */ 1644 static int 1645 union_readdir(ap) 1646 struct vop_readdir_args /* { 1647 struct vnode *a_vp; 1648 struct uio *a_uio; 1649 struct ucred *a_cred; 1650 int *a_eofflag; 1651 u_long *a_cookies; 1652 int a_ncookies; 1653 } */ *ap; 1654 { 1655 struct union_node *un = VTOUNION(ap->a_vp); 1656 struct proc *p = ap->a_uio->uio_procp; 1657 struct vnode *uvp; 1658 int error = 0; 1659 1660 if ((uvp = union_lock_upper(un, p)) != NULLVP) { 1661 ap->a_vp = uvp; 1662 error = VCALL(uvp, VOFFSET(vop_readdir), ap); 1663 union_unlock_upper(uvp, p); 1664 } 1665 return(error); 1666 } 1667 1668 static int 1669 union_readlink(ap) 1670 struct vop_readlink_args /* { 1671 struct vnode *a_vp; 1672 struct uio *a_uio; 1673 struct ucred *a_cred; 1674 } */ *ap; 1675 { 1676 int error; 1677 struct union_node *un = VTOUNION(ap->a_vp); 1678 struct uio *uio = ap->a_uio; 1679 struct proc *p = uio->uio_procp; 1680 struct vnode *vp; 1681 1682 vp = union_lock_other(un, p); 1683 KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); 1684 1685 ap->a_vp = vp; 1686 error = VCALL(vp, VOFFSET(vop_readlink), ap); 1687 union_unlock_other(vp, p); 1688 1689 return (error); 1690 } 1691 1692 /* 1693 * union_abortop: 1694 * 1695 * dvp is locked on entry and left locked on return 1696 * 1697 */ 1698 1699 static int 1700 union_abortop(ap) 1701 struct vop_abortop_args /* { 1702 struct vnode *a_dvp; 1703 struct componentname *a_cnp; 1704 } */ *ap; 1705 { 1706 struct componentname *cnp = ap->a_cnp; 1707 struct proc *p = cnp->cn_proc; 1708 struct union_node *un = VTOUNION(ap->a_dvp); 1709 int islocked = VOP_ISLOCKED(ap->a_dvp); 1710 struct vnode *vp; 1711 int error; 1712 1713 if (islocked) { 1714 vp = union_lock_other(un, p); 1715 } else { 1716 vp = OTHERVP(ap->a_dvp); 1717 } 1718 KASSERT(vp != NULL, ("union_abortop: backing vnode missing!")); 1719 1720 ap->a_dvp = vp; 1721 error = VCALL(vp, VOFFSET(vop_abortop), ap); 1722 1723 if (islocked) 1724 union_unlock_other(vp, p); 1725 1726 return (error); 1727 } 1728 1729 /* 1730 * union_inactive: 1731 * 1732 * Called with the vnode locked. We are expected to unlock the vnode. 1733 */ 1734 1735 static int 1736 union_inactive(ap) 1737 struct vop_inactive_args /* { 1738 struct vnode *a_vp; 1739 struct proc *a_p; 1740 } */ *ap; 1741 { 1742 struct vnode *vp = ap->a_vp; 1743 struct proc *p = ap->a_p; 1744 struct union_node *un = VTOUNION(vp); 1745 struct vnode **vpp; 1746 1747 /* 1748 * Do nothing (and _don't_ bypass). 1749 * Wait to vrele lowervp until reclaim, 1750 * so that until then our union_node is in the 1751 * cache and reusable. 1752 * 1753 * NEEDSWORK: Someday, consider inactive'ing 1754 * the lowervp and then trying to reactivate it 1755 * with capabilities (v_id) 1756 * like they do in the name lookup cache code. 1757 * That's too much work for now. 1758 */ 1759 1760 if (un->un_dircache != 0) { 1761 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1762 vrele(*vpp); 1763 free (un->un_dircache, M_TEMP); 1764 un->un_dircache = 0; 1765 } 1766 1767 #if 0 1768 if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { 1769 un->un_flags &= ~UN_ULOCK; 1770 VOP_UNLOCK(un->un_uppervp, 0, p); 1771 } 1772 #endif 1773 1774 VOP_UNLOCK(vp, 0, p); 1775 1776 if ((un->un_flags & UN_CACHED) == 0) 1777 vgone(vp); 1778 1779 return (0); 1780 } 1781 1782 static int 1783 union_reclaim(ap) 1784 struct vop_reclaim_args /* { 1785 struct vnode *a_vp; 1786 } */ *ap; 1787 { 1788 union_freevp(ap->a_vp); 1789 1790 return (0); 1791 } 1792 1793 static int 1794 union_lock(ap) 1795 struct vop_lock_args *ap; 1796 { 1797 #if 0 1798 struct vnode *vp = ap->a_vp; 1799 struct proc *p = ap->a_p; 1800 int flags = ap->a_flags; 1801 struct union_node *un; 1802 #endif 1803 int error; 1804 1805 error = vop_stdlock(ap); 1806 #if 0 1807 un = VTOUNION(vp); 1808 1809 if (error == 0) { 1810 /* 1811 * Lock the upper if it exists and this is an exclusive lock 1812 * request. 1813 */ 1814 if (un->un_uppervp != NULLVP && 1815 (flags & LK_TYPE_MASK) == LK_EXCLUSIVE) { 1816 if ((un->un_flags & UN_ULOCK) == 0 && vp->v_usecount) { 1817 error = vn_lock(un->un_uppervp, flags, p); 1818 if (error) { 1819 struct vop_unlock_args uap = { 0 }; 1820 uap.a_vp = ap->a_vp; 1821 uap.a_flags = ap->a_flags; 1822 uap.a_p = ap->a_p; 1823 vop_stdunlock(&uap); 1824 return (error); 1825 } 1826 un->un_flags |= UN_ULOCK; 1827 } 1828 } 1829 } 1830 #endif 1831 return (error); 1832 } 1833 1834 /* 1835 * union_unlock: 1836 * 1837 * Unlock our union node. This also unlocks uppervp. 1838 */ 1839 static int 1840 union_unlock(ap) 1841 struct vop_unlock_args /* { 1842 struct vnode *a_vp; 1843 int a_flags; 1844 struct proc *a_p; 1845 } */ *ap; 1846 { 1847 struct union_node *un = VTOUNION(ap->a_vp); 1848 int error; 1849 1850 KASSERT((un->un_uppervp == NULL || un->un_uppervp->v_usecount > 0), ("uppervp usecount is 0")); 1851 1852 error = vop_stdunlock(ap); 1853 #if 0 1854 1855 /* 1856 * If no exclusive locks remain and we are holding an uppervp lock, 1857 * remove the uppervp lock. 1858 */ 1859 1860 if ((un->un_flags & UN_ULOCK) && 1861 lockstatus(&un->un_lock) != LK_EXCLUSIVE) { 1862 un->un_flags &= ~UN_ULOCK; 1863 VOP_UNLOCK(un->un_uppervp, LK_EXCLUSIVE, p); 1864 } 1865 #endif 1866 return(error); 1867 } 1868 1869 /* 1870 * union_bmap: 1871 * 1872 * There isn't much we can do. We cannot push through to the real vnode 1873 * to get to the underlying device because this will bypass data 1874 * cached by the real vnode. 1875 * 1876 * For some reason we cannot return the 'real' vnode either, it seems 1877 * to blow up memory maps. 1878 */ 1879 1880 static int 1881 union_bmap(ap) 1882 struct vop_bmap_args /* { 1883 struct vnode *a_vp; 1884 daddr_t a_bn; 1885 struct vnode **a_vpp; 1886 daddr_t *a_bnp; 1887 int *a_runp; 1888 int *a_runb; 1889 } */ *ap; 1890 { 1891 return(EOPNOTSUPP); 1892 } 1893 1894 static int 1895 union_print(ap) 1896 struct vop_print_args /* { 1897 struct vnode *a_vp; 1898 } */ *ap; 1899 { 1900 struct vnode *vp = ap->a_vp; 1901 1902 printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n", 1903 vp, UPPERVP(vp), LOWERVP(vp)); 1904 if (UPPERVP(vp) != NULLVP) 1905 vprint("union: upper", UPPERVP(vp)); 1906 if (LOWERVP(vp) != NULLVP) 1907 vprint("union: lower", LOWERVP(vp)); 1908 1909 return (0); 1910 } 1911 1912 static int 1913 union_pathconf(ap) 1914 struct vop_pathconf_args /* { 1915 struct vnode *a_vp; 1916 int a_name; 1917 int *a_retval; 1918 } */ *ap; 1919 { 1920 int error; 1921 struct proc *p = curproc; /* XXX */ 1922 struct union_node *un = VTOUNION(ap->a_vp); 1923 struct vnode *vp; 1924 1925 vp = union_lock_other(un, p); 1926 KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); 1927 1928 ap->a_vp = vp; 1929 error = VCALL(vp, VOFFSET(vop_pathconf), ap); 1930 union_unlock_other(vp, p); 1931 1932 return (error); 1933 } 1934 1935 static int 1936 union_advlock(ap) 1937 struct vop_advlock_args /* { 1938 struct vnode *a_vp; 1939 caddr_t a_id; 1940 int a_op; 1941 struct flock *a_fl; 1942 int a_flags; 1943 } */ *ap; 1944 { 1945 register struct vnode *ovp = OTHERVP(ap->a_vp); 1946 1947 ap->a_vp = ovp; 1948 return (VCALL(ovp, VOFFSET(vop_advlock), ap)); 1949 } 1950 1951 1952 /* 1953 * XXX - vop_strategy must be hand coded because it has no 1954 * YYY - and it is not coherent with anything 1955 * 1956 * vnode in its arguments. 1957 * This goes away with a merged VM/buffer cache. 1958 */ 1959 static int 1960 union_strategy(ap) 1961 struct vop_strategy_args /* { 1962 struct vnode *a_vp; 1963 struct buf *a_bp; 1964 } */ *ap; 1965 { 1966 struct buf *bp = ap->a_bp; 1967 struct vnode *othervp = OTHERVP(bp->b_vp); 1968 1969 #ifdef DIAGNOSTIC 1970 if (othervp == NULLVP) 1971 panic("union_strategy: nil vp"); 1972 if (((bp->b_flags & B_READ) == 0) && 1973 (othervp == LOWERVP(bp->b_vp))) 1974 panic("union_strategy: writing to lowervp"); 1975 #endif 1976 return (VOP_STRATEGY(othervp, bp)); 1977 } 1978 1979 /* 1980 * Global vfs data structures 1981 */ 1982 vop_t **union_vnodeop_p; 1983 static struct vnodeopv_entry_desc union_vnodeop_entries[] = { 1984 { &vop_default_desc, (vop_t *) vop_defaultop }, 1985 { &vop_abortop_desc, (vop_t *) union_abortop }, 1986 { &vop_access_desc, (vop_t *) union_access }, 1987 { &vop_advlock_desc, (vop_t *) union_advlock }, 1988 { &vop_bmap_desc, (vop_t *) union_bmap }, 1989 { &vop_close_desc, (vop_t *) union_close }, 1990 { &vop_create_desc, (vop_t *) union_create }, 1991 { &vop_fsync_desc, (vop_t *) union_fsync }, 1992 { &vop_getpages_desc, (vop_t *) union_getpages }, 1993 { &vop_putpages_desc, (vop_t *) union_putpages }, 1994 { &vop_getattr_desc, (vop_t *) union_getattr }, 1995 { &vop_inactive_desc, (vop_t *) union_inactive }, 1996 { &vop_ioctl_desc, (vop_t *) union_ioctl }, 1997 { &vop_islocked_desc, (vop_t *) vop_stdislocked }, 1998 { &vop_lease_desc, (vop_t *) union_lease }, 1999 { &vop_link_desc, (vop_t *) union_link }, 2000 { &vop_lock_desc, (vop_t *) union_lock }, 2001 { &vop_lookup_desc, (vop_t *) union_lookup }, 2002 { &vop_mkdir_desc, (vop_t *) union_mkdir }, 2003 { &vop_mknod_desc, (vop_t *) union_mknod }, 2004 { &vop_mmap_desc, (vop_t *) union_mmap }, 2005 { &vop_open_desc, (vop_t *) union_open }, 2006 { &vop_pathconf_desc, (vop_t *) union_pathconf }, 2007 { &vop_poll_desc, (vop_t *) union_poll }, 2008 { &vop_print_desc, (vop_t *) union_print }, 2009 { &vop_read_desc, (vop_t *) union_read }, 2010 { &vop_readdir_desc, (vop_t *) union_readdir }, 2011 { &vop_readlink_desc, (vop_t *) union_readlink }, 2012 { &vop_reclaim_desc, (vop_t *) union_reclaim }, 2013 { &vop_remove_desc, (vop_t *) union_remove }, 2014 { &vop_rename_desc, (vop_t *) union_rename }, 2015 { &vop_revoke_desc, (vop_t *) union_revoke }, 2016 { &vop_rmdir_desc, (vop_t *) union_rmdir }, 2017 { &vop_setattr_desc, (vop_t *) union_setattr }, 2018 { &vop_strategy_desc, (vop_t *) union_strategy }, 2019 { &vop_symlink_desc, (vop_t *) union_symlink }, 2020 { &vop_unlock_desc, (vop_t *) union_unlock }, 2021 { &vop_whiteout_desc, (vop_t *) union_whiteout }, 2022 { &vop_write_desc, (vop_t *) union_write }, 2023 { NULL, NULL } 2024 }; 2025 static struct vnodeopv_desc union_vnodeop_opv_desc = 2026 { &union_vnodeop_p, union_vnodeop_entries }; 2027 2028 VNODEOP_SET(union_vnodeop_opv_desc); 2029