1 /* 2 * Copyright (c) 1994 Jan-Simon Pendry 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 34 * $FreeBSD$ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/fcntl.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/module.h> 46 #include <sys/mount.h> 47 #include <sys/mutex.h> 48 #include <sys/namei.h> 49 #include <sys/stat.h> 50 #include <sys/vnode.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_extern.h> /* for vnode_pager_setsize */ 54 #include <vm/vm_object.h> /* for vm cache coherency */ 55 #include <vm/uma.h> 56 57 #include <fs/unionfs/union.h> 58 59 #include <sys/proc.h> 60 61 extern int union_init(void); 62 63 /* must be power of two, otherwise change UNION_HASH() */ 64 #define NHASH 32 65 66 /* unsigned int ... */ 67 #define UNION_HASH(u, l) \ 68 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) 69 70 static MALLOC_DEFINE(M_UNPATH, "unpath", "UNION path component"); 71 static MALLOC_DEFINE(M_UNDCACHE, "undcac", "UNION directory cache"); 72 73 static LIST_HEAD(unhead, union_node) unhead[NHASH]; 74 static int unvplock[NHASH]; 75 76 static void union_dircache_r(struct vnode *vp, struct vnode ***vppp, 77 int *cntp); 78 static int union_list_lock(int ix); 79 static void union_list_unlock(int ix); 80 static int union_relookup(struct union_mount *um, struct vnode *dvp, 81 struct vnode **vpp, 82 struct componentname *cnp, 83 struct componentname *cn, char *path, 84 int pathlen); 85 static void union_updatevp(struct union_node *un, 86 struct vnode *uppervp, 87 struct vnode *lowervp); 88 static void union_newlower(struct union_node *, struct vnode *); 89 static void union_newupper(struct union_node *, struct vnode *); 90 static int union_copyfile(struct vnode *, struct vnode *, 91 struct ucred *, struct thread *); 92 static int union_vn_create(struct vnode **, struct union_node *, 93 struct thread *); 94 static int union_vn_close(struct vnode *, int, struct ucred *, 95 struct thread *); 96 97 int 98 union_init() 99 { 100 int i; 101 102 for (i = 0; i < NHASH; i++) 103 LIST_INIT(&unhead[i]); 104 bzero((caddr_t)unvplock, sizeof(unvplock)); 105 return (0); 106 } 107 108 static int 109 union_list_lock(ix) 110 int ix; 111 { 112 if (unvplock[ix] & UNVP_LOCKED) { 113 unvplock[ix] |= UNVP_WANT; 114 (void) tsleep( &unvplock[ix], PINOD, "unllck", 0); 115 return (1); 116 } 117 unvplock[ix] |= UNVP_LOCKED; 118 return (0); 119 } 120 121 static void 122 union_list_unlock(ix) 123 int ix; 124 { 125 unvplock[ix] &= ~UNVP_LOCKED; 126 127 if (unvplock[ix] & UNVP_WANT) { 128 unvplock[ix] &= ~UNVP_WANT; 129 wakeup( &unvplock[ix]); 130 } 131 } 132 133 /* 134 * union_updatevp: 135 * 136 * The uppervp, if not NULL, must be referenced and not locked by us 137 * The lowervp, if not NULL, must be referenced. 138 * 139 * If uppervp and lowervp match pointers already installed, then 140 * nothing happens. The passed vp's (when matching) are not adjusted. 141 * 142 * This routine may only be called by union_newupper() and 143 * union_newlower(). 144 */ 145 146 static void 147 union_updatevp(un, uppervp, lowervp) 148 struct union_node *un; 149 struct vnode *uppervp; 150 struct vnode *lowervp; 151 { 152 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 153 int nhash = UNION_HASH(uppervp, lowervp); 154 int docache = (lowervp != NULLVP || uppervp != NULLVP); 155 int lhash, uhash; 156 157 /* 158 * Ensure locking is ordered from lower to higher 159 * to avoid deadlocks. 160 */ 161 if (nhash < ohash) { 162 lhash = nhash; 163 uhash = ohash; 164 } else { 165 lhash = ohash; 166 uhash = nhash; 167 } 168 169 if (lhash != uhash) { 170 while (union_list_lock(lhash)) 171 continue; 172 } 173 174 while (union_list_lock(uhash)) 175 continue; 176 177 if (ohash != nhash || !docache) { 178 if (un->un_flags & UN_CACHED) { 179 un->un_flags &= ~UN_CACHED; 180 LIST_REMOVE(un, un_cache); 181 } 182 } 183 184 if (ohash != nhash) 185 union_list_unlock(ohash); 186 187 if (un->un_lowervp != lowervp) { 188 if (un->un_lowervp) { 189 vrele(un->un_lowervp); 190 if (un->un_path) { 191 free(un->un_path, M_UNPATH); 192 un->un_path = 0; 193 } 194 } 195 un->un_lowervp = lowervp; 196 un->un_lowersz = VNOVAL; 197 } 198 199 if (un->un_uppervp != uppervp) { 200 if (un->un_uppervp) 201 vrele(un->un_uppervp); 202 un->un_uppervp = uppervp; 203 un->un_uppersz = VNOVAL; 204 } 205 206 if (docache && (ohash != nhash)) { 207 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 208 un->un_flags |= UN_CACHED; 209 } 210 211 union_list_unlock(nhash); 212 } 213 214 /* 215 * Set a new lowervp. The passed lowervp must be referenced and will be 216 * stored in the vp in a referenced state. 217 */ 218 219 static void 220 union_newlower(un, lowervp) 221 struct union_node *un; 222 struct vnode *lowervp; 223 { 224 union_updatevp(un, un->un_uppervp, lowervp); 225 } 226 227 /* 228 * Set a new uppervp. The passed uppervp must be locked and will be 229 * stored in the vp in a locked state. The caller should not unlock 230 * uppervp. 231 */ 232 233 static void 234 union_newupper(un, uppervp) 235 struct union_node *un; 236 struct vnode *uppervp; 237 { 238 union_updatevp(un, uppervp, un->un_lowervp); 239 } 240 241 /* 242 * Keep track of size changes in the underlying vnodes. 243 * If the size changes, then callback to the vm layer 244 * giving priority to the upper layer size. 245 */ 246 void 247 union_newsize(vp, uppersz, lowersz) 248 struct vnode *vp; 249 off_t uppersz, lowersz; 250 { 251 struct union_node *un; 252 off_t sz; 253 254 /* only interested in regular files */ 255 if (vp->v_type != VREG) 256 return; 257 258 un = VTOUNION(vp); 259 sz = VNOVAL; 260 261 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 262 un->un_uppersz = uppersz; 263 if (sz == VNOVAL) 264 sz = un->un_uppersz; 265 } 266 267 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 268 un->un_lowersz = lowersz; 269 if (sz == VNOVAL) 270 sz = un->un_lowersz; 271 } 272 273 if (sz != VNOVAL) { 274 UDEBUG(("union: %s size now %ld\n", 275 (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); 276 /* 277 * There is no need to change size of non-existent object. 278 */ 279 /* vnode_pager_setsize(vp, sz); */ 280 } 281 } 282 283 /* 284 * union_allocvp: allocate a union_node and associate it with a 285 * parent union_node and one or two vnodes. 286 * 287 * vpp Holds the returned vnode locked and referenced if no 288 * error occurs. 289 * 290 * mp Holds the mount point. mp may or may not be busied. 291 * allocvp() makes no changes to mp. 292 * 293 * dvp Holds the parent union_node to the one we wish to create. 294 * XXX may only be used to traverse an uncopied lowervp-based 295 * tree? XXX 296 * 297 * dvp may or may not be locked. allocvp() makes no changes 298 * to dvp. 299 * 300 * upperdvp Holds the parent vnode to uppervp, generally used along 301 * with path component information to create a shadow of 302 * lowervp when uppervp does not exist. 303 * 304 * upperdvp is referenced but unlocked on entry, and will be 305 * dereferenced on return. 306 * 307 * uppervp Holds the new uppervp vnode to be stored in the 308 * union_node we are allocating. uppervp is referenced but 309 * not locked, and will be dereferenced on return. 310 * 311 * lowervp Holds the new lowervp vnode to be stored in the 312 * union_node we are allocating. lowervp is referenced but 313 * not locked, and will be dereferenced on return. 314 * 315 * cnp Holds path component information to be coupled with 316 * lowervp and upperdvp to allow unionfs to create an uppervp 317 * later on. Only used if lowervp is valid. The contents 318 * of cnp is only valid for the duration of the call. 319 * 320 * docache Determine whether this node should be entered in the 321 * cache or whether it should be destroyed as soon as possible. 322 * 323 * All union_nodes are maintained on a singly-linked 324 * list. New nodes are only allocated when they cannot 325 * be found on this list. Entries on the list are 326 * removed when the vfs reclaim entry is called. 327 * 328 * A single lock is kept for the entire list. This is 329 * needed because the getnewvnode() function can block 330 * waiting for a vnode to become free, in which case there 331 * may be more than one process trying to get the same 332 * vnode. This lock is only taken if we are going to 333 * call getnewvnode(), since the kernel itself is single-threaded. 334 * 335 * If an entry is found on the list, then call vget() to 336 * take a reference. This is done because there may be 337 * zero references to it and so it needs to removed from 338 * the vnode free list. 339 */ 340 341 int 342 union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) 343 struct vnode **vpp; 344 struct mount *mp; 345 struct vnode *dvp; /* parent union vnode */ 346 struct vnode *upperdvp; /* parent vnode of uppervp */ 347 struct componentname *cnp; /* may be null */ 348 struct vnode *uppervp; /* may be null */ 349 struct vnode *lowervp; /* may be null */ 350 int docache; 351 { 352 int error; 353 struct union_node *un = 0; 354 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 355 struct thread *td = (cnp) ? cnp->cn_thread : curthread; 356 int hash = 0; 357 int vflag; 358 int try; 359 360 if (uppervp == NULLVP && lowervp == NULLVP) 361 panic("union: unidentifiable allocation"); 362 363 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 364 vrele(lowervp); 365 lowervp = NULLVP; 366 } 367 368 /* detect the root vnode (and aliases) */ 369 vflag = 0; 370 if ((uppervp == um->um_uppervp) && 371 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 372 if (lowervp == NULLVP) { 373 lowervp = um->um_lowervp; 374 if (lowervp != NULLVP) 375 VREF(lowervp); 376 } 377 vflag = VV_ROOT; 378 } 379 380 loop: 381 if (!docache) { 382 un = 0; 383 } else for (try = 0; try < 3; try++) { 384 switch (try) { 385 case 0: 386 if (lowervp == NULLVP) 387 continue; 388 hash = UNION_HASH(uppervp, lowervp); 389 break; 390 391 case 1: 392 if (uppervp == NULLVP) 393 continue; 394 hash = UNION_HASH(uppervp, NULLVP); 395 break; 396 397 case 2: 398 if (lowervp == NULLVP) 399 continue; 400 hash = UNION_HASH(NULLVP, lowervp); 401 break; 402 } 403 404 while (union_list_lock(hash)) 405 continue; 406 407 LIST_FOREACH(un, &unhead[hash], un_cache) { 408 if ((un->un_lowervp == lowervp || 409 un->un_lowervp == NULLVP) && 410 (un->un_uppervp == uppervp || 411 un->un_uppervp == NULLVP) && 412 (UNIONTOV(un)->v_mount == mp)) { 413 if (vget(UNIONTOV(un), 0, 414 cnp ? cnp->cn_thread : NULL)) { 415 union_list_unlock(hash); 416 goto loop; 417 } 418 break; 419 } 420 } 421 422 union_list_unlock(hash); 423 424 if (un) 425 break; 426 } 427 428 if (un) { 429 /* 430 * Obtain a lock on the union_node. Everything is unlocked 431 * except for dvp, so check that case. If they match, our 432 * new un is already locked. Otherwise we have to lock our 433 * new un. 434 * 435 * A potential deadlock situation occurs when we are holding 436 * one lock while trying to get another. We must follow 437 * strict ordering rules to avoid it. We try to locate dvp 438 * by scanning up from un_vnode, since the most likely 439 * scenario is un being under dvp. 440 */ 441 442 if (dvp && un->un_vnode != dvp) { 443 struct vnode *scan = un->un_vnode; 444 445 do { 446 scan = VTOUNION(scan)->un_pvp; 447 } while (scan && scan->v_op == union_vnodeop_p && 448 scan != dvp); 449 if (scan != dvp) { 450 /* 451 * our new un is above dvp (we never saw dvp 452 * while moving up the tree). 453 */ 454 VREF(dvp); 455 VOP_UNLOCK(dvp, 0, td); 456 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 457 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); 458 vrele(dvp); 459 } else { 460 /* 461 * our new un is under dvp 462 */ 463 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 464 } 465 } else if (dvp == NULLVP) { 466 /* 467 * dvp is NULL, we need to lock un. 468 */ 469 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 470 } else { 471 /* 472 * dvp == un->un_vnode, we are already locked. 473 */ 474 error = 0; 475 } 476 477 if (error) 478 goto loop; 479 480 /* 481 * At this point, the union_node is locked and referenced. 482 * 483 * uppervp is locked and referenced or NULL, lowervp is 484 * referenced or NULL. 485 */ 486 UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", 487 un, un->un_vnode, un->un_uppervp, 488 (un->un_uppervp ? vrefcnt(un->un_uppervp) : -99), 489 uppervp, 490 (uppervp ? vrefcnt(uppervp) : -99) 491 )); 492 493 if (uppervp != un->un_uppervp) { 494 KASSERT(uppervp == NULL || vrefcnt(uppervp) > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", vrefcnt(uppervp))); 495 union_newupper(un, uppervp); 496 } else if (uppervp) { 497 KASSERT(vrefcnt(uppervp) > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", vrefcnt(uppervp))); 498 vrele(uppervp); 499 } 500 501 /* 502 * Save information about the lower layer. 503 * This needs to keep track of pathname 504 * and directory information which union_vn_create() 505 * might need. 506 */ 507 if (lowervp != un->un_lowervp) { 508 union_newlower(un, lowervp); 509 if (cnp && (lowervp != NULLVP)) { 510 un->un_path = malloc(cnp->cn_namelen+1, 511 M_UNPATH, M_WAITOK); 512 bcopy(cnp->cn_nameptr, un->un_path, 513 cnp->cn_namelen); 514 un->un_path[cnp->cn_namelen] = '\0'; 515 } 516 } else if (lowervp) { 517 vrele(lowervp); 518 } 519 520 /* 521 * and upperdvp 522 */ 523 if (upperdvp != un->un_dirvp) { 524 if (un->un_dirvp) 525 vrele(un->un_dirvp); 526 un->un_dirvp = upperdvp; 527 } else if (upperdvp) { 528 vrele(upperdvp); 529 } 530 531 *vpp = UNIONTOV(un); 532 return (0); 533 } 534 535 if (docache) { 536 /* 537 * Otherwise lock the vp list while we call getnewvnode() 538 * since that can block. 539 */ 540 hash = UNION_HASH(uppervp, lowervp); 541 542 if (union_list_lock(hash)) 543 goto loop; 544 } 545 546 /* 547 * Create new node rather than replace old node. 548 */ 549 550 error = getnewvnode("union", mp, union_vnodeop_p, vpp); 551 if (error) { 552 /* 553 * If an error occurs, clear out vnodes. 554 */ 555 if (lowervp) 556 vrele(lowervp); 557 if (uppervp) 558 vrele(uppervp); 559 if (upperdvp) 560 vrele(upperdvp); 561 *vpp = NULL; 562 goto out; 563 } 564 565 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 566 M_TEMP, M_WAITOK); 567 568 ASSERT_VOP_LOCKED(*vpp, "union_allocvp"); 569 (*vpp)->v_vflag |= vflag; 570 if (uppervp) 571 (*vpp)->v_type = uppervp->v_type; 572 else 573 (*vpp)->v_type = lowervp->v_type; 574 575 un = VTOUNION(*vpp); 576 bzero(un, sizeof(*un)); 577 578 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 579 580 un->un_vnode = *vpp; 581 un->un_uppervp = uppervp; 582 un->un_uppersz = VNOVAL; 583 un->un_lowervp = lowervp; 584 un->un_lowersz = VNOVAL; 585 un->un_dirvp = upperdvp; 586 un->un_pvp = dvp; /* only parent dir in new allocation */ 587 if (dvp != NULLVP) 588 VREF(dvp); 589 un->un_dircache = NULL; 590 un->un_openl = 0; 591 592 if (cnp && (lowervp != NULLVP)) { 593 un->un_path = malloc(cnp->cn_namelen+1, M_UNPATH, M_WAITOK); 594 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 595 un->un_path[cnp->cn_namelen] = '\0'; 596 } else { 597 un->un_path = NULL; 598 un->un_dirvp = NULL; 599 } 600 601 if (docache) { 602 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 603 un->un_flags |= UN_CACHED; 604 } 605 606 out: 607 if (docache) 608 union_list_unlock(hash); 609 610 return (error); 611 } 612 613 int 614 union_freevp(vp) 615 struct vnode *vp; 616 { 617 struct union_node *un = VTOUNION(vp); 618 619 if (un->un_flags & UN_CACHED) { 620 un->un_flags &= ~UN_CACHED; 621 LIST_REMOVE(un, un_cache); 622 } 623 624 if (un->un_pvp != NULLVP) { 625 vrele(un->un_pvp); 626 un->un_pvp = NULL; 627 } 628 if (un->un_uppervp != NULLVP) { 629 vrele(un->un_uppervp); 630 un->un_uppervp = NULL; 631 } 632 if (un->un_lowervp != NULLVP) { 633 vrele(un->un_lowervp); 634 un->un_lowervp = NULL; 635 } 636 if (un->un_dirvp != NULLVP) { 637 vrele(un->un_dirvp); 638 un->un_dirvp = NULL; 639 } 640 if (un->un_path) { 641 free(un->un_path, M_UNPATH); 642 un->un_path = NULL; 643 } 644 645 FREE(vp->v_data, M_TEMP); 646 vp->v_data = 0; 647 648 return (0); 649 } 650 651 /* 652 * copyfile. Copy the vnode (fvp) to the vnode (tvp) 653 * using a sequence of reads and writes. Both (fvp) 654 * and (tvp) are locked on entry and exit. 655 * 656 * fvp and tvp are both exclusive locked on call, but their refcount's 657 * haven't been bumped at all. 658 */ 659 static int 660 union_copyfile(fvp, tvp, cred, td) 661 struct vnode *fvp; 662 struct vnode *tvp; 663 struct ucred *cred; 664 struct thread *td; 665 { 666 char *buf; 667 struct uio uio; 668 struct iovec iov; 669 int error = 0; 670 671 /* 672 * strategy: 673 * Allocate a buffer of size MAXBSIZE. 674 * Loop doing reads and writes, keeping track 675 * of the current uio offset. 676 * Give up at the first sign of trouble. 677 */ 678 679 bzero(&uio, sizeof(uio)); 680 681 uio.uio_td = td; 682 uio.uio_segflg = UIO_SYSSPACE; 683 uio.uio_offset = 0; 684 685 VOP_LEASE(fvp, td, cred, LEASE_READ); 686 VOP_LEASE(tvp, td, cred, LEASE_WRITE); 687 688 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 689 690 /* ugly loop follows... */ 691 do { 692 off_t offset = uio.uio_offset; 693 int count; 694 int bufoffset; 695 696 /* 697 * Setup for big read. 698 */ 699 uio.uio_iov = &iov; 700 uio.uio_iovcnt = 1; 701 iov.iov_base = buf; 702 iov.iov_len = MAXBSIZE; 703 uio.uio_resid = iov.iov_len; 704 uio.uio_rw = UIO_READ; 705 706 if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) 707 break; 708 709 /* 710 * Get bytes read, handle read eof case and setup for 711 * write loop. 712 */ 713 if ((count = MAXBSIZE - uio.uio_resid) == 0) 714 break; 715 bufoffset = 0; 716 717 /* 718 * Write until an error occurs or our buffer has been 719 * exhausted, then update the offset for the next read. 720 */ 721 while (bufoffset < count) { 722 uio.uio_iov = &iov; 723 uio.uio_iovcnt = 1; 724 iov.iov_base = buf + bufoffset; 725 iov.iov_len = count - bufoffset; 726 uio.uio_offset = offset + bufoffset; 727 uio.uio_rw = UIO_WRITE; 728 uio.uio_resid = iov.iov_len; 729 730 if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) 731 break; 732 bufoffset += (count - bufoffset) - uio.uio_resid; 733 } 734 uio.uio_offset = offset + bufoffset; 735 } while (error == 0); 736 737 free(buf, M_TEMP); 738 return (error); 739 } 740 741 /* 742 * 743 * un's vnode is assumed to be locked on entry and remains locked on exit. 744 */ 745 746 int 747 union_copyup(un, docopy, cred, td) 748 struct union_node *un; 749 int docopy; 750 struct ucred *cred; 751 struct thread *td; 752 { 753 int error; 754 struct mount *mp; 755 struct vnode *lvp, *uvp; 756 757 /* 758 * If the user does not have read permission, the vnode should not 759 * be copied to upper layer. 760 */ 761 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td); 762 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td); 763 VOP_UNLOCK(un->un_lowervp, 0, td); 764 if (error) 765 return (error); 766 767 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0) 768 return (error); 769 if ((error = union_vn_create(&uvp, un, td)) != 0) { 770 vn_finished_write(mp); 771 return (error); 772 } 773 774 lvp = un->un_lowervp; 775 776 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); 777 if (docopy) { 778 /* 779 * XX - should not ignore errors 780 * from VOP_CLOSE() 781 */ 782 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); 783 error = VOP_OPEN(lvp, FREAD, cred, td, -1); 784 if (error == 0 && vn_canvmio(lvp) == TRUE) 785 error = vfs_object_create(lvp, td, cred); 786 if (error == 0) { 787 error = union_copyfile(lvp, uvp, cred, td); 788 VOP_UNLOCK(lvp, 0, td); 789 (void) VOP_CLOSE(lvp, FREAD, cred, td); 790 } 791 if (error == 0) 792 UDEBUG(("union: copied up %s\n", un->un_path)); 793 794 } 795 VOP_UNLOCK(uvp, 0, td); 796 vn_finished_write(mp); 797 union_newupper(un, uvp); 798 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); 799 union_vn_close(uvp, FWRITE, cred, td); 800 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); 801 /* 802 * Subsequent IOs will go to the top layer, so 803 * call close on the lower vnode and open on the 804 * upper vnode to ensure that the filesystem keeps 805 * its references counts right. This doesn't do 806 * the right thing with (cred) and (FREAD) though. 807 * Ignoring error returns is not right, either. 808 */ 809 if (error == 0) { 810 int i; 811 812 for (i = 0; i < un->un_openl; i++) { 813 (void) VOP_CLOSE(lvp, FREAD, cred, td); 814 (void) VOP_OPEN(uvp, FREAD, cred, td, -1); 815 } 816 if (un->un_openl) { 817 if (vn_canvmio(uvp) == TRUE) 818 error = vfs_object_create(uvp, td, cred); 819 } 820 un->un_openl = 0; 821 } 822 823 return (error); 824 825 } 826 827 /* 828 * union_relookup: 829 * 830 * dvp should be locked on entry and will be locked on return. No 831 * net change in the ref count will occur. 832 * 833 * If an error is returned, *vpp will be invalid, otherwise it 834 * will hold a locked, referenced vnode. If *vpp == dvp then 835 * remember that only one exclusive lock is held. 836 */ 837 838 static int 839 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 840 struct union_mount *um; 841 struct vnode *dvp; 842 struct vnode **vpp; 843 struct componentname *cnp; 844 struct componentname *cn; 845 char *path; 846 int pathlen; 847 { 848 int error; 849 850 /* 851 * A new componentname structure must be faked up because 852 * there is no way to know where the upper level cnp came 853 * from or what it is being used for. This must duplicate 854 * some of the work done by NDINIT(), some of the work done 855 * by namei(), some of the work done by lookup() and some of 856 * the work done by VOP_LOOKUP() when given a CREATE flag. 857 * Conclusion: Horrible. 858 */ 859 cn->cn_namelen = pathlen; 860 cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 861 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 862 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 863 864 cn->cn_nameiop = CREATE; 865 cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 866 cn->cn_thread = cnp->cn_thread; 867 if (um->um_op == UNMNT_ABOVE) 868 cn->cn_cred = cnp->cn_cred; 869 else 870 cn->cn_cred = um->um_cred; 871 cn->cn_nameptr = cn->cn_pnbuf; 872 cn->cn_consume = cnp->cn_consume; 873 874 VREF(dvp); 875 VOP_UNLOCK(dvp, 0, cnp->cn_thread); 876 877 /* 878 * Pass dvp unlocked and referenced on call to relookup(). 879 * 880 * If an error occurs, dvp will be returned unlocked and dereferenced. 881 */ 882 883 if ((error = relookup(dvp, vpp, cn)) != 0) { 884 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread); 885 return(error); 886 } 887 888 /* 889 * If no error occurs, dvp will be returned locked with the reference 890 * left as before, and vpp will be returned referenced and locked. 891 * 892 * We want to return with dvp as it was passed to us, so we get 893 * rid of our reference. 894 */ 895 vrele(dvp); 896 return (0); 897 } 898 899 /* 900 * Create a shadow directory in the upper layer. 901 * The new vnode is returned locked. 902 * 903 * (um) points to the union mount structure for access to the 904 * the mounting process's credentials. 905 * (dvp) is the directory in which to create the shadow directory, 906 * It is locked (but not ref'd) on entry and return. 907 * (cnp) is the component name to be created. 908 * (vpp) is the returned newly created shadow directory, which 909 * is returned locked and ref'd 910 */ 911 int 912 union_mkshadow(um, dvp, cnp, vpp) 913 struct union_mount *um; 914 struct vnode *dvp; 915 struct componentname *cnp; 916 struct vnode **vpp; 917 { 918 int error; 919 struct vattr va; 920 struct thread *td = cnp->cn_thread; 921 struct componentname cn; 922 struct mount *mp; 923 924 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 925 return (error); 926 if ((error = union_relookup(um, dvp, vpp, cnp, &cn, 927 cnp->cn_nameptr, cnp->cn_namelen)) != 0) { 928 vn_finished_write(mp); 929 return (error); 930 } 931 932 if (*vpp) { 933 if (cn.cn_flags & HASBUF) { 934 uma_zfree(namei_zone, cn.cn_pnbuf); 935 cn.cn_flags &= ~HASBUF; 936 } 937 if (dvp == *vpp) 938 vrele(*vpp); 939 else 940 vput(*vpp); 941 vn_finished_write(mp); 942 *vpp = NULLVP; 943 return (EEXIST); 944 } 945 946 /* 947 * Policy: when creating the shadow directory in the 948 * upper layer, create it owned by the user who did 949 * the mount, group from parent directory, and mode 950 * 777 modified by umask (ie mostly identical to the 951 * mkdir syscall). (jsp, kb) 952 */ 953 954 VATTR_NULL(&va); 955 va.va_type = VDIR; 956 va.va_mode = um->um_cmode; 957 958 /* VOP_LEASE: dvp is locked */ 959 VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE); 960 961 error = VOP_MKDIR(dvp, vpp, &cn, &va); 962 if (cn.cn_flags & HASBUF) { 963 uma_zfree(namei_zone, cn.cn_pnbuf); 964 cn.cn_flags &= ~HASBUF; 965 } 966 /*vput(dvp);*/ 967 vn_finished_write(mp); 968 return (error); 969 } 970 971 /* 972 * Create a whiteout entry in the upper layer. 973 * 974 * (um) points to the union mount structure for access to the 975 * the mounting process's credentials. 976 * (dvp) is the directory in which to create the whiteout. 977 * It is locked on entry and return. 978 * (cnp) is the component name to be created. 979 */ 980 int 981 union_mkwhiteout(um, dvp, cnp, path) 982 struct union_mount *um; 983 struct vnode *dvp; 984 struct componentname *cnp; 985 char *path; 986 { 987 int error; 988 struct thread *td = cnp->cn_thread; 989 struct vnode *wvp; 990 struct componentname cn; 991 struct mount *mp; 992 993 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 994 return (error); 995 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 996 if (error) { 997 vn_finished_write(mp); 998 return (error); 999 } 1000 1001 if (wvp) { 1002 if (cn.cn_flags & HASBUF) { 1003 uma_zfree(namei_zone, cn.cn_pnbuf); 1004 cn.cn_flags &= ~HASBUF; 1005 } 1006 if (wvp == dvp) 1007 vrele(wvp); 1008 else 1009 vput(wvp); 1010 vn_finished_write(mp); 1011 return (EEXIST); 1012 } 1013 1014 /* VOP_LEASE: dvp is locked */ 1015 VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE); 1016 1017 error = VOP_WHITEOUT(dvp, &cn, CREATE); 1018 if (cn.cn_flags & HASBUF) { 1019 uma_zfree(namei_zone, cn.cn_pnbuf); 1020 cn.cn_flags &= ~HASBUF; 1021 } 1022 vn_finished_write(mp); 1023 return (error); 1024 } 1025 1026 /* 1027 * union_vn_create: creates and opens a new shadow file 1028 * on the upper union layer. This function is similar 1029 * in spirit to calling vn_open() but it avoids calling namei(). 1030 * The problem with calling namei() is that a) it locks too many 1031 * things, and b) it doesn't start at the "right" directory, 1032 * whereas relookup() is told where to start. 1033 * 1034 * On entry, the vnode associated with un is locked. It remains locked 1035 * on return. 1036 * 1037 * If no error occurs, *vpp contains a locked referenced vnode for your 1038 * use. If an error occurs *vpp iis undefined. 1039 */ 1040 static int 1041 union_vn_create(vpp, un, td) 1042 struct vnode **vpp; 1043 struct union_node *un; 1044 struct thread *td; 1045 { 1046 struct vnode *vp; 1047 struct ucred *cred = td->td_ucred; 1048 struct vattr vat; 1049 struct vattr *vap = &vat; 1050 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 1051 int error; 1052 int cmode; 1053 struct componentname cn; 1054 1055 *vpp = NULLVP; 1056 FILEDESC_LOCK(td->td_proc->p_fd); 1057 cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; 1058 FILEDESC_UNLOCK(td->td_proc->p_fd); 1059 1060 /* 1061 * Build a new componentname structure (for the same 1062 * reasons outlines in union_mkshadow()). 1063 * The difference here is that the file is owned by 1064 * the current user, rather than by the person who 1065 * did the mount, since the current user needs to be 1066 * able to write the file (that's why it is being 1067 * copied in the first place). 1068 */ 1069 cn.cn_namelen = strlen(un->un_path); 1070 cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 1071 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 1072 cn.cn_nameiop = CREATE; 1073 cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 1074 cn.cn_thread = td; 1075 cn.cn_cred = td->td_ucred; 1076 cn.cn_nameptr = cn.cn_pnbuf; 1077 cn.cn_consume = 0; 1078 1079 /* 1080 * Pass dvp unlocked and referenced on call to relookup(). 1081 * 1082 * If an error occurs, dvp will be returned unlocked and dereferenced. 1083 */ 1084 VREF(un->un_dirvp); 1085 error = relookup(un->un_dirvp, &vp, &cn); 1086 if (error) 1087 return (error); 1088 1089 /* 1090 * If no error occurs, dvp will be returned locked with the reference 1091 * left as before, and vpp will be returned referenced and locked. 1092 */ 1093 if (vp) { 1094 vput(un->un_dirvp); 1095 if (cn.cn_flags & HASBUF) { 1096 uma_zfree(namei_zone, cn.cn_pnbuf); 1097 cn.cn_flags &= ~HASBUF; 1098 } 1099 if (vp == un->un_dirvp) 1100 vrele(vp); 1101 else 1102 vput(vp); 1103 return (EEXIST); 1104 } 1105 1106 /* 1107 * Good - there was no race to create the file 1108 * so go ahead and create it. The permissions 1109 * on the file will be 0666 modified by the 1110 * current user's umask. Access to the file, while 1111 * it is unioned, will require access to the top *and* 1112 * bottom files. Access when not unioned will simply 1113 * require access to the top-level file. 1114 * TODO: confirm choice of access permissions. 1115 */ 1116 VATTR_NULL(vap); 1117 vap->va_type = VREG; 1118 vap->va_mode = cmode; 1119 VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE); 1120 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); 1121 if (cn.cn_flags & HASBUF) { 1122 uma_zfree(namei_zone, cn.cn_pnbuf); 1123 cn.cn_flags &= ~HASBUF; 1124 } 1125 vput(un->un_dirvp); 1126 if (error) 1127 return (error); 1128 1129 error = VOP_OPEN(vp, fmode, cred, td, -1); 1130 if (error == 0 && vn_canvmio(vp) == TRUE) 1131 error = vfs_object_create(vp, td, cred); 1132 if (error) { 1133 vput(vp); 1134 return (error); 1135 } 1136 vp->v_writecount++; 1137 *vpp = vp; 1138 return (0); 1139 } 1140 1141 static int 1142 union_vn_close(vp, fmode, cred, td) 1143 struct vnode *vp; 1144 int fmode; 1145 struct ucred *cred; 1146 struct thread *td; 1147 { 1148 1149 if (fmode & FWRITE) 1150 --vp->v_writecount; 1151 return (VOP_CLOSE(vp, fmode, cred, td)); 1152 } 1153 1154 /* 1155 * union_removed_upper: 1156 * 1157 * An upper-only file/directory has been removed; un-cache it so 1158 * that unionfs vnode gets reclaimed and the last uppervp reference 1159 * disappears. 1160 * 1161 * Called with union_node unlocked. 1162 */ 1163 1164 void 1165 union_removed_upper(un) 1166 struct union_node *un; 1167 { 1168 if (un->un_flags & UN_CACHED) { 1169 int hash = UNION_HASH(un->un_uppervp, un->un_lowervp); 1170 1171 while (union_list_lock(hash)) 1172 continue; 1173 un->un_flags &= ~UN_CACHED; 1174 LIST_REMOVE(un, un_cache); 1175 union_list_unlock(hash); 1176 } 1177 } 1178 1179 /* 1180 * Determine whether a whiteout is needed 1181 * during a remove/rmdir operation. 1182 */ 1183 int 1184 union_dowhiteout(un, cred, td) 1185 struct union_node *un; 1186 struct ucred *cred; 1187 struct thread *td; 1188 { 1189 struct vattr va; 1190 1191 if (un->un_lowervp != NULLVP) 1192 return (1); 1193 1194 if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 && 1195 (va.va_flags & OPAQUE)) 1196 return (1); 1197 1198 return (0); 1199 } 1200 1201 static void 1202 union_dircache_r(vp, vppp, cntp) 1203 struct vnode *vp; 1204 struct vnode ***vppp; 1205 int *cntp; 1206 { 1207 struct union_node *un; 1208 1209 if (vp->v_op != union_vnodeop_p) { 1210 if (vppp) { 1211 VREF(vp); 1212 *(*vppp)++ = vp; 1213 if (--(*cntp) == 0) 1214 panic("union: dircache table too small"); 1215 } else { 1216 (*cntp)++; 1217 } 1218 } else { 1219 un = VTOUNION(vp); 1220 if (un->un_uppervp != NULLVP) 1221 union_dircache_r(un->un_uppervp, vppp, cntp); 1222 if (un->un_lowervp != NULLVP) 1223 union_dircache_r(un->un_lowervp, vppp, cntp); 1224 } 1225 } 1226 1227 struct vnode * 1228 union_dircache_get(vp, td) 1229 struct vnode *vp; 1230 struct thread *td; 1231 { 1232 int cnt; 1233 struct vnode *nvp; 1234 struct vnode **vpp; 1235 struct vnode **dircache, **newdircache; 1236 struct union_node *un; 1237 int error; 1238 1239 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1240 un = VTOUNION(vp); 1241 dircache = un->un_dircache; 1242 newdircache = NULL; 1243 1244 nvp = NULLVP; 1245 1246 if (dircache == NULL) { 1247 cnt = 0; 1248 union_dircache_r(vp, 0, &cnt); 1249 cnt++; 1250 newdircache = dircache = malloc(cnt * sizeof(struct vnode *), 1251 M_UNDCACHE, M_WAITOK); 1252 vpp = dircache; 1253 union_dircache_r(vp, &vpp, &cnt); 1254 *vpp = NULLVP; 1255 vpp = dircache + 1; 1256 } else { 1257 vpp = dircache; 1258 do { 1259 if (*vpp++ == un->un_uppervp) 1260 break; 1261 } while (*vpp != NULLVP); 1262 } 1263 1264 if (*vpp == NULLVP) 1265 goto out; 1266 1267 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/ 1268 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? vrefcnt(*vpp) : -99))); 1269 VREF(*vpp); 1270 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); 1271 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? vrefcnt(*vpp) : -99))); 1272 if (error) 1273 goto out; 1274 1275 un->un_dircache = NULL; 1276 VTOUNION(nvp)->un_dircache = dircache; 1277 newdircache = NULL; 1278 1279 out: 1280 /* 1281 * If we allocated a new dircache and couldn't attach 1282 * it to a new vp, free the resources we allocated. 1283 */ 1284 if (newdircache) { 1285 for (vpp = newdircache; *vpp != NULLVP; vpp++) 1286 vrele(*vpp); 1287 free(newdircache, M_UNDCACHE); 1288 } 1289 1290 VOP_UNLOCK(vp, 0, td); 1291 return (nvp); 1292 } 1293 1294 void 1295 union_dircache_free(struct union_node *un) 1296 { 1297 struct vnode **vpp; 1298 1299 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1300 vrele(*vpp); 1301 free(un->un_dircache, M_UNDCACHE); 1302 un->un_dircache = NULL; 1303 } 1304 1305 /* 1306 * Module glue to remove #ifdef UNION from vfs_syscalls.c 1307 */ 1308 static int 1309 union_dircheck(struct thread *td, struct vnode **vp, struct file *fp) 1310 { 1311 int error = 0; 1312 1313 if ((*vp)->v_op == union_vnodeop_p) { 1314 struct vnode *lvp; 1315 1316 lvp = union_dircache_get(*vp, td); 1317 if (lvp != NULLVP) { 1318 struct vattr va; 1319 1320 /* 1321 * If the directory is opaque, 1322 * then don't show lower entries 1323 */ 1324 error = VOP_GETATTR(*vp, &va, fp->f_cred, td); 1325 if (va.va_flags & OPAQUE) { 1326 vput(lvp); 1327 lvp = NULLVP; 1328 } 1329 } 1330 1331 if (lvp != NULLVP) { 1332 error = VOP_OPEN(lvp, FREAD, fp->f_cred, td, -1); 1333 if (error == 0 && vn_canvmio(lvp) == TRUE) 1334 error = vfs_object_create(lvp, td, fp->f_cred); 1335 if (error) { 1336 vput(lvp); 1337 return (error); 1338 } 1339 VOP_UNLOCK(lvp, 0, td); 1340 FILE_LOCK(fp); 1341 fp->f_vnode = lvp; 1342 fp->f_data = lvp; 1343 fp->f_offset = 0; 1344 FILE_UNLOCK(fp); 1345 error = vn_close(*vp, FREAD, fp->f_cred, td); 1346 if (error) 1347 return (error); 1348 *vp = lvp; 1349 return -1; /* goto unionread */ 1350 } 1351 } 1352 return error; 1353 } 1354 1355 static int 1356 union_modevent(module_t mod, int type, void *data) 1357 { 1358 switch (type) { 1359 case MOD_LOAD: 1360 union_dircheckp = union_dircheck; 1361 break; 1362 case MOD_UNLOAD: 1363 union_dircheckp = NULL; 1364 break; 1365 default: 1366 return EOPNOTSUPP; 1367 break; 1368 } 1369 return 0; 1370 } 1371 1372 static moduledata_t union_mod = { 1373 "union_dircheck", 1374 union_modevent, 1375 NULL 1376 }; 1377 1378 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); 1379