1 /* 2 * Copyright (c) 1994 Jan-Simon Pendry 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/fcntl.h> 44 #include <sys/file.h> 45 #include <sys/filedesc.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/module.h> 50 #include <sys/mount.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/stat.h> 54 #include <sys/vnode.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> /* for vnode_pager_setsize */ 58 #include <vm/vm_object.h> /* for vm cache coherency */ 59 #include <vm/uma.h> 60 61 #include <fs/unionfs/union.h> 62 63 #include <sys/proc.h> 64 65 extern int union_init(void); 66 67 /* must be power of two, otherwise change UNION_HASH() */ 68 #define NHASH 32 69 70 /* unsigned int ... */ 71 #define UNION_HASH(u, l) \ 72 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) 73 74 static MALLOC_DEFINE(M_UNPATH, "unpath", "UNION path component"); 75 static MALLOC_DEFINE(M_UNDCACHE, "undcac", "UNION directory cache"); 76 77 static LIST_HEAD(unhead, union_node) unhead[NHASH]; 78 static int unvplock[NHASH]; 79 80 static void union_dircache_r(struct vnode *vp, struct vnode ***vppp, 81 int *cntp); 82 static int union_list_lock(int ix); 83 static void union_list_unlock(int ix); 84 static int union_relookup(struct union_mount *um, struct vnode *dvp, 85 struct vnode **vpp, 86 struct componentname *cnp, 87 struct componentname *cn, char *path, 88 int pathlen); 89 static void union_updatevp(struct union_node *un, 90 struct vnode *uppervp, 91 struct vnode *lowervp); 92 static void union_newlower(struct union_node *, struct vnode *); 93 static void union_newupper(struct union_node *, struct vnode *); 94 static int union_copyfile(struct vnode *, struct vnode *, 95 struct ucred *, struct thread *); 96 static int union_vn_create(struct vnode **, struct union_node *, 97 struct thread *); 98 static int union_vn_close(struct vnode *, int, struct ucred *, 99 struct thread *); 100 101 int 102 union_init() 103 { 104 int i; 105 106 for (i = 0; i < NHASH; i++) 107 LIST_INIT(&unhead[i]); 108 bzero((caddr_t)unvplock, sizeof(unvplock)); 109 return (0); 110 } 111 112 static int 113 union_list_lock(ix) 114 int ix; 115 { 116 if (unvplock[ix] & UNVP_LOCKED) { 117 unvplock[ix] |= UNVP_WANT; 118 (void) tsleep( &unvplock[ix], PINOD, "unllck", 0); 119 return (1); 120 } 121 unvplock[ix] |= UNVP_LOCKED; 122 return (0); 123 } 124 125 static void 126 union_list_unlock(ix) 127 int ix; 128 { 129 unvplock[ix] &= ~UNVP_LOCKED; 130 131 if (unvplock[ix] & UNVP_WANT) { 132 unvplock[ix] &= ~UNVP_WANT; 133 wakeup( &unvplock[ix]); 134 } 135 } 136 137 /* 138 * union_updatevp: 139 * 140 * The uppervp, if not NULL, must be referenced and not locked by us 141 * The lowervp, if not NULL, must be referenced. 142 * 143 * If uppervp and lowervp match pointers already installed, then 144 * nothing happens. The passed vp's (when matching) are not adjusted. 145 * 146 * This routine may only be called by union_newupper() and 147 * union_newlower(). 148 */ 149 150 static void 151 union_updatevp(un, uppervp, lowervp) 152 struct union_node *un; 153 struct vnode *uppervp; 154 struct vnode *lowervp; 155 { 156 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 157 int nhash = UNION_HASH(uppervp, lowervp); 158 int docache = (lowervp != NULLVP || uppervp != NULLVP); 159 int lhash, uhash; 160 161 /* 162 * Ensure locking is ordered from lower to higher 163 * to avoid deadlocks. 164 */ 165 if (nhash < ohash) { 166 lhash = nhash; 167 uhash = ohash; 168 } else { 169 lhash = ohash; 170 uhash = nhash; 171 } 172 173 if (lhash != uhash) { 174 while (union_list_lock(lhash)) 175 continue; 176 } 177 178 while (union_list_lock(uhash)) 179 continue; 180 181 if (ohash != nhash || !docache) { 182 if (un->un_flags & UN_CACHED) { 183 un->un_flags &= ~UN_CACHED; 184 LIST_REMOVE(un, un_cache); 185 } 186 } 187 188 if (ohash != nhash) 189 union_list_unlock(ohash); 190 191 if (un->un_lowervp != lowervp) { 192 if (un->un_lowervp) { 193 vrele(un->un_lowervp); 194 if (un->un_path) { 195 free(un->un_path, M_UNPATH); 196 un->un_path = 0; 197 } 198 } 199 un->un_lowervp = lowervp; 200 un->un_lowersz = VNOVAL; 201 } 202 203 if (un->un_uppervp != uppervp) { 204 if (un->un_uppervp) 205 vrele(un->un_uppervp); 206 un->un_uppervp = uppervp; 207 un->un_uppersz = VNOVAL; 208 } 209 210 if (docache && (ohash != nhash)) { 211 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 212 un->un_flags |= UN_CACHED; 213 } 214 215 union_list_unlock(nhash); 216 } 217 218 /* 219 * Set a new lowervp. The passed lowervp must be referenced and will be 220 * stored in the vp in a referenced state. 221 */ 222 223 static void 224 union_newlower(un, lowervp) 225 struct union_node *un; 226 struct vnode *lowervp; 227 { 228 union_updatevp(un, un->un_uppervp, lowervp); 229 } 230 231 /* 232 * Set a new uppervp. The passed uppervp must be locked and will be 233 * stored in the vp in a locked state. The caller should not unlock 234 * uppervp. 235 */ 236 237 static void 238 union_newupper(un, uppervp) 239 struct union_node *un; 240 struct vnode *uppervp; 241 { 242 union_updatevp(un, uppervp, un->un_lowervp); 243 } 244 245 /* 246 * Keep track of size changes in the underlying vnodes. 247 * If the size changes, then callback to the vm layer 248 * giving priority to the upper layer size. 249 */ 250 void 251 union_newsize(vp, uppersz, lowersz) 252 struct vnode *vp; 253 off_t uppersz, lowersz; 254 { 255 struct union_node *un; 256 off_t sz; 257 258 /* only interested in regular files */ 259 if (vp->v_type != VREG) 260 return; 261 262 un = VTOUNION(vp); 263 sz = VNOVAL; 264 265 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 266 un->un_uppersz = uppersz; 267 if (sz == VNOVAL) 268 sz = un->un_uppersz; 269 } 270 271 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 272 un->un_lowersz = lowersz; 273 if (sz == VNOVAL) 274 sz = un->un_lowersz; 275 } 276 277 if (sz != VNOVAL) { 278 UDEBUG(("union: %s size now %ld\n", 279 (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); 280 /* 281 * There is no need to change size of non-existent object. 282 */ 283 /* vnode_pager_setsize(vp, sz); */ 284 } 285 } 286 287 /* 288 * union_allocvp: allocate a union_node and associate it with a 289 * parent union_node and one or two vnodes. 290 * 291 * vpp Holds the returned vnode locked and referenced if no 292 * error occurs. 293 * 294 * mp Holds the mount point. mp may or may not be busied. 295 * allocvp() makes no changes to mp. 296 * 297 * dvp Holds the parent union_node to the one we wish to create. 298 * XXX may only be used to traverse an uncopied lowervp-based 299 * tree? XXX 300 * 301 * dvp may or may not be locked. allocvp() makes no changes 302 * to dvp. 303 * 304 * upperdvp Holds the parent vnode to uppervp, generally used along 305 * with path component information to create a shadow of 306 * lowervp when uppervp does not exist. 307 * 308 * upperdvp is referenced but unlocked on entry, and will be 309 * dereferenced on return. 310 * 311 * uppervp Holds the new uppervp vnode to be stored in the 312 * union_node we are allocating. uppervp is referenced but 313 * not locked, and will be dereferenced on return. 314 * 315 * lowervp Holds the new lowervp vnode to be stored in the 316 * union_node we are allocating. lowervp is referenced but 317 * not locked, and will be dereferenced on return. 318 * 319 * cnp Holds path component information to be coupled with 320 * lowervp and upperdvp to allow unionfs to create an uppervp 321 * later on. Only used if lowervp is valid. The contents 322 * of cnp is only valid for the duration of the call. 323 * 324 * docache Determine whether this node should be entered in the 325 * cache or whether it should be destroyed as soon as possible. 326 * 327 * All union_nodes are maintained on a singly-linked 328 * list. New nodes are only allocated when they cannot 329 * be found on this list. Entries on the list are 330 * removed when the vfs reclaim entry is called. 331 * 332 * A single lock is kept for the entire list. This is 333 * needed because the getnewvnode() function can block 334 * waiting for a vnode to become free, in which case there 335 * may be more than one process trying to get the same 336 * vnode. This lock is only taken if we are going to 337 * call getnewvnode(), since the kernel itself is single-threaded. 338 * 339 * If an entry is found on the list, then call vget() to 340 * take a reference. This is done because there may be 341 * zero references to it and so it needs to removed from 342 * the vnode free list. 343 */ 344 345 int 346 union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) 347 struct vnode **vpp; 348 struct mount *mp; 349 struct vnode *dvp; /* parent union vnode */ 350 struct vnode *upperdvp; /* parent vnode of uppervp */ 351 struct componentname *cnp; /* may be null */ 352 struct vnode *uppervp; /* may be null */ 353 struct vnode *lowervp; /* may be null */ 354 int docache; 355 { 356 int error; 357 struct union_node *un = 0; 358 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 359 struct thread *td = (cnp) ? cnp->cn_thread : curthread; 360 int hash = 0; 361 int vflag; 362 int try; 363 364 if (uppervp == NULLVP && lowervp == NULLVP) 365 panic("union: unidentifiable allocation"); 366 367 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 368 vrele(lowervp); 369 lowervp = NULLVP; 370 } 371 372 /* detect the root vnode (and aliases) */ 373 vflag = 0; 374 if ((uppervp == um->um_uppervp) && 375 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 376 if (lowervp == NULLVP) { 377 lowervp = um->um_lowervp; 378 if (lowervp != NULLVP) 379 VREF(lowervp); 380 } 381 vflag = VV_ROOT; 382 } 383 384 loop: 385 if (!docache) { 386 un = 0; 387 } else for (try = 0; try < 3; try++) { 388 switch (try) { 389 case 0: 390 if (lowervp == NULLVP) 391 continue; 392 hash = UNION_HASH(uppervp, lowervp); 393 break; 394 395 case 1: 396 if (uppervp == NULLVP) 397 continue; 398 hash = UNION_HASH(uppervp, NULLVP); 399 break; 400 401 case 2: 402 if (lowervp == NULLVP) 403 continue; 404 hash = UNION_HASH(NULLVP, lowervp); 405 break; 406 } 407 408 while (union_list_lock(hash)) 409 continue; 410 411 LIST_FOREACH(un, &unhead[hash], un_cache) { 412 if ((un->un_lowervp == lowervp || 413 un->un_lowervp == NULLVP) && 414 (un->un_uppervp == uppervp || 415 un->un_uppervp == NULLVP) && 416 (UNIONTOV(un)->v_mount == mp)) { 417 if (vget(UNIONTOV(un), 0, 418 cnp ? cnp->cn_thread : NULL)) { 419 union_list_unlock(hash); 420 goto loop; 421 } 422 break; 423 } 424 } 425 426 union_list_unlock(hash); 427 428 if (un) 429 break; 430 } 431 432 if (un) { 433 /* 434 * Obtain a lock on the union_node. Everything is unlocked 435 * except for dvp, so check that case. If they match, our 436 * new un is already locked. Otherwise we have to lock our 437 * new un. 438 * 439 * A potential deadlock situation occurs when we are holding 440 * one lock while trying to get another. We must follow 441 * strict ordering rules to avoid it. We try to locate dvp 442 * by scanning up from un_vnode, since the most likely 443 * scenario is un being under dvp. 444 */ 445 446 if (dvp && un->un_vnode != dvp) { 447 struct vnode *scan = un->un_vnode; 448 449 do { 450 scan = VTOUNION(scan)->un_pvp; 451 } while (scan && scan->v_op == union_vnodeop_p && 452 scan != dvp); 453 if (scan != dvp) { 454 /* 455 * our new un is above dvp (we never saw dvp 456 * while moving up the tree). 457 */ 458 VREF(dvp); 459 VOP_UNLOCK(dvp, 0, td); 460 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 461 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); 462 vrele(dvp); 463 } else { 464 /* 465 * our new un is under dvp 466 */ 467 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 468 } 469 } else if (dvp == NULLVP) { 470 /* 471 * dvp is NULL, we need to lock un. 472 */ 473 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 474 } else { 475 /* 476 * dvp == un->un_vnode, we are already locked. 477 */ 478 error = 0; 479 } 480 481 if (error) 482 goto loop; 483 484 /* 485 * At this point, the union_node is locked and referenced. 486 * 487 * uppervp is locked and referenced or NULL, lowervp is 488 * referenced or NULL. 489 */ 490 UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", 491 un, un->un_vnode, un->un_uppervp, 492 (un->un_uppervp ? vrefcnt(un->un_uppervp) : -99), 493 uppervp, 494 (uppervp ? vrefcnt(uppervp) : -99) 495 )); 496 497 if (uppervp != un->un_uppervp) { 498 KASSERT(uppervp == NULL || vrefcnt(uppervp) > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", vrefcnt(uppervp))); 499 union_newupper(un, uppervp); 500 } else if (uppervp) { 501 KASSERT(vrefcnt(uppervp) > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", vrefcnt(uppervp))); 502 vrele(uppervp); 503 } 504 505 /* 506 * Save information about the lower layer. 507 * This needs to keep track of pathname 508 * and directory information which union_vn_create() 509 * might need. 510 */ 511 if (lowervp != un->un_lowervp) { 512 union_newlower(un, lowervp); 513 if (cnp && (lowervp != NULLVP)) { 514 un->un_path = malloc(cnp->cn_namelen+1, 515 M_UNPATH, M_WAITOK); 516 bcopy(cnp->cn_nameptr, un->un_path, 517 cnp->cn_namelen); 518 un->un_path[cnp->cn_namelen] = '\0'; 519 } 520 } else if (lowervp) { 521 vrele(lowervp); 522 } 523 524 /* 525 * and upperdvp 526 */ 527 if (upperdvp != un->un_dirvp) { 528 if (un->un_dirvp) 529 vrele(un->un_dirvp); 530 un->un_dirvp = upperdvp; 531 } else if (upperdvp) { 532 vrele(upperdvp); 533 } 534 535 *vpp = UNIONTOV(un); 536 return (0); 537 } 538 539 if (docache) { 540 /* 541 * Otherwise lock the vp list while we call getnewvnode() 542 * since that can block. 543 */ 544 hash = UNION_HASH(uppervp, lowervp); 545 546 if (union_list_lock(hash)) 547 goto loop; 548 } 549 550 /* 551 * Create new node rather than replace old node. 552 */ 553 554 error = getnewvnode("union", mp, union_vnodeop_p, vpp); 555 if (error) { 556 /* 557 * If an error occurs, clear out vnodes. 558 */ 559 if (lowervp) 560 vrele(lowervp); 561 if (uppervp) 562 vrele(uppervp); 563 if (upperdvp) 564 vrele(upperdvp); 565 *vpp = NULL; 566 goto out; 567 } 568 569 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 570 M_TEMP, M_WAITOK); 571 572 ASSERT_VOP_LOCKED(*vpp, "union_allocvp"); 573 (*vpp)->v_vflag |= vflag; 574 if (uppervp) 575 (*vpp)->v_type = uppervp->v_type; 576 else 577 (*vpp)->v_type = lowervp->v_type; 578 579 un = VTOUNION(*vpp); 580 bzero(un, sizeof(*un)); 581 582 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 583 584 un->un_vnode = *vpp; 585 un->un_uppervp = uppervp; 586 un->un_uppersz = VNOVAL; 587 un->un_lowervp = lowervp; 588 un->un_lowersz = VNOVAL; 589 un->un_dirvp = upperdvp; 590 un->un_pvp = dvp; /* only parent dir in new allocation */ 591 if (dvp != NULLVP) 592 VREF(dvp); 593 un->un_dircache = NULL; 594 un->un_openl = 0; 595 596 if (cnp && (lowervp != NULLVP)) { 597 un->un_path = malloc(cnp->cn_namelen+1, M_UNPATH, M_WAITOK); 598 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 599 un->un_path[cnp->cn_namelen] = '\0'; 600 } else { 601 un->un_path = NULL; 602 un->un_dirvp = NULL; 603 } 604 605 if (docache) { 606 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 607 un->un_flags |= UN_CACHED; 608 } 609 610 out: 611 if (docache) 612 union_list_unlock(hash); 613 614 return (error); 615 } 616 617 int 618 union_freevp(vp) 619 struct vnode *vp; 620 { 621 struct union_node *un = VTOUNION(vp); 622 623 if (un->un_flags & UN_CACHED) { 624 un->un_flags &= ~UN_CACHED; 625 LIST_REMOVE(un, un_cache); 626 } 627 628 if (un->un_pvp != NULLVP) { 629 vrele(un->un_pvp); 630 un->un_pvp = NULL; 631 } 632 if (un->un_uppervp != NULLVP) { 633 vrele(un->un_uppervp); 634 un->un_uppervp = NULL; 635 } 636 if (un->un_lowervp != NULLVP) { 637 vrele(un->un_lowervp); 638 un->un_lowervp = NULL; 639 } 640 if (un->un_dirvp != NULLVP) { 641 vrele(un->un_dirvp); 642 un->un_dirvp = NULL; 643 } 644 if (un->un_path) { 645 free(un->un_path, M_UNPATH); 646 un->un_path = NULL; 647 } 648 649 FREE(vp->v_data, M_TEMP); 650 vp->v_data = 0; 651 652 return (0); 653 } 654 655 /* 656 * copyfile. Copy the vnode (fvp) to the vnode (tvp) 657 * using a sequence of reads and writes. Both (fvp) 658 * and (tvp) are locked on entry and exit. 659 * 660 * fvp and tvp are both exclusive locked on call, but their refcount's 661 * haven't been bumped at all. 662 */ 663 static int 664 union_copyfile(fvp, tvp, cred, td) 665 struct vnode *fvp; 666 struct vnode *tvp; 667 struct ucred *cred; 668 struct thread *td; 669 { 670 char *buf; 671 struct uio uio; 672 struct iovec iov; 673 int error = 0; 674 675 /* 676 * strategy: 677 * Allocate a buffer of size MAXBSIZE. 678 * Loop doing reads and writes, keeping track 679 * of the current uio offset. 680 * Give up at the first sign of trouble. 681 */ 682 683 bzero(&uio, sizeof(uio)); 684 685 uio.uio_td = td; 686 uio.uio_segflg = UIO_SYSSPACE; 687 uio.uio_offset = 0; 688 689 VOP_LEASE(fvp, td, cred, LEASE_READ); 690 VOP_LEASE(tvp, td, cred, LEASE_WRITE); 691 692 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 693 694 /* ugly loop follows... */ 695 do { 696 off_t offset = uio.uio_offset; 697 int count; 698 int bufoffset; 699 700 /* 701 * Setup for big read. 702 */ 703 uio.uio_iov = &iov; 704 uio.uio_iovcnt = 1; 705 iov.iov_base = buf; 706 iov.iov_len = MAXBSIZE; 707 uio.uio_resid = iov.iov_len; 708 uio.uio_rw = UIO_READ; 709 710 if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) 711 break; 712 713 /* 714 * Get bytes read, handle read eof case and setup for 715 * write loop. 716 */ 717 if ((count = MAXBSIZE - uio.uio_resid) == 0) 718 break; 719 bufoffset = 0; 720 721 /* 722 * Write until an error occurs or our buffer has been 723 * exhausted, then update the offset for the next read. 724 */ 725 while (bufoffset < count) { 726 uio.uio_iov = &iov; 727 uio.uio_iovcnt = 1; 728 iov.iov_base = buf + bufoffset; 729 iov.iov_len = count - bufoffset; 730 uio.uio_offset = offset + bufoffset; 731 uio.uio_rw = UIO_WRITE; 732 uio.uio_resid = iov.iov_len; 733 734 if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) 735 break; 736 bufoffset += (count - bufoffset) - uio.uio_resid; 737 } 738 uio.uio_offset = offset + bufoffset; 739 } while (error == 0); 740 741 free(buf, M_TEMP); 742 return (error); 743 } 744 745 /* 746 * 747 * un's vnode is assumed to be locked on entry and remains locked on exit. 748 */ 749 750 int 751 union_copyup(un, docopy, cred, td) 752 struct union_node *un; 753 int docopy; 754 struct ucred *cred; 755 struct thread *td; 756 { 757 int error; 758 struct mount *mp; 759 struct vnode *lvp, *uvp; 760 761 /* 762 * If the user does not have read permission, the vnode should not 763 * be copied to upper layer. 764 */ 765 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td); 766 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td); 767 VOP_UNLOCK(un->un_lowervp, 0, td); 768 if (error) 769 return (error); 770 771 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0) 772 return (error); 773 if ((error = union_vn_create(&uvp, un, td)) != 0) { 774 vn_finished_write(mp); 775 return (error); 776 } 777 778 lvp = un->un_lowervp; 779 780 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); 781 if (docopy) { 782 /* 783 * XX - should not ignore errors 784 * from VOP_CLOSE() 785 */ 786 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); 787 error = VOP_OPEN(lvp, FREAD, cred, td, -1); 788 if (error == 0 && vn_canvmio(lvp) == TRUE) 789 error = vfs_object_create(lvp, td, cred); 790 if (error == 0) { 791 error = union_copyfile(lvp, uvp, cred, td); 792 VOP_UNLOCK(lvp, 0, td); 793 (void) VOP_CLOSE(lvp, FREAD, cred, td); 794 } 795 if (error == 0) 796 UDEBUG(("union: copied up %s\n", un->un_path)); 797 798 } 799 VOP_UNLOCK(uvp, 0, td); 800 vn_finished_write(mp); 801 union_newupper(un, uvp); 802 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); 803 union_vn_close(uvp, FWRITE, cred, td); 804 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); 805 /* 806 * Subsequent IOs will go to the top layer, so 807 * call close on the lower vnode and open on the 808 * upper vnode to ensure that the filesystem keeps 809 * its references counts right. This doesn't do 810 * the right thing with (cred) and (FREAD) though. 811 * Ignoring error returns is not right, either. 812 */ 813 if (error == 0) { 814 int i; 815 816 for (i = 0; i < un->un_openl; i++) { 817 (void) VOP_CLOSE(lvp, FREAD, cred, td); 818 (void) VOP_OPEN(uvp, FREAD, cred, td, -1); 819 } 820 if (un->un_openl) { 821 if (vn_canvmio(uvp) == TRUE) 822 error = vfs_object_create(uvp, td, cred); 823 } 824 un->un_openl = 0; 825 } 826 827 return (error); 828 829 } 830 831 /* 832 * union_relookup: 833 * 834 * dvp should be locked on entry and will be locked on return. No 835 * net change in the ref count will occur. 836 * 837 * If an error is returned, *vpp will be invalid, otherwise it 838 * will hold a locked, referenced vnode. If *vpp == dvp then 839 * remember that only one exclusive lock is held. 840 */ 841 842 static int 843 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 844 struct union_mount *um; 845 struct vnode *dvp; 846 struct vnode **vpp; 847 struct componentname *cnp; 848 struct componentname *cn; 849 char *path; 850 int pathlen; 851 { 852 int error; 853 854 /* 855 * A new componentname structure must be faked up because 856 * there is no way to know where the upper level cnp came 857 * from or what it is being used for. This must duplicate 858 * some of the work done by NDINIT(), some of the work done 859 * by namei(), some of the work done by lookup() and some of 860 * the work done by VOP_LOOKUP() when given a CREATE flag. 861 * Conclusion: Horrible. 862 */ 863 cn->cn_namelen = pathlen; 864 cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 865 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 866 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 867 868 cn->cn_nameiop = CREATE; 869 cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 870 cn->cn_thread = cnp->cn_thread; 871 if (um->um_op == UNMNT_ABOVE) 872 cn->cn_cred = cnp->cn_cred; 873 else 874 cn->cn_cred = um->um_cred; 875 cn->cn_nameptr = cn->cn_pnbuf; 876 cn->cn_consume = cnp->cn_consume; 877 878 VREF(dvp); 879 VOP_UNLOCK(dvp, 0, cnp->cn_thread); 880 881 /* 882 * Pass dvp unlocked and referenced on call to relookup(). 883 * 884 * If an error occurs, dvp will be returned unlocked and dereferenced. 885 */ 886 887 if ((error = relookup(dvp, vpp, cn)) != 0) { 888 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread); 889 return(error); 890 } 891 892 /* 893 * If no error occurs, dvp will be returned locked with the reference 894 * left as before, and vpp will be returned referenced and locked. 895 * 896 * We want to return with dvp as it was passed to us, so we get 897 * rid of our reference. 898 */ 899 vrele(dvp); 900 return (0); 901 } 902 903 /* 904 * Create a shadow directory in the upper layer. 905 * The new vnode is returned locked. 906 * 907 * (um) points to the union mount structure for access to the 908 * the mounting process's credentials. 909 * (dvp) is the directory in which to create the shadow directory, 910 * It is locked (but not ref'd) on entry and return. 911 * (cnp) is the component name to be created. 912 * (vpp) is the returned newly created shadow directory, which 913 * is returned locked and ref'd 914 */ 915 int 916 union_mkshadow(um, dvp, cnp, vpp) 917 struct union_mount *um; 918 struct vnode *dvp; 919 struct componentname *cnp; 920 struct vnode **vpp; 921 { 922 int error; 923 struct vattr va; 924 struct thread *td = cnp->cn_thread; 925 struct componentname cn; 926 struct mount *mp; 927 928 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 929 return (error); 930 if ((error = union_relookup(um, dvp, vpp, cnp, &cn, 931 cnp->cn_nameptr, cnp->cn_namelen)) != 0) { 932 vn_finished_write(mp); 933 return (error); 934 } 935 936 if (*vpp) { 937 if (cn.cn_flags & HASBUF) { 938 uma_zfree(namei_zone, cn.cn_pnbuf); 939 cn.cn_flags &= ~HASBUF; 940 } 941 if (dvp == *vpp) 942 vrele(*vpp); 943 else 944 vput(*vpp); 945 vn_finished_write(mp); 946 *vpp = NULLVP; 947 return (EEXIST); 948 } 949 950 /* 951 * Policy: when creating the shadow directory in the 952 * upper layer, create it owned by the user who did 953 * the mount, group from parent directory, and mode 954 * 777 modified by umask (ie mostly identical to the 955 * mkdir syscall). (jsp, kb) 956 */ 957 958 VATTR_NULL(&va); 959 va.va_type = VDIR; 960 va.va_mode = um->um_cmode; 961 962 /* VOP_LEASE: dvp is locked */ 963 VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE); 964 965 error = VOP_MKDIR(dvp, vpp, &cn, &va); 966 if (cn.cn_flags & HASBUF) { 967 uma_zfree(namei_zone, cn.cn_pnbuf); 968 cn.cn_flags &= ~HASBUF; 969 } 970 /*vput(dvp);*/ 971 vn_finished_write(mp); 972 return (error); 973 } 974 975 /* 976 * Create a whiteout entry in the upper layer. 977 * 978 * (um) points to the union mount structure for access to the 979 * the mounting process's credentials. 980 * (dvp) is the directory in which to create the whiteout. 981 * It is locked on entry and return. 982 * (cnp) is the component name to be created. 983 */ 984 int 985 union_mkwhiteout(um, dvp, cnp, path) 986 struct union_mount *um; 987 struct vnode *dvp; 988 struct componentname *cnp; 989 char *path; 990 { 991 int error; 992 struct thread *td = cnp->cn_thread; 993 struct vnode *wvp; 994 struct componentname cn; 995 struct mount *mp; 996 997 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 998 return (error); 999 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 1000 if (error) { 1001 vn_finished_write(mp); 1002 return (error); 1003 } 1004 1005 if (wvp) { 1006 if (cn.cn_flags & HASBUF) { 1007 uma_zfree(namei_zone, cn.cn_pnbuf); 1008 cn.cn_flags &= ~HASBUF; 1009 } 1010 if (wvp == dvp) 1011 vrele(wvp); 1012 else 1013 vput(wvp); 1014 vn_finished_write(mp); 1015 return (EEXIST); 1016 } 1017 1018 /* VOP_LEASE: dvp is locked */ 1019 VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE); 1020 1021 error = VOP_WHITEOUT(dvp, &cn, CREATE); 1022 if (cn.cn_flags & HASBUF) { 1023 uma_zfree(namei_zone, cn.cn_pnbuf); 1024 cn.cn_flags &= ~HASBUF; 1025 } 1026 vn_finished_write(mp); 1027 return (error); 1028 } 1029 1030 /* 1031 * union_vn_create: creates and opens a new shadow file 1032 * on the upper union layer. This function is similar 1033 * in spirit to calling vn_open() but it avoids calling namei(). 1034 * The problem with calling namei() is that a) it locks too many 1035 * things, and b) it doesn't start at the "right" directory, 1036 * whereas relookup() is told where to start. 1037 * 1038 * On entry, the vnode associated with un is locked. It remains locked 1039 * on return. 1040 * 1041 * If no error occurs, *vpp contains a locked referenced vnode for your 1042 * use. If an error occurs *vpp iis undefined. 1043 */ 1044 static int 1045 union_vn_create(vpp, un, td) 1046 struct vnode **vpp; 1047 struct union_node *un; 1048 struct thread *td; 1049 { 1050 struct vnode *vp; 1051 struct ucred *cred = td->td_ucred; 1052 struct vattr vat; 1053 struct vattr *vap = &vat; 1054 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 1055 int error; 1056 int cmode; 1057 struct componentname cn; 1058 1059 *vpp = NULLVP; 1060 FILEDESC_LOCK(td->td_proc->p_fd); 1061 cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; 1062 FILEDESC_UNLOCK(td->td_proc->p_fd); 1063 1064 /* 1065 * Build a new componentname structure (for the same 1066 * reasons outlines in union_mkshadow()). 1067 * The difference here is that the file is owned by 1068 * the current user, rather than by the person who 1069 * did the mount, since the current user needs to be 1070 * able to write the file (that's why it is being 1071 * copied in the first place). 1072 */ 1073 cn.cn_namelen = strlen(un->un_path); 1074 cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 1075 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 1076 cn.cn_nameiop = CREATE; 1077 cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 1078 cn.cn_thread = td; 1079 cn.cn_cred = td->td_ucred; 1080 cn.cn_nameptr = cn.cn_pnbuf; 1081 cn.cn_consume = 0; 1082 1083 /* 1084 * Pass dvp unlocked and referenced on call to relookup(). 1085 * 1086 * If an error occurs, dvp will be returned unlocked and dereferenced. 1087 */ 1088 VREF(un->un_dirvp); 1089 error = relookup(un->un_dirvp, &vp, &cn); 1090 if (error) 1091 return (error); 1092 1093 /* 1094 * If no error occurs, dvp will be returned locked with the reference 1095 * left as before, and vpp will be returned referenced and locked. 1096 */ 1097 if (vp) { 1098 vput(un->un_dirvp); 1099 if (cn.cn_flags & HASBUF) { 1100 uma_zfree(namei_zone, cn.cn_pnbuf); 1101 cn.cn_flags &= ~HASBUF; 1102 } 1103 if (vp == un->un_dirvp) 1104 vrele(vp); 1105 else 1106 vput(vp); 1107 return (EEXIST); 1108 } 1109 1110 /* 1111 * Good - there was no race to create the file 1112 * so go ahead and create it. The permissions 1113 * on the file will be 0666 modified by the 1114 * current user's umask. Access to the file, while 1115 * it is unioned, will require access to the top *and* 1116 * bottom files. Access when not unioned will simply 1117 * require access to the top-level file. 1118 * TODO: confirm choice of access permissions. 1119 */ 1120 VATTR_NULL(vap); 1121 vap->va_type = VREG; 1122 vap->va_mode = cmode; 1123 VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE); 1124 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); 1125 if (cn.cn_flags & HASBUF) { 1126 uma_zfree(namei_zone, cn.cn_pnbuf); 1127 cn.cn_flags &= ~HASBUF; 1128 } 1129 vput(un->un_dirvp); 1130 if (error) 1131 return (error); 1132 1133 error = VOP_OPEN(vp, fmode, cred, td, -1); 1134 if (error == 0 && vn_canvmio(vp) == TRUE) 1135 error = vfs_object_create(vp, td, cred); 1136 if (error) { 1137 vput(vp); 1138 return (error); 1139 } 1140 vp->v_writecount++; 1141 *vpp = vp; 1142 return (0); 1143 } 1144 1145 static int 1146 union_vn_close(vp, fmode, cred, td) 1147 struct vnode *vp; 1148 int fmode; 1149 struct ucred *cred; 1150 struct thread *td; 1151 { 1152 1153 if (fmode & FWRITE) 1154 --vp->v_writecount; 1155 return (VOP_CLOSE(vp, fmode, cred, td)); 1156 } 1157 1158 #if 0 1159 1160 /* 1161 * union_removed_upper: 1162 * 1163 * called with union_node unlocked. XXX 1164 */ 1165 1166 void 1167 union_removed_upper(un) 1168 struct union_node *un; 1169 { 1170 struct thread *td = curthread; /* XXX */ 1171 struct vnode **vpp; 1172 1173 /* 1174 * Do not set the uppervp to NULLVP. If lowervp is NULLVP, 1175 * union node will have neither uppervp nor lowervp. We remove 1176 * the union node from cache, so that it will not be referrenced. 1177 */ 1178 union_newupper(un, NULLVP); 1179 if (un->un_dircache != NULL) 1180 union_dircache_free(un); 1181 1182 if (un->un_flags & UN_CACHED) { 1183 un->un_flags &= ~UN_CACHED; 1184 LIST_REMOVE(un, un_cache); 1185 } 1186 } 1187 1188 #endif 1189 1190 /* 1191 * Determine whether a whiteout is needed 1192 * during a remove/rmdir operation. 1193 */ 1194 int 1195 union_dowhiteout(un, cred, td) 1196 struct union_node *un; 1197 struct ucred *cred; 1198 struct thread *td; 1199 { 1200 struct vattr va; 1201 1202 if (un->un_lowervp != NULLVP) 1203 return (1); 1204 1205 if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 && 1206 (va.va_flags & OPAQUE)) 1207 return (1); 1208 1209 return (0); 1210 } 1211 1212 static void 1213 union_dircache_r(vp, vppp, cntp) 1214 struct vnode *vp; 1215 struct vnode ***vppp; 1216 int *cntp; 1217 { 1218 struct union_node *un; 1219 1220 if (vp->v_op != union_vnodeop_p) { 1221 if (vppp) { 1222 VREF(vp); 1223 *(*vppp)++ = vp; 1224 if (--(*cntp) == 0) 1225 panic("union: dircache table too small"); 1226 } else { 1227 (*cntp)++; 1228 } 1229 } else { 1230 un = VTOUNION(vp); 1231 if (un->un_uppervp != NULLVP) 1232 union_dircache_r(un->un_uppervp, vppp, cntp); 1233 if (un->un_lowervp != NULLVP) 1234 union_dircache_r(un->un_lowervp, vppp, cntp); 1235 } 1236 } 1237 1238 struct vnode * 1239 union_dircache_get(vp, td) 1240 struct vnode *vp; 1241 struct thread *td; 1242 { 1243 int cnt; 1244 struct vnode *nvp; 1245 struct vnode **vpp; 1246 struct vnode **dircache, **newdircache; 1247 struct union_node *un; 1248 int error; 1249 1250 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1251 un = VTOUNION(vp); 1252 dircache = un->un_dircache; 1253 newdircache = NULL; 1254 1255 nvp = NULLVP; 1256 1257 if (dircache == NULL) { 1258 cnt = 0; 1259 union_dircache_r(vp, 0, &cnt); 1260 cnt++; 1261 newdircache = dircache = malloc(cnt * sizeof(struct vnode *), 1262 M_UNDCACHE, M_WAITOK); 1263 vpp = dircache; 1264 union_dircache_r(vp, &vpp, &cnt); 1265 *vpp = NULLVP; 1266 vpp = dircache + 1; 1267 } else { 1268 vpp = dircache; 1269 do { 1270 if (*vpp++ == un->un_uppervp) 1271 break; 1272 } while (*vpp != NULLVP); 1273 } 1274 1275 if (*vpp == NULLVP) 1276 goto out; 1277 1278 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/ 1279 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? vrefcnt(*vpp) : -99))); 1280 VREF(*vpp); 1281 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); 1282 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? vrefcnt(*vpp) : -99))); 1283 if (error) 1284 goto out; 1285 1286 un->un_dircache = NULL; 1287 VTOUNION(nvp)->un_dircache = dircache; 1288 newdircache = NULL; 1289 1290 out: 1291 /* 1292 * If we allocated a new dircache and couldn't attach 1293 * it to a new vp, free the resources we allocated. 1294 */ 1295 if (newdircache) { 1296 for (vpp = newdircache; *vpp != NULLVP; vpp++) 1297 vrele(*vpp); 1298 free(newdircache, M_UNDCACHE); 1299 } 1300 1301 VOP_UNLOCK(vp, 0, td); 1302 return (nvp); 1303 } 1304 1305 void 1306 union_dircache_free(struct union_node *un) 1307 { 1308 struct vnode **vpp; 1309 1310 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1311 vrele(*vpp); 1312 free(un->un_dircache, M_UNDCACHE); 1313 un->un_dircache = NULL; 1314 } 1315 1316 /* 1317 * Module glue to remove #ifdef UNION from vfs_syscalls.c 1318 */ 1319 static int 1320 union_dircheck(struct thread *td, struct vnode **vp, struct file *fp) 1321 { 1322 int error = 0; 1323 1324 if ((*vp)->v_op == union_vnodeop_p) { 1325 struct vnode *lvp; 1326 1327 lvp = union_dircache_get(*vp, td); 1328 if (lvp != NULLVP) { 1329 struct vattr va; 1330 1331 /* 1332 * If the directory is opaque, 1333 * then don't show lower entries 1334 */ 1335 error = VOP_GETATTR(*vp, &va, fp->f_cred, td); 1336 if (va.va_flags & OPAQUE) { 1337 vput(lvp); 1338 lvp = NULLVP; 1339 } 1340 } 1341 1342 if (lvp != NULLVP) { 1343 error = VOP_OPEN(lvp, FREAD, fp->f_cred, td, -1); 1344 if (error == 0 && vn_canvmio(lvp) == TRUE) 1345 error = vfs_object_create(lvp, td, fp->f_cred); 1346 if (error) { 1347 vput(lvp); 1348 return (error); 1349 } 1350 VOP_UNLOCK(lvp, 0, td); 1351 FILE_LOCK(fp); 1352 fp->f_vnode = lvp; 1353 fp->f_data = lvp; 1354 fp->f_offset = 0; 1355 FILE_UNLOCK(fp); 1356 error = vn_close(*vp, FREAD, fp->f_cred, td); 1357 if (error) 1358 return (error); 1359 *vp = lvp; 1360 return -1; /* goto unionread */ 1361 } 1362 } 1363 return error; 1364 } 1365 1366 static int 1367 union_modevent(module_t mod, int type, void *data) 1368 { 1369 switch (type) { 1370 case MOD_LOAD: 1371 union_dircheckp = union_dircheck; 1372 break; 1373 case MOD_UNLOAD: 1374 union_dircheckp = NULL; 1375 break; 1376 default: 1377 break; 1378 } 1379 return 0; 1380 } 1381 1382 static moduledata_t union_mod = { 1383 "union_dircheck", 1384 union_modevent, 1385 NULL 1386 }; 1387 1388 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); 1389