1 /* 2 * Copyright (c) 1994 Jan-Simon Pendry 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/fcntl.h> 44 #include <sys/file.h> 45 #include <sys/filedesc.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/module.h> 50 #include <sys/mount.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/stat.h> 54 #include <sys/vnode.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> /* for vnode_pager_setsize */ 58 #include <vm/vm_object.h> /* for vm cache coherency */ 59 #include <vm/uma.h> 60 61 #include <fs/unionfs/union.h> 62 63 #include <sys/proc.h> 64 65 extern int union_init(void); 66 67 /* must be power of two, otherwise change UNION_HASH() */ 68 #define NHASH 32 69 70 /* unsigned int ... */ 71 #define UNION_HASH(u, l) \ 72 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) 73 74 static LIST_HEAD(unhead, union_node) unhead[NHASH]; 75 static int unvplock[NHASH]; 76 77 static void union_dircache_r(struct vnode *vp, struct vnode ***vppp, 78 int *cntp); 79 static int union_list_lock(int ix); 80 static void union_list_unlock(int ix); 81 static int union_relookup(struct union_mount *um, struct vnode *dvp, 82 struct vnode **vpp, 83 struct componentname *cnp, 84 struct componentname *cn, char *path, 85 int pathlen); 86 static void union_updatevp(struct union_node *un, 87 struct vnode *uppervp, 88 struct vnode *lowervp); 89 static void union_newlower(struct union_node *, struct vnode *); 90 static void union_newupper(struct union_node *, struct vnode *); 91 static int union_copyfile(struct vnode *, struct vnode *, 92 struct ucred *, struct thread *); 93 static int union_vn_create(struct vnode **, struct union_node *, 94 struct thread *); 95 static int union_vn_close(struct vnode *, int, struct ucred *, 96 struct thread *); 97 98 int 99 union_init() 100 { 101 int i; 102 103 for (i = 0; i < NHASH; i++) 104 LIST_INIT(&unhead[i]); 105 bzero((caddr_t)unvplock, sizeof(unvplock)); 106 return (0); 107 } 108 109 static int 110 union_list_lock(ix) 111 int ix; 112 { 113 if (unvplock[ix] & UNVP_LOCKED) { 114 unvplock[ix] |= UNVP_WANT; 115 (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); 116 return (1); 117 } 118 unvplock[ix] |= UNVP_LOCKED; 119 return (0); 120 } 121 122 static void 123 union_list_unlock(ix) 124 int ix; 125 { 126 unvplock[ix] &= ~UNVP_LOCKED; 127 128 if (unvplock[ix] & UNVP_WANT) { 129 unvplock[ix] &= ~UNVP_WANT; 130 wakeup((caddr_t) &unvplock[ix]); 131 } 132 } 133 134 /* 135 * union_updatevp: 136 * 137 * The uppervp, if not NULL, must be referenced and not locked by us 138 * The lowervp, if not NULL, must be referenced. 139 * 140 * if uppervp and lowervp match pointers already installed, nothing 141 * happens. The passed vp's (when matching) are not adjusted. This 142 * routine may only be called by union_newupper() and union_newlower(). 143 */ 144 145 static void 146 union_updatevp(un, uppervp, lowervp) 147 struct union_node *un; 148 struct vnode *uppervp; 149 struct vnode *lowervp; 150 { 151 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 152 int nhash = UNION_HASH(uppervp, lowervp); 153 int docache = (lowervp != NULLVP || uppervp != NULLVP); 154 int lhash, uhash; 155 156 /* 157 * Ensure locking is ordered from lower to higher 158 * to avoid deadlocks. 159 */ 160 if (nhash < ohash) { 161 lhash = nhash; 162 uhash = ohash; 163 } else { 164 lhash = ohash; 165 uhash = nhash; 166 } 167 168 if (lhash != uhash) { 169 while (union_list_lock(lhash)) 170 continue; 171 } 172 173 while (union_list_lock(uhash)) 174 continue; 175 176 if (ohash != nhash || !docache) { 177 if (un->un_flags & UN_CACHED) { 178 un->un_flags &= ~UN_CACHED; 179 LIST_REMOVE(un, un_cache); 180 } 181 } 182 183 if (ohash != nhash) 184 union_list_unlock(ohash); 185 186 if (un->un_lowervp != lowervp) { 187 if (un->un_lowervp) { 188 vrele(un->un_lowervp); 189 if (un->un_path) { 190 free(un->un_path, M_TEMP); 191 un->un_path = 0; 192 } 193 } 194 un->un_lowervp = lowervp; 195 un->un_lowersz = VNOVAL; 196 } 197 198 if (un->un_uppervp != uppervp) { 199 if (un->un_uppervp) 200 vrele(un->un_uppervp); 201 un->un_uppervp = uppervp; 202 un->un_uppersz = VNOVAL; 203 } 204 205 if (docache && (ohash != nhash)) { 206 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 207 un->un_flags |= UN_CACHED; 208 } 209 210 union_list_unlock(nhash); 211 } 212 213 /* 214 * Set a new lowervp. The passed lowervp must be referenced and will be 215 * stored in the vp in a referenced state. 216 */ 217 218 static void 219 union_newlower(un, lowervp) 220 struct union_node *un; 221 struct vnode *lowervp; 222 { 223 union_updatevp(un, un->un_uppervp, lowervp); 224 } 225 226 /* 227 * Set a new uppervp. The passed uppervp must be locked and will be 228 * stored in the vp in a locked state. The caller should not unlock 229 * uppervp. 230 */ 231 232 static void 233 union_newupper(un, uppervp) 234 struct union_node *un; 235 struct vnode *uppervp; 236 { 237 union_updatevp(un, uppervp, un->un_lowervp); 238 } 239 240 /* 241 * Keep track of size changes in the underlying vnodes. 242 * If the size changes, then callback to the vm layer 243 * giving priority to the upper layer size. 244 */ 245 void 246 union_newsize(vp, uppersz, lowersz) 247 struct vnode *vp; 248 off_t uppersz, lowersz; 249 { 250 struct union_node *un; 251 off_t sz; 252 253 /* only interested in regular files */ 254 if (vp->v_type != VREG) 255 return; 256 257 un = VTOUNION(vp); 258 sz = VNOVAL; 259 260 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 261 un->un_uppersz = uppersz; 262 if (sz == VNOVAL) 263 sz = un->un_uppersz; 264 } 265 266 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 267 un->un_lowersz = lowersz; 268 if (sz == VNOVAL) 269 sz = un->un_lowersz; 270 } 271 272 if (sz != VNOVAL) { 273 UDEBUG(("union: %s size now %ld\n", 274 (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); 275 /* 276 * There is no need to change size of non-existent object 277 */ 278 /* vnode_pager_setsize(vp, sz); */ 279 } 280 } 281 282 /* 283 * union_allocvp: allocate a union_node and associate it with a 284 * parent union_node and one or two vnodes. 285 * 286 * vpp Holds the returned vnode locked and referenced if no 287 * error occurs. 288 * 289 * mp Holds the mount point. mp may or may not be busied. 290 * allocvp makes no changes to mp. 291 * 292 * dvp Holds the parent union_node to the one we wish to create. 293 * XXX may only be used to traverse an uncopied lowervp-based 294 * tree? XXX 295 * 296 * dvp may or may not be locked. allocvp makes no changes 297 * to dvp. 298 * 299 * upperdvp Holds the parent vnode to uppervp, generally used along 300 * with path component information to create a shadow of 301 * lowervp when uppervp does not exist. 302 * 303 * upperdvp is referenced but unlocked on entry, and will be 304 * dereferenced on return. 305 * 306 * uppervp Holds the new uppervp vnode to be stored in the 307 * union_node we are allocating. uppervp is referenced but 308 * not locked, and will be dereferenced on return. 309 * 310 * lowervp Holds the new lowervp vnode to be stored in the 311 * union_node we are allocating. lowervp is referenced but 312 * not locked, and will be dereferenced on return. 313 * 314 * cnp Holds path component information to be coupled with 315 * lowervp and upperdvp to allow unionfs to create an uppervp 316 * later on. Only used if lowervp is valid. The conents 317 * of cnp is only valid for the duration of the call. 318 * 319 * docache Determine whether this node should be entered in the 320 * cache or whether it should be destroyed as soon as possible. 321 * 322 * all union_nodes are maintained on a singly-linked 323 * list. new nodes are only allocated when they cannot 324 * be found on this list. entries on the list are 325 * removed when the vfs reclaim entry is called. 326 * 327 * a single lock is kept for the entire list. this is 328 * needed because the getnewvnode() function can block 329 * waiting for a vnode to become free, in which case there 330 * may be more than one process trying to get the same 331 * vnode. this lock is only taken if we are going to 332 * call getnewvnode, since the kernel itself is single-threaded. 333 * 334 * if an entry is found on the list, then call vget() to 335 * take a reference. this is done because there may be 336 * zero references to it and so it needs to removed from 337 * the vnode free list. 338 */ 339 340 int 341 union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) 342 struct vnode **vpp; 343 struct mount *mp; 344 struct vnode *dvp; /* parent union vnode */ 345 struct vnode *upperdvp; /* parent vnode of uppervp */ 346 struct componentname *cnp; /* may be null */ 347 struct vnode *uppervp; /* may be null */ 348 struct vnode *lowervp; /* may be null */ 349 int docache; 350 { 351 int error; 352 struct union_node *un = 0; 353 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 354 struct thread *td = (cnp) ? cnp->cn_thread : curthread; 355 int hash = 0; 356 int vflag; 357 int try; 358 359 if (uppervp == NULLVP && lowervp == NULLVP) 360 panic("union: unidentifiable allocation"); 361 362 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 363 vrele(lowervp); 364 lowervp = NULLVP; 365 } 366 367 /* detect the root vnode (and aliases) */ 368 vflag = 0; 369 if ((uppervp == um->um_uppervp) && 370 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 371 if (lowervp == NULLVP) { 372 lowervp = um->um_lowervp; 373 if (lowervp != NULLVP) 374 VREF(lowervp); 375 } 376 vflag = VV_ROOT; 377 } 378 379 loop: 380 if (!docache) { 381 un = 0; 382 } else for (try = 0; try < 3; try++) { 383 switch (try) { 384 case 0: 385 if (lowervp == NULLVP) 386 continue; 387 hash = UNION_HASH(uppervp, lowervp); 388 break; 389 390 case 1: 391 if (uppervp == NULLVP) 392 continue; 393 hash = UNION_HASH(uppervp, NULLVP); 394 break; 395 396 case 2: 397 if (lowervp == NULLVP) 398 continue; 399 hash = UNION_HASH(NULLVP, lowervp); 400 break; 401 } 402 403 while (union_list_lock(hash)) 404 continue; 405 406 LIST_FOREACH(un, &unhead[hash], un_cache) { 407 if ((un->un_lowervp == lowervp || 408 un->un_lowervp == NULLVP) && 409 (un->un_uppervp == uppervp || 410 un->un_uppervp == NULLVP) && 411 (UNIONTOV(un)->v_mount == mp)) { 412 if (vget(UNIONTOV(un), 0, 413 cnp ? cnp->cn_thread : NULL)) { 414 union_list_unlock(hash); 415 goto loop; 416 } 417 break; 418 } 419 } 420 421 union_list_unlock(hash); 422 423 if (un) 424 break; 425 } 426 427 if (un) { 428 /* 429 * Obtain a lock on the union_node. Everything is unlocked 430 * except for dvp, so check that case. If they match, our 431 * new un is already locked. Otherwise we have to lock our 432 * new un. 433 * 434 * A potential deadlock situation occurs when we are holding 435 * one lock while trying to get another. We must follow 436 * strict ordering rules to avoid it. We try to locate dvp 437 * by scanning up from un_vnode, since the most likely 438 * scenario is un being under dvp. 439 */ 440 441 if (dvp && un->un_vnode != dvp) { 442 struct vnode *scan = un->un_vnode; 443 444 do { 445 scan = VTOUNION(scan)->un_pvp; 446 } while (scan && scan->v_tag == VT_UNION && scan != dvp); 447 if (scan != dvp) { 448 /* 449 * our new un is above dvp (we never saw dvp 450 * while moving up the tree). 451 */ 452 VREF(dvp); 453 VOP_UNLOCK(dvp, 0, td); 454 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 455 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); 456 vrele(dvp); 457 } else { 458 /* 459 * our new un is under dvp 460 */ 461 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 462 } 463 } else if (dvp == NULLVP) { 464 /* 465 * dvp is NULL, we need to lock un. 466 */ 467 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); 468 } else { 469 /* 470 * dvp == un->un_vnode, we are already locked. 471 */ 472 error = 0; 473 } 474 475 if (error) 476 goto loop; 477 478 /* 479 * At this point, the union_node is locked and referenced. 480 * 481 * uppervp is locked and referenced or NULL, lowervp is 482 * referenced or NULL. 483 */ 484 UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", 485 un, un->un_vnode, un->un_uppervp, 486 (un->un_uppervp ? un->un_uppervp->v_usecount : -99), 487 uppervp, 488 (uppervp ? uppervp->v_usecount : -99) 489 )); 490 491 if (uppervp != un->un_uppervp) { 492 KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount)); 493 union_newupper(un, uppervp); 494 } else if (uppervp) { 495 KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount)); 496 vrele(uppervp); 497 } 498 499 /* 500 * Save information about the lower layer. 501 * This needs to keep track of pathname 502 * and directory information which union_vn_create 503 * might need. 504 */ 505 if (lowervp != un->un_lowervp) { 506 union_newlower(un, lowervp); 507 if (cnp && (lowervp != NULLVP)) { 508 un->un_path = malloc(cnp->cn_namelen+1, 509 M_TEMP, M_WAITOK); 510 bcopy(cnp->cn_nameptr, un->un_path, 511 cnp->cn_namelen); 512 un->un_path[cnp->cn_namelen] = '\0'; 513 } 514 } else if (lowervp) { 515 vrele(lowervp); 516 } 517 518 /* 519 * and upperdvp 520 */ 521 if (upperdvp != un->un_dirvp) { 522 if (un->un_dirvp) 523 vrele(un->un_dirvp); 524 un->un_dirvp = upperdvp; 525 } else if (upperdvp) { 526 vrele(upperdvp); 527 } 528 529 *vpp = UNIONTOV(un); 530 return (0); 531 } 532 533 if (docache) { 534 /* 535 * otherwise lock the vp list while we call getnewvnode 536 * since that can block. 537 */ 538 hash = UNION_HASH(uppervp, lowervp); 539 540 if (union_list_lock(hash)) 541 goto loop; 542 } 543 544 /* 545 * Create new node rather then replace old node 546 */ 547 548 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); 549 if (error) { 550 /* 551 * If an error occurs clear out vnodes. 552 */ 553 if (lowervp) 554 vrele(lowervp); 555 if (uppervp) 556 vrele(uppervp); 557 if (upperdvp) 558 vrele(upperdvp); 559 *vpp = NULL; 560 goto out; 561 } 562 563 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 564 M_TEMP, M_WAITOK); 565 566 ASSERT_VOP_LOCKED(*vpp, "union_allocvp"); 567 (*vpp)->v_vflag |= vflag; 568 if (uppervp) 569 (*vpp)->v_type = uppervp->v_type; 570 else 571 (*vpp)->v_type = lowervp->v_type; 572 573 un = VTOUNION(*vpp); 574 bzero(un, sizeof(*un)); 575 576 lockinit(&un->un_lock, PVFS, "unlock", VLKTIMEOUT, 0); 577 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 578 579 un->un_vnode = *vpp; 580 un->un_uppervp = uppervp; 581 un->un_uppersz = VNOVAL; 582 un->un_lowervp = lowervp; 583 un->un_lowersz = VNOVAL; 584 un->un_dirvp = upperdvp; 585 un->un_pvp = dvp; /* only parent dir in new allocation */ 586 if (dvp != NULLVP) 587 VREF(dvp); 588 un->un_dircache = 0; 589 un->un_openl = 0; 590 591 if (cnp && (lowervp != NULLVP)) { 592 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 593 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 594 un->un_path[cnp->cn_namelen] = '\0'; 595 } else { 596 un->un_path = 0; 597 un->un_dirvp = NULL; 598 } 599 600 if (docache) { 601 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 602 un->un_flags |= UN_CACHED; 603 } 604 605 out: 606 if (docache) 607 union_list_unlock(hash); 608 609 return (error); 610 } 611 612 int 613 union_freevp(vp) 614 struct vnode *vp; 615 { 616 struct union_node *un = VTOUNION(vp); 617 618 if (un->un_flags & UN_CACHED) { 619 un->un_flags &= ~UN_CACHED; 620 LIST_REMOVE(un, un_cache); 621 } 622 623 if (un->un_pvp != NULLVP) { 624 vrele(un->un_pvp); 625 un->un_pvp = NULL; 626 } 627 if (un->un_uppervp != NULLVP) { 628 vrele(un->un_uppervp); 629 un->un_uppervp = NULL; 630 } 631 if (un->un_lowervp != NULLVP) { 632 vrele(un->un_lowervp); 633 un->un_lowervp = NULL; 634 } 635 if (un->un_dirvp != NULLVP) { 636 vrele(un->un_dirvp); 637 un->un_dirvp = NULL; 638 } 639 if (un->un_path) { 640 free(un->un_path, M_TEMP); 641 un->un_path = NULL; 642 } 643 lockdestroy(&un->un_lock); 644 645 FREE(vp->v_data, M_TEMP); 646 vp->v_data = 0; 647 648 return (0); 649 } 650 651 /* 652 * copyfile. copy the vnode (fvp) to the vnode (tvp) 653 * using a sequence of reads and writes. both (fvp) 654 * and (tvp) are locked on entry and exit. 655 * 656 * fvp and tvp are both exclusive locked on call, but their refcount's 657 * haven't been bumped at all. 658 */ 659 static int 660 union_copyfile(fvp, tvp, cred, td) 661 struct vnode *fvp; 662 struct vnode *tvp; 663 struct ucred *cred; 664 struct thread *td; 665 { 666 char *buf; 667 struct uio uio; 668 struct iovec iov; 669 int error = 0; 670 671 /* 672 * strategy: 673 * allocate a buffer of size MAXBSIZE. 674 * loop doing reads and writes, keeping track 675 * of the current uio offset. 676 * give up at the first sign of trouble. 677 */ 678 679 bzero(&uio, sizeof(uio)); 680 681 uio.uio_td = td; 682 uio.uio_segflg = UIO_SYSSPACE; 683 uio.uio_offset = 0; 684 685 VOP_LEASE(fvp, td, cred, LEASE_READ); 686 VOP_LEASE(tvp, td, cred, LEASE_WRITE); 687 688 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 689 690 /* ugly loop follows... */ 691 do { 692 off_t offset = uio.uio_offset; 693 int count; 694 int bufoffset; 695 696 /* 697 * Setup for big read 698 */ 699 uio.uio_iov = &iov; 700 uio.uio_iovcnt = 1; 701 iov.iov_base = buf; 702 iov.iov_len = MAXBSIZE; 703 uio.uio_resid = iov.iov_len; 704 uio.uio_rw = UIO_READ; 705 706 if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) 707 break; 708 709 /* 710 * Get bytes read, handle read eof case and setup for 711 * write loop 712 */ 713 if ((count = MAXBSIZE - uio.uio_resid) == 0) 714 break; 715 bufoffset = 0; 716 717 /* 718 * Write until an error occurs or our buffer has been 719 * exhausted, then update the offset for the next read. 720 */ 721 while (bufoffset < count) { 722 uio.uio_iov = &iov; 723 uio.uio_iovcnt = 1; 724 iov.iov_base = buf + bufoffset; 725 iov.iov_len = count - bufoffset; 726 uio.uio_offset = offset + bufoffset; 727 uio.uio_rw = UIO_WRITE; 728 uio.uio_resid = iov.iov_len; 729 730 if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) 731 break; 732 bufoffset += (count - bufoffset) - uio.uio_resid; 733 } 734 uio.uio_offset = offset + bufoffset; 735 } while (error == 0); 736 737 free(buf, M_TEMP); 738 return (error); 739 } 740 741 /* 742 * 743 * un's vnode is assumed to be locked on entry and remains locked on exit. 744 */ 745 746 int 747 union_copyup(un, docopy, cred, td) 748 struct union_node *un; 749 int docopy; 750 struct ucred *cred; 751 struct thread *td; 752 { 753 int error; 754 struct mount *mp; 755 struct vnode *lvp, *uvp; 756 757 /* 758 * If the user does not have read permission, the vnode should not 759 * be copied to upper layer. 760 */ 761 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td); 762 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td); 763 VOP_UNLOCK(un->un_lowervp, 0, td); 764 if (error) 765 return (error); 766 767 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0) 768 return (error); 769 if ((error = union_vn_create(&uvp, un, td)) != 0) { 770 vn_finished_write(mp); 771 return (error); 772 } 773 774 lvp = un->un_lowervp; 775 776 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 777 if (docopy) { 778 /* 779 * XX - should not ignore errors 780 * from VOP_CLOSE 781 */ 782 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); 783 error = VOP_OPEN(lvp, FREAD, cred, td); 784 if (error == 0 && vn_canvmio(lvp) == TRUE) 785 error = vfs_object_create(lvp, td, cred); 786 if (error == 0) { 787 error = union_copyfile(lvp, uvp, cred, td); 788 VOP_UNLOCK(lvp, 0, td); 789 (void) VOP_CLOSE(lvp, FREAD, cred, td); 790 } 791 if (error == 0) 792 UDEBUG(("union: copied up %s\n", un->un_path)); 793 794 } 795 VOP_UNLOCK(uvp, 0, td); 796 vn_finished_write(mp); 797 union_newupper(un, uvp); 798 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 799 union_vn_close(uvp, FWRITE, cred, td); 800 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 801 /* 802 * Subsequent IOs will go to the top layer, so 803 * call close on the lower vnode and open on the 804 * upper vnode to ensure that the filesystem keeps 805 * its references counts right. This doesn't do 806 * the right thing with (cred) and (FREAD) though. 807 * Ignoring error returns is not right, either. 808 */ 809 if (error == 0) { 810 int i; 811 812 for (i = 0; i < un->un_openl; i++) { 813 (void) VOP_CLOSE(lvp, FREAD, cred, td); 814 (void) VOP_OPEN(uvp, FREAD, cred, td); 815 } 816 if (un->un_openl) { 817 if (vn_canvmio(uvp) == TRUE) 818 error = vfs_object_create(uvp, td, cred); 819 } 820 un->un_openl = 0; 821 } 822 823 return (error); 824 825 } 826 827 /* 828 * union_relookup: 829 * 830 * dvp should be locked on entry and will be locked on return. No 831 * net change in the ref count will occur. 832 * 833 * If an error is returned, *vpp will be invalid, otherwise it 834 * will hold a locked, referenced vnode. If *vpp == dvp then 835 * remember that only one exclusive lock is held. 836 */ 837 838 static int 839 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 840 struct union_mount *um; 841 struct vnode *dvp; 842 struct vnode **vpp; 843 struct componentname *cnp; 844 struct componentname *cn; 845 char *path; 846 int pathlen; 847 { 848 int error; 849 850 /* 851 * A new componentname structure must be faked up because 852 * there is no way to know where the upper level cnp came 853 * from or what it is being used for. This must duplicate 854 * some of the work done by NDINIT, some of the work done 855 * by namei, some of the work done by lookup and some of 856 * the work done by VOP_LOOKUP when given a CREATE flag. 857 * Conclusion: Horrible. 858 */ 859 cn->cn_namelen = pathlen; 860 cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 861 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 862 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 863 864 cn->cn_nameiop = CREATE; 865 cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 866 cn->cn_thread = cnp->cn_thread; 867 if (um->um_op == UNMNT_ABOVE) 868 cn->cn_cred = cnp->cn_cred; 869 else 870 cn->cn_cred = um->um_cred; 871 cn->cn_nameptr = cn->cn_pnbuf; 872 cn->cn_consume = cnp->cn_consume; 873 874 VREF(dvp); 875 VOP_UNLOCK(dvp, 0, cnp->cn_thread); 876 877 /* 878 * Pass dvp unlocked and referenced on call to relookup(). 879 * 880 * If an error occurs, dvp will be returned unlocked and dereferenced. 881 */ 882 883 if ((error = relookup(dvp, vpp, cn)) != 0) { 884 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread); 885 return(error); 886 } 887 888 /* 889 * If no error occurs, dvp will be returned locked with the reference 890 * left as before, and vpp will be returned referenced and locked. 891 * 892 * We want to return with dvp as it was passed to us, so we get 893 * rid of our reference. 894 */ 895 vrele(dvp); 896 return (0); 897 } 898 899 /* 900 * Create a shadow directory in the upper layer. 901 * The new vnode is returned locked. 902 * 903 * (um) points to the union mount structure for access to the 904 * the mounting process's credentials. 905 * (dvp) is the directory in which to create the shadow directory, 906 * it is locked (but not ref'd) on entry and return. 907 * (cnp) is the componentname to be created. 908 * (vpp) is the returned newly created shadow directory, which 909 * is returned locked and ref'd 910 */ 911 int 912 union_mkshadow(um, dvp, cnp, vpp) 913 struct union_mount *um; 914 struct vnode *dvp; 915 struct componentname *cnp; 916 struct vnode **vpp; 917 { 918 int error; 919 struct vattr va; 920 struct thread *td = cnp->cn_thread; 921 struct componentname cn; 922 struct mount *mp; 923 924 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 925 return (error); 926 if ((error = union_relookup(um, dvp, vpp, cnp, &cn, 927 cnp->cn_nameptr, cnp->cn_namelen)) != 0) { 928 vn_finished_write(mp); 929 return (error); 930 } 931 932 if (*vpp) { 933 if (cn.cn_flags & HASBUF) { 934 uma_zfree(namei_zone, cn.cn_pnbuf); 935 cn.cn_flags &= ~HASBUF; 936 } 937 if (dvp == *vpp) 938 vrele(*vpp); 939 else 940 vput(*vpp); 941 vn_finished_write(mp); 942 *vpp = NULLVP; 943 return (EEXIST); 944 } 945 946 /* 947 * policy: when creating the shadow directory in the 948 * upper layer, create it owned by the user who did 949 * the mount, group from parent directory, and mode 950 * 777 modified by umask (ie mostly identical to the 951 * mkdir syscall). (jsp, kb) 952 */ 953 954 VATTR_NULL(&va); 955 va.va_type = VDIR; 956 va.va_mode = um->um_cmode; 957 958 /* VOP_LEASE: dvp is locked */ 959 VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE); 960 961 error = VOP_MKDIR(dvp, vpp, &cn, &va); 962 if (cn.cn_flags & HASBUF) { 963 uma_zfree(namei_zone, cn.cn_pnbuf); 964 cn.cn_flags &= ~HASBUF; 965 } 966 /*vput(dvp);*/ 967 vn_finished_write(mp); 968 return (error); 969 } 970 971 /* 972 * Create a whiteout entry in the upper layer. 973 * 974 * (um) points to the union mount structure for access to the 975 * the mounting process's credentials. 976 * (dvp) is the directory in which to create the whiteout. 977 * it is locked on entry and return. 978 * (cnp) is the componentname to be created. 979 */ 980 int 981 union_mkwhiteout(um, dvp, cnp, path) 982 struct union_mount *um; 983 struct vnode *dvp; 984 struct componentname *cnp; 985 char *path; 986 { 987 int error; 988 struct thread *td = cnp->cn_thread; 989 struct vnode *wvp; 990 struct componentname cn; 991 struct mount *mp; 992 993 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 994 return (error); 995 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 996 if (error) { 997 vn_finished_write(mp); 998 return (error); 999 } 1000 1001 if (wvp) { 1002 if (cn.cn_flags & HASBUF) { 1003 uma_zfree(namei_zone, cn.cn_pnbuf); 1004 cn.cn_flags &= ~HASBUF; 1005 } 1006 if (wvp == dvp) 1007 vrele(wvp); 1008 else 1009 vput(wvp); 1010 vn_finished_write(mp); 1011 return (EEXIST); 1012 } 1013 1014 /* VOP_LEASE: dvp is locked */ 1015 VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE); 1016 1017 error = VOP_WHITEOUT(dvp, &cn, CREATE); 1018 if (cn.cn_flags & HASBUF) { 1019 uma_zfree(namei_zone, cn.cn_pnbuf); 1020 cn.cn_flags &= ~HASBUF; 1021 } 1022 vn_finished_write(mp); 1023 return (error); 1024 } 1025 1026 /* 1027 * union_vn_create: creates and opens a new shadow file 1028 * on the upper union layer. this function is similar 1029 * in spirit to calling vn_open but it avoids calling namei(). 1030 * the problem with calling namei is that a) it locks too many 1031 * things, and b) it doesn't start at the "right" directory, 1032 * whereas relookup is told where to start. 1033 * 1034 * On entry, the vnode associated with un is locked. It remains locked 1035 * on return. 1036 * 1037 * If no error occurs, *vpp contains a locked referenced vnode for your 1038 * use. If an error occurs *vpp iis undefined. 1039 */ 1040 static int 1041 union_vn_create(vpp, un, td) 1042 struct vnode **vpp; 1043 struct union_node *un; 1044 struct thread *td; 1045 { 1046 struct vnode *vp; 1047 struct ucred *cred = td->td_ucred; 1048 struct vattr vat; 1049 struct vattr *vap = &vat; 1050 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 1051 int error; 1052 int cmode; 1053 struct componentname cn; 1054 1055 *vpp = NULLVP; 1056 FILEDESC_LOCK(td->td_proc->p_fd); 1057 cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; 1058 FILEDESC_UNLOCK(td->td_proc->p_fd); 1059 1060 /* 1061 * Build a new componentname structure (for the same 1062 * reasons outlines in union_mkshadow). 1063 * The difference here is that the file is owned by 1064 * the current user, rather than by the person who 1065 * did the mount, since the current user needs to be 1066 * able to write the file (that's why it is being 1067 * copied in the first place). 1068 */ 1069 cn.cn_namelen = strlen(un->un_path); 1070 cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 1071 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 1072 cn.cn_nameiop = CREATE; 1073 cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 1074 cn.cn_thread = td; 1075 cn.cn_cred = td->td_ucred; 1076 cn.cn_nameptr = cn.cn_pnbuf; 1077 cn.cn_consume = 0; 1078 1079 /* 1080 * Pass dvp unlocked and referenced on call to relookup(). 1081 * 1082 * If an error occurs, dvp will be returned unlocked and dereferenced. 1083 */ 1084 VREF(un->un_dirvp); 1085 error = relookup(un->un_dirvp, &vp, &cn); 1086 if (error) 1087 return (error); 1088 1089 /* 1090 * If no error occurs, dvp will be returned locked with the reference 1091 * left as before, and vpp will be returned referenced and locked. 1092 */ 1093 if (vp) { 1094 vput(un->un_dirvp); 1095 if (cn.cn_flags & HASBUF) { 1096 uma_zfree(namei_zone, cn.cn_pnbuf); 1097 cn.cn_flags &= ~HASBUF; 1098 } 1099 if (vp == un->un_dirvp) 1100 vrele(vp); 1101 else 1102 vput(vp); 1103 return (EEXIST); 1104 } 1105 1106 /* 1107 * Good - there was no race to create the file 1108 * so go ahead and create it. The permissions 1109 * on the file will be 0666 modified by the 1110 * current user's umask. Access to the file, while 1111 * it is unioned, will require access to the top *and* 1112 * bottom files. Access when not unioned will simply 1113 * require access to the top-level file. 1114 * TODO: confirm choice of access permissions. 1115 */ 1116 VATTR_NULL(vap); 1117 vap->va_type = VREG; 1118 vap->va_mode = cmode; 1119 VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE); 1120 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); 1121 if (cn.cn_flags & HASBUF) { 1122 uma_zfree(namei_zone, cn.cn_pnbuf); 1123 cn.cn_flags &= ~HASBUF; 1124 } 1125 vput(un->un_dirvp); 1126 if (error) 1127 return (error); 1128 1129 error = VOP_OPEN(vp, fmode, cred, td); 1130 if (error == 0 && vn_canvmio(vp) == TRUE) 1131 error = vfs_object_create(vp, td, cred); 1132 if (error) { 1133 vput(vp); 1134 return (error); 1135 } 1136 vp->v_writecount++; 1137 *vpp = vp; 1138 return (0); 1139 } 1140 1141 static int 1142 union_vn_close(vp, fmode, cred, td) 1143 struct vnode *vp; 1144 int fmode; 1145 struct ucred *cred; 1146 struct thread *td; 1147 { 1148 1149 if (fmode & FWRITE) 1150 --vp->v_writecount; 1151 return (VOP_CLOSE(vp, fmode, cred, td)); 1152 } 1153 1154 #if 0 1155 1156 /* 1157 * union_removed_upper: 1158 * 1159 * called with union_node unlocked. XXX 1160 */ 1161 1162 void 1163 union_removed_upper(un) 1164 struct union_node *un; 1165 { 1166 struct thread *td = curthread; /* XXX */ 1167 struct vnode **vpp; 1168 1169 /* 1170 * Do not set the uppervp to NULLVP. If lowervp is NULLVP, 1171 * union node will have neither uppervp nor lowervp. We remove 1172 * the union node from cache, so that it will not be referrenced. 1173 */ 1174 union_newupper(un, NULLVP); 1175 if (un->un_dircache != 0) { 1176 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1177 vrele(*vpp); 1178 free(un->un_dircache, M_TEMP); 1179 un->un_dircache = 0; 1180 } 1181 1182 if (un->un_flags & UN_CACHED) { 1183 un->un_flags &= ~UN_CACHED; 1184 LIST_REMOVE(un, un_cache); 1185 } 1186 } 1187 1188 #endif 1189 1190 /* 1191 * determine whether a whiteout is needed 1192 * during a remove/rmdir operation. 1193 */ 1194 int 1195 union_dowhiteout(un, cred, td) 1196 struct union_node *un; 1197 struct ucred *cred; 1198 struct thread *td; 1199 { 1200 struct vattr va; 1201 1202 if (un->un_lowervp != NULLVP) 1203 return (1); 1204 1205 if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 && 1206 (va.va_flags & OPAQUE)) 1207 return (1); 1208 1209 return (0); 1210 } 1211 1212 static void 1213 union_dircache_r(vp, vppp, cntp) 1214 struct vnode *vp; 1215 struct vnode ***vppp; 1216 int *cntp; 1217 { 1218 struct union_node *un; 1219 1220 if (vp->v_op != union_vnodeop_p) { 1221 if (vppp) { 1222 VREF(vp); 1223 *(*vppp)++ = vp; 1224 if (--(*cntp) == 0) 1225 panic("union: dircache table too small"); 1226 } else { 1227 (*cntp)++; 1228 } 1229 1230 return; 1231 } 1232 1233 un = VTOUNION(vp); 1234 if (un->un_uppervp != NULLVP) 1235 union_dircache_r(un->un_uppervp, vppp, cntp); 1236 if (un->un_lowervp != NULLVP) 1237 union_dircache_r(un->un_lowervp, vppp, cntp); 1238 } 1239 1240 struct vnode * 1241 union_dircache(vp, td) 1242 struct vnode *vp; 1243 struct thread *td; 1244 { 1245 int cnt; 1246 struct vnode *nvp; 1247 struct vnode **vpp; 1248 struct vnode **dircache; 1249 struct union_node *un; 1250 int error; 1251 1252 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1253 dircache = VTOUNION(vp)->un_dircache; 1254 1255 nvp = NULLVP; 1256 1257 if (dircache == NULL) { 1258 cnt = 0; 1259 union_dircache_r(vp, 0, &cnt); 1260 cnt++; 1261 dircache = malloc(cnt * sizeof(struct vnode *), 1262 M_TEMP, M_WAITOK); 1263 vpp = dircache; 1264 union_dircache_r(vp, &vpp, &cnt); 1265 *vpp = NULLVP; 1266 vpp = dircache + 1; 1267 } else { 1268 vpp = dircache; 1269 do { 1270 if (*vpp++ == VTOUNION(vp)->un_uppervp) 1271 break; 1272 } while (*vpp != NULLVP); 1273 } 1274 1275 if (*vpp == NULLVP) 1276 goto out; 1277 1278 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/ 1279 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); 1280 VREF(*vpp); 1281 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); 1282 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); 1283 if (error) 1284 goto out; 1285 1286 VTOUNION(vp)->un_dircache = 0; 1287 un = VTOUNION(nvp); 1288 un->un_dircache = dircache; 1289 1290 out: 1291 VOP_UNLOCK(vp, 0, td); 1292 return (nvp); 1293 } 1294 1295 /* 1296 * Module glue to remove #ifdef UNION from vfs_syscalls.c 1297 */ 1298 static int 1299 union_dircheck(struct thread *td, struct vnode **vp, struct file *fp) 1300 { 1301 int error = 0; 1302 1303 if ((*vp)->v_op == union_vnodeop_p) { 1304 struct vnode *lvp; 1305 1306 lvp = union_dircache(*vp, td); 1307 if (lvp != NULLVP) { 1308 struct vattr va; 1309 1310 /* 1311 * If the directory is opaque, 1312 * then don't show lower entries 1313 */ 1314 error = VOP_GETATTR(*vp, &va, fp->f_cred, td); 1315 if (va.va_flags & OPAQUE) { 1316 vput(lvp); 1317 lvp = NULL; 1318 } 1319 } 1320 1321 if (lvp != NULLVP) { 1322 error = VOP_OPEN(lvp, FREAD, fp->f_cred, td); 1323 if (error == 0 && vn_canvmio(lvp) == TRUE) 1324 error = vfs_object_create(lvp, td, fp->f_cred); 1325 if (error) { 1326 vput(lvp); 1327 return (error); 1328 } 1329 VOP_UNLOCK(lvp, 0, td); 1330 FILE_LOCK(fp); 1331 fp->f_data = (caddr_t) lvp; 1332 fp->f_offset = 0; 1333 FILE_UNLOCK(fp); 1334 error = vn_close(*vp, FREAD, fp->f_cred, td); 1335 if (error) 1336 return (error); 1337 *vp = lvp; 1338 return -1; /* goto unionread */ 1339 } 1340 } 1341 return error; 1342 } 1343 1344 static int 1345 union_modevent(module_t mod, int type, void *data) 1346 { 1347 switch (type) { 1348 case MOD_LOAD: 1349 union_dircheckp = union_dircheck; 1350 break; 1351 case MOD_UNLOAD: 1352 union_dircheckp = NULL; 1353 break; 1354 default: 1355 break; 1356 } 1357 return 0; 1358 } 1359 1360 static moduledata_t union_mod = { 1361 "union_dircheck", 1362 union_modevent, 1363 NULL 1364 }; 1365 1366 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); 1367