1 /* 2 * Copyright (c) 1994 Jan-Simon Pendry 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/fcntl.h> 44 #include <sys/file.h> 45 #include <sys/filedesc.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/module.h> 50 #include <sys/mount.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/stat.h> 54 #include <sys/vnode.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> /* for vnode_pager_setsize */ 58 #include <vm/vm_zone.h> 59 #include <vm/vm_object.h> /* for vm cache coherency */ 60 61 #include <fs/unionfs/union.h> 62 63 #include <sys/proc.h> 64 65 extern int union_init __P((void)); 66 67 /* must be power of two, otherwise change UNION_HASH() */ 68 #define NHASH 32 69 70 /* unsigned int ... */ 71 #define UNION_HASH(u, l) \ 72 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) 73 74 static LIST_HEAD(unhead, union_node) unhead[NHASH]; 75 static int unvplock[NHASH]; 76 77 static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, 78 int *cntp)); 79 static int union_list_lock __P((int ix)); 80 static void union_list_unlock __P((int ix)); 81 static int union_relookup __P((struct union_mount *um, struct vnode *dvp, 82 struct vnode **vpp, 83 struct componentname *cnp, 84 struct componentname *cn, char *path, 85 int pathlen)); 86 static void union_updatevp __P((struct union_node *un, 87 struct vnode *uppervp, 88 struct vnode *lowervp)); 89 static void union_newlower __P((struct union_node *, struct vnode *)); 90 static void union_newupper __P((struct union_node *, struct vnode *)); 91 static int union_copyfile __P((struct vnode *, struct vnode *, 92 struct ucred *, struct proc *)); 93 static int union_vn_create __P((struct vnode **, struct union_node *, 94 struct proc *)); 95 static int union_vn_close __P((struct vnode *, int, struct ucred *, 96 struct proc *)); 97 98 int 99 union_init() 100 { 101 int i; 102 103 for (i = 0; i < NHASH; i++) 104 LIST_INIT(&unhead[i]); 105 bzero((caddr_t)unvplock, sizeof(unvplock)); 106 return (0); 107 } 108 109 static int 110 union_list_lock(ix) 111 int ix; 112 { 113 if (unvplock[ix] & UNVP_LOCKED) { 114 unvplock[ix] |= UNVP_WANT; 115 (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); 116 return (1); 117 } 118 unvplock[ix] |= UNVP_LOCKED; 119 return (0); 120 } 121 122 static void 123 union_list_unlock(ix) 124 int ix; 125 { 126 unvplock[ix] &= ~UNVP_LOCKED; 127 128 if (unvplock[ix] & UNVP_WANT) { 129 unvplock[ix] &= ~UNVP_WANT; 130 wakeup((caddr_t) &unvplock[ix]); 131 } 132 } 133 134 /* 135 * union_updatevp: 136 * 137 * The uppervp, if not NULL, must be referenced and not locked by us 138 * The lowervp, if not NULL, must be referenced. 139 * 140 * if uppervp and lowervp match pointers already installed, nothing 141 * happens. The passed vp's (when matching) are not adjusted. This 142 * routine may only be called by union_newupper() and union_newlower(). 143 */ 144 145 static void 146 union_updatevp(un, uppervp, lowervp) 147 struct union_node *un; 148 struct vnode *uppervp; 149 struct vnode *lowervp; 150 { 151 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 152 int nhash = UNION_HASH(uppervp, lowervp); 153 int docache = (lowervp != NULLVP || uppervp != NULLVP); 154 int lhash, uhash; 155 156 /* 157 * Ensure locking is ordered from lower to higher 158 * to avoid deadlocks. 159 */ 160 if (nhash < ohash) { 161 lhash = nhash; 162 uhash = ohash; 163 } else { 164 lhash = ohash; 165 uhash = nhash; 166 } 167 168 if (lhash != uhash) { 169 while (union_list_lock(lhash)) 170 continue; 171 } 172 173 while (union_list_lock(uhash)) 174 continue; 175 176 if (ohash != nhash || !docache) { 177 if (un->un_flags & UN_CACHED) { 178 un->un_flags &= ~UN_CACHED; 179 LIST_REMOVE(un, un_cache); 180 } 181 } 182 183 if (ohash != nhash) 184 union_list_unlock(ohash); 185 186 if (un->un_lowervp != lowervp) { 187 if (un->un_lowervp) { 188 vrele(un->un_lowervp); 189 if (un->un_path) { 190 free(un->un_path, M_TEMP); 191 un->un_path = 0; 192 } 193 } 194 un->un_lowervp = lowervp; 195 un->un_lowersz = VNOVAL; 196 } 197 198 if (un->un_uppervp != uppervp) { 199 if (un->un_uppervp) 200 vrele(un->un_uppervp); 201 un->un_uppervp = uppervp; 202 un->un_uppersz = VNOVAL; 203 } 204 205 if (docache && (ohash != nhash)) { 206 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 207 un->un_flags |= UN_CACHED; 208 } 209 210 union_list_unlock(nhash); 211 } 212 213 /* 214 * Set a new lowervp. The passed lowervp must be referenced and will be 215 * stored in the vp in a referenced state. 216 */ 217 218 static void 219 union_newlower(un, lowervp) 220 struct union_node *un; 221 struct vnode *lowervp; 222 { 223 union_updatevp(un, un->un_uppervp, lowervp); 224 } 225 226 /* 227 * Set a new uppervp. The passed uppervp must be locked and will be 228 * stored in the vp in a locked state. The caller should not unlock 229 * uppervp. 230 */ 231 232 static void 233 union_newupper(un, uppervp) 234 struct union_node *un; 235 struct vnode *uppervp; 236 { 237 union_updatevp(un, uppervp, un->un_lowervp); 238 } 239 240 /* 241 * Keep track of size changes in the underlying vnodes. 242 * If the size changes, then callback to the vm layer 243 * giving priority to the upper layer size. 244 */ 245 void 246 union_newsize(vp, uppersz, lowersz) 247 struct vnode *vp; 248 off_t uppersz, lowersz; 249 { 250 struct union_node *un; 251 off_t sz; 252 253 /* only interested in regular files */ 254 if (vp->v_type != VREG) 255 return; 256 257 un = VTOUNION(vp); 258 sz = VNOVAL; 259 260 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 261 un->un_uppersz = uppersz; 262 if (sz == VNOVAL) 263 sz = un->un_uppersz; 264 } 265 266 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 267 un->un_lowersz = lowersz; 268 if (sz == VNOVAL) 269 sz = un->un_lowersz; 270 } 271 272 if (sz != VNOVAL) { 273 UDEBUG(("union: %s size now %ld\n", 274 (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); 275 /* 276 * There is no need to change size of non-existent object 277 */ 278 /* vnode_pager_setsize(vp, sz); */ 279 } 280 } 281 282 /* 283 * union_allocvp: allocate a union_node and associate it with a 284 * parent union_node and one or two vnodes. 285 * 286 * vpp Holds the returned vnode locked and referenced if no 287 * error occurs. 288 * 289 * mp Holds the mount point. mp may or may not be busied. 290 * allocvp makes no changes to mp. 291 * 292 * dvp Holds the parent union_node to the one we wish to create. 293 * XXX may only be used to traverse an uncopied lowervp-based 294 * tree? XXX 295 * 296 * dvp may or may not be locked. allocvp makes no changes 297 * to dvp. 298 * 299 * upperdvp Holds the parent vnode to uppervp, generally used along 300 * with path component information to create a shadow of 301 * lowervp when uppervp does not exist. 302 * 303 * upperdvp is referenced but unlocked on entry, and will be 304 * dereferenced on return. 305 * 306 * uppervp Holds the new uppervp vnode to be stored in the 307 * union_node we are allocating. uppervp is referenced but 308 * not locked, and will be dereferenced on return. 309 * 310 * lowervp Holds the new lowervp vnode to be stored in the 311 * union_node we are allocating. lowervp is referenced but 312 * not locked, and will be dereferenced on return. 313 * 314 * cnp Holds path component information to be coupled with 315 * lowervp and upperdvp to allow unionfs to create an uppervp 316 * later on. Only used if lowervp is valid. The conents 317 * of cnp is only valid for the duration of the call. 318 * 319 * docache Determine whether this node should be entered in the 320 * cache or whether it should be destroyed as soon as possible. 321 * 322 * all union_nodes are maintained on a singly-linked 323 * list. new nodes are only allocated when they cannot 324 * be found on this list. entries on the list are 325 * removed when the vfs reclaim entry is called. 326 * 327 * a single lock is kept for the entire list. this is 328 * needed because the getnewvnode() function can block 329 * waiting for a vnode to become free, in which case there 330 * may be more than one process trying to get the same 331 * vnode. this lock is only taken if we are going to 332 * call getnewvnode, since the kernel itself is single-threaded. 333 * 334 * if an entry is found on the list, then call vget() to 335 * take a reference. this is done because there may be 336 * zero references to it and so it needs to removed from 337 * the vnode free list. 338 */ 339 340 int 341 union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) 342 struct vnode **vpp; 343 struct mount *mp; 344 struct vnode *dvp; /* parent union vnode */ 345 struct vnode *upperdvp; /* parent vnode of uppervp */ 346 struct componentname *cnp; /* may be null */ 347 struct vnode *uppervp; /* may be null */ 348 struct vnode *lowervp; /* may be null */ 349 int docache; 350 { 351 int error; 352 struct union_node *un = 0; 353 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 354 struct proc *p = (cnp) ? cnp->cn_proc : curproc; 355 int hash = 0; 356 int vflag; 357 int try; 358 359 if (uppervp == NULLVP && lowervp == NULLVP) 360 panic("union: unidentifiable allocation"); 361 362 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 363 vrele(lowervp); 364 lowervp = NULLVP; 365 } 366 367 /* detect the root vnode (and aliases) */ 368 vflag = 0; 369 if ((uppervp == um->um_uppervp) && 370 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 371 if (lowervp == NULLVP) { 372 lowervp = um->um_lowervp; 373 if (lowervp != NULLVP) 374 VREF(lowervp); 375 } 376 vflag = VROOT; 377 } 378 379 loop: 380 if (!docache) { 381 un = 0; 382 } else for (try = 0; try < 3; try++) { 383 switch (try) { 384 case 0: 385 if (lowervp == NULLVP) 386 continue; 387 hash = UNION_HASH(uppervp, lowervp); 388 break; 389 390 case 1: 391 if (uppervp == NULLVP) 392 continue; 393 hash = UNION_HASH(uppervp, NULLVP); 394 break; 395 396 case 2: 397 if (lowervp == NULLVP) 398 continue; 399 hash = UNION_HASH(NULLVP, lowervp); 400 break; 401 } 402 403 while (union_list_lock(hash)) 404 continue; 405 406 LIST_FOREACH(un, &unhead[hash], un_cache) { 407 if ((un->un_lowervp == lowervp || 408 un->un_lowervp == NULLVP) && 409 (un->un_uppervp == uppervp || 410 un->un_uppervp == NULLVP) && 411 (UNIONTOV(un)->v_mount == mp)) { 412 if (vget(UNIONTOV(un), 0, 413 cnp ? cnp->cn_proc : NULL)) { 414 union_list_unlock(hash); 415 goto loop; 416 } 417 break; 418 } 419 } 420 421 union_list_unlock(hash); 422 423 if (un) 424 break; 425 } 426 427 if (un) { 428 /* 429 * Obtain a lock on the union_node. Everything is unlocked 430 * except for dvp, so check that case. If they match, our 431 * new un is already locked. Otherwise we have to lock our 432 * new un. 433 * 434 * A potential deadlock situation occurs when we are holding 435 * one lock while trying to get another. We must follow 436 * strict ordering rules to avoid it. We try to locate dvp 437 * by scanning up from un_vnode, since the most likely 438 * scenario is un being under dvp. 439 */ 440 441 if (dvp && un->un_vnode != dvp) { 442 struct vnode *scan = un->un_vnode; 443 444 do { 445 scan = VTOUNION(scan)->un_pvp; 446 } while (scan && scan->v_tag == VT_UNION && scan != dvp); 447 if (scan != dvp) { 448 /* 449 * our new un is above dvp (we never saw dvp 450 * while moving up the tree). 451 */ 452 VREF(dvp); 453 VOP_UNLOCK(dvp, 0, p); 454 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); 455 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); 456 vrele(dvp); 457 } else { 458 /* 459 * our new un is under dvp 460 */ 461 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); 462 } 463 } else if (dvp == NULLVP) { 464 /* 465 * dvp is NULL, we need to lock un. 466 */ 467 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); 468 } else { 469 /* 470 * dvp == un->un_vnode, we are already locked. 471 */ 472 error = 0; 473 } 474 475 if (error) 476 goto loop; 477 478 /* 479 * At this point, the union_node is locked and referenced. 480 * 481 * uppervp is locked and referenced or NULL, lowervp is 482 * referenced or NULL. 483 */ 484 UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", 485 un, un->un_vnode, un->un_uppervp, 486 (un->un_uppervp ? un->un_uppervp->v_usecount : -99), 487 uppervp, 488 (uppervp ? uppervp->v_usecount : -99) 489 )); 490 491 if (uppervp != un->un_uppervp) { 492 KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount)); 493 union_newupper(un, uppervp); 494 } else if (uppervp) { 495 KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount)); 496 vrele(uppervp); 497 } 498 499 /* 500 * Save information about the lower layer. 501 * This needs to keep track of pathname 502 * and directory information which union_vn_create 503 * might need. 504 */ 505 if (lowervp != un->un_lowervp) { 506 union_newlower(un, lowervp); 507 if (cnp && (lowervp != NULLVP)) { 508 un->un_path = malloc(cnp->cn_namelen+1, 509 M_TEMP, M_WAITOK); 510 bcopy(cnp->cn_nameptr, un->un_path, 511 cnp->cn_namelen); 512 un->un_path[cnp->cn_namelen] = '\0'; 513 } 514 } else if (lowervp) { 515 vrele(lowervp); 516 } 517 518 /* 519 * and upperdvp 520 */ 521 if (upperdvp != un->un_dirvp) { 522 if (un->un_dirvp) 523 vrele(un->un_dirvp); 524 un->un_dirvp = upperdvp; 525 } else if (upperdvp) { 526 vrele(upperdvp); 527 } 528 529 *vpp = UNIONTOV(un); 530 return (0); 531 } 532 533 if (docache) { 534 /* 535 * otherwise lock the vp list while we call getnewvnode 536 * since that can block. 537 */ 538 hash = UNION_HASH(uppervp, lowervp); 539 540 if (union_list_lock(hash)) 541 goto loop; 542 } 543 544 /* 545 * Create new node rather then replace old node 546 */ 547 548 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); 549 if (error) { 550 /* 551 * If an error occurs clear out vnodes. 552 */ 553 if (lowervp) 554 vrele(lowervp); 555 if (uppervp) 556 vrele(uppervp); 557 if (upperdvp) 558 vrele(upperdvp); 559 *vpp = NULL; 560 goto out; 561 } 562 563 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 564 M_TEMP, M_WAITOK); 565 566 (*vpp)->v_flag |= vflag; 567 if (uppervp) 568 (*vpp)->v_type = uppervp->v_type; 569 else 570 (*vpp)->v_type = lowervp->v_type; 571 572 un = VTOUNION(*vpp); 573 bzero(un, sizeof(*un)); 574 575 lockinit(&un->un_lock, PVFS, "unlock", 0, 0); 576 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); 577 578 un->un_vnode = *vpp; 579 un->un_uppervp = uppervp; 580 un->un_uppersz = VNOVAL; 581 un->un_lowervp = lowervp; 582 un->un_lowersz = VNOVAL; 583 un->un_dirvp = upperdvp; 584 un->un_pvp = dvp; /* only parent dir in new allocation */ 585 if (dvp != NULLVP) 586 VREF(dvp); 587 un->un_dircache = 0; 588 un->un_openl = 0; 589 590 if (cnp && (lowervp != NULLVP)) { 591 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 592 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 593 un->un_path[cnp->cn_namelen] = '\0'; 594 } else { 595 un->un_path = 0; 596 un->un_dirvp = NULL; 597 } 598 599 if (docache) { 600 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 601 un->un_flags |= UN_CACHED; 602 } 603 604 out: 605 if (docache) 606 union_list_unlock(hash); 607 608 return (error); 609 } 610 611 int 612 union_freevp(vp) 613 struct vnode *vp; 614 { 615 struct union_node *un = VTOUNION(vp); 616 617 if (un->un_flags & UN_CACHED) { 618 un->un_flags &= ~UN_CACHED; 619 LIST_REMOVE(un, un_cache); 620 } 621 622 if (un->un_pvp != NULLVP) { 623 vrele(un->un_pvp); 624 un->un_pvp = NULL; 625 } 626 if (un->un_uppervp != NULLVP) { 627 vrele(un->un_uppervp); 628 un->un_uppervp = NULL; 629 } 630 if (un->un_lowervp != NULLVP) { 631 vrele(un->un_lowervp); 632 un->un_lowervp = NULL; 633 } 634 if (un->un_dirvp != NULLVP) { 635 vrele(un->un_dirvp); 636 un->un_dirvp = NULL; 637 } 638 if (un->un_path) { 639 free(un->un_path, M_TEMP); 640 un->un_path = NULL; 641 } 642 lockdestroy(&un->un_lock); 643 644 FREE(vp->v_data, M_TEMP); 645 vp->v_data = 0; 646 647 return (0); 648 } 649 650 /* 651 * copyfile. copy the vnode (fvp) to the vnode (tvp) 652 * using a sequence of reads and writes. both (fvp) 653 * and (tvp) are locked on entry and exit. 654 * 655 * fvp and tvp are both exclusive locked on call, but their refcount's 656 * haven't been bumped at all. 657 */ 658 static int 659 union_copyfile(fvp, tvp, cred, p) 660 struct vnode *fvp; 661 struct vnode *tvp; 662 struct ucred *cred; 663 struct proc *p; 664 { 665 char *buf; 666 struct uio uio; 667 struct iovec iov; 668 int error = 0; 669 670 /* 671 * strategy: 672 * allocate a buffer of size MAXBSIZE. 673 * loop doing reads and writes, keeping track 674 * of the current uio offset. 675 * give up at the first sign of trouble. 676 */ 677 678 bzero(&uio, sizeof(uio)); 679 680 uio.uio_procp = p; 681 uio.uio_segflg = UIO_SYSSPACE; 682 uio.uio_offset = 0; 683 684 VOP_LEASE(fvp, p, cred, LEASE_READ); 685 VOP_LEASE(tvp, p, cred, LEASE_WRITE); 686 687 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 688 689 /* ugly loop follows... */ 690 do { 691 off_t offset = uio.uio_offset; 692 int count; 693 int bufoffset; 694 695 /* 696 * Setup for big read 697 */ 698 uio.uio_iov = &iov; 699 uio.uio_iovcnt = 1; 700 iov.iov_base = buf; 701 iov.iov_len = MAXBSIZE; 702 uio.uio_resid = iov.iov_len; 703 uio.uio_rw = UIO_READ; 704 705 if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) 706 break; 707 708 /* 709 * Get bytes read, handle read eof case and setup for 710 * write loop 711 */ 712 if ((count = MAXBSIZE - uio.uio_resid) == 0) 713 break; 714 bufoffset = 0; 715 716 /* 717 * Write until an error occurs or our buffer has been 718 * exhausted, then update the offset for the next read. 719 */ 720 while (bufoffset < count) { 721 uio.uio_iov = &iov; 722 uio.uio_iovcnt = 1; 723 iov.iov_base = buf + bufoffset; 724 iov.iov_len = count - bufoffset; 725 uio.uio_offset = offset + bufoffset; 726 uio.uio_rw = UIO_WRITE; 727 uio.uio_resid = iov.iov_len; 728 729 if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) 730 break; 731 bufoffset += (count - bufoffset) - uio.uio_resid; 732 } 733 uio.uio_offset = offset + bufoffset; 734 } while (error == 0); 735 736 free(buf, M_TEMP); 737 return (error); 738 } 739 740 /* 741 * 742 * un's vnode is assumed to be locked on entry and remains locked on exit. 743 */ 744 745 int 746 union_copyup(un, docopy, cred, p) 747 struct union_node *un; 748 int docopy; 749 struct ucred *cred; 750 struct proc *p; 751 { 752 int error; 753 struct mount *mp; 754 struct vnode *lvp, *uvp; 755 756 /* 757 * If the user does not have read permission, the vnode should not 758 * be copied to upper layer. 759 */ 760 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); 761 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); 762 VOP_UNLOCK(un->un_lowervp, 0, p); 763 if (error) 764 return (error); 765 766 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0) 767 return (error); 768 if ((error = union_vn_create(&uvp, un, p)) != 0) { 769 vn_finished_write(mp); 770 return (error); 771 } 772 773 lvp = un->un_lowervp; 774 775 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 776 if (docopy) { 777 /* 778 * XX - should not ignore errors 779 * from VOP_CLOSE 780 */ 781 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); 782 error = VOP_OPEN(lvp, FREAD, cred, p); 783 if (error == 0 && vn_canvmio(lvp) == TRUE) 784 error = vfs_object_create(lvp, p, cred); 785 if (error == 0) { 786 error = union_copyfile(lvp, uvp, cred, p); 787 VOP_UNLOCK(lvp, 0, p); 788 (void) VOP_CLOSE(lvp, FREAD, cred, p); 789 } 790 if (error == 0) 791 UDEBUG(("union: copied up %s\n", un->un_path)); 792 793 } 794 VOP_UNLOCK(uvp, 0, p); 795 vn_finished_write(mp); 796 union_newupper(un, uvp); 797 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 798 union_vn_close(uvp, FWRITE, cred, p); 799 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 800 /* 801 * Subsequent IOs will go to the top layer, so 802 * call close on the lower vnode and open on the 803 * upper vnode to ensure that the filesystem keeps 804 * its references counts right. This doesn't do 805 * the right thing with (cred) and (FREAD) though. 806 * Ignoring error returns is not right, either. 807 */ 808 if (error == 0) { 809 int i; 810 811 for (i = 0; i < un->un_openl; i++) { 812 (void) VOP_CLOSE(lvp, FREAD, cred, p); 813 (void) VOP_OPEN(uvp, FREAD, cred, p); 814 } 815 if (un->un_openl) { 816 if (vn_canvmio(uvp) == TRUE) 817 error = vfs_object_create(uvp, p, cred); 818 } 819 un->un_openl = 0; 820 } 821 822 return (error); 823 824 } 825 826 /* 827 * union_relookup: 828 * 829 * dvp should be locked on entry and will be locked on return. No 830 * net change in the ref count will occur. 831 * 832 * If an error is returned, *vpp will be invalid, otherwise it 833 * will hold a locked, referenced vnode. If *vpp == dvp then 834 * remember that only one exclusive lock is held. 835 */ 836 837 static int 838 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 839 struct union_mount *um; 840 struct vnode *dvp; 841 struct vnode **vpp; 842 struct componentname *cnp; 843 struct componentname *cn; 844 char *path; 845 int pathlen; 846 { 847 int error; 848 849 /* 850 * A new componentname structure must be faked up because 851 * there is no way to know where the upper level cnp came 852 * from or what it is being used for. This must duplicate 853 * some of the work done by NDINIT, some of the work done 854 * by namei, some of the work done by lookup and some of 855 * the work done by VOP_LOOKUP when given a CREATE flag. 856 * Conclusion: Horrible. 857 */ 858 cn->cn_namelen = pathlen; 859 cn->cn_pnbuf = zalloc(namei_zone); 860 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 861 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 862 863 cn->cn_nameiop = CREATE; 864 cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 865 cn->cn_proc = cnp->cn_proc; 866 if (um->um_op == UNMNT_ABOVE) 867 cn->cn_cred = cnp->cn_cred; 868 else 869 cn->cn_cred = um->um_cred; 870 cn->cn_nameptr = cn->cn_pnbuf; 871 cn->cn_consume = cnp->cn_consume; 872 873 VREF(dvp); 874 VOP_UNLOCK(dvp, 0, cnp->cn_proc); 875 876 /* 877 * Pass dvp unlocked and referenced on call to relookup(). 878 * 879 * If an error occurs, dvp will be returned unlocked and dereferenced. 880 */ 881 882 if ((error = relookup(dvp, vpp, cn)) != 0) { 883 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc); 884 return(error); 885 } 886 887 /* 888 * If no error occurs, dvp will be returned locked with the reference 889 * left as before, and vpp will be returned referenced and locked. 890 * 891 * We want to return with dvp as it was passed to us, so we get 892 * rid of our reference. 893 */ 894 vrele(dvp); 895 return (0); 896 } 897 898 /* 899 * Create a shadow directory in the upper layer. 900 * The new vnode is returned locked. 901 * 902 * (um) points to the union mount structure for access to the 903 * the mounting process's credentials. 904 * (dvp) is the directory in which to create the shadow directory, 905 * it is locked (but not ref'd) on entry and return. 906 * (cnp) is the componentname to be created. 907 * (vpp) is the returned newly created shadow directory, which 908 * is returned locked and ref'd 909 */ 910 int 911 union_mkshadow(um, dvp, cnp, vpp) 912 struct union_mount *um; 913 struct vnode *dvp; 914 struct componentname *cnp; 915 struct vnode **vpp; 916 { 917 int error; 918 struct vattr va; 919 struct proc *p = cnp->cn_proc; 920 struct componentname cn; 921 struct mount *mp; 922 923 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 924 return (error); 925 if ((error = union_relookup(um, dvp, vpp, cnp, &cn, 926 cnp->cn_nameptr, cnp->cn_namelen)) != 0) { 927 vn_finished_write(mp); 928 return (error); 929 } 930 931 if (*vpp) { 932 if (cn.cn_flags & HASBUF) { 933 zfree(namei_zone, cn.cn_pnbuf); 934 cn.cn_flags &= ~HASBUF; 935 } 936 if (dvp == *vpp) 937 vrele(*vpp); 938 else 939 vput(*vpp); 940 vn_finished_write(mp); 941 *vpp = NULLVP; 942 return (EEXIST); 943 } 944 945 /* 946 * policy: when creating the shadow directory in the 947 * upper layer, create it owned by the user who did 948 * the mount, group from parent directory, and mode 949 * 777 modified by umask (ie mostly identical to the 950 * mkdir syscall). (jsp, kb) 951 */ 952 953 VATTR_NULL(&va); 954 va.va_type = VDIR; 955 va.va_mode = um->um_cmode; 956 957 /* VOP_LEASE: dvp is locked */ 958 VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); 959 960 error = VOP_MKDIR(dvp, vpp, &cn, &va); 961 if (cn.cn_flags & HASBUF) { 962 zfree(namei_zone, cn.cn_pnbuf); 963 cn.cn_flags &= ~HASBUF; 964 } 965 /*vput(dvp);*/ 966 vn_finished_write(mp); 967 return (error); 968 } 969 970 /* 971 * Create a whiteout entry in the upper layer. 972 * 973 * (um) points to the union mount structure for access to the 974 * the mounting process's credentials. 975 * (dvp) is the directory in which to create the whiteout. 976 * it is locked on entry and return. 977 * (cnp) is the componentname to be created. 978 */ 979 int 980 union_mkwhiteout(um, dvp, cnp, path) 981 struct union_mount *um; 982 struct vnode *dvp; 983 struct componentname *cnp; 984 char *path; 985 { 986 int error; 987 struct proc *p = cnp->cn_proc; 988 struct vnode *wvp; 989 struct componentname cn; 990 struct mount *mp; 991 992 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 993 return (error); 994 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 995 if (error) { 996 vn_finished_write(mp); 997 return (error); 998 } 999 1000 if (wvp) { 1001 if (cn.cn_flags & HASBUF) { 1002 zfree(namei_zone, cn.cn_pnbuf); 1003 cn.cn_flags &= ~HASBUF; 1004 } 1005 if (wvp == dvp) 1006 vrele(wvp); 1007 else 1008 vput(wvp); 1009 vn_finished_write(mp); 1010 return (EEXIST); 1011 } 1012 1013 /* VOP_LEASE: dvp is locked */ 1014 VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); 1015 1016 error = VOP_WHITEOUT(dvp, &cn, CREATE); 1017 if (cn.cn_flags & HASBUF) { 1018 zfree(namei_zone, cn.cn_pnbuf); 1019 cn.cn_flags &= ~HASBUF; 1020 } 1021 vn_finished_write(mp); 1022 return (error); 1023 } 1024 1025 /* 1026 * union_vn_create: creates and opens a new shadow file 1027 * on the upper union layer. this function is similar 1028 * in spirit to calling vn_open but it avoids calling namei(). 1029 * the problem with calling namei is that a) it locks too many 1030 * things, and b) it doesn't start at the "right" directory, 1031 * whereas relookup is told where to start. 1032 * 1033 * On entry, the vnode associated with un is locked. It remains locked 1034 * on return. 1035 * 1036 * If no error occurs, *vpp contains a locked referenced vnode for your 1037 * use. If an error occurs *vpp iis undefined. 1038 */ 1039 static int 1040 union_vn_create(vpp, un, p) 1041 struct vnode **vpp; 1042 struct union_node *un; 1043 struct proc *p; 1044 { 1045 struct vnode *vp; 1046 struct ucred *cred = p->p_ucred; 1047 struct vattr vat; 1048 struct vattr *vap = &vat; 1049 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 1050 int error; 1051 int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; 1052 struct componentname cn; 1053 1054 *vpp = NULLVP; 1055 1056 /* 1057 * Build a new componentname structure (for the same 1058 * reasons outlines in union_mkshadow). 1059 * The difference here is that the file is owned by 1060 * the current user, rather than by the person who 1061 * did the mount, since the current user needs to be 1062 * able to write the file (that's why it is being 1063 * copied in the first place). 1064 */ 1065 cn.cn_namelen = strlen(un->un_path); 1066 cn.cn_pnbuf = zalloc(namei_zone); 1067 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 1068 cn.cn_nameiop = CREATE; 1069 cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 1070 cn.cn_proc = p; 1071 cn.cn_cred = p->p_ucred; 1072 cn.cn_nameptr = cn.cn_pnbuf; 1073 cn.cn_consume = 0; 1074 1075 /* 1076 * Pass dvp unlocked and referenced on call to relookup(). 1077 * 1078 * If an error occurs, dvp will be returned unlocked and dereferenced. 1079 */ 1080 VREF(un->un_dirvp); 1081 error = relookup(un->un_dirvp, &vp, &cn); 1082 if (error) 1083 return (error); 1084 1085 /* 1086 * If no error occurs, dvp will be returned locked with the reference 1087 * left as before, and vpp will be returned referenced and locked. 1088 */ 1089 if (vp) { 1090 vput(un->un_dirvp); 1091 if (cn.cn_flags & HASBUF) { 1092 zfree(namei_zone, cn.cn_pnbuf); 1093 cn.cn_flags &= ~HASBUF; 1094 } 1095 if (vp == un->un_dirvp) 1096 vrele(vp); 1097 else 1098 vput(vp); 1099 return (EEXIST); 1100 } 1101 1102 /* 1103 * Good - there was no race to create the file 1104 * so go ahead and create it. The permissions 1105 * on the file will be 0666 modified by the 1106 * current user's umask. Access to the file, while 1107 * it is unioned, will require access to the top *and* 1108 * bottom files. Access when not unioned will simply 1109 * require access to the top-level file. 1110 * TODO: confirm choice of access permissions. 1111 */ 1112 VATTR_NULL(vap); 1113 vap->va_type = VREG; 1114 vap->va_mode = cmode; 1115 VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); 1116 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); 1117 if (cn.cn_flags & HASBUF) { 1118 zfree(namei_zone, cn.cn_pnbuf); 1119 cn.cn_flags &= ~HASBUF; 1120 } 1121 vput(un->un_dirvp); 1122 if (error) 1123 return (error); 1124 1125 error = VOP_OPEN(vp, fmode, cred, p); 1126 if (error == 0 && vn_canvmio(vp) == TRUE) 1127 error = vfs_object_create(vp, p, cred); 1128 if (error) { 1129 vput(vp); 1130 return (error); 1131 } 1132 vp->v_writecount++; 1133 *vpp = vp; 1134 return (0); 1135 } 1136 1137 static int 1138 union_vn_close(vp, fmode, cred, p) 1139 struct vnode *vp; 1140 int fmode; 1141 struct ucred *cred; 1142 struct proc *p; 1143 { 1144 1145 if (fmode & FWRITE) 1146 --vp->v_writecount; 1147 return (VOP_CLOSE(vp, fmode, cred, p)); 1148 } 1149 1150 #if 0 1151 1152 /* 1153 * union_removed_upper: 1154 * 1155 * called with union_node unlocked. XXX 1156 */ 1157 1158 void 1159 union_removed_upper(un) 1160 struct union_node *un; 1161 { 1162 struct proc *p = curproc; /* XXX */ 1163 struct vnode **vpp; 1164 1165 /* 1166 * Do not set the uppervp to NULLVP. If lowervp is NULLVP, 1167 * union node will have neither uppervp nor lowervp. We remove 1168 * the union node from cache, so that it will not be referrenced. 1169 */ 1170 union_newupper(un, NULLVP); 1171 if (un->un_dircache != 0) { 1172 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1173 vrele(*vpp); 1174 free(un->un_dircache, M_TEMP); 1175 un->un_dircache = 0; 1176 } 1177 1178 if (un->un_flags & UN_CACHED) { 1179 un->un_flags &= ~UN_CACHED; 1180 LIST_REMOVE(un, un_cache); 1181 } 1182 } 1183 1184 #endif 1185 1186 /* 1187 * determine whether a whiteout is needed 1188 * during a remove/rmdir operation. 1189 */ 1190 int 1191 union_dowhiteout(un, cred, p) 1192 struct union_node *un; 1193 struct ucred *cred; 1194 struct proc *p; 1195 { 1196 struct vattr va; 1197 1198 if (un->un_lowervp != NULLVP) 1199 return (1); 1200 1201 if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && 1202 (va.va_flags & OPAQUE)) 1203 return (1); 1204 1205 return (0); 1206 } 1207 1208 static void 1209 union_dircache_r(vp, vppp, cntp) 1210 struct vnode *vp; 1211 struct vnode ***vppp; 1212 int *cntp; 1213 { 1214 struct union_node *un; 1215 1216 if (vp->v_op != union_vnodeop_p) { 1217 if (vppp) { 1218 VREF(vp); 1219 *(*vppp)++ = vp; 1220 if (--(*cntp) == 0) 1221 panic("union: dircache table too small"); 1222 } else { 1223 (*cntp)++; 1224 } 1225 1226 return; 1227 } 1228 1229 un = VTOUNION(vp); 1230 if (un->un_uppervp != NULLVP) 1231 union_dircache_r(un->un_uppervp, vppp, cntp); 1232 if (un->un_lowervp != NULLVP) 1233 union_dircache_r(un->un_lowervp, vppp, cntp); 1234 } 1235 1236 struct vnode * 1237 union_dircache(vp, p) 1238 struct vnode *vp; 1239 struct proc *p; 1240 { 1241 int cnt; 1242 struct vnode *nvp; 1243 struct vnode **vpp; 1244 struct vnode **dircache; 1245 struct union_node *un; 1246 int error; 1247 1248 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1249 dircache = VTOUNION(vp)->un_dircache; 1250 1251 nvp = NULLVP; 1252 1253 if (dircache == NULL) { 1254 cnt = 0; 1255 union_dircache_r(vp, 0, &cnt); 1256 cnt++; 1257 dircache = malloc(cnt * sizeof(struct vnode *), 1258 M_TEMP, M_WAITOK); 1259 vpp = dircache; 1260 union_dircache_r(vp, &vpp, &cnt); 1261 *vpp = NULLVP; 1262 vpp = dircache + 1; 1263 } else { 1264 vpp = dircache; 1265 do { 1266 if (*vpp++ == VTOUNION(vp)->un_uppervp) 1267 break; 1268 } while (*vpp != NULLVP); 1269 } 1270 1271 if (*vpp == NULLVP) 1272 goto out; 1273 1274 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/ 1275 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); 1276 VREF(*vpp); 1277 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); 1278 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); 1279 if (error) 1280 goto out; 1281 1282 VTOUNION(vp)->un_dircache = 0; 1283 un = VTOUNION(nvp); 1284 un->un_dircache = dircache; 1285 1286 out: 1287 VOP_UNLOCK(vp, 0, p); 1288 return (nvp); 1289 } 1290 1291 /* 1292 * Module glue to remove #ifdef UNION from vfs_syscalls.c 1293 */ 1294 static int 1295 union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) 1296 { 1297 int error = 0; 1298 1299 if ((*vp)->v_op == union_vnodeop_p) { 1300 struct vnode *lvp; 1301 1302 lvp = union_dircache(*vp, p); 1303 if (lvp != NULLVP) { 1304 struct vattr va; 1305 1306 /* 1307 * If the directory is opaque, 1308 * then don't show lower entries 1309 */ 1310 error = VOP_GETATTR(*vp, &va, fp->f_cred, p); 1311 if (va.va_flags & OPAQUE) { 1312 vput(lvp); 1313 lvp = NULL; 1314 } 1315 } 1316 1317 if (lvp != NULLVP) { 1318 error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); 1319 if (error == 0 && vn_canvmio(lvp) == TRUE) 1320 error = vfs_object_create(lvp, p, fp->f_cred); 1321 if (error) { 1322 vput(lvp); 1323 return (error); 1324 } 1325 VOP_UNLOCK(lvp, 0, p); 1326 fp->f_data = (caddr_t) lvp; 1327 fp->f_offset = 0; 1328 error = vn_close(*vp, FREAD, fp->f_cred, p); 1329 if (error) 1330 return (error); 1331 *vp = lvp; 1332 return -1; /* goto unionread */ 1333 } 1334 } 1335 return error; 1336 } 1337 1338 static int 1339 union_modevent(module_t mod, int type, void *data) 1340 { 1341 switch (type) { 1342 case MOD_LOAD: 1343 union_dircheckp = union_dircheck; 1344 break; 1345 case MOD_UNLOAD: 1346 union_dircheckp = NULL; 1347 break; 1348 default: 1349 break; 1350 } 1351 return 0; 1352 } 1353 1354 static moduledata_t union_mod = { 1355 "union_dircheck", 1356 union_modevent, 1357 NULL 1358 }; 1359 1360 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); 1361