1 /* 2 * Copyright (c) 1994 Jan-Simon Pendry 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 38 * $Id: union_subr.c,v 1.36 1998/12/14 05:00:59 dillon Exp $ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/namei.h> 46 #include <sys/malloc.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/module.h> 51 #include <sys/mount.h> 52 #include <sys/stat.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> /* for vnode_pager_setsize */ 55 #include <vm/vm_zone.h> 56 #include <miscfs/union/union.h> 57 58 #include <sys/proc.h> 59 60 extern int union_init __P((void)); 61 62 /* must be power of two, otherwise change UNION_HASH() */ 63 #define NHASH 32 64 65 /* unsigned int ... */ 66 #define UNION_HASH(u, l) \ 67 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) 68 69 static LIST_HEAD(unhead, union_node) unhead[NHASH]; 70 static int unvplock[NHASH]; 71 72 static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, 73 int *cntp)); 74 static int union_list_lock __P((int ix)); 75 static void union_list_unlock __P((int ix)); 76 static int union_relookup __P((struct union_mount *um, struct vnode *dvp, 77 struct vnode **vpp, 78 struct componentname *cnp, 79 struct componentname *cn, char *path, 80 int pathlen)); 81 static void union_updatevp __P((struct union_node *un, 82 struct vnode *uppervp, 83 struct vnode *lowervp)); 84 static void union_newlower __P((struct union_node *, struct vnode *)); 85 static void union_newupper __P((struct union_node *, struct vnode *)); 86 static int union_copyfile __P((struct vnode *, struct vnode *, 87 struct ucred *, struct proc *)); 88 static int union_vn_create __P((struct vnode **, struct union_node *, 89 struct proc *)); 90 static int union_vn_close __P((struct vnode *, int, struct ucred *, 91 struct proc *)); 92 93 int 94 union_init() 95 { 96 int i; 97 98 for (i = 0; i < NHASH; i++) 99 LIST_INIT(&unhead[i]); 100 bzero((caddr_t) unvplock, sizeof(unvplock)); 101 return (0); 102 } 103 104 static int 105 union_list_lock(ix) 106 int ix; 107 { 108 109 if (unvplock[ix] & UN_LOCKED) { 110 unvplock[ix] |= UN_WANT; 111 (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); 112 return (1); 113 } 114 115 unvplock[ix] |= UN_LOCKED; 116 117 return (0); 118 } 119 120 static void 121 union_list_unlock(ix) 122 int ix; 123 { 124 125 unvplock[ix] &= ~UN_LOCKED; 126 127 if (unvplock[ix] & UN_WANT) { 128 unvplock[ix] &= ~UN_WANT; 129 wakeup((caddr_t) &unvplock[ix]); 130 } 131 } 132 133 static void 134 union_updatevp(un, uppervp, lowervp) 135 struct union_node *un; 136 struct vnode *uppervp; 137 struct vnode *lowervp; 138 { 139 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 140 int nhash = UNION_HASH(uppervp, lowervp); 141 int docache = (lowervp != NULLVP || uppervp != NULLVP); 142 int lhash, uhash; 143 144 /* 145 * Ensure locking is ordered from lower to higher 146 * to avoid deadlocks. 147 */ 148 if (nhash < ohash) { 149 lhash = nhash; 150 uhash = ohash; 151 } else { 152 lhash = ohash; 153 uhash = nhash; 154 } 155 156 if (lhash != uhash) 157 while (union_list_lock(lhash)) 158 continue; 159 160 while (union_list_lock(uhash)) 161 continue; 162 163 if (ohash != nhash || !docache) { 164 if (un->un_flags & UN_CACHED) { 165 un->un_flags &= ~UN_CACHED; 166 LIST_REMOVE(un, un_cache); 167 } 168 } 169 170 if (ohash != nhash) 171 union_list_unlock(ohash); 172 173 if (un->un_lowervp != lowervp) { 174 if (un->un_lowervp) { 175 vrele(un->un_lowervp); 176 if (un->un_path) { 177 free(un->un_path, M_TEMP); 178 un->un_path = 0; 179 } 180 if (un->un_dirvp) { 181 vrele(un->un_dirvp); 182 un->un_dirvp = NULLVP; 183 } 184 } 185 un->un_lowervp = lowervp; 186 un->un_lowersz = VNOVAL; 187 } 188 189 if (un->un_uppervp != uppervp) { 190 if (un->un_uppervp) 191 vrele(un->un_uppervp); 192 193 un->un_uppervp = uppervp; 194 un->un_uppersz = VNOVAL; 195 } 196 197 if (docache && (ohash != nhash)) { 198 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 199 un->un_flags |= UN_CACHED; 200 } 201 202 union_list_unlock(nhash); 203 } 204 205 static void 206 union_newlower(un, lowervp) 207 struct union_node *un; 208 struct vnode *lowervp; 209 { 210 211 union_updatevp(un, un->un_uppervp, lowervp); 212 } 213 214 static void 215 union_newupper(un, uppervp) 216 struct union_node *un; 217 struct vnode *uppervp; 218 { 219 220 union_updatevp(un, uppervp, un->un_lowervp); 221 } 222 223 /* 224 * Keep track of size changes in the underlying vnodes. 225 * If the size changes, then callback to the vm layer 226 * giving priority to the upper layer size. 227 */ 228 void 229 union_newsize(vp, uppersz, lowersz) 230 struct vnode *vp; 231 off_t uppersz, lowersz; 232 { 233 struct union_node *un; 234 off_t sz; 235 236 /* only interested in regular files */ 237 if (vp->v_type != VREG) 238 return; 239 240 un = VTOUNION(vp); 241 sz = VNOVAL; 242 243 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 244 un->un_uppersz = uppersz; 245 if (sz == VNOVAL) 246 sz = un->un_uppersz; 247 } 248 249 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 250 un->un_lowersz = lowersz; 251 if (sz == VNOVAL) 252 sz = un->un_lowersz; 253 } 254 255 if (sz != VNOVAL) { 256 #ifdef UNION_DIAGNOSTIC 257 printf("union: %s size now %ld\n", 258 uppersz != VNOVAL ? "upper" : "lower", (long) sz); 259 #endif 260 vnode_pager_setsize(vp, sz); 261 } 262 } 263 264 /* 265 * allocate a union_node/vnode pair. the vnode is 266 * referenced and locked. the new vnode is returned 267 * via (vpp). (mp) is the mountpoint of the union filesystem, 268 * (dvp) is the parent directory where the upper layer object 269 * should exist (but doesn't) and (cnp) is the componentname 270 * information which is partially copied to allow the upper 271 * layer object to be created at a later time. (uppervp) 272 * and (lowervp) reference the upper and lower layer objects 273 * being mapped. either, but not both, can be nil. 274 * if supplied, (uppervp) is locked. 275 * the reference is either maintained in the new union_node 276 * object which is allocated, or they are vrele'd. 277 * 278 * all union_nodes are maintained on a singly-linked 279 * list. new nodes are only allocated when they cannot 280 * be found on this list. entries on the list are 281 * removed when the vfs reclaim entry is called. 282 * 283 * a single lock is kept for the entire list. this is 284 * needed because the getnewvnode() function can block 285 * waiting for a vnode to become free, in which case there 286 * may be more than one process trying to get the same 287 * vnode. this lock is only taken if we are going to 288 * call getnewvnode, since the kernel itself is single-threaded. 289 * 290 * if an entry is found on the list, then call vget() to 291 * take a reference. this is done because there may be 292 * zero references to it and so it needs to removed from 293 * the vnode free list. 294 */ 295 int 296 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) 297 struct vnode **vpp; 298 struct mount *mp; 299 struct vnode *undvp; /* parent union vnode */ 300 struct vnode *dvp; /* may be null */ 301 struct componentname *cnp; /* may be null */ 302 struct vnode *uppervp; /* may be null */ 303 struct vnode *lowervp; /* may be null */ 304 int docache; 305 { 306 int error; 307 struct union_node *un = 0; 308 struct vnode *xlowervp = NULLVP; 309 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 310 int hash = 0; 311 int vflag; 312 int try; 313 314 if (uppervp == NULLVP && lowervp == NULLVP) 315 panic("union: unidentifiable allocation"); 316 317 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 318 xlowervp = lowervp; 319 lowervp = NULLVP; 320 } 321 322 /* detect the root vnode (and aliases) */ 323 vflag = 0; 324 if ((uppervp == um->um_uppervp) && 325 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 326 if (lowervp == NULLVP) { 327 lowervp = um->um_lowervp; 328 if (lowervp != NULLVP) 329 VREF(lowervp); 330 } 331 vflag = VROOT; 332 } 333 334 loop: 335 if (!docache) { 336 un = 0; 337 } else for (try = 0; try < 3; try++) { 338 switch (try) { 339 case 0: 340 if (lowervp == NULLVP) 341 continue; 342 hash = UNION_HASH(uppervp, lowervp); 343 break; 344 345 case 1: 346 if (uppervp == NULLVP) 347 continue; 348 hash = UNION_HASH(uppervp, NULLVP); 349 break; 350 351 case 2: 352 if (lowervp == NULLVP) 353 continue; 354 hash = UNION_HASH(NULLVP, lowervp); 355 break; 356 } 357 358 while (union_list_lock(hash)) 359 continue; 360 361 for (un = unhead[hash].lh_first; un != 0; 362 un = un->un_cache.le_next) { 363 if ((un->un_lowervp == lowervp || 364 un->un_lowervp == NULLVP) && 365 (un->un_uppervp == uppervp || 366 un->un_uppervp == NULLVP) && 367 (UNIONTOV(un)->v_mount == mp)) { 368 if (vget(UNIONTOV(un), 0, 369 cnp ? cnp->cn_proc : NULL)) { 370 union_list_unlock(hash); 371 goto loop; 372 } 373 break; 374 } 375 } 376 377 union_list_unlock(hash); 378 379 if (un) 380 break; 381 } 382 383 if (un) { 384 /* 385 * Obtain a lock on the union_node. 386 * uppervp is locked, though un->un_uppervp 387 * may not be. this doesn't break the locking 388 * hierarchy since in the case that un->un_uppervp 389 * is not yet locked it will be vrele'd and replaced 390 * with uppervp. 391 */ 392 393 if ((dvp != NULLVP) && (uppervp == dvp)) { 394 /* 395 * Access ``.'', so (un) will already 396 * be locked. Since this process has 397 * the lock on (uppervp) no other 398 * process can hold the lock on (un). 399 */ 400 #ifdef DIAGNOSTIC 401 if ((un->un_flags & UN_LOCKED) == 0) 402 panic("union: . not locked"); 403 else if (curproc && un->un_pid != curproc->p_pid && 404 un->un_pid > -1 && curproc->p_pid > -1) 405 panic("union: allocvp not lock owner"); 406 #endif 407 } else { 408 if (un->un_flags & UN_LOCKED) { 409 vrele(UNIONTOV(un)); 410 un->un_flags |= UN_WANT; 411 (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); 412 goto loop; 413 } 414 un->un_flags |= UN_LOCKED; 415 416 #ifdef DIAGNOSTIC 417 if (curproc) 418 un->un_pid = curproc->p_pid; 419 else 420 un->un_pid = -1; 421 #endif 422 } 423 424 /* 425 * At this point, the union_node is locked, 426 * un->un_uppervp may not be locked, and uppervp 427 * is locked or nil. 428 */ 429 430 /* 431 * Save information about the upper layer. 432 */ 433 if (uppervp != un->un_uppervp) { 434 union_newupper(un, uppervp); 435 } else if (uppervp) { 436 vrele(uppervp); 437 } 438 439 if (un->un_uppervp) { 440 un->un_flags |= UN_ULOCK; 441 un->un_flags &= ~UN_KLOCK; 442 } 443 444 /* 445 * Save information about the lower layer. 446 * This needs to keep track of pathname 447 * and directory information which union_vn_create 448 * might need. 449 */ 450 if (lowervp != un->un_lowervp) { 451 union_newlower(un, lowervp); 452 if (cnp && (lowervp != NULLVP)) { 453 un->un_hash = cnp->cn_hash; 454 un->un_path = malloc(cnp->cn_namelen+1, 455 M_TEMP, M_WAITOK); 456 bcopy(cnp->cn_nameptr, un->un_path, 457 cnp->cn_namelen); 458 un->un_path[cnp->cn_namelen] = '\0'; 459 VREF(dvp); 460 un->un_dirvp = dvp; 461 } 462 } else if (lowervp) { 463 vrele(lowervp); 464 } 465 *vpp = UNIONTOV(un); 466 return (0); 467 } 468 469 if (docache) { 470 /* 471 * otherwise lock the vp list while we call getnewvnode 472 * since that can block. 473 */ 474 hash = UNION_HASH(uppervp, lowervp); 475 476 if (union_list_lock(hash)) 477 goto loop; 478 } 479 480 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); 481 if (error) { 482 if (uppervp) { 483 if (dvp == uppervp) 484 vrele(uppervp); 485 else 486 vput(uppervp); 487 } 488 if (lowervp) 489 vrele(lowervp); 490 491 goto out; 492 } 493 494 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 495 M_TEMP, M_WAITOK); 496 497 (*vpp)->v_flag |= vflag; 498 if (uppervp) 499 (*vpp)->v_type = uppervp->v_type; 500 else 501 (*vpp)->v_type = lowervp->v_type; 502 un = VTOUNION(*vpp); 503 un->un_vnode = *vpp; 504 un->un_uppervp = uppervp; 505 un->un_uppersz = VNOVAL; 506 un->un_lowervp = lowervp; 507 un->un_lowersz = VNOVAL; 508 un->un_pvp = undvp; 509 if (undvp != NULLVP) 510 VREF(undvp); 511 un->un_dircache = 0; 512 un->un_openl = 0; 513 un->un_flags = UN_LOCKED; 514 if (un->un_uppervp) 515 un->un_flags |= UN_ULOCK; 516 #ifdef DIAGNOSTIC 517 if (curproc) 518 un->un_pid = curproc->p_pid; 519 else 520 un->un_pid = -1; 521 #endif 522 if (cnp && (lowervp != NULLVP)) { 523 un->un_hash = cnp->cn_hash; 524 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 525 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 526 un->un_path[cnp->cn_namelen] = '\0'; 527 VREF(dvp); 528 un->un_dirvp = dvp; 529 } else { 530 un->un_hash = 0; 531 un->un_path = 0; 532 un->un_dirvp = 0; 533 } 534 535 if (docache) { 536 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 537 un->un_flags |= UN_CACHED; 538 } 539 540 if (xlowervp) 541 vrele(xlowervp); 542 543 out: 544 if (docache) 545 union_list_unlock(hash); 546 547 return (error); 548 } 549 550 int 551 union_freevp(vp) 552 struct vnode *vp; 553 { 554 struct union_node *un = VTOUNION(vp); 555 556 if (un->un_flags & UN_CACHED) { 557 un->un_flags &= ~UN_CACHED; 558 LIST_REMOVE(un, un_cache); 559 } 560 561 if (un->un_pvp != NULLVP) 562 vrele(un->un_pvp); 563 if (un->un_uppervp != NULLVP) 564 vrele(un->un_uppervp); 565 if (un->un_lowervp != NULLVP) 566 vrele(un->un_lowervp); 567 if (un->un_dirvp != NULLVP) 568 vrele(un->un_dirvp); 569 if (un->un_path) 570 free(un->un_path, M_TEMP); 571 572 FREE(vp->v_data, M_TEMP); 573 vp->v_data = 0; 574 575 return (0); 576 } 577 578 /* 579 * copyfile. copy the vnode (fvp) to the vnode (tvp) 580 * using a sequence of reads and writes. both (fvp) 581 * and (tvp) are locked on entry and exit. 582 */ 583 static int 584 union_copyfile(fvp, tvp, cred, p) 585 struct vnode *fvp; 586 struct vnode *tvp; 587 struct ucred *cred; 588 struct proc *p; 589 { 590 char *buf; 591 struct uio uio; 592 struct iovec iov; 593 int error = 0; 594 595 /* 596 * strategy: 597 * allocate a buffer of size MAXBSIZE. 598 * loop doing reads and writes, keeping track 599 * of the current uio offset. 600 * give up at the first sign of trouble. 601 */ 602 603 uio.uio_procp = p; 604 uio.uio_segflg = UIO_SYSSPACE; 605 uio.uio_offset = 0; 606 607 VOP_UNLOCK(fvp, 0, p); /* XXX */ 608 VOP_LEASE(fvp, p, cred, LEASE_READ); 609 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ 610 VOP_UNLOCK(tvp, 0, p); /* XXX */ 611 VOP_LEASE(tvp, p, cred, LEASE_WRITE); 612 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ 613 614 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 615 616 /* ugly loop follows... */ 617 do { 618 off_t offset = uio.uio_offset; 619 620 uio.uio_iov = &iov; 621 uio.uio_iovcnt = 1; 622 iov.iov_base = buf; 623 iov.iov_len = MAXBSIZE; 624 uio.uio_resid = iov.iov_len; 625 uio.uio_rw = UIO_READ; 626 error = VOP_READ(fvp, &uio, 0, cred); 627 628 if (error == 0) { 629 uio.uio_iov = &iov; 630 uio.uio_iovcnt = 1; 631 iov.iov_base = buf; 632 iov.iov_len = MAXBSIZE - uio.uio_resid; 633 uio.uio_offset = offset; 634 uio.uio_rw = UIO_WRITE; 635 uio.uio_resid = iov.iov_len; 636 637 if (uio.uio_resid == 0) 638 break; 639 640 do { 641 error = VOP_WRITE(tvp, &uio, 0, cred); 642 } while ((uio.uio_resid > 0) && (error == 0)); 643 } 644 645 } while (error == 0); 646 647 free(buf, M_TEMP); 648 return (error); 649 } 650 651 /* 652 * (un) is assumed to be locked on entry and remains 653 * locked on exit. 654 */ 655 int 656 union_copyup(un, docopy, cred, p) 657 struct union_node *un; 658 int docopy; 659 struct ucred *cred; 660 struct proc *p; 661 { 662 int error; 663 struct vnode *lvp, *uvp; 664 665 /* 666 * If the user does not have read permission, the vnode should not 667 * be copied to upper layer. 668 */ 669 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); 670 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); 671 VOP_UNLOCK(un->un_lowervp, 0, p); 672 if (error) 673 return (error); 674 675 error = union_vn_create(&uvp, un, p); 676 if (error) 677 return (error); 678 679 /* at this point, uppervp is locked */ 680 union_newupper(un, uvp); 681 un->un_flags |= UN_ULOCK; 682 683 lvp = un->un_lowervp; 684 685 if (docopy) { 686 /* 687 * XX - should not ignore errors 688 * from VOP_CLOSE 689 */ 690 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); 691 error = VOP_OPEN(lvp, FREAD, cred, p); 692 if (error == 0) { 693 error = union_copyfile(lvp, uvp, cred, p); 694 VOP_UNLOCK(lvp, 0, p); 695 (void) VOP_CLOSE(lvp, FREAD, cred, p); 696 } 697 #ifdef UNION_DIAGNOSTIC 698 if (error == 0) 699 uprintf("union: copied up %s\n", un->un_path); 700 #endif 701 702 } 703 un->un_flags &= ~UN_ULOCK; 704 VOP_UNLOCK(uvp, 0, p); 705 union_vn_close(uvp, FWRITE, cred, p); 706 vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); 707 un->un_flags |= UN_ULOCK; 708 709 /* 710 * Subsequent IOs will go to the top layer, so 711 * call close on the lower vnode and open on the 712 * upper vnode to ensure that the filesystem keeps 713 * its references counts right. This doesn't do 714 * the right thing with (cred) and (FREAD) though. 715 * Ignoring error returns is not right, either. 716 */ 717 if (error == 0) { 718 int i; 719 720 for (i = 0; i < un->un_openl; i++) { 721 (void) VOP_CLOSE(lvp, FREAD, cred, p); 722 (void) VOP_OPEN(uvp, FREAD, cred, p); 723 } 724 un->un_openl = 0; 725 } 726 727 return (error); 728 729 } 730 731 static int 732 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 733 struct union_mount *um; 734 struct vnode *dvp; 735 struct vnode **vpp; 736 struct componentname *cnp; 737 struct componentname *cn; 738 char *path; 739 int pathlen; 740 { 741 int error; 742 743 /* 744 * A new componentname structure must be faked up because 745 * there is no way to know where the upper level cnp came 746 * from or what it is being used for. This must duplicate 747 * some of the work done by NDINIT, some of the work done 748 * by namei, some of the work done by lookup and some of 749 * the work done by VOP_LOOKUP when given a CREATE flag. 750 * Conclusion: Horrible. 751 * 752 * The pathname buffer will be FREEed by VOP_MKDIR. 753 */ 754 cn->cn_namelen = pathlen; 755 cn->cn_pnbuf = zalloc(namei_zone); 756 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 757 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 758 759 cn->cn_nameiop = CREATE; 760 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 761 cn->cn_proc = cnp->cn_proc; 762 if (um->um_op == UNMNT_ABOVE) 763 cn->cn_cred = cnp->cn_cred; 764 else 765 cn->cn_cred = um->um_cred; 766 cn->cn_nameptr = cn->cn_pnbuf; 767 cn->cn_hash = cnp->cn_hash; 768 cn->cn_consume = cnp->cn_consume; 769 770 VREF(dvp); 771 error = relookup(dvp, vpp, cn); 772 if (!error) 773 vrele(dvp); 774 else { 775 zfree(namei_zone, cn->cn_pnbuf); 776 cn->cn_pnbuf = NULL; 777 } 778 779 return (error); 780 } 781 782 /* 783 * Create a shadow directory in the upper layer. 784 * The new vnode is returned locked. 785 * 786 * (um) points to the union mount structure for access to the 787 * the mounting process's credentials. 788 * (dvp) is the directory in which to create the shadow directory. 789 * it is unlocked on entry and exit. 790 * (cnp) is the componentname to be created. 791 * (vpp) is the returned newly created shadow directory, which 792 * is returned locked. 793 */ 794 int 795 union_mkshadow(um, dvp, cnp, vpp) 796 struct union_mount *um; 797 struct vnode *dvp; 798 struct componentname *cnp; 799 struct vnode **vpp; 800 { 801 int error; 802 struct vattr va; 803 struct proc *p = cnp->cn_proc; 804 struct componentname cn; 805 806 error = union_relookup(um, dvp, vpp, cnp, &cn, 807 cnp->cn_nameptr, cnp->cn_namelen); 808 if (error) 809 return (error); 810 811 if (*vpp) { 812 VOP_ABORTOP(dvp, &cn); 813 VOP_UNLOCK(dvp, 0, p); 814 vrele(*vpp); 815 *vpp = NULLVP; 816 return (EEXIST); 817 } 818 819 /* 820 * policy: when creating the shadow directory in the 821 * upper layer, create it owned by the user who did 822 * the mount, group from parent directory, and mode 823 * 777 modified by umask (ie mostly identical to the 824 * mkdir syscall). (jsp, kb) 825 */ 826 827 VATTR_NULL(&va); 828 va.va_type = VDIR; 829 va.va_mode = um->um_cmode; 830 831 /* VOP_LEASE: dvp is locked */ 832 VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); 833 834 error = VOP_MKDIR(dvp, vpp, &cn, &va); 835 vput(dvp); 836 return (error); 837 } 838 839 /* 840 * Create a whiteout entry in the upper layer. 841 * 842 * (um) points to the union mount structure for access to the 843 * the mounting process's credentials. 844 * (dvp) is the directory in which to create the whiteout. 845 * it is locked on entry and exit. 846 * (cnp) is the componentname to be created. 847 */ 848 int 849 union_mkwhiteout(um, dvp, cnp, path) 850 struct union_mount *um; 851 struct vnode *dvp; 852 struct componentname *cnp; 853 char *path; 854 { 855 int error; 856 struct proc *p = cnp->cn_proc; 857 struct vnode *wvp; 858 struct componentname cn; 859 860 VOP_UNLOCK(dvp, 0, p); 861 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 862 if (error) { 863 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); 864 return (error); 865 } 866 867 if (wvp) { 868 VOP_ABORTOP(dvp, &cn); 869 vrele(dvp); 870 vrele(wvp); 871 return (EEXIST); 872 } 873 874 /* VOP_LEASE: dvp is locked */ 875 VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); 876 877 error = VOP_WHITEOUT(dvp, &cn, CREATE); 878 if (error) 879 VOP_ABORTOP(dvp, &cn); 880 881 vrele(dvp); 882 883 return (error); 884 } 885 886 /* 887 * union_vn_create: creates and opens a new shadow file 888 * on the upper union layer. this function is similar 889 * in spirit to calling vn_open but it avoids calling namei(). 890 * the problem with calling namei is that a) it locks too many 891 * things, and b) it doesn't start at the "right" directory, 892 * whereas relookup is told where to start. 893 */ 894 static int 895 union_vn_create(vpp, un, p) 896 struct vnode **vpp; 897 struct union_node *un; 898 struct proc *p; 899 { 900 struct vnode *vp; 901 struct ucred *cred = p->p_ucred; 902 struct vattr vat; 903 struct vattr *vap = &vat; 904 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 905 int error; 906 int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; 907 struct componentname cn; 908 909 *vpp = NULLVP; 910 911 /* 912 * Build a new componentname structure (for the same 913 * reasons outlines in union_mkshadow). 914 * The difference here is that the file is owned by 915 * the current user, rather than by the person who 916 * did the mount, since the current user needs to be 917 * able to write the file (that's why it is being 918 * copied in the first place). 919 */ 920 cn.cn_namelen = strlen(un->un_path); 921 cn.cn_pnbuf = zalloc(namei_zone); 922 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 923 cn.cn_nameiop = CREATE; 924 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 925 cn.cn_proc = p; 926 cn.cn_cred = p->p_ucred; 927 cn.cn_nameptr = cn.cn_pnbuf; 928 cn.cn_hash = un->un_hash; 929 cn.cn_consume = 0; 930 931 VREF(un->un_dirvp); 932 error = relookup(un->un_dirvp, &vp, &cn); 933 if (error) 934 return (error); 935 vrele(un->un_dirvp); 936 937 if (vp) { 938 VOP_ABORTOP(un->un_dirvp, &cn); 939 if (un->un_dirvp == vp) 940 vrele(un->un_dirvp); 941 else 942 vput(un->un_dirvp); 943 vrele(vp); 944 return (EEXIST); 945 } 946 947 /* 948 * Good - there was no race to create the file 949 * so go ahead and create it. The permissions 950 * on the file will be 0666 modified by the 951 * current user's umask. Access to the file, while 952 * it is unioned, will require access to the top *and* 953 * bottom files. Access when not unioned will simply 954 * require access to the top-level file. 955 * TODO: confirm choice of access permissions. 956 */ 957 VATTR_NULL(vap); 958 vap->va_type = VREG; 959 vap->va_mode = cmode; 960 VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); 961 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); 962 vput(un->un_dirvp); 963 if (error) 964 return (error); 965 966 error = VOP_OPEN(vp, fmode, cred, p); 967 if (error) { 968 vput(vp); 969 return (error); 970 } 971 972 vp->v_writecount++; 973 *vpp = vp; 974 return (0); 975 } 976 977 static int 978 union_vn_close(vp, fmode, cred, p) 979 struct vnode *vp; 980 int fmode; 981 struct ucred *cred; 982 struct proc *p; 983 { 984 985 if (fmode & FWRITE) 986 --vp->v_writecount; 987 return (VOP_CLOSE(vp, fmode, cred, p)); 988 } 989 990 void 991 union_removed_upper(un) 992 struct union_node *un; 993 { 994 struct proc *p = curproc; /* XXX */ 995 struct vnode **vpp; 996 997 /* 998 * Do not set the uppervp to NULLVP. If lowervp is NULLVP, 999 * union node will have neither uppervp nor lowervp. We remove 1000 * the union node from cache, so that it will not be referrenced. 1001 */ 1002 #if 0 1003 union_newupper(un, NULLVP); 1004 #endif 1005 if (un->un_dircache != 0) { 1006 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1007 vrele(*vpp); 1008 free(un->un_dircache, M_TEMP); 1009 un->un_dircache = 0; 1010 } 1011 1012 if (un->un_flags & UN_CACHED) { 1013 un->un_flags &= ~UN_CACHED; 1014 LIST_REMOVE(un, un_cache); 1015 } 1016 1017 if (un->un_flags & UN_ULOCK) { 1018 un->un_flags &= ~UN_ULOCK; 1019 VOP_UNLOCK(un->un_uppervp, 0, p); 1020 } 1021 } 1022 1023 #if 0 1024 struct vnode * 1025 union_lowervp(vp) 1026 struct vnode *vp; 1027 { 1028 struct union_node *un = VTOUNION(vp); 1029 1030 if ((un->un_lowervp != NULLVP) && 1031 (vp->v_type == un->un_lowervp->v_type)) { 1032 if (vget(un->un_lowervp, 0) == 0) 1033 return (un->un_lowervp); 1034 } 1035 1036 return (NULLVP); 1037 } 1038 #endif 1039 1040 /* 1041 * determine whether a whiteout is needed 1042 * during a remove/rmdir operation. 1043 */ 1044 int 1045 union_dowhiteout(un, cred, p) 1046 struct union_node *un; 1047 struct ucred *cred; 1048 struct proc *p; 1049 { 1050 struct vattr va; 1051 1052 if (un->un_lowervp != NULLVP) 1053 return (1); 1054 1055 if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && 1056 (va.va_flags & OPAQUE)) 1057 return (1); 1058 1059 return (0); 1060 } 1061 1062 static void 1063 union_dircache_r(vp, vppp, cntp) 1064 struct vnode *vp; 1065 struct vnode ***vppp; 1066 int *cntp; 1067 { 1068 struct union_node *un; 1069 1070 if (vp->v_op != union_vnodeop_p) { 1071 if (vppp) { 1072 VREF(vp); 1073 *(*vppp)++ = vp; 1074 if (--(*cntp) == 0) 1075 panic("union: dircache table too small"); 1076 } else { 1077 (*cntp)++; 1078 } 1079 1080 return; 1081 } 1082 1083 un = VTOUNION(vp); 1084 if (un->un_uppervp != NULLVP) 1085 union_dircache_r(un->un_uppervp, vppp, cntp); 1086 if (un->un_lowervp != NULLVP) 1087 union_dircache_r(un->un_lowervp, vppp, cntp); 1088 } 1089 1090 struct vnode * 1091 union_dircache(vp, p) 1092 struct vnode *vp; 1093 struct proc *p; 1094 { 1095 int cnt; 1096 struct vnode *nvp; 1097 struct vnode **vpp; 1098 struct vnode **dircache; 1099 struct union_node *un; 1100 int error; 1101 1102 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1103 dircache = VTOUNION(vp)->un_dircache; 1104 1105 nvp = NULLVP; 1106 1107 if (dircache == 0) { 1108 cnt = 0; 1109 union_dircache_r(vp, 0, &cnt); 1110 cnt++; 1111 dircache = (struct vnode **) 1112 malloc(cnt * sizeof(struct vnode *), 1113 M_TEMP, M_WAITOK); 1114 vpp = dircache; 1115 union_dircache_r(vp, &vpp, &cnt); 1116 *vpp = NULLVP; 1117 vpp = dircache + 1; 1118 } else { 1119 vpp = dircache; 1120 do { 1121 if (*vpp++ == VTOUNION(vp)->un_uppervp) 1122 break; 1123 } while (*vpp != NULLVP); 1124 } 1125 1126 if (*vpp == NULLVP) 1127 goto out; 1128 1129 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); 1130 VREF(*vpp); 1131 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); 1132 if (error) 1133 goto out; 1134 1135 VTOUNION(vp)->un_dircache = 0; 1136 un = VTOUNION(nvp); 1137 un->un_dircache = dircache; 1138 1139 out: 1140 VOP_UNLOCK(vp, 0, p); 1141 return (nvp); 1142 } 1143 1144 /* 1145 * Module glue to remove #ifdef UNION from vfs_syscalls.c 1146 */ 1147 static int 1148 union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) 1149 { 1150 int error = 0; 1151 1152 if ((*vp)->v_op == union_vnodeop_p) { 1153 struct vnode *lvp; 1154 1155 lvp = union_dircache(*vp, p); 1156 if (lvp != NULLVP) { 1157 struct vattr va; 1158 1159 /* 1160 * If the directory is opaque, 1161 * then don't show lower entries 1162 */ 1163 error = VOP_GETATTR(*vp, &va, fp->f_cred, p); 1164 if (va.va_flags & OPAQUE) { 1165 vput(lvp); 1166 lvp = NULL; 1167 } 1168 } 1169 1170 if (lvp != NULLVP) { 1171 error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); 1172 if (error) { 1173 vput(lvp); 1174 return (error); 1175 } 1176 VOP_UNLOCK(lvp, 0, p); 1177 fp->f_data = (caddr_t) lvp; 1178 fp->f_offset = 0; 1179 error = vn_close(*vp, FREAD, fp->f_cred, p); 1180 if (error) 1181 return (error); 1182 *vp = lvp; 1183 return -1; /* goto unionread */ 1184 } 1185 } 1186 return error; 1187 } 1188 1189 static int 1190 union_modevent(module_t mod, int type, void *data) 1191 { 1192 switch (type) { 1193 case MOD_LOAD: 1194 union_dircheckp = union_dircheck; 1195 break; 1196 case MOD_UNLOAD: 1197 union_dircheckp = NULL; 1198 break; 1199 default: 1200 break; 1201 } 1202 return 0; 1203 } 1204 static moduledata_t union_mod = { 1205 "union_dircheck", 1206 union_modevent, 1207 NULL 1208 }; 1209 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); 1210