1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1994 Jan-Simon Pendry 5 * Copyright (c) 1994 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc. 8 * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org> 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Jan-Simon Pendry. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/ktr.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/proc.h> 51 #include <sys/vnode.h> 52 #include <sys/dirent.h> 53 #include <sys/fcntl.h> 54 #include <sys/filedesc.h> 55 #include <sys/stat.h> 56 #include <sys/sysctl.h> 57 #include <sys/taskqueue.h> 58 #include <sys/resourcevar.h> 59 60 #include <security/mac/mac_framework.h> 61 62 #include <vm/uma.h> 63 64 #include <fs/unionfs/union.h> 65 66 #define NUNIONFSNODECACHE 16 67 #define UNIONFSHASHMASK (NUNIONFSNODECACHE - 1) 68 69 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table"); 70 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part"); 71 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part"); 72 73 static struct task unionfs_deferred_rele_task; 74 static struct mtx unionfs_deferred_rele_lock; 75 static STAILQ_HEAD(, unionfs_node) unionfs_deferred_rele_list = 76 STAILQ_HEAD_INITIALIZER(unionfs_deferred_rele_list); 77 static TASKQUEUE_DEFINE_THREAD(unionfs_rele); 78 79 unsigned int unionfs_ndeferred = 0; 80 SYSCTL_UINT(_vfs, OID_AUTO, unionfs_ndeferred, CTLFLAG_RD, 81 &unionfs_ndeferred, 0, "unionfs deferred vnode release"); 82 83 static void unionfs_deferred_rele(void *, int); 84 85 /* 86 * Initialize 87 */ 88 int 89 unionfs_init(struct vfsconf *vfsp) 90 { 91 UNIONFSDEBUG("unionfs_init\n"); /* printed during system boot */ 92 TASK_INIT(&unionfs_deferred_rele_task, 0, unionfs_deferred_rele, NULL); 93 mtx_init(&unionfs_deferred_rele_lock, "uniondefr", NULL, MTX_DEF); 94 return (0); 95 } 96 97 /* 98 * Uninitialize 99 */ 100 int 101 unionfs_uninit(struct vfsconf *vfsp) 102 { 103 taskqueue_quiesce(taskqueue_unionfs_rele); 104 taskqueue_free(taskqueue_unionfs_rele); 105 mtx_destroy(&unionfs_deferred_rele_lock); 106 return (0); 107 } 108 109 static void 110 unionfs_deferred_rele(void *arg __unused, int pending __unused) 111 { 112 STAILQ_HEAD(, unionfs_node) local_rele_list; 113 struct unionfs_node *unp, *tunp; 114 unsigned int ndeferred; 115 116 ndeferred = 0; 117 STAILQ_INIT(&local_rele_list); 118 mtx_lock(&unionfs_deferred_rele_lock); 119 STAILQ_CONCAT(&local_rele_list, &unionfs_deferred_rele_list); 120 mtx_unlock(&unionfs_deferred_rele_lock); 121 STAILQ_FOREACH_SAFE(unp, &local_rele_list, un_rele, tunp) { 122 ++ndeferred; 123 MPASS(unp->un_dvp != NULL); 124 vrele(unp->un_dvp); 125 free(unp, M_UNIONFSNODE); 126 } 127 128 /* We expect this function to be single-threaded, thus no atomic */ 129 unionfs_ndeferred += ndeferred; 130 } 131 132 static struct unionfs_node_hashhead * 133 unionfs_get_hashhead(struct vnode *dvp, struct vnode *lookup) 134 { 135 struct unionfs_node *unp; 136 137 unp = VTOUNIONFS(dvp); 138 139 return (&(unp->un_hashtbl[vfs_hash_index(lookup) & UNIONFSHASHMASK])); 140 } 141 142 /* 143 * Attempt to lookup a cached unionfs vnode by upper/lower vp 144 * from dvp, with dvp's interlock held. 145 */ 146 static struct vnode * 147 unionfs_get_cached_vnode_locked(struct vnode *lookup, struct vnode *dvp) 148 { 149 struct unionfs_node *unp; 150 struct unionfs_node_hashhead *hd; 151 struct vnode *vp; 152 153 hd = unionfs_get_hashhead(dvp, lookup); 154 155 LIST_FOREACH(unp, hd, un_hash) { 156 if (unp->un_uppervp == lookup || 157 unp->un_lowervp == lookup) { 158 vp = UNIONFSTOV(unp); 159 VI_LOCK_FLAGS(vp, MTX_DUPOK); 160 vp->v_iflag &= ~VI_OWEINACT; 161 if (VN_IS_DOOMED(vp) || 162 ((vp->v_iflag & VI_DOINGINACT) != 0)) { 163 VI_UNLOCK(vp); 164 vp = NULLVP; 165 } else { 166 vrefl(vp); 167 VI_UNLOCK(vp); 168 } 169 return (vp); 170 } 171 } 172 173 return (NULLVP); 174 } 175 176 177 /* 178 * Get the cached vnode. 179 */ 180 static struct vnode * 181 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp, 182 struct vnode *dvp) 183 { 184 struct vnode *vp; 185 186 vp = NULLVP; 187 VI_LOCK(dvp); 188 if (uvp != NULLVP) 189 vp = unionfs_get_cached_vnode_locked(uvp, dvp); 190 else if (lvp != NULLVP) 191 vp = unionfs_get_cached_vnode_locked(lvp, dvp); 192 VI_UNLOCK(dvp); 193 194 return (vp); 195 } 196 197 /* 198 * Add the new vnode into cache. 199 */ 200 static struct vnode * 201 unionfs_ins_cached_vnode(struct unionfs_node *uncp, 202 struct vnode *dvp) 203 { 204 struct unionfs_node_hashhead *hd; 205 struct vnode *vp; 206 207 ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__); 208 ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__); 209 KASSERT(uncp->un_uppervp == NULLVP || uncp->un_uppervp->v_type == VDIR, 210 ("%s: v_type != VDIR", __func__)); 211 KASSERT(uncp->un_lowervp == NULLVP || uncp->un_lowervp->v_type == VDIR, 212 ("%s: v_type != VDIR", __func__)); 213 214 vp = NULLVP; 215 VI_LOCK(dvp); 216 if (uncp->un_uppervp != NULL) 217 vp = unionfs_get_cached_vnode_locked(uncp->un_uppervp, dvp); 218 else if (uncp->un_lowervp != NULL) 219 vp = unionfs_get_cached_vnode_locked(uncp->un_lowervp, dvp); 220 if (vp == NULLVP) { 221 hd = unionfs_get_hashhead(dvp, (uncp->un_uppervp != NULLVP ? 222 uncp->un_uppervp : uncp->un_lowervp)); 223 LIST_INSERT_HEAD(hd, uncp, un_hash); 224 } 225 VI_UNLOCK(dvp); 226 227 return (vp); 228 } 229 230 /* 231 * Remove the vnode. 232 */ 233 static void 234 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp) 235 { 236 KASSERT(unp != NULL, ("%s: null node", __func__)); 237 KASSERT(dvp != NULLVP, 238 ("%s: null parent vnode", __func__)); 239 240 VI_LOCK(dvp); 241 if (unp->un_hash.le_prev != NULL) { 242 LIST_REMOVE(unp, un_hash); 243 unp->un_hash.le_next = NULL; 244 unp->un_hash.le_prev = NULL; 245 } 246 VI_UNLOCK(dvp); 247 } 248 249 /* 250 * Common cleanup handling for unionfs_nodeget 251 * Upper, lower, and parent directory vnodes are expected to be referenced by 252 * the caller. Upper and lower vnodes, if non-NULL, are also expected to be 253 * exclusively locked by the caller. 254 * This function will return with the caller's locks and references undone. 255 */ 256 static void 257 unionfs_nodeget_cleanup(struct vnode *vp, void *arg) 258 { 259 struct unionfs_node *unp; 260 261 /* 262 * Lock and reset the default vnode lock; vgone() expects a locked 263 * vnode, and we're going to reset the vnode ops. 264 */ 265 lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); 266 267 /* 268 * Clear out private data and reset the vnode ops to avoid use of 269 * unionfs vnode ops on a partially constructed vnode. 270 */ 271 VI_LOCK(vp); 272 vp->v_data = NULL; 273 vp->v_vnlock = &vp->v_lock; 274 vp->v_op = &dead_vnodeops; 275 VI_UNLOCK(vp); 276 vgone(vp); 277 vput(vp); 278 279 unp = arg; 280 if (unp->un_dvp != NULLVP) 281 vrele(unp->un_dvp); 282 if (unp->un_uppervp != NULLVP) 283 vput(unp->un_uppervp); 284 if (unp->un_lowervp != NULLVP) 285 vput(unp->un_lowervp); 286 if (unp->un_hashtbl != NULL) 287 hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK); 288 free(unp->un_path, M_UNIONFSPATH); 289 free(unp, M_UNIONFSNODE); 290 } 291 292 /* 293 * Make a new or get existing unionfs node. 294 * 295 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is 296 * locked, uppervp or lowervp is locked too. In order to prevent dead lock, 297 * you should not lock plurality simultaneously. 298 */ 299 int 300 unionfs_nodeget(struct mount *mp, struct vnode *uppervp, 301 struct vnode *lowervp, struct vnode *dvp, struct vnode **vpp, 302 struct componentname *cnp) 303 { 304 char *path; 305 struct unionfs_mount *ump; 306 struct unionfs_node *unp; 307 struct vnode *vp; 308 u_long hashmask; 309 int error; 310 int lkflags; 311 enum vtype vt; 312 313 error = 0; 314 ump = MOUNTTOUNIONFSMOUNT(mp); 315 lkflags = (cnp ? cnp->cn_lkflags : 0); 316 path = (cnp ? cnp->cn_nameptr : NULL); 317 *vpp = NULLVP; 318 319 if (uppervp == NULLVP && lowervp == NULLVP) 320 panic("%s: upper and lower is null", __func__); 321 322 vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type); 323 324 /* If it has no ISLASTCN flag, path check is skipped. */ 325 if (cnp && !(cnp->cn_flags & ISLASTCN)) 326 path = NULL; 327 328 /* check the cache */ 329 if (dvp != NULLVP && vt == VDIR) { 330 vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp); 331 if (vp != NULLVP) { 332 *vpp = vp; 333 goto unionfs_nodeget_out; 334 } 335 } 336 337 if ((uppervp == NULLVP || ump->um_uppervp != uppervp) || 338 (lowervp == NULLVP || ump->um_lowervp != lowervp)) { 339 /* dvp will be NULLVP only in case of root vnode. */ 340 if (dvp == NULLVP) 341 return (EINVAL); 342 } 343 unp = malloc(sizeof(struct unionfs_node), 344 M_UNIONFSNODE, M_WAITOK | M_ZERO); 345 346 error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp); 347 if (error != 0) { 348 free(unp, M_UNIONFSNODE); 349 return (error); 350 } 351 if (dvp != NULLVP) 352 vref(dvp); 353 if (uppervp != NULLVP) 354 vref(uppervp); 355 if (lowervp != NULLVP) 356 vref(lowervp); 357 358 if (vt == VDIR) { 359 unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH, 360 &hashmask); 361 KASSERT(hashmask == UNIONFSHASHMASK, 362 ("unexpected unionfs hash mask 0x%lx", hashmask)); 363 } 364 365 unp->un_vnode = vp; 366 unp->un_uppervp = uppervp; 367 unp->un_lowervp = lowervp; 368 unp->un_dvp = dvp; 369 if (uppervp != NULLVP) 370 vp->v_vnlock = uppervp->v_vnlock; 371 else 372 vp->v_vnlock = lowervp->v_vnlock; 373 374 if (path != NULL) { 375 unp->un_path = malloc(cnp->cn_namelen + 1, 376 M_UNIONFSPATH, M_WAITOK | M_ZERO); 377 bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen); 378 unp->un_path[cnp->cn_namelen] = '\0'; 379 unp->un_pathlen = cnp->cn_namelen; 380 } 381 vp->v_type = vt; 382 vp->v_data = unp; 383 384 if ((uppervp != NULLVP && ump->um_uppervp == uppervp) && 385 (lowervp != NULLVP && ump->um_lowervp == lowervp)) 386 vp->v_vflag |= VV_ROOT; 387 388 vn_lock_pair(lowervp, false, uppervp, false); 389 error = insmntque1(vp, mp, unionfs_nodeget_cleanup, unp); 390 if (error != 0) 391 return (error); 392 if (lowervp != NULL && VN_IS_DOOMED(lowervp)) { 393 vput(lowervp); 394 unp->un_lowervp = NULL; 395 } 396 if (uppervp != NULL && VN_IS_DOOMED(uppervp)) { 397 vput(uppervp); 398 unp->un_uppervp = NULL; 399 } 400 if (unp->un_lowervp == NULL && unp->un_uppervp == NULL) { 401 unionfs_nodeget_cleanup(vp, unp); 402 return (ENOENT); 403 } 404 405 if (dvp != NULLVP && vt == VDIR) 406 *vpp = unionfs_ins_cached_vnode(unp, dvp); 407 if (*vpp != NULLVP) { 408 unionfs_nodeget_cleanup(vp, unp); 409 vp = *vpp; 410 } else { 411 if (uppervp != NULL) 412 VOP_UNLOCK(uppervp); 413 if (lowervp != NULL) 414 VOP_UNLOCK(lowervp); 415 *vpp = vp; 416 } 417 418 unionfs_nodeget_out: 419 if (lkflags & LK_TYPE_MASK) 420 vn_lock(vp, lkflags | LK_RETRY); 421 422 return (0); 423 } 424 425 /* 426 * Clean up the unionfs node. 427 */ 428 void 429 unionfs_noderem(struct vnode *vp) 430 { 431 struct unionfs_node *unp, *unp_t1, *unp_t2; 432 struct unionfs_node_hashhead *hd; 433 struct unionfs_node_status *unsp, *unsp_tmp; 434 struct vnode *lvp; 435 struct vnode *uvp; 436 struct vnode *dvp; 437 int count; 438 439 KASSERT(vp->v_vnlock->lk_recurse == 0, 440 ("%s: vnode %p locked recursively", __func__, vp)); 441 if (lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) 442 panic("%s: failed to acquire lock for vnode lock", __func__); 443 444 /* 445 * Use the interlock to protect the clearing of v_data to 446 * prevent faults in unionfs_lock(). 447 */ 448 VI_LOCK(vp); 449 unp = VTOUNIONFS(vp); 450 lvp = unp->un_lowervp; 451 uvp = unp->un_uppervp; 452 dvp = unp->un_dvp; 453 unp->un_lowervp = unp->un_uppervp = NULLVP; 454 vp->v_vnlock = &(vp->v_lock); 455 vp->v_data = NULL; 456 vp->v_object = NULL; 457 if (vp->v_writecount > 0) { 458 if (uvp != NULL) 459 VOP_ADD_WRITECOUNT(uvp, -vp->v_writecount); 460 else if (lvp != NULL) 461 VOP_ADD_WRITECOUNT(lvp, -vp->v_writecount); 462 } else if (vp->v_writecount < 0) 463 vp->v_writecount = 0; 464 if (unp->un_hashtbl != NULL) { 465 /* 466 * Clear out any cached child vnodes. This should only 467 * be necessary during forced unmount, when the vnode may 468 * be reclaimed with a non-zero use count. Otherwise the 469 * reference held by each child should prevent reclamation. 470 */ 471 for (count = 0; count <= UNIONFSHASHMASK; count++) { 472 hd = unp->un_hashtbl + count; 473 LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) { 474 LIST_REMOVE(unp_t1, un_hash); 475 unp_t1->un_hash.le_next = NULL; 476 unp_t1->un_hash.le_prev = NULL; 477 } 478 } 479 } 480 VI_UNLOCK(vp); 481 482 if (lvp != NULLVP) 483 VOP_UNLOCK(lvp); 484 if (uvp != NULLVP) 485 VOP_UNLOCK(uvp); 486 487 if (dvp != NULLVP) 488 unionfs_rem_cached_vnode(unp, dvp); 489 490 if (lvp != NULLVP) 491 vrele(lvp); 492 if (uvp != NULLVP) 493 vrele(uvp); 494 if (unp->un_path != NULL) { 495 free(unp->un_path, M_UNIONFSPATH); 496 unp->un_path = NULL; 497 unp->un_pathlen = 0; 498 } 499 500 if (unp->un_hashtbl != NULL) { 501 hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK); 502 } 503 504 LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) { 505 LIST_REMOVE(unsp, uns_list); 506 free(unsp, M_TEMP); 507 } 508 if (dvp != NULLVP) { 509 mtx_lock(&unionfs_deferred_rele_lock); 510 STAILQ_INSERT_TAIL(&unionfs_deferred_rele_list, unp, un_rele); 511 mtx_unlock(&unionfs_deferred_rele_lock); 512 taskqueue_enqueue(taskqueue_unionfs_rele, 513 &unionfs_deferred_rele_task); 514 } else 515 free(unp, M_UNIONFSNODE); 516 } 517 518 /* 519 * Get the unionfs node status. 520 * You need exclusive lock this vnode. 521 */ 522 void 523 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td, 524 struct unionfs_node_status **unspp) 525 { 526 struct unionfs_node_status *unsp; 527 pid_t pid; 528 529 pid = td->td_proc->p_pid; 530 531 KASSERT(NULL != unspp, ("%s: NULL status", __func__)); 532 ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__); 533 534 LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) { 535 if (unsp->uns_pid == pid) { 536 *unspp = unsp; 537 return; 538 } 539 } 540 541 /* create a new unionfs node status */ 542 unsp = malloc(sizeof(struct unionfs_node_status), 543 M_TEMP, M_WAITOK | M_ZERO); 544 545 unsp->uns_pid = pid; 546 LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list); 547 548 *unspp = unsp; 549 } 550 551 /* 552 * Remove the unionfs node status, if you can. 553 * You need exclusive lock this vnode. 554 */ 555 void 556 unionfs_tryrem_node_status(struct unionfs_node *unp, 557 struct unionfs_node_status *unsp) 558 { 559 KASSERT(NULL != unsp, ("%s: NULL status", __func__)); 560 ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__); 561 562 if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt) 563 return; 564 565 LIST_REMOVE(unsp, uns_list); 566 free(unsp, M_TEMP); 567 } 568 569 /* 570 * Create upper node attr. 571 */ 572 void 573 unionfs_create_uppervattr_core(struct unionfs_mount *ump, struct vattr *lva, 574 struct vattr *uva, struct thread *td) 575 { 576 VATTR_NULL(uva); 577 uva->va_type = lva->va_type; 578 uva->va_atime = lva->va_atime; 579 uva->va_mtime = lva->va_mtime; 580 uva->va_ctime = lva->va_ctime; 581 582 switch (ump->um_copymode) { 583 case UNIONFS_TRANSPARENT: 584 uva->va_mode = lva->va_mode; 585 uva->va_uid = lva->va_uid; 586 uva->va_gid = lva->va_gid; 587 break; 588 case UNIONFS_MASQUERADE: 589 if (ump->um_uid == lva->va_uid) { 590 uva->va_mode = lva->va_mode & 077077; 591 uva->va_mode |= (lva->va_type == VDIR ? 592 ump->um_udir : ump->um_ufile) & 0700; 593 uva->va_uid = lva->va_uid; 594 uva->va_gid = lva->va_gid; 595 } else { 596 uva->va_mode = (lva->va_type == VDIR ? 597 ump->um_udir : ump->um_ufile); 598 uva->va_uid = ump->um_uid; 599 uva->va_gid = ump->um_gid; 600 } 601 break; 602 default: /* UNIONFS_TRADITIONAL */ 603 uva->va_mode = 0777 & ~td->td_proc->p_pd->pd_cmask; 604 uva->va_uid = ump->um_uid; 605 uva->va_gid = ump->um_gid; 606 break; 607 } 608 } 609 610 /* 611 * Create upper node attr. 612 */ 613 int 614 unionfs_create_uppervattr(struct unionfs_mount *ump, struct vnode *lvp, 615 struct vattr *uva, struct ucred *cred, struct thread *td) 616 { 617 struct vattr lva; 618 int error; 619 620 if ((error = VOP_GETATTR(lvp, &lva, cred))) 621 return (error); 622 623 unionfs_create_uppervattr_core(ump, &lva, uva, td); 624 625 return (error); 626 } 627 628 /* 629 * relookup 630 * 631 * dvp should be locked on entry and will be locked on return. 632 * 633 * If an error is returned, *vpp will be invalid, otherwise it will hold a 634 * locked, referenced vnode. If *vpp == dvp then remember that only one 635 * LK_EXCLUSIVE lock is held. 636 */ 637 int 638 unionfs_relookup(struct vnode *dvp, struct vnode **vpp, 639 struct componentname *cnp, struct componentname *cn, struct thread *td, 640 char *path, int pathlen, u_long nameiop) 641 { 642 int error; 643 644 cn->cn_namelen = pathlen; 645 cn->cn_pnbuf = path; 646 cn->cn_nameiop = nameiop; 647 cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN); 648 cn->cn_lkflags = LK_EXCLUSIVE; 649 cn->cn_cred = cnp->cn_cred; 650 cn->cn_nameptr = cn->cn_pnbuf; 651 652 if (nameiop == DELETE) 653 cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART)); 654 else if (RENAME == nameiop) 655 cn->cn_flags |= (cnp->cn_flags & SAVESTART); 656 else if (nameiop == CREATE) 657 cn->cn_flags |= NOCACHE; 658 659 vref(dvp); 660 VOP_UNLOCK(dvp); 661 662 if ((error = relookup(dvp, vpp, cn))) { 663 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 664 } else 665 vrele(dvp); 666 667 KASSERT((cn->cn_flags & HASBUF) != 0, 668 ("%s: HASBUF cleared", __func__)); 669 KASSERT((cn->cn_flags & SAVENAME) != 0, 670 ("%s: SAVENAME cleared", __func__)); 671 KASSERT(cn->cn_pnbuf == path, ("%s: cn_pnbuf changed", __func__)); 672 673 return (error); 674 } 675 676 /* 677 * relookup for CREATE namei operation. 678 * 679 * dvp is unionfs vnode. dvp should be locked. 680 * 681 * If it called 'unionfs_copyfile' function by unionfs_link etc, 682 * VOP_LOOKUP information is broken. 683 * So it need relookup in order to create link etc. 684 */ 685 int 686 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp, 687 struct thread *td) 688 { 689 struct vnode *udvp; 690 struct vnode *vp; 691 struct componentname cn; 692 int error; 693 694 udvp = UNIONFSVPTOUPPERVP(dvp); 695 vp = NULLVP; 696 697 KASSERT((cnp->cn_flags & HASBUF) != 0, 698 ("%s called without HASBUF", __func__)); 699 error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, 700 cnp->cn_namelen, CREATE); 701 if (error) 702 return (error); 703 704 if (vp != NULLVP) { 705 if (udvp == vp) 706 vrele(vp); 707 else 708 vput(vp); 709 710 error = EEXIST; 711 } 712 713 return (error); 714 } 715 716 /* 717 * relookup for DELETE namei operation. 718 * 719 * dvp is unionfs vnode. dvp should be locked. 720 */ 721 int 722 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp, 723 struct thread *td) 724 { 725 struct vnode *udvp; 726 struct vnode *vp; 727 struct componentname cn; 728 int error; 729 730 udvp = UNIONFSVPTOUPPERVP(dvp); 731 vp = NULLVP; 732 733 KASSERT((cnp->cn_flags & HASBUF) != 0, 734 ("%s called without HASBUF", __func__)); 735 error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, 736 cnp->cn_namelen, DELETE); 737 if (error) 738 return (error); 739 740 if (vp == NULLVP) 741 error = ENOENT; 742 else { 743 if (udvp == vp) 744 vrele(vp); 745 else 746 vput(vp); 747 } 748 749 return (error); 750 } 751 752 /* 753 * relookup for RENAME namei operation. 754 * 755 * dvp is unionfs vnode. dvp should be locked. 756 */ 757 int 758 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp, 759 struct thread *td) 760 { 761 struct vnode *udvp; 762 struct vnode *vp; 763 struct componentname cn; 764 int error; 765 766 udvp = UNIONFSVPTOUPPERVP(dvp); 767 vp = NULLVP; 768 769 KASSERT((cnp->cn_flags & HASBUF) != 0, 770 ("%s called without HASBUF", __func__)); 771 error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, 772 cnp->cn_namelen, RENAME); 773 if (error) 774 return (error); 775 776 if (vp != NULLVP) { 777 if (udvp == vp) 778 vrele(vp); 779 else 780 vput(vp); 781 } 782 783 return (error); 784 } 785 786 /* 787 * Update the unionfs_node. 788 * 789 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the 790 * uvp's lock and lower's lock will be unlocked. 791 */ 792 static void 793 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp, 794 struct thread *td) 795 { 796 struct unionfs_node_hashhead *hd; 797 struct vnode *vp; 798 struct vnode *lvp; 799 struct vnode *dvp; 800 unsigned count, lockrec; 801 802 vp = UNIONFSTOV(unp); 803 lvp = unp->un_lowervp; 804 ASSERT_VOP_ELOCKED(lvp, __func__); 805 ASSERT_VOP_ELOCKED(uvp, __func__); 806 dvp = unp->un_dvp; 807 808 /* 809 * Uppdate the upper vnode's lock state to match the lower vnode, 810 * and then switch the unionfs vnode's lock to the upper vnode. 811 */ 812 lockrec = lvp->v_vnlock->lk_recurse; 813 for (count = 0; count < lockrec; count++) 814 vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY); 815 VI_LOCK(vp); 816 unp->un_uppervp = uvp; 817 vp->v_vnlock = uvp->v_vnlock; 818 VI_UNLOCK(vp); 819 820 /* 821 * Re-cache the unionfs vnode against the upper vnode 822 */ 823 if (dvp != NULLVP && vp->v_type == VDIR) { 824 VI_LOCK(dvp); 825 if (unp->un_hash.le_prev != NULL) { 826 LIST_REMOVE(unp, un_hash); 827 hd = unionfs_get_hashhead(dvp, uvp); 828 LIST_INSERT_HEAD(hd, unp, un_hash); 829 } 830 VI_UNLOCK(unp->un_dvp); 831 } 832 } 833 834 /* 835 * Create a new shadow dir. 836 * 837 * udvp should be locked on entry and will be locked on return. 838 * 839 * If no error returned, unp will be updated. 840 */ 841 int 842 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp, 843 struct unionfs_node *unp, struct componentname *cnp, struct thread *td) 844 { 845 struct vnode *lvp; 846 struct vnode *uvp; 847 struct vattr va; 848 struct vattr lva; 849 struct nameidata nd; 850 struct mount *mp; 851 struct ucred *cred; 852 struct ucred *credbk; 853 struct uidinfo *rootinfo; 854 int error; 855 856 if (unp->un_uppervp != NULLVP) 857 return (EEXIST); 858 859 lvp = unp->un_lowervp; 860 uvp = NULLVP; 861 credbk = cnp->cn_cred; 862 863 /* Authority change to root */ 864 rootinfo = uifind((uid_t)0); 865 cred = crdup(cnp->cn_cred); 866 /* 867 * The calls to chgproccnt() are needed to compensate for change_ruid() 868 * calling chgproccnt(). 869 */ 870 chgproccnt(cred->cr_ruidinfo, 1, 0); 871 change_euid(cred, rootinfo); 872 change_ruid(cred, rootinfo); 873 change_svuid(cred, (uid_t)0); 874 uifree(rootinfo); 875 cnp->cn_cred = cred; 876 877 memset(&nd.ni_cnd, 0, sizeof(struct componentname)); 878 NDPREINIT(&nd); 879 880 if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred))) 881 goto unionfs_mkshadowdir_abort; 882 883 if ((error = unionfs_relookup(udvp, &uvp, cnp, &nd.ni_cnd, td, 884 cnp->cn_nameptr, cnp->cn_namelen, CREATE))) 885 goto unionfs_mkshadowdir_abort; 886 if (uvp != NULLVP) { 887 if (udvp == uvp) 888 vrele(uvp); 889 else 890 vput(uvp); 891 892 error = EEXIST; 893 goto unionfs_mkshadowdir_abort; 894 } 895 896 if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH))) 897 goto unionfs_mkshadowdir_abort; 898 unionfs_create_uppervattr_core(ump, &lva, &va, td); 899 900 error = VOP_MKDIR(udvp, &uvp, &nd.ni_cnd, &va); 901 902 if (!error) { 903 unionfs_node_update(unp, uvp, td); 904 905 /* 906 * XXX The bug which cannot set uid/gid was corrected. 907 * Ignore errors. 908 */ 909 va.va_type = VNON; 910 VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred); 911 } 912 vn_finished_write(mp); 913 914 unionfs_mkshadowdir_abort: 915 cnp->cn_cred = credbk; 916 chgproccnt(cred->cr_ruidinfo, -1, 0); 917 crfree(cred); 918 919 return (error); 920 } 921 922 /* 923 * Create a new whiteout. 924 * 925 * dvp should be locked on entry and will be locked on return. 926 */ 927 int 928 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp, 929 struct thread *td, char *path, int pathlen) 930 { 931 struct vnode *wvp; 932 struct nameidata nd; 933 struct mount *mp; 934 int error; 935 936 wvp = NULLVP; 937 NDPREINIT(&nd); 938 if ((error = unionfs_relookup(dvp, &wvp, cnp, &nd.ni_cnd, td, path, 939 pathlen, CREATE))) { 940 return (error); 941 } 942 if (wvp != NULLVP) { 943 if (dvp == wvp) 944 vrele(wvp); 945 else 946 vput(wvp); 947 948 return (EEXIST); 949 } 950 951 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH))) 952 goto unionfs_mkwhiteout_free_out; 953 error = VOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE); 954 955 vn_finished_write(mp); 956 957 unionfs_mkwhiteout_free_out: 958 return (error); 959 } 960 961 /* 962 * Create a new vnode for create a new shadow file. 963 * 964 * If an error is returned, *vpp will be invalid, otherwise it will hold a 965 * locked, referenced and opened vnode. 966 * 967 * unp is never updated. 968 */ 969 static int 970 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp, 971 struct unionfs_node *unp, struct vattr *uvap, struct thread *td) 972 { 973 struct unionfs_mount *ump; 974 struct vnode *vp; 975 struct vnode *lvp; 976 struct ucred *cred; 977 struct vattr lva; 978 struct nameidata nd; 979 int fmode; 980 int error; 981 982 ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount); 983 vp = NULLVP; 984 lvp = unp->un_lowervp; 985 cred = td->td_ucred; 986 fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL); 987 error = 0; 988 989 if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0) 990 return (error); 991 unionfs_create_uppervattr_core(ump, &lva, uvap, td); 992 993 if (unp->un_path == NULL) 994 panic("%s: NULL un_path", __func__); 995 996 nd.ni_cnd.cn_namelen = unp->un_pathlen; 997 nd.ni_cnd.cn_pnbuf = unp->un_path; 998 nd.ni_cnd.cn_nameiop = CREATE; 999 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | 1000 ISLASTCN; 1001 nd.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 1002 nd.ni_cnd.cn_cred = cred; 1003 nd.ni_cnd.cn_nameptr = nd.ni_cnd.cn_pnbuf; 1004 NDPREINIT(&nd); 1005 1006 vref(udvp); 1007 if ((error = relookup(udvp, &vp, &nd.ni_cnd)) != 0) 1008 goto unionfs_vn_create_on_upper_free_out2; 1009 vrele(udvp); 1010 1011 if (vp != NULLVP) { 1012 if (vp == udvp) 1013 vrele(vp); 1014 else 1015 vput(vp); 1016 error = EEXIST; 1017 goto unionfs_vn_create_on_upper_free_out1; 1018 } 1019 1020 if ((error = VOP_CREATE(udvp, &vp, &nd.ni_cnd, uvap)) != 0) 1021 goto unionfs_vn_create_on_upper_free_out1; 1022 1023 if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) { 1024 vput(vp); 1025 goto unionfs_vn_create_on_upper_free_out1; 1026 } 1027 error = VOP_ADD_WRITECOUNT(vp, 1); 1028 CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", 1029 __func__, vp, vp->v_writecount); 1030 if (error == 0) { 1031 *vpp = vp; 1032 } else { 1033 VOP_CLOSE(vp, fmode, cred, td); 1034 } 1035 1036 unionfs_vn_create_on_upper_free_out1: 1037 VOP_UNLOCK(udvp); 1038 1039 unionfs_vn_create_on_upper_free_out2: 1040 KASSERT((nd.ni_cnd.cn_flags & HASBUF) != 0, 1041 ("%s: HASBUF cleared", __func__)); 1042 KASSERT((nd.ni_cnd.cn_flags & SAVENAME) != 0, 1043 ("%s: SAVENAME cleared", __func__)); 1044 KASSERT(nd.ni_cnd.cn_pnbuf == unp->un_path, 1045 ("%s: cn_pnbuf changed", __func__)); 1046 1047 return (error); 1048 } 1049 1050 /* 1051 * Copy from lvp to uvp. 1052 * 1053 * lvp and uvp should be locked and opened on entry and will be locked and 1054 * opened on return. 1055 */ 1056 static int 1057 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp, 1058 struct ucred *cred, struct thread *td) 1059 { 1060 char *buf; 1061 struct uio uio; 1062 struct iovec iov; 1063 off_t offset; 1064 int count; 1065 int error; 1066 int bufoffset; 1067 1068 error = 0; 1069 memset(&uio, 0, sizeof(uio)); 1070 1071 uio.uio_td = td; 1072 uio.uio_segflg = UIO_SYSSPACE; 1073 uio.uio_offset = 0; 1074 1075 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 1076 1077 while (error == 0) { 1078 offset = uio.uio_offset; 1079 1080 uio.uio_iov = &iov; 1081 uio.uio_iovcnt = 1; 1082 iov.iov_base = buf; 1083 iov.iov_len = MAXBSIZE; 1084 uio.uio_resid = iov.iov_len; 1085 uio.uio_rw = UIO_READ; 1086 1087 if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0) 1088 break; 1089 if ((count = MAXBSIZE - uio.uio_resid) == 0) 1090 break; 1091 1092 bufoffset = 0; 1093 while (bufoffset < count) { 1094 uio.uio_iov = &iov; 1095 uio.uio_iovcnt = 1; 1096 iov.iov_base = buf + bufoffset; 1097 iov.iov_len = count - bufoffset; 1098 uio.uio_offset = offset + bufoffset; 1099 uio.uio_resid = iov.iov_len; 1100 uio.uio_rw = UIO_WRITE; 1101 1102 if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0) 1103 break; 1104 1105 bufoffset += (count - bufoffset) - uio.uio_resid; 1106 } 1107 1108 uio.uio_offset = offset + bufoffset; 1109 } 1110 1111 free(buf, M_TEMP); 1112 1113 return (error); 1114 } 1115 1116 /* 1117 * Copy file from lower to upper. 1118 * 1119 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to 1120 * docopy. 1121 * 1122 * If no error returned, unp will be updated. 1123 */ 1124 int 1125 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred, 1126 struct thread *td) 1127 { 1128 struct mount *mp; 1129 struct vnode *udvp; 1130 struct vnode *lvp; 1131 struct vnode *uvp; 1132 struct vattr uva; 1133 int error; 1134 1135 lvp = unp->un_lowervp; 1136 uvp = NULLVP; 1137 1138 if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY)) 1139 return (EROFS); 1140 if (unp->un_dvp == NULLVP) 1141 return (EINVAL); 1142 if (unp->un_uppervp != NULLVP) 1143 return (EEXIST); 1144 udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp; 1145 if (udvp == NULLVP) 1146 return (EROFS); 1147 if ((udvp->v_mount->mnt_flag & MNT_RDONLY)) 1148 return (EROFS); 1149 1150 error = VOP_ACCESS(lvp, VREAD, cred, td); 1151 if (error != 0) 1152 return (error); 1153 1154 if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0) 1155 return (error); 1156 error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td); 1157 if (error != 0) { 1158 vn_finished_write(mp); 1159 return (error); 1160 } 1161 1162 if (docopy != 0) { 1163 error = VOP_OPEN(lvp, FREAD, cred, td, NULL); 1164 if (error == 0) { 1165 error = unionfs_copyfile_core(lvp, uvp, cred, td); 1166 VOP_CLOSE(lvp, FREAD, cred, td); 1167 } 1168 } 1169 VOP_CLOSE(uvp, FWRITE, cred, td); 1170 VOP_ADD_WRITECOUNT_CHECKED(uvp, -1); 1171 CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", 1172 __func__, uvp, uvp->v_writecount); 1173 1174 vn_finished_write(mp); 1175 1176 if (error == 0) { 1177 /* Reset the attributes. Ignore errors. */ 1178 uva.va_type = VNON; 1179 VOP_SETATTR(uvp, &uva, cred); 1180 } 1181 1182 unionfs_node_update(unp, uvp, td); 1183 1184 return (error); 1185 } 1186 1187 /* 1188 * It checks whether vp can rmdir. (check empty) 1189 * 1190 * vp is unionfs vnode. 1191 * vp should be locked. 1192 */ 1193 int 1194 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td) 1195 { 1196 struct vnode *uvp; 1197 struct vnode *lvp; 1198 struct vnode *tvp; 1199 struct dirent *dp; 1200 struct dirent *edp; 1201 struct componentname cn; 1202 struct iovec iov; 1203 struct uio uio; 1204 struct vattr va; 1205 int error; 1206 int eofflag; 1207 int lookuperr; 1208 1209 /* 1210 * The size of buf needs to be larger than DIRBLKSIZ. 1211 */ 1212 char buf[256 * 6]; 1213 1214 ASSERT_VOP_ELOCKED(vp, __func__); 1215 1216 eofflag = 0; 1217 uvp = UNIONFSVPTOUPPERVP(vp); 1218 lvp = UNIONFSVPTOLOWERVP(vp); 1219 1220 /* check opaque */ 1221 if ((error = VOP_GETATTR(uvp, &va, cred)) != 0) 1222 return (error); 1223 if (va.va_flags & OPAQUE) 1224 return (0); 1225 1226 /* open vnode */ 1227 #ifdef MAC 1228 if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0) 1229 return (error); 1230 #endif 1231 if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0) 1232 return (error); 1233 if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0) 1234 return (error); 1235 1236 uio.uio_rw = UIO_READ; 1237 uio.uio_segflg = UIO_SYSSPACE; 1238 uio.uio_td = td; 1239 uio.uio_offset = 0; 1240 1241 #ifdef MAC 1242 error = mac_vnode_check_readdir(td->td_ucred, lvp); 1243 #endif 1244 while (!error && !eofflag) { 1245 iov.iov_base = buf; 1246 iov.iov_len = sizeof(buf); 1247 uio.uio_iov = &iov; 1248 uio.uio_iovcnt = 1; 1249 uio.uio_resid = iov.iov_len; 1250 1251 error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL); 1252 if (error != 0) 1253 break; 1254 KASSERT(eofflag != 0 || uio.uio_resid < sizeof(buf), 1255 ("%s: empty read from lower FS", __func__)); 1256 1257 edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid]; 1258 for (dp = (struct dirent*)buf; !error && dp < edp; 1259 dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) { 1260 if (dp->d_type == DT_WHT || dp->d_fileno == 0 || 1261 (dp->d_namlen == 1 && dp->d_name[0] == '.') || 1262 (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2))) 1263 continue; 1264 1265 cn.cn_namelen = dp->d_namlen; 1266 cn.cn_pnbuf = NULL; 1267 cn.cn_nameptr = dp->d_name; 1268 cn.cn_nameiop = LOOKUP; 1269 cn.cn_flags = LOCKPARENT | LOCKLEAF | SAVENAME | 1270 RDONLY | ISLASTCN; 1271 cn.cn_lkflags = LK_EXCLUSIVE; 1272 cn.cn_cred = cred; 1273 1274 /* 1275 * check entry in lower. 1276 * Sometimes, readdir function returns 1277 * wrong entry. 1278 */ 1279 lookuperr = VOP_LOOKUP(lvp, &tvp, &cn); 1280 1281 if (!lookuperr) 1282 vput(tvp); 1283 else 1284 continue; /* skip entry */ 1285 1286 /* 1287 * check entry 1288 * If it has no exist/whiteout entry in upper, 1289 * directory is not empty. 1290 */ 1291 cn.cn_flags = LOCKPARENT | LOCKLEAF | SAVENAME | 1292 RDONLY | ISLASTCN; 1293 lookuperr = VOP_LOOKUP(uvp, &tvp, &cn); 1294 1295 if (!lookuperr) 1296 vput(tvp); 1297 1298 /* ignore exist or whiteout entry */ 1299 if (!lookuperr || 1300 (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT))) 1301 continue; 1302 1303 error = ENOTEMPTY; 1304 } 1305 } 1306 1307 /* close vnode */ 1308 VOP_CLOSE(vp, FREAD, cred, td); 1309 1310 return (error); 1311 } 1312 1313