1 /* $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5 * 6 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11 * 2005 program. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * tmpfs vnode interface. 37 */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/dirent.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/limits.h> 48 #include <sys/lockf.h> 49 #include <sys/lock.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/sched.h> 56 #include <sys/smr.h> 57 #include <sys/stat.h> 58 #include <sys/sysctl.h> 59 #include <sys/unistd.h> 60 #include <sys/vnode.h> 61 #include <security/audit/audit.h> 62 #include <security/mac/mac_framework.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_pager.h> 69 #include <vm/swap_pager.h> 70 71 #include <fs/tmpfs/tmpfs_vnops.h> 72 #include <fs/tmpfs/tmpfs.h> 73 74 SYSCTL_DECL(_vfs_tmpfs); 75 VFS_SMR_DECLARE; 76 77 static volatile int tmpfs_rename_restarts; 78 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD, 79 __DEVOLATILE(int *, &tmpfs_rename_restarts), 0, 80 "Times rename had to restart due to lock contention"); 81 82 static int 83 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags, 84 struct vnode **rvp) 85 { 86 87 return (tmpfs_alloc_vp(mp, arg, lkflags, rvp)); 88 } 89 90 static int 91 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 92 { 93 struct tmpfs_dirent *de; 94 struct tmpfs_node *dnode, *pnode; 95 struct tmpfs_mount *tm; 96 int error; 97 98 /* Caller assumes responsibility for ensuring access (VEXEC). */ 99 dnode = VP_TO_TMPFS_DIR(dvp); 100 *vpp = NULLVP; 101 102 /* We cannot be requesting the parent directory of the root node. */ 103 MPASS(IMPLIES(dnode->tn_type == VDIR && 104 dnode->tn_dir.tn_parent == dnode, 105 !(cnp->cn_flags & ISDOTDOT))); 106 107 TMPFS_ASSERT_LOCKED(dnode); 108 if (dnode->tn_dir.tn_parent == NULL) { 109 error = ENOENT; 110 goto out; 111 } 112 if (cnp->cn_flags & ISDOTDOT) { 113 tm = VFS_TO_TMPFS(dvp->v_mount); 114 pnode = dnode->tn_dir.tn_parent; 115 tmpfs_ref_node(pnode); 116 error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc, 117 pnode, cnp->cn_lkflags, vpp); 118 tmpfs_free_node(tm, pnode); 119 if (error != 0) 120 goto out; 121 } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 122 VREF(dvp); 123 *vpp = dvp; 124 error = 0; 125 } else { 126 de = tmpfs_dir_lookup(dnode, NULL, cnp); 127 if (de != NULL && de->td_node == NULL) 128 cnp->cn_flags |= ISWHITEOUT; 129 if (de == NULL || de->td_node == NULL) { 130 /* 131 * The entry was not found in the directory. 132 * This is OK if we are creating or renaming an 133 * entry and are working on the last component of 134 * the path name. 135 */ 136 if ((cnp->cn_flags & ISLASTCN) && 137 (cnp->cn_nameiop == CREATE || \ 138 cnp->cn_nameiop == RENAME || 139 (cnp->cn_nameiop == DELETE && 140 cnp->cn_flags & DOWHITEOUT && 141 cnp->cn_flags & ISWHITEOUT))) { 142 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, 143 curthread); 144 if (error != 0) 145 goto out; 146 147 error = EJUSTRETURN; 148 } else 149 error = ENOENT; 150 } else { 151 struct tmpfs_node *tnode; 152 153 /* 154 * The entry was found, so get its associated 155 * tmpfs_node. 156 */ 157 tnode = de->td_node; 158 159 /* 160 * If we are not at the last path component and 161 * found a non-directory or non-link entry (which 162 * may itself be pointing to a directory), raise 163 * an error. 164 */ 165 if ((tnode->tn_type != VDIR && 166 tnode->tn_type != VLNK) && 167 !(cnp->cn_flags & ISLASTCN)) { 168 error = ENOTDIR; 169 goto out; 170 } 171 172 /* 173 * If we are deleting or renaming the entry, keep 174 * track of its tmpfs_dirent so that it can be 175 * easily deleted later. 176 */ 177 if ((cnp->cn_flags & ISLASTCN) && 178 (cnp->cn_nameiop == DELETE || 179 cnp->cn_nameiop == RENAME)) { 180 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, 181 curthread); 182 if (error != 0) 183 goto out; 184 185 /* Allocate a new vnode on the matching entry. */ 186 error = tmpfs_alloc_vp(dvp->v_mount, tnode, 187 cnp->cn_lkflags, vpp); 188 if (error != 0) 189 goto out; 190 191 if ((dnode->tn_mode & S_ISTXT) && 192 VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, 193 curthread) && VOP_ACCESS(*vpp, VADMIN, 194 cnp->cn_cred, curthread)) { 195 error = EPERM; 196 vput(*vpp); 197 *vpp = NULL; 198 goto out; 199 } 200 } else { 201 error = tmpfs_alloc_vp(dvp->v_mount, tnode, 202 cnp->cn_lkflags, vpp); 203 if (error != 0) 204 goto out; 205 } 206 } 207 } 208 209 /* 210 * Store the result of this lookup in the cache. Avoid this if the 211 * request was for creation, as it does not improve timings on 212 * emprical tests. 213 */ 214 if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp)) 215 cache_enter(dvp, *vpp, cnp); 216 217 out: 218 /* 219 * If there were no errors, *vpp cannot be null and it must be 220 * locked. 221 */ 222 MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp))); 223 224 return (error); 225 } 226 227 static int 228 tmpfs_cached_lookup(struct vop_cachedlookup_args *v) 229 { 230 231 return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp)); 232 } 233 234 static int 235 tmpfs_lookup(struct vop_lookup_args *v) 236 { 237 struct vnode *dvp = v->a_dvp; 238 struct vnode **vpp = v->a_vpp; 239 struct componentname *cnp = v->a_cnp; 240 int error; 241 242 /* Check accessibility of requested node as a first step. */ 243 error = vn_dir_check_exec(dvp, cnp); 244 if (error != 0) 245 return (error); 246 247 return (tmpfs_lookup1(dvp, vpp, cnp)); 248 } 249 250 static int 251 tmpfs_create(struct vop_create_args *v) 252 { 253 struct vnode *dvp = v->a_dvp; 254 struct vnode **vpp = v->a_vpp; 255 struct componentname *cnp = v->a_cnp; 256 struct vattr *vap = v->a_vap; 257 int error; 258 259 MPASS(vap->va_type == VREG || vap->va_type == VSOCK); 260 261 error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); 262 if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp)) 263 cache_enter(dvp, *vpp, cnp); 264 return (error); 265 } 266 267 static int 268 tmpfs_mknod(struct vop_mknod_args *v) 269 { 270 struct vnode *dvp = v->a_dvp; 271 struct vnode **vpp = v->a_vpp; 272 struct componentname *cnp = v->a_cnp; 273 struct vattr *vap = v->a_vap; 274 275 if (vap->va_type != VBLK && vap->va_type != VCHR && 276 vap->va_type != VFIFO) 277 return (EINVAL); 278 279 return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL)); 280 } 281 282 struct fileops tmpfs_fnops; 283 284 static int 285 tmpfs_open(struct vop_open_args *v) 286 { 287 struct vnode *vp; 288 struct tmpfs_node *node; 289 struct file *fp; 290 int error, mode; 291 292 vp = v->a_vp; 293 mode = v->a_mode; 294 node = VP_TO_TMPFS_NODE(vp); 295 296 /* 297 * The file is still active but all its names have been removed 298 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as 299 * it is about to die. 300 */ 301 if (node->tn_links < 1) 302 return (ENOENT); 303 304 /* If the file is marked append-only, deny write requests. */ 305 if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE) 306 error = EPERM; 307 else { 308 error = 0; 309 /* For regular files, the call below is nop. */ 310 KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags & 311 OBJ_DEAD) == 0, ("dead object")); 312 vnode_create_vobject(vp, node->tn_size, v->a_td); 313 } 314 315 fp = v->a_fp; 316 MPASS(fp == NULL || fp->f_data == NULL); 317 if (error == 0 && fp != NULL && vp->v_type == VREG) { 318 tmpfs_ref_node(node); 319 finit_vnode(fp, mode, node, &tmpfs_fnops); 320 } 321 322 return (error); 323 } 324 325 static int 326 tmpfs_close(struct vop_close_args *v) 327 { 328 struct vnode *vp = v->a_vp; 329 330 /* Update node times. */ 331 tmpfs_update(vp); 332 333 return (0); 334 } 335 336 int 337 tmpfs_fo_close(struct file *fp, struct thread *td) 338 { 339 struct tmpfs_node *node; 340 341 node = fp->f_data; 342 if (node != NULL) { 343 MPASS(node->tn_type == VREG); 344 tmpfs_free_node(node->tn_reg.tn_tmp, node); 345 } 346 return (vnops.fo_close(fp, td)); 347 } 348 349 /* 350 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 351 * the comment above cache_fplookup for details. 352 */ 353 int 354 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v) 355 { 356 struct vnode *vp; 357 struct tmpfs_node *node; 358 struct ucred *cred; 359 mode_t all_x, mode; 360 361 vp = v->a_vp; 362 node = VP_TO_TMPFS_NODE_SMR(vp); 363 if (__predict_false(node == NULL)) 364 return (EAGAIN); 365 366 all_x = S_IXUSR | S_IXGRP | S_IXOTH; 367 mode = atomic_load_short(&node->tn_mode); 368 if (__predict_true((mode & all_x) == all_x)) 369 return (0); 370 371 cred = v->a_cred; 372 return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred)); 373 } 374 375 int 376 tmpfs_access(struct vop_access_args *v) 377 { 378 struct vnode *vp = v->a_vp; 379 accmode_t accmode = v->a_accmode; 380 struct ucred *cred = v->a_cred; 381 mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH; 382 int error; 383 struct tmpfs_node *node; 384 385 MPASS(VOP_ISLOCKED(vp)); 386 387 node = VP_TO_TMPFS_NODE(vp); 388 389 /* 390 * Common case path lookup. 391 */ 392 if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x)) 393 return (0); 394 395 switch (vp->v_type) { 396 case VDIR: 397 /* FALLTHROUGH */ 398 case VLNK: 399 /* FALLTHROUGH */ 400 case VREG: 401 if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) { 402 error = EROFS; 403 goto out; 404 } 405 break; 406 407 case VBLK: 408 /* FALLTHROUGH */ 409 case VCHR: 410 /* FALLTHROUGH */ 411 case VSOCK: 412 /* FALLTHROUGH */ 413 case VFIFO: 414 break; 415 416 default: 417 error = EINVAL; 418 goto out; 419 } 420 421 if (accmode & VWRITE && node->tn_flags & IMMUTABLE) { 422 error = EPERM; 423 goto out; 424 } 425 426 error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid, 427 accmode, cred); 428 429 out: 430 MPASS(VOP_ISLOCKED(vp)); 431 432 return (error); 433 } 434 435 int 436 tmpfs_stat(struct vop_stat_args *v) 437 { 438 struct vnode *vp = v->a_vp; 439 struct stat *sb = v->a_sb; 440 struct tmpfs_node *node; 441 int error; 442 443 node = VP_TO_TMPFS_NODE(vp); 444 445 tmpfs_update_getattr(vp); 446 447 error = vop_stat_helper_pre(v); 448 if (__predict_false(error)) 449 return (error); 450 451 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 452 sb->st_ino = node->tn_id; 453 sb->st_mode = node->tn_mode | VTTOIF(vp->v_type); 454 sb->st_nlink = node->tn_links; 455 sb->st_uid = node->tn_uid; 456 sb->st_gid = node->tn_gid; 457 sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? 458 node->tn_rdev : NODEV; 459 sb->st_size = node->tn_size; 460 sb->st_atim.tv_sec = node->tn_atime.tv_sec; 461 sb->st_atim.tv_nsec = node->tn_atime.tv_nsec; 462 sb->st_mtim.tv_sec = node->tn_mtime.tv_sec; 463 sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec; 464 sb->st_ctim.tv_sec = node->tn_ctime.tv_sec; 465 sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec; 466 sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec; 467 sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec; 468 sb->st_blksize = PAGE_SIZE; 469 sb->st_flags = node->tn_flags; 470 sb->st_gen = node->tn_gen; 471 if (vp->v_type == VREG) { 472 #ifdef __ILP32__ 473 vm_object_t obj = node->tn_reg.tn_aobj; 474 475 /* Handle torn read */ 476 VM_OBJECT_RLOCK(obj); 477 #endif 478 sb->st_blocks = ptoa(node->tn_reg.tn_pages); 479 #ifdef __ILP32__ 480 VM_OBJECT_RUNLOCK(obj); 481 #endif 482 } else { 483 sb->st_blocks = node->tn_size; 484 } 485 sb->st_blocks /= S_BLKSIZE; 486 return (vop_stat_helper_post(v, error)); 487 } 488 489 int 490 tmpfs_getattr(struct vop_getattr_args *v) 491 { 492 struct vnode *vp = v->a_vp; 493 struct vattr *vap = v->a_vap; 494 struct tmpfs_node *node; 495 496 node = VP_TO_TMPFS_NODE(vp); 497 498 tmpfs_update_getattr(vp); 499 500 vap->va_type = vp->v_type; 501 vap->va_mode = node->tn_mode; 502 vap->va_nlink = node->tn_links; 503 vap->va_uid = node->tn_uid; 504 vap->va_gid = node->tn_gid; 505 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 506 vap->va_fileid = node->tn_id; 507 vap->va_size = node->tn_size; 508 vap->va_blocksize = PAGE_SIZE; 509 vap->va_atime = node->tn_atime; 510 vap->va_mtime = node->tn_mtime; 511 vap->va_ctime = node->tn_ctime; 512 vap->va_birthtime = node->tn_birthtime; 513 vap->va_gen = node->tn_gen; 514 vap->va_flags = node->tn_flags; 515 vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? 516 node->tn_rdev : NODEV; 517 if (vp->v_type == VREG) { 518 #ifdef __ILP32__ 519 vm_object_t obj = node->tn_reg.tn_aobj; 520 521 VM_OBJECT_RLOCK(obj); 522 #endif 523 vap->va_bytes = ptoa(node->tn_reg.tn_pages); 524 #ifdef __ILP32__ 525 VM_OBJECT_RUNLOCK(obj); 526 #endif 527 } else { 528 vap->va_bytes = node->tn_size; 529 } 530 vap->va_filerev = 0; 531 532 return (0); 533 } 534 535 int 536 tmpfs_setattr(struct vop_setattr_args *v) 537 { 538 struct vnode *vp = v->a_vp; 539 struct vattr *vap = v->a_vap; 540 struct ucred *cred = v->a_cred; 541 struct thread *td = curthread; 542 543 int error; 544 545 MPASS(VOP_ISLOCKED(vp)); 546 ASSERT_VOP_IN_SEQC(vp); 547 548 error = 0; 549 550 /* Abort if any unsettable attribute is given. */ 551 if (vap->va_type != VNON || 552 vap->va_nlink != VNOVAL || 553 vap->va_fsid != VNOVAL || 554 vap->va_fileid != VNOVAL || 555 vap->va_blocksize != VNOVAL || 556 vap->va_gen != VNOVAL || 557 vap->va_rdev != VNOVAL || 558 vap->va_bytes != VNOVAL) 559 error = EINVAL; 560 561 if (error == 0 && (vap->va_flags != VNOVAL)) 562 error = tmpfs_chflags(vp, vap->va_flags, cred, td); 563 564 if (error == 0 && (vap->va_size != VNOVAL)) 565 error = tmpfs_chsize(vp, vap->va_size, cred, td); 566 567 if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL)) 568 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td); 569 570 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) 571 error = tmpfs_chmod(vp, vap->va_mode, cred, td); 572 573 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL && 574 vap->va_atime.tv_nsec != VNOVAL) || 575 (vap->va_mtime.tv_sec != VNOVAL && 576 vap->va_mtime.tv_nsec != VNOVAL) || 577 (vap->va_birthtime.tv_sec != VNOVAL && 578 vap->va_birthtime.tv_nsec != VNOVAL))) 579 error = tmpfs_chtimes(vp, vap, cred, td); 580 581 /* 582 * Update the node times. We give preference to the error codes 583 * generated by this function rather than the ones that may arise 584 * from tmpfs_update. 585 */ 586 tmpfs_update(vp); 587 588 MPASS(VOP_ISLOCKED(vp)); 589 590 return (error); 591 } 592 593 static int 594 tmpfs_read(struct vop_read_args *v) 595 { 596 struct vnode *vp; 597 struct uio *uio; 598 struct tmpfs_node *node; 599 600 vp = v->a_vp; 601 if (vp->v_type != VREG) 602 return (EISDIR); 603 uio = v->a_uio; 604 if (uio->uio_offset < 0) 605 return (EINVAL); 606 node = VP_TO_TMPFS_NODE(vp); 607 tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node); 608 return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio)); 609 } 610 611 static int 612 tmpfs_read_pgcache(struct vop_read_pgcache_args *v) 613 { 614 struct vnode *vp; 615 struct tmpfs_node *node; 616 vm_object_t object; 617 off_t size; 618 int error; 619 620 vp = v->a_vp; 621 VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp); 622 623 if (v->a_uio->uio_offset < 0) 624 return (EINVAL); 625 626 error = EJUSTRETURN; 627 vfs_smr_enter(); 628 629 node = VP_TO_TMPFS_NODE_SMR(vp); 630 if (node == NULL) 631 goto out_smr; 632 MPASS(node->tn_type == VREG); 633 MPASS(node->tn_refcount >= 1); 634 object = node->tn_reg.tn_aobj; 635 if (object == NULL) 636 goto out_smr; 637 638 MPASS(object->type == tmpfs_pager_type); 639 MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_SWAP)) == 640 OBJ_SWAP); 641 if (!VN_IS_DOOMED(vp)) { 642 /* size cannot become shorter due to rangelock. */ 643 size = node->tn_size; 644 tmpfs_set_accessed(node->tn_reg.tn_tmp, node); 645 vfs_smr_exit(); 646 error = uiomove_object(object, size, v->a_uio); 647 return (error); 648 } 649 out_smr: 650 vfs_smr_exit(); 651 return (error); 652 } 653 654 static int 655 tmpfs_write(struct vop_write_args *v) 656 { 657 struct vnode *vp; 658 struct uio *uio; 659 struct tmpfs_node *node; 660 off_t oldsize; 661 ssize_t r; 662 int error, ioflag; 663 mode_t newmode; 664 665 vp = v->a_vp; 666 uio = v->a_uio; 667 ioflag = v->a_ioflag; 668 error = 0; 669 node = VP_TO_TMPFS_NODE(vp); 670 oldsize = node->tn_size; 671 672 if (uio->uio_offset < 0 || vp->v_type != VREG) 673 return (EINVAL); 674 if (uio->uio_resid == 0) 675 return (0); 676 if (ioflag & IO_APPEND) 677 uio->uio_offset = node->tn_size; 678 error = vn_rlimit_fsizex(vp, uio, VFS_TO_TMPFS(vp->v_mount)-> 679 tm_maxfilesize, &r, uio->uio_td); 680 if (error != 0) { 681 vn_rlimit_fsizex_res(uio, r); 682 return (error); 683 } 684 685 if (uio->uio_offset + uio->uio_resid > node->tn_size) { 686 error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid, 687 FALSE); 688 if (error != 0) 689 goto out; 690 } 691 692 error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio); 693 node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED; 694 node->tn_accessed = true; 695 if (node->tn_mode & (S_ISUID | S_ISGID)) { 696 if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) { 697 newmode = node->tn_mode & ~(S_ISUID | S_ISGID); 698 vn_seqc_write_begin(vp); 699 atomic_store_short(&node->tn_mode, newmode); 700 vn_seqc_write_end(vp); 701 } 702 } 703 if (error != 0) 704 (void)tmpfs_reg_resize(vp, oldsize, TRUE); 705 706 out: 707 MPASS(IMPLIES(error == 0, uio->uio_resid == 0)); 708 MPASS(IMPLIES(error != 0, oldsize == node->tn_size)); 709 710 vn_rlimit_fsizex_res(uio, r); 711 return (error); 712 } 713 714 static int 715 tmpfs_deallocate(struct vop_deallocate_args *v) 716 { 717 return (tmpfs_reg_punch_hole(v->a_vp, v->a_offset, v->a_len)); 718 } 719 720 static int 721 tmpfs_fsync(struct vop_fsync_args *v) 722 { 723 struct vnode *vp = v->a_vp; 724 725 MPASS(VOP_ISLOCKED(vp)); 726 727 tmpfs_check_mtime(vp); 728 tmpfs_update(vp); 729 730 return (0); 731 } 732 733 static int 734 tmpfs_remove(struct vop_remove_args *v) 735 { 736 struct vnode *dvp = v->a_dvp; 737 struct vnode *vp = v->a_vp; 738 739 int error; 740 struct tmpfs_dirent *de; 741 struct tmpfs_mount *tmp; 742 struct tmpfs_node *dnode; 743 struct tmpfs_node *node; 744 745 MPASS(VOP_ISLOCKED(dvp)); 746 MPASS(VOP_ISLOCKED(vp)); 747 748 if (vp->v_type == VDIR) { 749 error = EISDIR; 750 goto out; 751 } 752 753 dnode = VP_TO_TMPFS_DIR(dvp); 754 node = VP_TO_TMPFS_NODE(vp); 755 tmp = VFS_TO_TMPFS(vp->v_mount); 756 de = tmpfs_dir_lookup(dnode, node, v->a_cnp); 757 MPASS(de != NULL); 758 759 /* Files marked as immutable or append-only cannot be deleted. */ 760 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || 761 (dnode->tn_flags & APPEND)) { 762 error = EPERM; 763 goto out; 764 } 765 766 /* Remove the entry from the directory; as it is a file, we do not 767 * have to change the number of hard links of the directory. */ 768 tmpfs_dir_detach(dvp, de); 769 if (v->a_cnp->cn_flags & DOWHITEOUT) 770 tmpfs_dir_whiteout_add(dvp, v->a_cnp); 771 772 /* Free the directory entry we just deleted. Note that the node 773 * referred by it will not be removed until the vnode is really 774 * reclaimed. */ 775 tmpfs_free_dirent(tmp, de); 776 777 node->tn_status |= TMPFS_NODE_CHANGED; 778 node->tn_accessed = true; 779 error = 0; 780 781 out: 782 return (error); 783 } 784 785 static int 786 tmpfs_link(struct vop_link_args *v) 787 { 788 struct vnode *dvp = v->a_tdvp; 789 struct vnode *vp = v->a_vp; 790 struct componentname *cnp = v->a_cnp; 791 792 int error; 793 struct tmpfs_dirent *de; 794 struct tmpfs_node *node; 795 796 MPASS(VOP_ISLOCKED(dvp)); 797 MPASS(dvp != vp); /* XXX When can this be false? */ 798 node = VP_TO_TMPFS_NODE(vp); 799 800 /* Ensure that we do not overflow the maximum number of links imposed 801 * by the system. */ 802 MPASS(node->tn_links <= TMPFS_LINK_MAX); 803 if (node->tn_links == TMPFS_LINK_MAX) { 804 error = EMLINK; 805 goto out; 806 } 807 808 /* We cannot create links of files marked immutable or append-only. */ 809 if (node->tn_flags & (IMMUTABLE | APPEND)) { 810 error = EPERM; 811 goto out; 812 } 813 814 /* Allocate a new directory entry to represent the node. */ 815 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, 816 cnp->cn_nameptr, cnp->cn_namelen, &de); 817 if (error != 0) 818 goto out; 819 820 /* Insert the new directory entry into the appropriate directory. */ 821 if (cnp->cn_flags & ISWHITEOUT) 822 tmpfs_dir_whiteout_remove(dvp, cnp); 823 tmpfs_dir_attach(dvp, de); 824 825 /* vp link count has changed, so update node times. */ 826 node->tn_status |= TMPFS_NODE_CHANGED; 827 tmpfs_update(vp); 828 829 error = 0; 830 831 out: 832 return (error); 833 } 834 835 /* 836 * We acquire all but fdvp locks using non-blocking acquisitions. If we 837 * fail to acquire any lock in the path we will drop all held locks, 838 * acquire the new lock in a blocking fashion, and then release it and 839 * restart the rename. This acquire/release step ensures that we do not 840 * spin on a lock waiting for release. On error release all vnode locks 841 * and decrement references the way tmpfs_rename() would do. 842 */ 843 static int 844 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp, 845 struct vnode *tdvp, struct vnode **tvpp, 846 struct componentname *fcnp, struct componentname *tcnp) 847 { 848 struct vnode *nvp; 849 struct mount *mp; 850 struct tmpfs_dirent *de; 851 int error, restarts = 0; 852 853 VOP_UNLOCK(tdvp); 854 if (*tvpp != NULL && *tvpp != tdvp) 855 VOP_UNLOCK(*tvpp); 856 mp = fdvp->v_mount; 857 858 relock: 859 restarts += 1; 860 error = vn_lock(fdvp, LK_EXCLUSIVE); 861 if (error) 862 goto releout; 863 if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 864 VOP_UNLOCK(fdvp); 865 error = vn_lock(tdvp, LK_EXCLUSIVE); 866 if (error) 867 goto releout; 868 VOP_UNLOCK(tdvp); 869 goto relock; 870 } 871 /* 872 * Re-resolve fvp to be certain it still exists and fetch the 873 * correct vnode. 874 */ 875 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp); 876 if (de == NULL) { 877 VOP_UNLOCK(fdvp); 878 VOP_UNLOCK(tdvp); 879 if ((fcnp->cn_flags & ISDOTDOT) != 0 || 880 (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) 881 error = EINVAL; 882 else 883 error = ENOENT; 884 goto releout; 885 } 886 error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp); 887 if (error != 0) { 888 VOP_UNLOCK(fdvp); 889 VOP_UNLOCK(tdvp); 890 if (error != EBUSY) 891 goto releout; 892 error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp); 893 if (error != 0) 894 goto releout; 895 VOP_UNLOCK(nvp); 896 /* 897 * Concurrent rename race. 898 */ 899 if (nvp == tdvp) { 900 vrele(nvp); 901 error = EINVAL; 902 goto releout; 903 } 904 vrele(*fvpp); 905 *fvpp = nvp; 906 goto relock; 907 } 908 vrele(*fvpp); 909 *fvpp = nvp; 910 VOP_UNLOCK(*fvpp); 911 /* 912 * Re-resolve tvp and acquire the vnode lock if present. 913 */ 914 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp); 915 /* 916 * If tvp disappeared we just carry on. 917 */ 918 if (de == NULL && *tvpp != NULL) { 919 vrele(*tvpp); 920 *tvpp = NULL; 921 } 922 /* 923 * Get the tvp ino if the lookup succeeded. We may have to restart 924 * if the non-blocking acquire fails. 925 */ 926 if (de != NULL) { 927 nvp = NULL; 928 error = tmpfs_alloc_vp(mp, de->td_node, 929 LK_EXCLUSIVE | LK_NOWAIT, &nvp); 930 if (*tvpp != NULL) 931 vrele(*tvpp); 932 *tvpp = nvp; 933 if (error != 0) { 934 VOP_UNLOCK(fdvp); 935 VOP_UNLOCK(tdvp); 936 if (error != EBUSY) 937 goto releout; 938 error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, 939 &nvp); 940 if (error != 0) 941 goto releout; 942 VOP_UNLOCK(nvp); 943 /* 944 * fdvp contains fvp, thus tvp (=fdvp) is not empty. 945 */ 946 if (nvp == fdvp) { 947 error = ENOTEMPTY; 948 goto releout; 949 } 950 goto relock; 951 } 952 } 953 tmpfs_rename_restarts += restarts; 954 955 return (0); 956 957 releout: 958 vrele(fdvp); 959 vrele(*fvpp); 960 vrele(tdvp); 961 if (*tvpp != NULL) 962 vrele(*tvpp); 963 tmpfs_rename_restarts += restarts; 964 965 return (error); 966 } 967 968 static int 969 tmpfs_rename(struct vop_rename_args *v) 970 { 971 struct vnode *fdvp = v->a_fdvp; 972 struct vnode *fvp = v->a_fvp; 973 struct componentname *fcnp = v->a_fcnp; 974 struct vnode *tdvp = v->a_tdvp; 975 struct vnode *tvp = v->a_tvp; 976 struct componentname *tcnp = v->a_tcnp; 977 char *newname; 978 struct tmpfs_dirent *de; 979 struct tmpfs_mount *tmp; 980 struct tmpfs_node *fdnode; 981 struct tmpfs_node *fnode; 982 struct tmpfs_node *tnode; 983 struct tmpfs_node *tdnode; 984 int error; 985 bool want_seqc_end; 986 987 MPASS(VOP_ISLOCKED(tdvp)); 988 MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp))); 989 990 want_seqc_end = false; 991 992 /* 993 * Disallow cross-device renames. 994 * XXX Why isn't this done by the caller? 995 */ 996 if (fvp->v_mount != tdvp->v_mount || 997 (tvp != NULL && fvp->v_mount != tvp->v_mount)) { 998 error = EXDEV; 999 goto out; 1000 } 1001 1002 /* If source and target are the same file, there is nothing to do. */ 1003 if (fvp == tvp) { 1004 error = 0; 1005 goto out; 1006 } 1007 1008 /* 1009 * If we need to move the directory between entries, lock the 1010 * source so that we can safely operate on it. 1011 */ 1012 if (fdvp != tdvp && fdvp != tvp) { 1013 if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1014 error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp, 1015 fcnp, tcnp); 1016 if (error != 0) 1017 return (error); 1018 ASSERT_VOP_ELOCKED(fdvp, 1019 "tmpfs_rename: fdvp not locked"); 1020 ASSERT_VOP_ELOCKED(tdvp, 1021 "tmpfs_rename: tdvp not locked"); 1022 if (tvp != NULL) 1023 ASSERT_VOP_ELOCKED(tvp, 1024 "tmpfs_rename: tvp not locked"); 1025 if (fvp == tvp) { 1026 error = 0; 1027 goto out_locked; 1028 } 1029 } 1030 } 1031 1032 if (tvp != NULL) 1033 vn_seqc_write_begin(tvp); 1034 vn_seqc_write_begin(tdvp); 1035 vn_seqc_write_begin(fvp); 1036 vn_seqc_write_begin(fdvp); 1037 want_seqc_end = true; 1038 1039 tmp = VFS_TO_TMPFS(tdvp->v_mount); 1040 tdnode = VP_TO_TMPFS_DIR(tdvp); 1041 tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); 1042 fdnode = VP_TO_TMPFS_DIR(fdvp); 1043 fnode = VP_TO_TMPFS_NODE(fvp); 1044 de = tmpfs_dir_lookup(fdnode, fnode, fcnp); 1045 1046 /* 1047 * Entry can disappear before we lock fdvp, 1048 * also avoid manipulating '.' and '..' entries. 1049 */ 1050 if (de == NULL) { 1051 if ((fcnp->cn_flags & ISDOTDOT) != 0 || 1052 (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) 1053 error = EINVAL; 1054 else 1055 error = ENOENT; 1056 goto out_locked; 1057 } 1058 MPASS(de->td_node == fnode); 1059 1060 /* 1061 * If re-naming a directory to another preexisting directory 1062 * ensure that the target directory is empty so that its 1063 * removal causes no side effects. 1064 * Kern_rename guarantees the destination to be a directory 1065 * if the source is one. 1066 */ 1067 if (tvp != NULL) { 1068 MPASS(tnode != NULL); 1069 1070 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1071 (tdnode->tn_flags & (APPEND | IMMUTABLE))) { 1072 error = EPERM; 1073 goto out_locked; 1074 } 1075 1076 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { 1077 if (tnode->tn_size > 0) { 1078 error = ENOTEMPTY; 1079 goto out_locked; 1080 } 1081 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { 1082 error = ENOTDIR; 1083 goto out_locked; 1084 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { 1085 error = EISDIR; 1086 goto out_locked; 1087 } else { 1088 MPASS(fnode->tn_type != VDIR && 1089 tnode->tn_type != VDIR); 1090 } 1091 } 1092 1093 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) 1094 || (fdnode->tn_flags & (APPEND | IMMUTABLE))) { 1095 error = EPERM; 1096 goto out_locked; 1097 } 1098 1099 /* 1100 * Ensure that we have enough memory to hold the new name, if it 1101 * has to be changed. 1102 */ 1103 if (fcnp->cn_namelen != tcnp->cn_namelen || 1104 bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) { 1105 newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK); 1106 } else 1107 newname = NULL; 1108 1109 /* 1110 * If the node is being moved to another directory, we have to do 1111 * the move. 1112 */ 1113 if (fdnode != tdnode) { 1114 /* 1115 * In case we are moving a directory, we have to adjust its 1116 * parent to point to the new parent. 1117 */ 1118 if (de->td_node->tn_type == VDIR) { 1119 struct tmpfs_node *n; 1120 1121 /* 1122 * Ensure the target directory is not a child of the 1123 * directory being moved. Otherwise, we'd end up 1124 * with stale nodes. 1125 */ 1126 n = tdnode; 1127 /* 1128 * TMPFS_LOCK guaranties that no nodes are freed while 1129 * traversing the list. Nodes can only be marked as 1130 * removed: tn_parent == NULL. 1131 */ 1132 TMPFS_LOCK(tmp); 1133 TMPFS_NODE_LOCK(n); 1134 while (n != n->tn_dir.tn_parent) { 1135 struct tmpfs_node *parent; 1136 1137 if (n == fnode) { 1138 TMPFS_NODE_UNLOCK(n); 1139 TMPFS_UNLOCK(tmp); 1140 error = EINVAL; 1141 if (newname != NULL) 1142 free(newname, M_TMPFSNAME); 1143 goto out_locked; 1144 } 1145 parent = n->tn_dir.tn_parent; 1146 TMPFS_NODE_UNLOCK(n); 1147 if (parent == NULL) { 1148 n = NULL; 1149 break; 1150 } 1151 TMPFS_NODE_LOCK(parent); 1152 if (parent->tn_dir.tn_parent == NULL) { 1153 TMPFS_NODE_UNLOCK(parent); 1154 n = NULL; 1155 break; 1156 } 1157 n = parent; 1158 } 1159 TMPFS_UNLOCK(tmp); 1160 if (n == NULL) { 1161 error = EINVAL; 1162 if (newname != NULL) 1163 free(newname, M_TMPFSNAME); 1164 goto out_locked; 1165 } 1166 TMPFS_NODE_UNLOCK(n); 1167 1168 /* Adjust the parent pointer. */ 1169 TMPFS_VALIDATE_DIR(fnode); 1170 TMPFS_NODE_LOCK(de->td_node); 1171 de->td_node->tn_dir.tn_parent = tdnode; 1172 TMPFS_NODE_UNLOCK(de->td_node); 1173 1174 /* 1175 * As a result of changing the target of the '..' 1176 * entry, the link count of the source and target 1177 * directories has to be adjusted. 1178 */ 1179 TMPFS_NODE_LOCK(tdnode); 1180 TMPFS_ASSERT_LOCKED(tdnode); 1181 tdnode->tn_links++; 1182 TMPFS_NODE_UNLOCK(tdnode); 1183 1184 TMPFS_NODE_LOCK(fdnode); 1185 TMPFS_ASSERT_LOCKED(fdnode); 1186 fdnode->tn_links--; 1187 TMPFS_NODE_UNLOCK(fdnode); 1188 } 1189 } 1190 1191 /* 1192 * Do the move: just remove the entry from the source directory 1193 * and insert it into the target one. 1194 */ 1195 tmpfs_dir_detach(fdvp, de); 1196 1197 if (fcnp->cn_flags & DOWHITEOUT) 1198 tmpfs_dir_whiteout_add(fdvp, fcnp); 1199 if (tcnp->cn_flags & ISWHITEOUT) 1200 tmpfs_dir_whiteout_remove(tdvp, tcnp); 1201 1202 /* 1203 * If the name has changed, we need to make it effective by changing 1204 * it in the directory entry. 1205 */ 1206 if (newname != NULL) { 1207 MPASS(tcnp->cn_namelen <= MAXNAMLEN); 1208 1209 free(de->ud.td_name, M_TMPFSNAME); 1210 de->ud.td_name = newname; 1211 tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen); 1212 1213 fnode->tn_status |= TMPFS_NODE_CHANGED; 1214 tdnode->tn_status |= TMPFS_NODE_MODIFIED; 1215 } 1216 1217 /* 1218 * If we are overwriting an entry, we have to remove the old one 1219 * from the target directory. 1220 */ 1221 if (tvp != NULL) { 1222 struct tmpfs_dirent *tde; 1223 1224 /* Remove the old entry from the target directory. */ 1225 tde = tmpfs_dir_lookup(tdnode, tnode, tcnp); 1226 tmpfs_dir_detach(tdvp, tde); 1227 1228 /* 1229 * Free the directory entry we just deleted. Note that the 1230 * node referred by it will not be removed until the vnode is 1231 * really reclaimed. 1232 */ 1233 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde); 1234 } 1235 1236 tmpfs_dir_attach(tdvp, de); 1237 1238 if (tmpfs_use_nc(fvp)) { 1239 cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp); 1240 } 1241 1242 error = 0; 1243 1244 out_locked: 1245 if (fdvp != tdvp && fdvp != tvp) 1246 VOP_UNLOCK(fdvp); 1247 1248 out: 1249 if (want_seqc_end) { 1250 if (tvp != NULL) 1251 vn_seqc_write_end(tvp); 1252 vn_seqc_write_end(tdvp); 1253 vn_seqc_write_end(fvp); 1254 vn_seqc_write_end(fdvp); 1255 } 1256 1257 /* 1258 * Release target nodes. 1259 * XXX: I don't understand when tdvp can be the same as tvp, but 1260 * other code takes care of this... 1261 */ 1262 if (tdvp == tvp) 1263 vrele(tdvp); 1264 else 1265 vput(tdvp); 1266 if (tvp != NULL) 1267 vput(tvp); 1268 1269 /* Release source nodes. */ 1270 vrele(fdvp); 1271 vrele(fvp); 1272 1273 return (error); 1274 } 1275 1276 static int 1277 tmpfs_mkdir(struct vop_mkdir_args *v) 1278 { 1279 struct vnode *dvp = v->a_dvp; 1280 struct vnode **vpp = v->a_vpp; 1281 struct componentname *cnp = v->a_cnp; 1282 struct vattr *vap = v->a_vap; 1283 1284 MPASS(vap->va_type == VDIR); 1285 1286 return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL)); 1287 } 1288 1289 static int 1290 tmpfs_rmdir(struct vop_rmdir_args *v) 1291 { 1292 struct vnode *dvp = v->a_dvp; 1293 struct vnode *vp = v->a_vp; 1294 1295 int error; 1296 struct tmpfs_dirent *de; 1297 struct tmpfs_mount *tmp; 1298 struct tmpfs_node *dnode; 1299 struct tmpfs_node *node; 1300 1301 MPASS(VOP_ISLOCKED(dvp)); 1302 MPASS(VOP_ISLOCKED(vp)); 1303 1304 tmp = VFS_TO_TMPFS(dvp->v_mount); 1305 dnode = VP_TO_TMPFS_DIR(dvp); 1306 node = VP_TO_TMPFS_DIR(vp); 1307 1308 /* Directories with more than two entries ('.' and '..') cannot be 1309 * removed. */ 1310 if (node->tn_size > 0) { 1311 error = ENOTEMPTY; 1312 goto out; 1313 } 1314 1315 if ((dnode->tn_flags & APPEND) 1316 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1317 error = EPERM; 1318 goto out; 1319 } 1320 1321 /* This invariant holds only if we are not trying to remove "..". 1322 * We checked for that above so this is safe now. */ 1323 MPASS(node->tn_dir.tn_parent == dnode); 1324 1325 /* Get the directory entry associated with node (vp). This was 1326 * filled by tmpfs_lookup while looking up the entry. */ 1327 de = tmpfs_dir_lookup(dnode, node, v->a_cnp); 1328 MPASS(TMPFS_DIRENT_MATCHES(de, 1329 v->a_cnp->cn_nameptr, 1330 v->a_cnp->cn_namelen)); 1331 1332 /* Check flags to see if we are allowed to remove the directory. */ 1333 if ((dnode->tn_flags & APPEND) != 0 || 1334 (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) { 1335 error = EPERM; 1336 goto out; 1337 } 1338 1339 /* Detach the directory entry from the directory (dnode). */ 1340 tmpfs_dir_detach(dvp, de); 1341 if (v->a_cnp->cn_flags & DOWHITEOUT) 1342 tmpfs_dir_whiteout_add(dvp, v->a_cnp); 1343 1344 /* No vnode should be allocated for this entry from this point */ 1345 TMPFS_NODE_LOCK(node); 1346 node->tn_links--; 1347 node->tn_dir.tn_parent = NULL; 1348 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1349 node->tn_accessed = true; 1350 1351 TMPFS_NODE_UNLOCK(node); 1352 1353 TMPFS_NODE_LOCK(dnode); 1354 dnode->tn_links--; 1355 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1356 dnode->tn_accessed = true; 1357 TMPFS_NODE_UNLOCK(dnode); 1358 1359 if (tmpfs_use_nc(dvp)) { 1360 cache_vop_rmdir(dvp, vp); 1361 } 1362 1363 /* Free the directory entry we just deleted. Note that the node 1364 * referred by it will not be removed until the vnode is really 1365 * reclaimed. */ 1366 tmpfs_free_dirent(tmp, de); 1367 1368 /* Release the deleted vnode (will destroy the node, notify 1369 * interested parties and clean it from the cache). */ 1370 1371 dnode->tn_status |= TMPFS_NODE_CHANGED; 1372 tmpfs_update(dvp); 1373 1374 error = 0; 1375 1376 out: 1377 return (error); 1378 } 1379 1380 static int 1381 tmpfs_symlink(struct vop_symlink_args *v) 1382 { 1383 struct vnode *dvp = v->a_dvp; 1384 struct vnode **vpp = v->a_vpp; 1385 struct componentname *cnp = v->a_cnp; 1386 struct vattr *vap = v->a_vap; 1387 const char *target = v->a_target; 1388 1389 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */ 1390 MPASS(vap->va_type == VLNK); 1391 #else 1392 vap->va_type = VLNK; 1393 #endif 1394 1395 return (tmpfs_alloc_file(dvp, vpp, vap, cnp, target)); 1396 } 1397 1398 static int 1399 tmpfs_readdir(struct vop_readdir_args *va) 1400 { 1401 struct vnode *vp; 1402 struct uio *uio; 1403 struct tmpfs_mount *tm; 1404 struct tmpfs_node *node; 1405 uint64_t **cookies; 1406 int *eofflag, *ncookies; 1407 ssize_t startresid; 1408 int error, maxcookies; 1409 1410 vp = va->a_vp; 1411 uio = va->a_uio; 1412 eofflag = va->a_eofflag; 1413 cookies = va->a_cookies; 1414 ncookies = va->a_ncookies; 1415 1416 /* This operation only makes sense on directory nodes. */ 1417 if (vp->v_type != VDIR) 1418 return (ENOTDIR); 1419 1420 maxcookies = 0; 1421 node = VP_TO_TMPFS_DIR(vp); 1422 tm = VFS_TO_TMPFS(vp->v_mount); 1423 1424 startresid = uio->uio_resid; 1425 1426 /* Allocate cookies for NFS and compat modules. */ 1427 if (cookies != NULL && ncookies != NULL) { 1428 maxcookies = howmany(node->tn_size, 1429 sizeof(struct tmpfs_dirent)) + 2; 1430 *cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP, 1431 M_WAITOK); 1432 *ncookies = 0; 1433 } 1434 1435 if (cookies == NULL) 1436 error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL); 1437 else 1438 error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies, 1439 ncookies); 1440 1441 /* Buffer was filled without hitting EOF. */ 1442 if (error == EJUSTRETURN) 1443 error = (uio->uio_resid != startresid) ? 0 : EINVAL; 1444 1445 if (error != 0 && cookies != NULL && ncookies != NULL) { 1446 free(*cookies, M_TEMP); 1447 *cookies = NULL; 1448 *ncookies = 0; 1449 } 1450 1451 if (eofflag != NULL) 1452 *eofflag = 1453 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); 1454 1455 return (error); 1456 } 1457 1458 static int 1459 tmpfs_readlink(struct vop_readlink_args *v) 1460 { 1461 struct vnode *vp = v->a_vp; 1462 struct uio *uio = v->a_uio; 1463 1464 int error; 1465 struct tmpfs_node *node; 1466 1467 MPASS(uio->uio_offset == 0); 1468 MPASS(vp->v_type == VLNK); 1469 1470 node = VP_TO_TMPFS_NODE(vp); 1471 1472 error = uiomove(node->tn_link_target, MIN(node->tn_size, uio->uio_resid), 1473 uio); 1474 tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node); 1475 1476 return (error); 1477 } 1478 1479 /* 1480 * VOP_FPLOOKUP_SYMLINK routines are subject to special circumstances, see 1481 * the comment above cache_fplookup for details. 1482 * 1483 * Check tmpfs_alloc_node for tmpfs-specific synchronisation notes. 1484 */ 1485 static int 1486 tmpfs_fplookup_symlink(struct vop_fplookup_symlink_args *v) 1487 { 1488 struct vnode *vp; 1489 struct tmpfs_node *node; 1490 char *symlink; 1491 1492 vp = v->a_vp; 1493 node = VP_TO_TMPFS_NODE_SMR(vp); 1494 if (__predict_false(node == NULL)) 1495 return (EAGAIN); 1496 if (!atomic_load_char(&node->tn_link_smr)) 1497 return (EAGAIN); 1498 symlink = atomic_load_ptr(&node->tn_link_target); 1499 if (symlink == NULL) 1500 return (EAGAIN); 1501 1502 return (cache_symlink_resolve(v->a_fpl, symlink, node->tn_size)); 1503 } 1504 1505 static int 1506 tmpfs_inactive(struct vop_inactive_args *v) 1507 { 1508 struct vnode *vp; 1509 struct tmpfs_node *node; 1510 1511 vp = v->a_vp; 1512 node = VP_TO_TMPFS_NODE(vp); 1513 if (node->tn_links == 0) 1514 vrecycle(vp); 1515 else 1516 tmpfs_check_mtime(vp); 1517 return (0); 1518 } 1519 1520 static int 1521 tmpfs_need_inactive(struct vop_need_inactive_args *ap) 1522 { 1523 struct vnode *vp; 1524 struct tmpfs_node *node; 1525 struct vm_object *obj; 1526 1527 vp = ap->a_vp; 1528 node = VP_TO_TMPFS_NODE(vp); 1529 if (node->tn_links == 0) 1530 goto need; 1531 if (vp->v_type == VREG) { 1532 obj = vp->v_object; 1533 if (obj->generation != obj->cleangeneration) 1534 goto need; 1535 } 1536 return (0); 1537 need: 1538 return (1); 1539 } 1540 1541 int 1542 tmpfs_reclaim(struct vop_reclaim_args *v) 1543 { 1544 struct vnode *vp; 1545 struct tmpfs_mount *tmp; 1546 struct tmpfs_node *node; 1547 bool unlock; 1548 1549 vp = v->a_vp; 1550 node = VP_TO_TMPFS_NODE(vp); 1551 tmp = VFS_TO_TMPFS(vp->v_mount); 1552 1553 if (vp->v_type == VREG) 1554 tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj); 1555 vp->v_object = NULL; 1556 1557 TMPFS_LOCK(tmp); 1558 TMPFS_NODE_LOCK(node); 1559 tmpfs_free_vp(vp); 1560 1561 /* 1562 * If the node referenced by this vnode was deleted by the user, 1563 * we must free its associated data structures (now that the vnode 1564 * is being reclaimed). 1565 */ 1566 unlock = true; 1567 if (node->tn_links == 0 && 1568 (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) { 1569 node->tn_vpstate = TMPFS_VNODE_DOOMED; 1570 unlock = !tmpfs_free_node_locked(tmp, node, true); 1571 } 1572 1573 if (unlock) { 1574 TMPFS_NODE_UNLOCK(node); 1575 TMPFS_UNLOCK(tmp); 1576 } 1577 1578 MPASS(vp->v_data == NULL); 1579 return (0); 1580 } 1581 1582 int 1583 tmpfs_print(struct vop_print_args *v) 1584 { 1585 struct vnode *vp = v->a_vp; 1586 1587 struct tmpfs_node *node; 1588 1589 node = VP_TO_TMPFS_NODE(vp); 1590 1591 printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n", 1592 node, node->tn_flags, (uintmax_t)node->tn_links); 1593 printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n", 1594 node->tn_mode, node->tn_uid, node->tn_gid, 1595 (intmax_t)node->tn_size, node->tn_status); 1596 1597 if (vp->v_type == VFIFO) 1598 fifo_printinfo(vp); 1599 1600 printf("\n"); 1601 1602 return (0); 1603 } 1604 1605 int 1606 tmpfs_pathconf(struct vop_pathconf_args *v) 1607 { 1608 struct vnode *vp = v->a_vp; 1609 int name = v->a_name; 1610 long *retval = v->a_retval; 1611 1612 int error; 1613 1614 error = 0; 1615 1616 switch (name) { 1617 case _PC_LINK_MAX: 1618 *retval = TMPFS_LINK_MAX; 1619 break; 1620 1621 case _PC_SYMLINK_MAX: 1622 *retval = MAXPATHLEN; 1623 break; 1624 1625 case _PC_NAME_MAX: 1626 *retval = NAME_MAX; 1627 break; 1628 1629 case _PC_PIPE_BUF: 1630 if (vp->v_type == VDIR || vp->v_type == VFIFO) 1631 *retval = PIPE_BUF; 1632 else 1633 error = EINVAL; 1634 break; 1635 1636 case _PC_CHOWN_RESTRICTED: 1637 *retval = 1; 1638 break; 1639 1640 case _PC_NO_TRUNC: 1641 *retval = 1; 1642 break; 1643 1644 case _PC_SYNC_IO: 1645 *retval = 1; 1646 break; 1647 1648 case _PC_FILESIZEBITS: 1649 *retval = 64; 1650 break; 1651 1652 case _PC_MIN_HOLE_SIZE: 1653 *retval = PAGE_SIZE; 1654 break; 1655 1656 default: 1657 error = vop_stdpathconf(v); 1658 } 1659 1660 return (error); 1661 } 1662 1663 static int 1664 tmpfs_vptofh(struct vop_vptofh_args *ap) 1665 /* 1666 vop_vptofh { 1667 IN struct vnode *a_vp; 1668 IN struct fid *a_fhp; 1669 }; 1670 */ 1671 { 1672 struct tmpfs_fid_data tfd; 1673 struct tmpfs_node *node; 1674 struct fid *fhp; 1675 1676 node = VP_TO_TMPFS_NODE(ap->a_vp); 1677 fhp = ap->a_fhp; 1678 fhp->fid_len = sizeof(tfd); 1679 1680 /* 1681 * Copy into fid_data from the stack to avoid unaligned pointer use. 1682 * See the comment in sys/mount.h on struct fid for details. 1683 */ 1684 tfd.tfd_id = node->tn_id; 1685 tfd.tfd_gen = node->tn_gen; 1686 memcpy(fhp->fid_data, &tfd, fhp->fid_len); 1687 1688 return (0); 1689 } 1690 1691 static int 1692 tmpfs_whiteout(struct vop_whiteout_args *ap) 1693 { 1694 struct vnode *dvp = ap->a_dvp; 1695 struct componentname *cnp = ap->a_cnp; 1696 struct tmpfs_dirent *de; 1697 1698 switch (ap->a_flags) { 1699 case LOOKUP: 1700 return (0); 1701 case CREATE: 1702 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 1703 if (de != NULL) 1704 return (de->td_node == NULL ? 0 : EEXIST); 1705 return (tmpfs_dir_whiteout_add(dvp, cnp)); 1706 case DELETE: 1707 tmpfs_dir_whiteout_remove(dvp, cnp); 1708 return (0); 1709 default: 1710 panic("tmpfs_whiteout: unknown op"); 1711 } 1712 } 1713 1714 static int 1715 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp, 1716 struct tmpfs_dirent **pde) 1717 { 1718 struct tmpfs_dir_cursor dc; 1719 struct tmpfs_dirent *de; 1720 1721 for (de = tmpfs_dir_first(tnp, &dc); de != NULL; 1722 de = tmpfs_dir_next(tnp, &dc)) { 1723 if (de->td_node == tn) { 1724 *pde = de; 1725 return (0); 1726 } 1727 } 1728 return (ENOENT); 1729 } 1730 1731 static int 1732 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn, 1733 struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp) 1734 { 1735 struct tmpfs_dirent *de; 1736 int error, i; 1737 1738 error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED, 1739 dvp); 1740 if (error != 0) 1741 return (error); 1742 error = tmpfs_vptocnp_dir(tn, tnp, &de); 1743 if (error == 0) { 1744 i = *buflen; 1745 i -= de->td_namelen; 1746 if (i < 0) { 1747 error = ENOMEM; 1748 } else { 1749 bcopy(de->ud.td_name, buf + i, de->td_namelen); 1750 *buflen = i; 1751 } 1752 } 1753 if (error == 0) { 1754 if (vp != *dvp) 1755 VOP_UNLOCK(*dvp); 1756 } else { 1757 if (vp != *dvp) 1758 vput(*dvp); 1759 else 1760 vrele(vp); 1761 } 1762 return (error); 1763 } 1764 1765 static int 1766 tmpfs_vptocnp(struct vop_vptocnp_args *ap) 1767 { 1768 struct vnode *vp, **dvp; 1769 struct tmpfs_node *tn, *tnp, *tnp1; 1770 struct tmpfs_dirent *de; 1771 struct tmpfs_mount *tm; 1772 char *buf; 1773 size_t *buflen; 1774 int error; 1775 1776 vp = ap->a_vp; 1777 dvp = ap->a_vpp; 1778 buf = ap->a_buf; 1779 buflen = ap->a_buflen; 1780 1781 tm = VFS_TO_TMPFS(vp->v_mount); 1782 tn = VP_TO_TMPFS_NODE(vp); 1783 if (tn->tn_type == VDIR) { 1784 tnp = tn->tn_dir.tn_parent; 1785 if (tnp == NULL) 1786 return (ENOENT); 1787 tmpfs_ref_node(tnp); 1788 error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf, 1789 buflen, dvp); 1790 tmpfs_free_node(tm, tnp); 1791 return (error); 1792 } 1793 restart: 1794 TMPFS_LOCK(tm); 1795 restart_locked: 1796 LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) { 1797 if (tnp->tn_type != VDIR) 1798 continue; 1799 TMPFS_NODE_LOCK(tnp); 1800 tmpfs_ref_node(tnp); 1801 1802 /* 1803 * tn_vnode cannot be instantiated while we hold the 1804 * node lock, so the directory cannot be changed while 1805 * we iterate over it. Do this to avoid instantiating 1806 * vnode for directories which cannot point to our 1807 * node. 1808 */ 1809 error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp, 1810 &de) : 0; 1811 1812 if (error == 0) { 1813 TMPFS_NODE_UNLOCK(tnp); 1814 TMPFS_UNLOCK(tm); 1815 error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen, 1816 dvp); 1817 if (error == 0) { 1818 tmpfs_free_node(tm, tnp); 1819 return (0); 1820 } 1821 if (VN_IS_DOOMED(vp)) { 1822 tmpfs_free_node(tm, tnp); 1823 return (ENOENT); 1824 } 1825 TMPFS_LOCK(tm); 1826 TMPFS_NODE_LOCK(tnp); 1827 } 1828 if (tmpfs_free_node_locked(tm, tnp, false)) { 1829 goto restart; 1830 } else { 1831 KASSERT(tnp->tn_refcount > 0, 1832 ("node %p refcount zero", tnp)); 1833 if (tnp->tn_attached) { 1834 tnp1 = LIST_NEXT(tnp, tn_entries); 1835 TMPFS_NODE_UNLOCK(tnp); 1836 } else { 1837 TMPFS_NODE_UNLOCK(tnp); 1838 goto restart_locked; 1839 } 1840 } 1841 } 1842 TMPFS_UNLOCK(tm); 1843 return (ENOENT); 1844 } 1845 1846 static off_t 1847 tmpfs_seek_data_locked(vm_object_t obj, off_t noff) 1848 { 1849 vm_page_t m; 1850 vm_pindex_t p, p_m, p_swp; 1851 1852 p = OFF_TO_IDX(noff); 1853 m = vm_page_find_least(obj, p); 1854 1855 /* 1856 * Microoptimize the most common case for SEEK_DATA, where 1857 * there is no hole and the page is resident. 1858 */ 1859 if (m != NULL && vm_page_any_valid(m) && m->pindex == p) 1860 return (noff); 1861 1862 p_swp = swap_pager_find_least(obj, p); 1863 if (p_swp == p) 1864 return (noff); 1865 1866 p_m = m == NULL ? obj->size : m->pindex; 1867 return (IDX_TO_OFF(MIN(p_m, p_swp))); 1868 } 1869 1870 static off_t 1871 tmpfs_seek_next(off_t noff) 1872 { 1873 return (noff + PAGE_SIZE - (noff & PAGE_MASK)); 1874 } 1875 1876 static int 1877 tmpfs_seek_clamp(struct tmpfs_node *tn, off_t *noff, bool seekdata) 1878 { 1879 if (*noff < tn->tn_size) 1880 return (0); 1881 if (seekdata) 1882 return (ENXIO); 1883 *noff = tn->tn_size; 1884 return (0); 1885 } 1886 1887 static off_t 1888 tmpfs_seek_hole_locked(vm_object_t obj, off_t noff) 1889 { 1890 vm_page_t m; 1891 vm_pindex_t p, p_swp; 1892 1893 for (;; noff = tmpfs_seek_next(noff)) { 1894 /* 1895 * Walk over the largest sequential run of the valid pages. 1896 */ 1897 for (m = vm_page_lookup(obj, OFF_TO_IDX(noff)); 1898 m != NULL && vm_page_any_valid(m); 1899 m = vm_page_next(m), noff = tmpfs_seek_next(noff)) 1900 ; 1901 1902 /* 1903 * Found a hole in the object's page queue. Check if 1904 * there is a hole in the swap at the same place. 1905 */ 1906 p = OFF_TO_IDX(noff); 1907 p_swp = swap_pager_find_least(obj, p); 1908 if (p_swp != p) { 1909 noff = IDX_TO_OFF(p); 1910 break; 1911 } 1912 } 1913 return (noff); 1914 } 1915 1916 static int 1917 tmpfs_seek_datahole(struct vnode *vp, off_t *off, bool seekdata) 1918 { 1919 struct tmpfs_node *tn; 1920 vm_object_t obj; 1921 off_t noff; 1922 int error; 1923 1924 if (vp->v_type != VREG) 1925 return (ENOTTY); 1926 tn = VP_TO_TMPFS_NODE(vp); 1927 noff = *off; 1928 if (noff < 0) 1929 return (ENXIO); 1930 error = tmpfs_seek_clamp(tn, &noff, seekdata); 1931 if (error != 0) 1932 return (error); 1933 obj = tn->tn_reg.tn_aobj; 1934 1935 VM_OBJECT_RLOCK(obj); 1936 noff = seekdata ? tmpfs_seek_data_locked(obj, noff) : 1937 tmpfs_seek_hole_locked(obj, noff); 1938 VM_OBJECT_RUNLOCK(obj); 1939 1940 error = tmpfs_seek_clamp(tn, &noff, seekdata); 1941 if (error == 0) 1942 *off = noff; 1943 return (error); 1944 } 1945 1946 static int 1947 tmpfs_ioctl(struct vop_ioctl_args *ap) 1948 { 1949 struct vnode *vp = ap->a_vp; 1950 int error = 0; 1951 1952 switch (ap->a_command) { 1953 case FIOSEEKDATA: 1954 case FIOSEEKHOLE: 1955 error = vn_lock(vp, LK_SHARED); 1956 if (error != 0) { 1957 error = EBADF; 1958 break; 1959 } 1960 error = tmpfs_seek_datahole(vp, (off_t *)ap->a_data, 1961 ap->a_command == FIOSEEKDATA); 1962 VOP_UNLOCK(vp); 1963 break; 1964 default: 1965 error = ENOTTY; 1966 break; 1967 } 1968 return (error); 1969 } 1970 1971 /* 1972 * Vnode operations vector used for files stored in a tmpfs file system. 1973 */ 1974 struct vop_vector tmpfs_vnodeop_entries = { 1975 .vop_default = &default_vnodeops, 1976 .vop_lookup = vfs_cache_lookup, 1977 .vop_cachedlookup = tmpfs_cached_lookup, 1978 .vop_create = tmpfs_create, 1979 .vop_mknod = tmpfs_mknod, 1980 .vop_open = tmpfs_open, 1981 .vop_close = tmpfs_close, 1982 .vop_fplookup_vexec = tmpfs_fplookup_vexec, 1983 .vop_fplookup_symlink = tmpfs_fplookup_symlink, 1984 .vop_access = tmpfs_access, 1985 .vop_stat = tmpfs_stat, 1986 .vop_getattr = tmpfs_getattr, 1987 .vop_setattr = tmpfs_setattr, 1988 .vop_read = tmpfs_read, 1989 .vop_read_pgcache = tmpfs_read_pgcache, 1990 .vop_write = tmpfs_write, 1991 .vop_deallocate = tmpfs_deallocate, 1992 .vop_fsync = tmpfs_fsync, 1993 .vop_remove = tmpfs_remove, 1994 .vop_link = tmpfs_link, 1995 .vop_rename = tmpfs_rename, 1996 .vop_mkdir = tmpfs_mkdir, 1997 .vop_rmdir = tmpfs_rmdir, 1998 .vop_symlink = tmpfs_symlink, 1999 .vop_readdir = tmpfs_readdir, 2000 .vop_readlink = tmpfs_readlink, 2001 .vop_inactive = tmpfs_inactive, 2002 .vop_need_inactive = tmpfs_need_inactive, 2003 .vop_reclaim = tmpfs_reclaim, 2004 .vop_print = tmpfs_print, 2005 .vop_pathconf = tmpfs_pathconf, 2006 .vop_vptofh = tmpfs_vptofh, 2007 .vop_whiteout = tmpfs_whiteout, 2008 .vop_bmap = VOP_EOPNOTSUPP, 2009 .vop_vptocnp = tmpfs_vptocnp, 2010 .vop_lock1 = vop_lock, 2011 .vop_unlock = vop_unlock, 2012 .vop_islocked = vop_islocked, 2013 .vop_add_writecount = vop_stdadd_writecount_nomsync, 2014 .vop_ioctl = tmpfs_ioctl, 2015 }; 2016 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries); 2017 2018 /* 2019 * Same vector for mounts which do not use namecache. 2020 */ 2021 struct vop_vector tmpfs_vnodeop_nonc_entries = { 2022 .vop_default = &tmpfs_vnodeop_entries, 2023 .vop_lookup = tmpfs_lookup, 2024 }; 2025 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries); 2026