1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5 * 6 * Copyright (c) 2005 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11 * 2005 program. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Efficient memory file system supporting functions. 37 */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/dirent.h> 44 #include <sys/fnv_hash.h> 45 #include <sys/lock.h> 46 #include <sys/limits.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/random.h> 52 #include <sys/refcount.h> 53 #include <sys/rwlock.h> 54 #include <sys/smr.h> 55 #include <sys/stat.h> 56 #include <sys/sysctl.h> 57 #include <sys/user.h> 58 #include <sys/vnode.h> 59 #include <sys/vmmeter.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_param.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/swap_pager.h> 69 70 #include <fs/tmpfs/tmpfs.h> 71 #include <fs/tmpfs/tmpfs_fifoops.h> 72 #include <fs/tmpfs/tmpfs_vnops.h> 73 74 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "tmpfs file system"); 76 77 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; 78 79 MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure"); 80 static uma_zone_t tmpfs_node_pool; 81 VFS_SMR_DECLARE; 82 83 int tmpfs_pager_type = -1; 84 85 static vm_object_t 86 tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 87 vm_ooffset_t offset, struct ucred *cred) 88 { 89 vm_object_t object; 90 91 MPASS(handle == NULL); 92 MPASS(offset == 0); 93 object = vm_object_allocate_dyn(tmpfs_pager_type, size, 94 OBJ_COLORED | OBJ_SWAP); 95 if (!swap_pager_init_object(object, NULL, NULL, size, 0)) { 96 vm_object_deallocate(object); 97 object = NULL; 98 } 99 return (object); 100 } 101 102 /* 103 * Make sure tmpfs vnodes with writable mappings can be found on the lazy list. 104 * 105 * This allows for periodic mtime updates while only scanning vnodes which are 106 * plausibly dirty, see tmpfs_update_mtime_lazy. 107 */ 108 static void 109 tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old, 110 vm_offset_t new) 111 { 112 struct vnode *vp; 113 114 VM_OBJECT_ASSERT_WLOCKED(object); 115 116 vp = object->un_pager.swp.swp_tmpfs; 117 118 /* 119 * Forced unmount? 120 */ 121 if (vp == NULL) { 122 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 123 ("object %p with OBJ_TMPFS_VREF but without vnode", object)); 124 VM_OBJECT_WUNLOCK(object); 125 return; 126 } 127 128 if (old == 0) { 129 VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp, 130 ("object without writable mappings has a reference")); 131 VNPASS(vp->v_usecount > 0, vp); 132 } else { 133 VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp, 134 ("object with writable mappings does not have a reference")); 135 } 136 137 if (old == new) { 138 VM_OBJECT_WUNLOCK(object); 139 return; 140 } 141 142 if (new == 0) { 143 vm_object_clear_flag(object, OBJ_TMPFS_VREF); 144 VM_OBJECT_WUNLOCK(object); 145 vrele(vp); 146 } else { 147 if ((object->flags & OBJ_TMPFS_VREF) == 0) { 148 vref(vp); 149 vlazy(vp); 150 vm_object_set_flag(object, OBJ_TMPFS_VREF); 151 } 152 VM_OBJECT_WUNLOCK(object); 153 } 154 } 155 156 static void 157 tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start, 158 vm_offset_t end) 159 { 160 vm_offset_t new, old; 161 162 VM_OBJECT_WLOCK(object); 163 KASSERT((object->flags & OBJ_ANON) == 0, 164 ("%s: object %p with OBJ_ANON", __func__, object)); 165 old = object->un_pager.swp.writemappings; 166 object->un_pager.swp.writemappings += (vm_ooffset_t)end - start; 167 new = object->un_pager.swp.writemappings; 168 tmpfs_pager_writecount_recalc(object, old, new); 169 VM_OBJECT_ASSERT_UNLOCKED(object); 170 } 171 172 static void 173 tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start, 174 vm_offset_t end) 175 { 176 vm_offset_t new, old; 177 178 VM_OBJECT_WLOCK(object); 179 KASSERT((object->flags & OBJ_ANON) == 0, 180 ("%s: object %p with OBJ_ANON", __func__, object)); 181 old = object->un_pager.swp.writemappings; 182 object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start; 183 new = object->un_pager.swp.writemappings; 184 tmpfs_pager_writecount_recalc(object, old, new); 185 VM_OBJECT_ASSERT_UNLOCKED(object); 186 } 187 188 static void 189 tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) 190 { 191 struct vnode *vp; 192 193 /* 194 * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type 195 * type, but not OBJ_TMPFS flag. In this case there is no 196 * v_writecount to adjust. 197 */ 198 if (vp_heldp != NULL) 199 VM_OBJECT_RLOCK(object); 200 else 201 VM_OBJECT_ASSERT_LOCKED(object); 202 if ((object->flags & OBJ_TMPFS) != 0) { 203 vp = object->un_pager.swp.swp_tmpfs; 204 if (vp != NULL) { 205 *vpp = vp; 206 if (vp_heldp != NULL) { 207 vhold(vp); 208 *vp_heldp = true; 209 } 210 } 211 } 212 if (vp_heldp != NULL) 213 VM_OBJECT_RUNLOCK(object); 214 } 215 216 struct pagerops tmpfs_pager_ops = { 217 .pgo_kvme_type = KVME_TYPE_VNODE, 218 .pgo_alloc = tmpfs_pager_alloc, 219 .pgo_set_writeable_dirty = vm_object_set_writeable_dirty_, 220 .pgo_update_writecount = tmpfs_pager_update_writecount, 221 .pgo_release_writecount = tmpfs_pager_release_writecount, 222 .pgo_mightbedirty = vm_object_mightbedirty_, 223 .pgo_getvp = tmpfs_pager_getvp, 224 }; 225 226 static int 227 tmpfs_node_ctor(void *mem, int size, void *arg, int flags) 228 { 229 struct tmpfs_node *node; 230 231 node = mem; 232 node->tn_gen++; 233 node->tn_size = 0; 234 node->tn_status = 0; 235 node->tn_accessed = false; 236 node->tn_flags = 0; 237 node->tn_links = 0; 238 node->tn_vnode = NULL; 239 node->tn_vpstate = 0; 240 return (0); 241 } 242 243 static void 244 tmpfs_node_dtor(void *mem, int size, void *arg) 245 { 246 struct tmpfs_node *node; 247 248 node = mem; 249 node->tn_type = VNON; 250 } 251 252 static int 253 tmpfs_node_init(void *mem, int size, int flags) 254 { 255 struct tmpfs_node *node; 256 257 node = mem; 258 node->tn_id = 0; 259 mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF); 260 node->tn_gen = arc4random(); 261 return (0); 262 } 263 264 static void 265 tmpfs_node_fini(void *mem, int size) 266 { 267 struct tmpfs_node *node; 268 269 node = mem; 270 mtx_destroy(&node->tn_interlock); 271 } 272 273 int 274 tmpfs_subr_init(void) 275 { 276 tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops, 277 OBJT_SWAP); 278 if (tmpfs_pager_type == -1) 279 return (EINVAL); 280 tmpfs_node_pool = uma_zcreate("TMPFS node", 281 sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, 282 tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); 283 VFS_SMR_ZONE_SET(tmpfs_node_pool); 284 return (0); 285 } 286 287 void 288 tmpfs_subr_uninit(void) 289 { 290 if (tmpfs_pager_type != -1) 291 vm_pager_free_dyn_type(tmpfs_pager_type); 292 tmpfs_pager_type = -1; 293 uma_zdestroy(tmpfs_node_pool); 294 } 295 296 static int 297 sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) 298 { 299 int error; 300 long pages, bytes; 301 302 pages = *(long *)arg1; 303 bytes = pages * PAGE_SIZE; 304 305 error = sysctl_handle_long(oidp, &bytes, 0, req); 306 if (error || !req->newptr) 307 return (error); 308 309 pages = bytes / PAGE_SIZE; 310 if (pages < TMPFS_PAGES_MINRESERVED) 311 return (EINVAL); 312 313 *(long *)arg1 = pages; 314 return (0); 315 } 316 317 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, 318 CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &tmpfs_pages_reserved, 0, 319 sysctl_mem_reserved, "L", 320 "Amount of available memory and swap below which tmpfs growth stops"); 321 322 static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, 323 struct tmpfs_dirent *b); 324 RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 325 326 size_t 327 tmpfs_mem_avail(void) 328 { 329 size_t avail; 330 long reserved; 331 332 avail = swap_pager_avail + vm_free_count(); 333 reserved = atomic_load_long(&tmpfs_pages_reserved); 334 if (__predict_false(avail < reserved)) 335 return (0); 336 return (avail - reserved); 337 } 338 339 size_t 340 tmpfs_pages_used(struct tmpfs_mount *tmp) 341 { 342 const size_t node_size = sizeof(struct tmpfs_node) + 343 sizeof(struct tmpfs_dirent); 344 size_t meta_pages; 345 346 meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, 347 PAGE_SIZE); 348 return (meta_pages + tmp->tm_pages_used); 349 } 350 351 static size_t 352 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) 353 { 354 if (tmpfs_mem_avail() < req_pages) 355 return (0); 356 357 if (tmp->tm_pages_max != ULONG_MAX && 358 tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) 359 return (0); 360 361 return (1); 362 } 363 364 static int 365 tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base, 366 int end, boolean_t ignerr) 367 { 368 vm_page_t m; 369 int rv, error; 370 371 VM_OBJECT_ASSERT_WLOCKED(object); 372 KASSERT(base >= 0, ("%s: base %d", __func__, base)); 373 KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base, 374 end)); 375 error = 0; 376 377 retry: 378 m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); 379 if (m != NULL) { 380 MPASS(vm_page_all_valid(m)); 381 } else if (vm_pager_has_page(object, idx, NULL, NULL)) { 382 m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL | 383 VM_ALLOC_WAITFAIL); 384 if (m == NULL) 385 goto retry; 386 vm_object_pip_add(object, 1); 387 VM_OBJECT_WUNLOCK(object); 388 rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); 389 VM_OBJECT_WLOCK(object); 390 vm_object_pip_wakeup(object); 391 if (rv == VM_PAGER_OK) { 392 /* 393 * Since the page was not resident, and therefore not 394 * recently accessed, immediately enqueue it for 395 * asynchronous laundering. The current operation is 396 * not regarded as an access. 397 */ 398 vm_page_launder(m); 399 } else { 400 vm_page_free(m); 401 m = NULL; 402 if (!ignerr) 403 error = EIO; 404 } 405 } 406 if (m != NULL) { 407 pmap_zero_page_area(m, base, end - base); 408 vm_page_set_dirty(m); 409 vm_page_xunbusy(m); 410 } 411 412 return (error); 413 } 414 415 void 416 tmpfs_ref_node(struct tmpfs_node *node) 417 { 418 #ifdef INVARIANTS 419 u_int old; 420 421 old = 422 #endif 423 refcount_acquire(&node->tn_refcount); 424 #ifdef INVARIANTS 425 KASSERT(old > 0, ("node %p zero refcount", node)); 426 #endif 427 } 428 429 /* 430 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 431 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 432 * using the credentials of the process 'p'. 433 * 434 * If the node type is set to 'VDIR', then the parent parameter must point 435 * to the parent directory of the node being created. It may only be NULL 436 * while allocating the root node. 437 * 438 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 439 * specifies the device the node represents. 440 * 441 * If the node type is set to 'VLNK', then the parameter target specifies 442 * the file name of the target file for the symbolic link that is being 443 * created. 444 * 445 * Note that new nodes are retrieved from the available list if it has 446 * items or, if it is empty, from the node pool as long as there is enough 447 * space to create them. 448 * 449 * Returns zero on success or an appropriate error code on failure. 450 */ 451 int 452 tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, 453 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 454 const char *target, dev_t rdev, struct tmpfs_node **node) 455 { 456 struct tmpfs_node *nnode; 457 vm_object_t obj; 458 char *symlink; 459 char symlink_smr; 460 461 /* If the root directory of the 'tmp' file system is not yet 462 * allocated, this must be the request to do it. */ 463 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 464 465 MPASS(IFF(type == VLNK, target != NULL)); 466 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 467 468 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 469 return (ENOSPC); 470 if (tmpfs_pages_check_avail(tmp, 1) == 0) 471 return (ENOSPC); 472 473 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 474 /* 475 * When a new tmpfs node is created for fully 476 * constructed mount point, there must be a parent 477 * node, which vnode is locked exclusively. As 478 * consequence, if the unmount is executing in 479 * parallel, vflush() cannot reclaim the parent vnode. 480 * Due to this, the check for MNTK_UNMOUNT flag is not 481 * racy: if we did not see MNTK_UNMOUNT flag, then tmp 482 * cannot be destroyed until node construction is 483 * finished and the parent vnode unlocked. 484 * 485 * Tmpfs does not need to instantiate new nodes during 486 * unmount. 487 */ 488 return (EBUSY); 489 } 490 if ((mp->mnt_kern_flag & MNT_RDONLY) != 0) 491 return (EROFS); 492 493 nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK); 494 495 /* Generic initialization. */ 496 nnode->tn_type = type; 497 vfs_timestamp(&nnode->tn_atime); 498 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 499 nnode->tn_atime; 500 nnode->tn_uid = uid; 501 nnode->tn_gid = gid; 502 nnode->tn_mode = mode; 503 nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr); 504 nnode->tn_refcount = 1; 505 506 /* Type-specific initialization. */ 507 switch (nnode->tn_type) { 508 case VBLK: 509 case VCHR: 510 nnode->tn_rdev = rdev; 511 break; 512 513 case VDIR: 514 RB_INIT(&nnode->tn_dir.tn_dirhead); 515 LIST_INIT(&nnode->tn_dir.tn_dupindex); 516 MPASS(parent != nnode); 517 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 518 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 519 nnode->tn_dir.tn_readdir_lastn = 0; 520 nnode->tn_dir.tn_readdir_lastp = NULL; 521 nnode->tn_links++; 522 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 523 nnode->tn_dir.tn_parent->tn_links++; 524 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 525 break; 526 527 case VFIFO: 528 /* FALLTHROUGH */ 529 case VSOCK: 530 break; 531 532 case VLNK: 533 MPASS(strlen(target) < MAXPATHLEN); 534 nnode->tn_size = strlen(target); 535 536 symlink = NULL; 537 if (!tmp->tm_nonc) { 538 symlink = cache_symlink_alloc(nnode->tn_size + 1, M_WAITOK); 539 symlink_smr = true; 540 } 541 if (symlink == NULL) { 542 symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, M_WAITOK); 543 symlink_smr = false; 544 } 545 memcpy(symlink, target, nnode->tn_size + 1); 546 547 /* 548 * Allow safe symlink resolving for lockless lookup. 549 * tmpfs_fplookup_symlink references this comment. 550 * 551 * 1. nnode is not yet visible to the world 552 * 2. both tn_link_target and tn_link_smr get populated 553 * 3. release fence publishes their content 554 * 4. tn_link_target content is immutable until node destruction, 555 * where the pointer gets set to NULL 556 * 5. tn_link_smr is never changed once set 557 * 558 * As a result it is sufficient to issue load consume on the node 559 * pointer to also get the above content in a stable manner. 560 * Worst case tn_link_smr flag may be set to true despite being stale, 561 * while the target buffer is already cleared out. 562 */ 563 atomic_store_ptr(&nnode->tn_link_target, symlink); 564 atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); 565 atomic_thread_fence_rel(); 566 break; 567 568 case VREG: 569 obj = nnode->tn_reg.tn_aobj = 570 vm_pager_allocate(tmpfs_pager_type, NULL, 0, 571 VM_PROT_DEFAULT, 0, 572 NULL /* XXXKIB - tmpfs needs swap reservation */); 573 /* OBJ_TMPFS is set together with the setting of vp->v_object */ 574 nnode->tn_reg.tn_tmp = tmp; 575 break; 576 577 default: 578 panic("tmpfs_alloc_node: type %p %d", nnode, 579 (int)nnode->tn_type); 580 } 581 582 TMPFS_LOCK(tmp); 583 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 584 nnode->tn_attached = true; 585 tmp->tm_nodes_inuse++; 586 tmp->tm_refcount++; 587 TMPFS_UNLOCK(tmp); 588 589 *node = nnode; 590 return (0); 591 } 592 593 /* 594 * Destroys the node pointed to by node from the file system 'tmp'. 595 * If the node references a directory, no entries are allowed. 596 */ 597 void 598 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 599 { 600 if (refcount_release_if_not_last(&node->tn_refcount)) 601 return; 602 603 TMPFS_LOCK(tmp); 604 TMPFS_NODE_LOCK(node); 605 if (!tmpfs_free_node_locked(tmp, node, false)) { 606 TMPFS_NODE_UNLOCK(node); 607 TMPFS_UNLOCK(tmp); 608 } 609 } 610 611 bool 612 tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node, 613 bool detach) 614 { 615 vm_object_t uobj; 616 char *symlink; 617 bool last; 618 619 TMPFS_MP_ASSERT_LOCKED(tmp); 620 TMPFS_NODE_ASSERT_LOCKED(node); 621 622 last = refcount_release(&node->tn_refcount); 623 if (node->tn_attached && (detach || last)) { 624 MPASS(tmp->tm_nodes_inuse > 0); 625 tmp->tm_nodes_inuse--; 626 LIST_REMOVE(node, tn_entries); 627 node->tn_attached = false; 628 } 629 if (!last) 630 return (false); 631 632 TMPFS_NODE_UNLOCK(node); 633 634 #ifdef INVARIANTS 635 MPASS(node->tn_vnode == NULL); 636 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 637 638 /* 639 * Make sure this is a node type we can deal with. Everything is explicitly 640 * enumerated without the 'default' clause so the the compiler can throw an 641 * error in case a new type is added. 642 */ 643 switch (node->tn_type) { 644 case VBLK: 645 case VCHR: 646 case VDIR: 647 case VFIFO: 648 case VSOCK: 649 case VLNK: 650 case VREG: 651 break; 652 case VNON: 653 case VBAD: 654 case VMARKER: 655 panic("%s: bad type %d for node %p", __func__, (int)node->tn_type, node); 656 } 657 #endif 658 659 switch (node->tn_type) { 660 case VREG: 661 uobj = node->tn_reg.tn_aobj; 662 if (uobj != NULL) { 663 if (uobj->size != 0) 664 atomic_subtract_long(&tmp->tm_pages_used, uobj->size); 665 } 666 667 tmpfs_free_tmp(tmp); 668 669 if (uobj != NULL) { 670 KASSERT((uobj->flags & OBJ_TMPFS) == 0, 671 ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); 672 vm_object_deallocate(uobj); 673 } 674 break; 675 case VLNK: 676 tmpfs_free_tmp(tmp); 677 678 symlink = node->tn_link_target; 679 atomic_store_ptr(&node->tn_link_target, NULL); 680 if (atomic_load_char(&node->tn_link_smr)) { 681 cache_symlink_free(symlink, node->tn_size + 1); 682 } else { 683 free(symlink, M_TMPFSNAME); 684 } 685 break; 686 default: 687 tmpfs_free_tmp(tmp); 688 break; 689 } 690 691 uma_zfree_smr(tmpfs_node_pool, node); 692 return (true); 693 } 694 695 static __inline uint32_t 696 tmpfs_dirent_hash(const char *name, u_int len) 697 { 698 uint32_t hash; 699 700 hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; 701 #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP 702 hash &= 0xf; 703 #endif 704 if (hash < TMPFS_DIRCOOKIE_MIN) 705 hash += TMPFS_DIRCOOKIE_MIN; 706 707 return (hash); 708 } 709 710 static __inline off_t 711 tmpfs_dirent_cookie(struct tmpfs_dirent *de) 712 { 713 if (de == NULL) 714 return (TMPFS_DIRCOOKIE_EOF); 715 716 MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); 717 718 return (de->td_cookie); 719 } 720 721 static __inline boolean_t 722 tmpfs_dirent_dup(struct tmpfs_dirent *de) 723 { 724 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); 725 } 726 727 static __inline boolean_t 728 tmpfs_dirent_duphead(struct tmpfs_dirent *de) 729 { 730 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); 731 } 732 733 void 734 tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) 735 { 736 de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); 737 memcpy(de->ud.td_name, name, namelen); 738 de->td_namelen = namelen; 739 } 740 741 /* 742 * Allocates a new directory entry for the node node with a name of name. 743 * The new directory entry is returned in *de. 744 * 745 * The link count of node is increased by one to reflect the new object 746 * referencing it. 747 * 748 * Returns zero on success or an appropriate error code on failure. 749 */ 750 int 751 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 752 const char *name, u_int len, struct tmpfs_dirent **de) 753 { 754 struct tmpfs_dirent *nde; 755 756 nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK); 757 nde->td_node = node; 758 if (name != NULL) { 759 nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 760 tmpfs_dirent_init(nde, name, len); 761 } else 762 nde->td_namelen = 0; 763 if (node != NULL) 764 node->tn_links++; 765 766 *de = nde; 767 768 return (0); 769 } 770 771 /* 772 * Frees a directory entry. It is the caller's responsibility to destroy 773 * the node referenced by it if needed. 774 * 775 * The link count of node is decreased by one to reflect the removal of an 776 * object that referenced it. This only happens if 'node_exists' is true; 777 * otherwise the function will not access the node referred to by the 778 * directory entry, as it may already have been released from the outside. 779 */ 780 void 781 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 782 { 783 struct tmpfs_node *node; 784 785 node = de->td_node; 786 if (node != NULL) { 787 MPASS(node->tn_links > 0); 788 node->tn_links--; 789 } 790 if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) 791 free(de->ud.td_name, M_TMPFSNAME); 792 free(de, M_TMPFSDIR); 793 } 794 795 void 796 tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) 797 { 798 bool want_vrele; 799 800 ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); 801 if (vp->v_type != VREG || obj == NULL) 802 return; 803 804 VM_OBJECT_WLOCK(obj); 805 VI_LOCK(vp); 806 /* 807 * May be going through forced unmount. 808 */ 809 want_vrele = false; 810 if ((obj->flags & OBJ_TMPFS_VREF) != 0) { 811 vm_object_clear_flag(obj, OBJ_TMPFS_VREF); 812 want_vrele = true; 813 } 814 815 vm_object_clear_flag(obj, OBJ_TMPFS); 816 obj->un_pager.swp.swp_tmpfs = NULL; 817 if (vp->v_writecount < 0) 818 vp->v_writecount = 0; 819 VI_UNLOCK(vp); 820 VM_OBJECT_WUNLOCK(obj); 821 if (want_vrele) { 822 vrele(vp); 823 } 824 } 825 826 /* 827 * Need to clear v_object for insmntque failure. 828 */ 829 static void 830 tmpfs_insmntque_dtr(struct vnode *vp, void *dtr_arg) 831 { 832 833 tmpfs_destroy_vobject(vp, vp->v_object); 834 vp->v_object = NULL; 835 vp->v_data = NULL; 836 vp->v_op = &dead_vnodeops; 837 vgone(vp); 838 vput(vp); 839 } 840 841 /* 842 * Allocates a new vnode for the node node or returns a new reference to 843 * an existing one if the node had already a vnode referencing it. The 844 * resulting locked vnode is returned in *vpp. 845 * 846 * Returns zero on success or an appropriate error code on failure. 847 */ 848 int 849 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 850 struct vnode **vpp) 851 { 852 struct vnode *vp; 853 enum vgetstate vs; 854 struct tmpfs_mount *tm; 855 vm_object_t object; 856 int error; 857 858 error = 0; 859 tm = VFS_TO_TMPFS(mp); 860 TMPFS_NODE_LOCK(node); 861 tmpfs_ref_node(node); 862 loop: 863 TMPFS_NODE_ASSERT_LOCKED(node); 864 if ((vp = node->tn_vnode) != NULL) { 865 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 866 if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || 867 (VN_IS_DOOMED(vp) && 868 (lkflag & LK_NOWAIT) != 0)) { 869 TMPFS_NODE_UNLOCK(node); 870 error = ENOENT; 871 vp = NULL; 872 goto out; 873 } 874 if (VN_IS_DOOMED(vp)) { 875 node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; 876 while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { 877 msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 878 0, "tmpfsE", 0); 879 } 880 goto loop; 881 } 882 vs = vget_prep(vp); 883 TMPFS_NODE_UNLOCK(node); 884 error = vget_finish(vp, lkflag, vs); 885 if (error == ENOENT) { 886 TMPFS_NODE_LOCK(node); 887 goto loop; 888 } 889 if (error != 0) { 890 vp = NULL; 891 goto out; 892 } 893 894 /* 895 * Make sure the vnode is still there after 896 * getting the interlock to avoid racing a free. 897 */ 898 if (node->tn_vnode != vp) { 899 vput(vp); 900 TMPFS_NODE_LOCK(node); 901 goto loop; 902 } 903 904 goto out; 905 } 906 907 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 908 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 909 TMPFS_NODE_UNLOCK(node); 910 error = ENOENT; 911 vp = NULL; 912 goto out; 913 } 914 915 /* 916 * otherwise lock the vp list while we call getnewvnode 917 * since that can block. 918 */ 919 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 920 node->tn_vpstate |= TMPFS_VNODE_WANT; 921 error = msleep((caddr_t) &node->tn_vpstate, 922 TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0); 923 if (error != 0) 924 goto out; 925 goto loop; 926 } else 927 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 928 929 TMPFS_NODE_UNLOCK(node); 930 931 /* Get a new vnode and associate it with our node. */ 932 error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ? 933 &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp); 934 if (error != 0) 935 goto unlock; 936 MPASS(vp != NULL); 937 938 /* lkflag is ignored, the lock is exclusive */ 939 (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 940 941 vp->v_data = node; 942 vp->v_type = node->tn_type; 943 944 /* Type-specific initialization. */ 945 switch (node->tn_type) { 946 case VBLK: 947 /* FALLTHROUGH */ 948 case VCHR: 949 /* FALLTHROUGH */ 950 case VLNK: 951 /* FALLTHROUGH */ 952 case VSOCK: 953 break; 954 case VFIFO: 955 vp->v_op = &tmpfs_fifoop_entries; 956 break; 957 case VREG: 958 object = node->tn_reg.tn_aobj; 959 VM_OBJECT_WLOCK(object); 960 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 961 ("%s: object %p with OBJ_TMPFS_VREF but without vnode", 962 __func__, object)); 963 KASSERT(object->un_pager.swp.writemappings == 0, 964 ("%s: object %p has writemappings", 965 __func__, object)); 966 VI_LOCK(vp); 967 KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); 968 vp->v_object = object; 969 object->un_pager.swp.swp_tmpfs = vp; 970 vm_object_set_flag(object, OBJ_TMPFS); 971 vn_irflag_set_locked(vp, VIRF_PGREAD); 972 VI_UNLOCK(vp); 973 VM_OBJECT_WUNLOCK(object); 974 break; 975 case VDIR: 976 MPASS(node->tn_dir.tn_parent != NULL); 977 if (node->tn_dir.tn_parent == node) 978 vp->v_vflag |= VV_ROOT; 979 break; 980 981 default: 982 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 983 } 984 if (vp->v_type != VFIFO) 985 VN_LOCK_ASHARE(vp); 986 987 error = insmntque1(vp, mp, tmpfs_insmntque_dtr, NULL); 988 if (error != 0) 989 vp = NULL; 990 991 unlock: 992 TMPFS_NODE_LOCK(node); 993 994 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 995 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 996 node->tn_vnode = vp; 997 998 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 999 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 1000 TMPFS_NODE_UNLOCK(node); 1001 wakeup((caddr_t) &node->tn_vpstate); 1002 } else 1003 TMPFS_NODE_UNLOCK(node); 1004 1005 out: 1006 if (error == 0) { 1007 *vpp = vp; 1008 1009 #ifdef INVARIANTS 1010 MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); 1011 TMPFS_NODE_LOCK(node); 1012 MPASS(*vpp == node->tn_vnode); 1013 TMPFS_NODE_UNLOCK(node); 1014 #endif 1015 } 1016 tmpfs_free_node(tm, node); 1017 1018 return (error); 1019 } 1020 1021 /* 1022 * Destroys the association between the vnode vp and the node it 1023 * references. 1024 */ 1025 void 1026 tmpfs_free_vp(struct vnode *vp) 1027 { 1028 struct tmpfs_node *node; 1029 1030 node = VP_TO_TMPFS_NODE(vp); 1031 1032 TMPFS_NODE_ASSERT_LOCKED(node); 1033 node->tn_vnode = NULL; 1034 if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) 1035 wakeup(&node->tn_vnode); 1036 node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; 1037 vp->v_data = NULL; 1038 } 1039 1040 /* 1041 * Allocates a new file of type 'type' and adds it to the parent directory 1042 * 'dvp'; this addition is done using the component name given in 'cnp'. 1043 * The ownership of the new file is automatically assigned based on the 1044 * credentials of the caller (through 'cnp'), the group is set based on 1045 * the parent directory and the mode is determined from the 'vap' argument. 1046 * If successful, *vpp holds a vnode to the newly created file and zero 1047 * is returned. Otherwise *vpp is NULL and the function returns an 1048 * appropriate error code. 1049 */ 1050 int 1051 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 1052 struct componentname *cnp, const char *target) 1053 { 1054 int error; 1055 struct tmpfs_dirent *de; 1056 struct tmpfs_mount *tmp; 1057 struct tmpfs_node *dnode; 1058 struct tmpfs_node *node; 1059 struct tmpfs_node *parent; 1060 1061 ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file"); 1062 MPASS(cnp->cn_flags & HASBUF); 1063 1064 tmp = VFS_TO_TMPFS(dvp->v_mount); 1065 dnode = VP_TO_TMPFS_DIR(dvp); 1066 *vpp = NULL; 1067 1068 /* If the entry we are creating is a directory, we cannot overflow 1069 * the number of links of its parent, because it will get a new 1070 * link. */ 1071 if (vap->va_type == VDIR) { 1072 /* Ensure that we do not overflow the maximum number of links 1073 * imposed by the system. */ 1074 MPASS(dnode->tn_links <= TMPFS_LINK_MAX); 1075 if (dnode->tn_links == TMPFS_LINK_MAX) { 1076 return (EMLINK); 1077 } 1078 1079 parent = dnode; 1080 MPASS(parent != NULL); 1081 } else 1082 parent = NULL; 1083 1084 /* Allocate a node that represents the new file. */ 1085 error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, 1086 cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent, 1087 target, vap->va_rdev, &node); 1088 if (error != 0) 1089 return (error); 1090 1091 /* Allocate a directory entry that points to the new file. */ 1092 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 1093 &de); 1094 if (error != 0) { 1095 tmpfs_free_node(tmp, node); 1096 return (error); 1097 } 1098 1099 /* Allocate a vnode for the new file. */ 1100 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 1101 if (error != 0) { 1102 tmpfs_free_dirent(tmp, de); 1103 tmpfs_free_node(tmp, node); 1104 return (error); 1105 } 1106 1107 /* Now that all required items are allocated, we can proceed to 1108 * insert the new node into the directory, an operation that 1109 * cannot fail. */ 1110 if (cnp->cn_flags & ISWHITEOUT) 1111 tmpfs_dir_whiteout_remove(dvp, cnp); 1112 tmpfs_dir_attach(dvp, de); 1113 return (0); 1114 } 1115 1116 struct tmpfs_dirent * 1117 tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1118 { 1119 struct tmpfs_dirent *de; 1120 1121 de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); 1122 dc->tdc_tree = de; 1123 if (de != NULL && tmpfs_dirent_duphead(de)) 1124 de = LIST_FIRST(&de->ud.td_duphead); 1125 dc->tdc_current = de; 1126 1127 return (dc->tdc_current); 1128 } 1129 1130 struct tmpfs_dirent * 1131 tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1132 { 1133 struct tmpfs_dirent *de; 1134 1135 MPASS(dc->tdc_tree != NULL); 1136 if (tmpfs_dirent_dup(dc->tdc_current)) { 1137 dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); 1138 if (dc->tdc_current != NULL) 1139 return (dc->tdc_current); 1140 } 1141 dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, 1142 &dnode->tn_dir.tn_dirhead, dc->tdc_tree); 1143 if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { 1144 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1145 MPASS(dc->tdc_current != NULL); 1146 } 1147 1148 return (dc->tdc_current); 1149 } 1150 1151 /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ 1152 static struct tmpfs_dirent * 1153 tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) 1154 { 1155 struct tmpfs_dirent *de, dekey; 1156 1157 dekey.td_hash = hash; 1158 de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); 1159 return (de); 1160 } 1161 1162 /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ 1163 static struct tmpfs_dirent * 1164 tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, 1165 struct tmpfs_dir_cursor *dc) 1166 { 1167 struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; 1168 struct tmpfs_dirent *de, dekey; 1169 1170 MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); 1171 1172 if (cookie == node->tn_dir.tn_readdir_lastn && 1173 (de = node->tn_dir.tn_readdir_lastp) != NULL) { 1174 /* Protect against possible race, tn_readdir_last[pn] 1175 * may be updated with only shared vnode lock held. */ 1176 if (cookie == tmpfs_dirent_cookie(de)) 1177 goto out; 1178 } 1179 1180 if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { 1181 LIST_FOREACH(de, &node->tn_dir.tn_dupindex, 1182 uh.td_dup.index_entries) { 1183 MPASS(tmpfs_dirent_dup(de)); 1184 if (de->td_cookie == cookie) 1185 goto out; 1186 /* dupindex list is sorted. */ 1187 if (de->td_cookie < cookie) { 1188 de = NULL; 1189 goto out; 1190 } 1191 } 1192 MPASS(de == NULL); 1193 goto out; 1194 } 1195 1196 if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { 1197 de = NULL; 1198 } else { 1199 dekey.td_hash = cookie; 1200 /* Recover if direntry for cookie was removed */ 1201 de = RB_NFIND(tmpfs_dir, dirhead, &dekey); 1202 } 1203 dc->tdc_tree = de; 1204 dc->tdc_current = de; 1205 if (de != NULL && tmpfs_dirent_duphead(de)) { 1206 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1207 MPASS(dc->tdc_current != NULL); 1208 } 1209 return (dc->tdc_current); 1210 1211 out: 1212 dc->tdc_tree = de; 1213 dc->tdc_current = de; 1214 if (de != NULL && tmpfs_dirent_dup(de)) 1215 dc->tdc_tree = tmpfs_dir_xlookup_hash(node, 1216 de->td_hash); 1217 return (dc->tdc_current); 1218 } 1219 1220 /* 1221 * Looks for a directory entry in the directory represented by node. 1222 * 'cnp' describes the name of the entry to look for. Note that the . 1223 * and .. components are not allowed as they do not physically exist 1224 * within directories. 1225 * 1226 * Returns a pointer to the entry when found, otherwise NULL. 1227 */ 1228 struct tmpfs_dirent * 1229 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 1230 struct componentname *cnp) 1231 { 1232 struct tmpfs_dir_duphead *duphead; 1233 struct tmpfs_dirent *de; 1234 uint32_t hash; 1235 1236 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 1237 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 1238 cnp->cn_nameptr[1] == '.'))); 1239 TMPFS_VALIDATE_DIR(node); 1240 1241 hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); 1242 de = tmpfs_dir_xlookup_hash(node, hash); 1243 if (de != NULL && tmpfs_dirent_duphead(de)) { 1244 duphead = &de->ud.td_duphead; 1245 LIST_FOREACH(de, duphead, uh.td_dup.entries) { 1246 if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1247 cnp->cn_namelen)) 1248 break; 1249 } 1250 } else if (de != NULL) { 1251 if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1252 cnp->cn_namelen)) 1253 de = NULL; 1254 } 1255 if (de != NULL && f != NULL && de->td_node != f) 1256 de = NULL; 1257 1258 return (de); 1259 } 1260 1261 /* 1262 * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex 1263 * list, allocate new cookie value. 1264 */ 1265 static void 1266 tmpfs_dir_attach_dup(struct tmpfs_node *dnode, 1267 struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) 1268 { 1269 struct tmpfs_dir_duphead *dupindex; 1270 struct tmpfs_dirent *de, *pde; 1271 1272 dupindex = &dnode->tn_dir.tn_dupindex; 1273 de = LIST_FIRST(dupindex); 1274 if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { 1275 if (de == NULL) 1276 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1277 else 1278 nde->td_cookie = de->td_cookie + 1; 1279 MPASS(tmpfs_dirent_dup(nde)); 1280 LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); 1281 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1282 return; 1283 } 1284 1285 /* 1286 * Cookie numbers are near exhaustion. Scan dupindex list for unused 1287 * numbers. dupindex list is sorted in descending order. Keep it so 1288 * after inserting nde. 1289 */ 1290 while (1) { 1291 pde = de; 1292 de = LIST_NEXT(de, uh.td_dup.index_entries); 1293 if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { 1294 /* 1295 * Last element of the index doesn't have minimal cookie 1296 * value, use it. 1297 */ 1298 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1299 LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); 1300 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1301 return; 1302 } else if (de == NULL) { 1303 /* 1304 * We are so lucky have 2^30 hash duplicates in single 1305 * directory :) Return largest possible cookie value. 1306 * It should be fine except possible issues with 1307 * VOP_READDIR restart. 1308 */ 1309 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; 1310 LIST_INSERT_HEAD(dupindex, nde, 1311 uh.td_dup.index_entries); 1312 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1313 return; 1314 } 1315 if (de->td_cookie + 1 == pde->td_cookie || 1316 de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) 1317 continue; /* No hole or invalid cookie. */ 1318 nde->td_cookie = de->td_cookie + 1; 1319 MPASS(tmpfs_dirent_dup(nde)); 1320 MPASS(pde->td_cookie > nde->td_cookie); 1321 MPASS(nde->td_cookie > de->td_cookie); 1322 LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); 1323 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1324 return; 1325 } 1326 } 1327 1328 /* 1329 * Attaches the directory entry de to the directory represented by vp. 1330 * Note that this does not change the link count of the node pointed by 1331 * the directory entry, as this is done by tmpfs_alloc_dirent. 1332 */ 1333 void 1334 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 1335 { 1336 struct tmpfs_node *dnode; 1337 struct tmpfs_dirent *xde, *nde; 1338 1339 ASSERT_VOP_ELOCKED(vp, __func__); 1340 MPASS(de->td_namelen > 0); 1341 MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); 1342 MPASS(de->td_cookie == de->td_hash); 1343 1344 dnode = VP_TO_TMPFS_DIR(vp); 1345 dnode->tn_dir.tn_readdir_lastn = 0; 1346 dnode->tn_dir.tn_readdir_lastp = NULL; 1347 1348 MPASS(!tmpfs_dirent_dup(de)); 1349 xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1350 if (xde != NULL && tmpfs_dirent_duphead(xde)) 1351 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1352 else if (xde != NULL) { 1353 /* 1354 * Allocate new duphead. Swap xde with duphead to avoid 1355 * adding/removing elements with the same hash. 1356 */ 1357 MPASS(!tmpfs_dirent_dup(xde)); 1358 tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, 1359 &nde); 1360 /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ 1361 memcpy(nde, xde, sizeof(*xde)); 1362 xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; 1363 LIST_INIT(&xde->ud.td_duphead); 1364 xde->td_namelen = 0; 1365 xde->td_node = NULL; 1366 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); 1367 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1368 } 1369 dnode->tn_size += sizeof(struct tmpfs_dirent); 1370 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1371 dnode->tn_accessed = true; 1372 tmpfs_update(vp); 1373 } 1374 1375 /* 1376 * Detaches the directory entry de from the directory represented by vp. 1377 * Note that this does not change the link count of the node pointed by 1378 * the directory entry, as this is done by tmpfs_free_dirent. 1379 */ 1380 void 1381 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 1382 { 1383 struct tmpfs_mount *tmp; 1384 struct tmpfs_dir *head; 1385 struct tmpfs_node *dnode; 1386 struct tmpfs_dirent *xde; 1387 1388 ASSERT_VOP_ELOCKED(vp, __func__); 1389 1390 dnode = VP_TO_TMPFS_DIR(vp); 1391 head = &dnode->tn_dir.tn_dirhead; 1392 dnode->tn_dir.tn_readdir_lastn = 0; 1393 dnode->tn_dir.tn_readdir_lastp = NULL; 1394 1395 if (tmpfs_dirent_dup(de)) { 1396 /* Remove duphead if de was last entry. */ 1397 if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { 1398 xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); 1399 MPASS(tmpfs_dirent_duphead(xde)); 1400 } else 1401 xde = NULL; 1402 LIST_REMOVE(de, uh.td_dup.entries); 1403 LIST_REMOVE(de, uh.td_dup.index_entries); 1404 if (xde != NULL) { 1405 if (LIST_EMPTY(&xde->ud.td_duphead)) { 1406 RB_REMOVE(tmpfs_dir, head, xde); 1407 tmp = VFS_TO_TMPFS(vp->v_mount); 1408 MPASS(xde->td_node == NULL); 1409 tmpfs_free_dirent(tmp, xde); 1410 } 1411 } 1412 de->td_cookie = de->td_hash; 1413 } else 1414 RB_REMOVE(tmpfs_dir, head, de); 1415 1416 dnode->tn_size -= sizeof(struct tmpfs_dirent); 1417 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1418 dnode->tn_accessed = true; 1419 tmpfs_update(vp); 1420 } 1421 1422 void 1423 tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) 1424 { 1425 struct tmpfs_dirent *de, *dde, *nde; 1426 1427 RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { 1428 RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1429 /* Node may already be destroyed. */ 1430 de->td_node = NULL; 1431 if (tmpfs_dirent_duphead(de)) { 1432 while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { 1433 LIST_REMOVE(dde, uh.td_dup.entries); 1434 dde->td_node = NULL; 1435 tmpfs_free_dirent(tmp, dde); 1436 } 1437 } 1438 tmpfs_free_dirent(tmp, de); 1439 } 1440 } 1441 1442 /* 1443 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 1444 * directory and returns it in the uio space. The function returns 0 1445 * on success, -1 if there was not enough space in the uio structure to 1446 * hold the directory entry or an appropriate error code if another 1447 * error happens. 1448 */ 1449 static int 1450 tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1451 struct uio *uio) 1452 { 1453 int error; 1454 struct dirent dent; 1455 1456 TMPFS_VALIDATE_DIR(node); 1457 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 1458 1459 dent.d_fileno = node->tn_id; 1460 dent.d_off = TMPFS_DIRCOOKIE_DOTDOT; 1461 dent.d_type = DT_DIR; 1462 dent.d_namlen = 1; 1463 dent.d_name[0] = '.'; 1464 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1465 dirent_terminate(&dent); 1466 1467 if (dent.d_reclen > uio->uio_resid) 1468 error = EJUSTRETURN; 1469 else 1470 error = uiomove(&dent, dent.d_reclen, uio); 1471 1472 tmpfs_set_accessed(tm, node); 1473 1474 return (error); 1475 } 1476 1477 /* 1478 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 1479 * directory and returns it in the uio space. The function returns 0 1480 * on success, -1 if there was not enough space in the uio structure to 1481 * hold the directory entry or an appropriate error code if another 1482 * error happens. 1483 */ 1484 static int 1485 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1486 struct uio *uio, off_t next) 1487 { 1488 struct tmpfs_node *parent; 1489 struct dirent dent; 1490 int error; 1491 1492 TMPFS_VALIDATE_DIR(node); 1493 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 1494 1495 /* 1496 * Return ENOENT if the current node is already removed. 1497 */ 1498 TMPFS_ASSERT_LOCKED(node); 1499 parent = node->tn_dir.tn_parent; 1500 if (parent == NULL) 1501 return (ENOENT); 1502 1503 dent.d_fileno = parent->tn_id; 1504 dent.d_off = next; 1505 dent.d_type = DT_DIR; 1506 dent.d_namlen = 2; 1507 dent.d_name[0] = '.'; 1508 dent.d_name[1] = '.'; 1509 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1510 dirent_terminate(&dent); 1511 1512 if (dent.d_reclen > uio->uio_resid) 1513 error = EJUSTRETURN; 1514 else 1515 error = uiomove(&dent, dent.d_reclen, uio); 1516 1517 tmpfs_set_accessed(tm, node); 1518 1519 return (error); 1520 } 1521 1522 /* 1523 * Helper function for tmpfs_readdir. Returns as much directory entries 1524 * as can fit in the uio space. The read starts at uio->uio_offset. 1525 * The function returns 0 on success, -1 if there was not enough space 1526 * in the uio structure to hold the directory entry or an appropriate 1527 * error code if another error happens. 1528 */ 1529 int 1530 tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node, 1531 struct uio *uio, int maxcookies, u_long *cookies, int *ncookies) 1532 { 1533 struct tmpfs_dir_cursor dc; 1534 struct tmpfs_dirent *de, *nde; 1535 off_t off; 1536 int error; 1537 1538 TMPFS_VALIDATE_DIR(node); 1539 1540 off = 0; 1541 1542 /* 1543 * Lookup the node from the current offset. The starting offset of 1544 * 0 will lookup both '.' and '..', and then the first real entry, 1545 * or EOF if there are none. Then find all entries for the dir that 1546 * fit into the buffer. Once no more entries are found (de == NULL), 1547 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next 1548 * call to return 0. 1549 */ 1550 switch (uio->uio_offset) { 1551 case TMPFS_DIRCOOKIE_DOT: 1552 error = tmpfs_dir_getdotdent(tm, node, uio); 1553 if (error != 0) 1554 return (error); 1555 uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT; 1556 if (cookies != NULL) 1557 cookies[(*ncookies)++] = off; 1558 /* FALLTHROUGH */ 1559 case TMPFS_DIRCOOKIE_DOTDOT: 1560 de = tmpfs_dir_first(node, &dc); 1561 off = tmpfs_dirent_cookie(de); 1562 error = tmpfs_dir_getdotdotdent(tm, node, uio, off); 1563 if (error != 0) 1564 return (error); 1565 uio->uio_offset = off; 1566 if (cookies != NULL) 1567 cookies[(*ncookies)++] = off; 1568 /* EOF. */ 1569 if (de == NULL) 1570 return (0); 1571 break; 1572 case TMPFS_DIRCOOKIE_EOF: 1573 return (0); 1574 default: 1575 de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); 1576 if (de == NULL) 1577 return (EINVAL); 1578 if (cookies != NULL) 1579 off = tmpfs_dirent_cookie(de); 1580 } 1581 1582 /* 1583 * Read as much entries as possible; i.e., until we reach the end of the 1584 * directory or we exhaust uio space. 1585 */ 1586 do { 1587 struct dirent d; 1588 1589 /* 1590 * Create a dirent structure representing the current tmpfs_node 1591 * and fill it. 1592 */ 1593 if (de->td_node == NULL) { 1594 d.d_fileno = 1; 1595 d.d_type = DT_WHT; 1596 } else { 1597 d.d_fileno = de->td_node->tn_id; 1598 switch (de->td_node->tn_type) { 1599 case VBLK: 1600 d.d_type = DT_BLK; 1601 break; 1602 1603 case VCHR: 1604 d.d_type = DT_CHR; 1605 break; 1606 1607 case VDIR: 1608 d.d_type = DT_DIR; 1609 break; 1610 1611 case VFIFO: 1612 d.d_type = DT_FIFO; 1613 break; 1614 1615 case VLNK: 1616 d.d_type = DT_LNK; 1617 break; 1618 1619 case VREG: 1620 d.d_type = DT_REG; 1621 break; 1622 1623 case VSOCK: 1624 d.d_type = DT_SOCK; 1625 break; 1626 1627 default: 1628 panic("tmpfs_dir_getdents: type %p %d", 1629 de->td_node, (int)de->td_node->tn_type); 1630 } 1631 } 1632 d.d_namlen = de->td_namelen; 1633 MPASS(de->td_namelen < sizeof(d.d_name)); 1634 (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); 1635 d.d_reclen = GENERIC_DIRSIZ(&d); 1636 1637 /* 1638 * Stop reading if the directory entry we are treating is bigger 1639 * than the amount of data that can be returned. 1640 */ 1641 if (d.d_reclen > uio->uio_resid) { 1642 error = EJUSTRETURN; 1643 break; 1644 } 1645 1646 nde = tmpfs_dir_next(node, &dc); 1647 d.d_off = tmpfs_dirent_cookie(nde); 1648 dirent_terminate(&d); 1649 1650 /* 1651 * Copy the new dirent structure into the output buffer and 1652 * advance pointers. 1653 */ 1654 error = uiomove(&d, d.d_reclen, uio); 1655 if (error == 0) { 1656 de = nde; 1657 if (cookies != NULL) { 1658 off = tmpfs_dirent_cookie(de); 1659 MPASS(*ncookies < maxcookies); 1660 cookies[(*ncookies)++] = off; 1661 } 1662 } 1663 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 1664 1665 /* Skip setting off when using cookies as it is already done above. */ 1666 if (cookies == NULL) 1667 off = tmpfs_dirent_cookie(de); 1668 1669 /* Update the offset and cache. */ 1670 uio->uio_offset = off; 1671 node->tn_dir.tn_readdir_lastn = off; 1672 node->tn_dir.tn_readdir_lastp = de; 1673 1674 tmpfs_set_accessed(tm, node); 1675 return (error); 1676 } 1677 1678 int 1679 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 1680 { 1681 struct tmpfs_dirent *de; 1682 int error; 1683 1684 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 1685 cnp->cn_nameptr, cnp->cn_namelen, &de); 1686 if (error != 0) 1687 return (error); 1688 tmpfs_dir_attach(dvp, de); 1689 return (0); 1690 } 1691 1692 void 1693 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 1694 { 1695 struct tmpfs_dirent *de; 1696 1697 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 1698 MPASS(de != NULL && de->td_node == NULL); 1699 tmpfs_dir_detach(dvp, de); 1700 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 1701 } 1702 1703 /* 1704 * Resizes the aobj associated with the regular file pointed to by 'vp' to the 1705 * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 1706 * 'newsize' must be positive. 1707 * 1708 * Returns zero on success or an appropriate error code on failure. 1709 */ 1710 int 1711 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) 1712 { 1713 struct tmpfs_mount *tmp; 1714 struct tmpfs_node *node; 1715 vm_object_t uobj; 1716 vm_pindex_t idx, newpages, oldpages; 1717 off_t oldsize; 1718 int base, error; 1719 1720 MPASS(vp->v_type == VREG); 1721 MPASS(newsize >= 0); 1722 1723 node = VP_TO_TMPFS_NODE(vp); 1724 uobj = node->tn_reg.tn_aobj; 1725 tmp = VFS_TO_TMPFS(vp->v_mount); 1726 1727 /* 1728 * Convert the old and new sizes to the number of pages needed to 1729 * store them. It may happen that we do not need to do anything 1730 * because the last allocated page can accommodate the change on 1731 * its own. 1732 */ 1733 oldsize = node->tn_size; 1734 oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 1735 MPASS(oldpages == uobj->size); 1736 newpages = OFF_TO_IDX(newsize + PAGE_MASK); 1737 1738 if (__predict_true(newpages == oldpages && newsize >= oldsize)) { 1739 node->tn_size = newsize; 1740 return (0); 1741 } 1742 1743 if (newpages > oldpages && 1744 tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) 1745 return (ENOSPC); 1746 1747 VM_OBJECT_WLOCK(uobj); 1748 if (newsize < oldsize) { 1749 /* 1750 * Zero the truncated part of the last page. 1751 */ 1752 base = newsize & PAGE_MASK; 1753 if (base != 0) { 1754 idx = OFF_TO_IDX(newsize); 1755 error = tmpfs_partial_page_invalidate(uobj, idx, base, 1756 PAGE_SIZE, ignerr); 1757 if (error != 0) { 1758 VM_OBJECT_WUNLOCK(uobj); 1759 return (error); 1760 } 1761 } 1762 1763 /* 1764 * Release any swap space and free any whole pages. 1765 */ 1766 if (newpages < oldpages) 1767 vm_object_page_remove(uobj, newpages, 0, 0); 1768 } 1769 uobj->size = newpages; 1770 VM_OBJECT_WUNLOCK(uobj); 1771 1772 atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); 1773 1774 node->tn_size = newsize; 1775 return (0); 1776 } 1777 1778 /* 1779 * Punch hole in the aobj associated with the regular file pointed to by 'vp'. 1780 * Requests completely beyond the end-of-file are converted to no-op. 1781 * 1782 * Returns 0 on success or error code from tmpfs_partial_page_invalidate() on 1783 * failure. 1784 */ 1785 int 1786 tmpfs_reg_punch_hole(struct vnode *vp, off_t *offset, off_t *length) 1787 { 1788 struct tmpfs_mount *tmp; 1789 struct tmpfs_node *node; 1790 vm_object_t object; 1791 vm_pindex_t pistart, pi, piend; 1792 int startofs, endofs, end; 1793 off_t off, len; 1794 int error; 1795 1796 KASSERT(*length <= OFF_MAX - *offset, ("%s: offset + length overflows", 1797 __func__)); 1798 node = VP_TO_TMPFS_NODE(vp); 1799 KASSERT(node->tn_type == VREG, ("%s: node is not regular file", 1800 __func__)); 1801 object = node->tn_reg.tn_aobj; 1802 tmp = VFS_TO_TMPFS(vp->v_mount); 1803 off = *offset; 1804 len = omin(node->tn_size - off, *length); 1805 startofs = off & PAGE_MASK; 1806 endofs = (off + len) & PAGE_MASK; 1807 pistart = OFF_TO_IDX(off); 1808 piend = OFF_TO_IDX(off + len); 1809 pi = OFF_TO_IDX((vm_ooffset_t)off + PAGE_MASK); 1810 error = 0; 1811 1812 /* Handle the case when offset is on or beyond file size. */ 1813 if (len <= 0) { 1814 *length = 0; 1815 return (0); 1816 } 1817 1818 VM_OBJECT_WLOCK(object); 1819 1820 /* 1821 * If there is a partial page at the beginning of the hole-punching 1822 * request, fill the partial page with zeroes. 1823 */ 1824 if (startofs != 0) { 1825 end = pistart != piend ? PAGE_SIZE : endofs; 1826 error = tmpfs_partial_page_invalidate(object, pistart, startofs, 1827 end, FALSE); 1828 if (error != 0) 1829 goto out; 1830 off += end - startofs; 1831 len -= end - startofs; 1832 } 1833 1834 /* 1835 * Toss away the full pages in the affected area. 1836 */ 1837 if (pi < piend) { 1838 vm_object_page_remove(object, pi, piend, 0); 1839 off += IDX_TO_OFF(piend - pi); 1840 len -= IDX_TO_OFF(piend - pi); 1841 } 1842 1843 /* 1844 * If there is a partial page at the end of the hole-punching request, 1845 * fill the partial page with zeroes. 1846 */ 1847 if (endofs != 0 && pistart != piend) { 1848 error = tmpfs_partial_page_invalidate(object, piend, 0, endofs, 1849 FALSE); 1850 if (error != 0) 1851 goto out; 1852 off += endofs; 1853 len -= endofs; 1854 } 1855 1856 out: 1857 VM_OBJECT_WUNLOCK(object); 1858 *offset = off; 1859 *length = len; 1860 return (error); 1861 } 1862 1863 void 1864 tmpfs_check_mtime(struct vnode *vp) 1865 { 1866 struct tmpfs_node *node; 1867 struct vm_object *obj; 1868 1869 ASSERT_VOP_ELOCKED(vp, "check_mtime"); 1870 if (vp->v_type != VREG) 1871 return; 1872 obj = vp->v_object; 1873 KASSERT(obj->type == tmpfs_pager_type && 1874 (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) == 1875 (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj")); 1876 /* unlocked read */ 1877 if (obj->generation != obj->cleangeneration) { 1878 VM_OBJECT_WLOCK(obj); 1879 if (obj->generation != obj->cleangeneration) { 1880 obj->cleangeneration = obj->generation; 1881 node = VP_TO_TMPFS_NODE(vp); 1882 node->tn_status |= TMPFS_NODE_MODIFIED | 1883 TMPFS_NODE_CHANGED; 1884 } 1885 VM_OBJECT_WUNLOCK(obj); 1886 } 1887 } 1888 1889 /* 1890 * Change flags of the given vnode. 1891 * Caller should execute tmpfs_update on vp after a successful execution. 1892 * The vnode must be locked on entry and remain locked on exit. 1893 */ 1894 int 1895 tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, 1896 struct thread *p) 1897 { 1898 int error; 1899 struct tmpfs_node *node; 1900 1901 ASSERT_VOP_ELOCKED(vp, "chflags"); 1902 1903 node = VP_TO_TMPFS_NODE(vp); 1904 1905 if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | 1906 UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | 1907 UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 1908 UF_SPARSE | UF_SYSTEM)) != 0) 1909 return (EOPNOTSUPP); 1910 1911 /* Disallow this operation if the file system is mounted read-only. */ 1912 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1913 return (EROFS); 1914 1915 /* 1916 * Callers may only modify the file flags on objects they 1917 * have VADMIN rights for. 1918 */ 1919 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1920 return (error); 1921 /* 1922 * Unprivileged processes are not permitted to unset system 1923 * flags, or modify flags if any system flags are set. 1924 */ 1925 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { 1926 if (node->tn_flags & 1927 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 1928 error = securelevel_gt(cred, 0); 1929 if (error) 1930 return (error); 1931 } 1932 } else { 1933 if (node->tn_flags & 1934 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1935 ((flags ^ node->tn_flags) & SF_SETTABLE)) 1936 return (EPERM); 1937 } 1938 node->tn_flags = flags; 1939 node->tn_status |= TMPFS_NODE_CHANGED; 1940 1941 ASSERT_VOP_ELOCKED(vp, "chflags2"); 1942 1943 return (0); 1944 } 1945 1946 /* 1947 * Change access mode on the given vnode. 1948 * Caller should execute tmpfs_update on vp after a successful execution. 1949 * The vnode must be locked on entry and remain locked on exit. 1950 */ 1951 int 1952 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1953 { 1954 int error; 1955 struct tmpfs_node *node; 1956 mode_t newmode; 1957 1958 ASSERT_VOP_ELOCKED(vp, "chmod"); 1959 ASSERT_VOP_IN_SEQC(vp); 1960 1961 node = VP_TO_TMPFS_NODE(vp); 1962 1963 /* Disallow this operation if the file system is mounted read-only. */ 1964 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1965 return (EROFS); 1966 1967 /* Immutable or append-only files cannot be modified, either. */ 1968 if (node->tn_flags & (IMMUTABLE | APPEND)) 1969 return (EPERM); 1970 1971 /* 1972 * To modify the permissions on a file, must possess VADMIN 1973 * for that file. 1974 */ 1975 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1976 return (error); 1977 1978 /* 1979 * Privileged processes may set the sticky bit on non-directories, 1980 * as well as set the setgid bit on a file with a group that the 1981 * process is not a member of. 1982 */ 1983 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1984 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 1985 return (EFTYPE); 1986 } 1987 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1988 error = priv_check_cred(cred, PRIV_VFS_SETGID); 1989 if (error) 1990 return (error); 1991 } 1992 1993 newmode = node->tn_mode & ~ALLPERMS; 1994 newmode |= mode & ALLPERMS; 1995 atomic_store_short(&node->tn_mode, newmode); 1996 1997 node->tn_status |= TMPFS_NODE_CHANGED; 1998 1999 ASSERT_VOP_ELOCKED(vp, "chmod2"); 2000 2001 return (0); 2002 } 2003 2004 /* 2005 * Change ownership of the given vnode. At least one of uid or gid must 2006 * be different than VNOVAL. If one is set to that value, the attribute 2007 * is unchanged. 2008 * Caller should execute tmpfs_update on vp after a successful execution. 2009 * The vnode must be locked on entry and remain locked on exit. 2010 */ 2011 int 2012 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 2013 struct thread *p) 2014 { 2015 int error; 2016 struct tmpfs_node *node; 2017 uid_t ouid; 2018 gid_t ogid; 2019 mode_t newmode; 2020 2021 ASSERT_VOP_ELOCKED(vp, "chown"); 2022 ASSERT_VOP_IN_SEQC(vp); 2023 2024 node = VP_TO_TMPFS_NODE(vp); 2025 2026 /* Assign default values if they are unknown. */ 2027 MPASS(uid != VNOVAL || gid != VNOVAL); 2028 if (uid == VNOVAL) 2029 uid = node->tn_uid; 2030 if (gid == VNOVAL) 2031 gid = node->tn_gid; 2032 MPASS(uid != VNOVAL && gid != VNOVAL); 2033 2034 /* Disallow this operation if the file system is mounted read-only. */ 2035 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2036 return (EROFS); 2037 2038 /* Immutable or append-only files cannot be modified, either. */ 2039 if (node->tn_flags & (IMMUTABLE | APPEND)) 2040 return (EPERM); 2041 2042 /* 2043 * To modify the ownership of a file, must possess VADMIN for that 2044 * file. 2045 */ 2046 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 2047 return (error); 2048 2049 /* 2050 * To change the owner of a file, or change the group of a file to a 2051 * group of which we are not a member, the caller must have 2052 * privilege. 2053 */ 2054 if ((uid != node->tn_uid || 2055 (gid != node->tn_gid && !groupmember(gid, cred))) && 2056 (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) 2057 return (error); 2058 2059 ogid = node->tn_gid; 2060 ouid = node->tn_uid; 2061 2062 node->tn_uid = uid; 2063 node->tn_gid = gid; 2064 2065 node->tn_status |= TMPFS_NODE_CHANGED; 2066 2067 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 2068 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { 2069 newmode = node->tn_mode & ~(S_ISUID | S_ISGID); 2070 atomic_store_short(&node->tn_mode, newmode); 2071 } 2072 } 2073 2074 ASSERT_VOP_ELOCKED(vp, "chown2"); 2075 2076 return (0); 2077 } 2078 2079 /* 2080 * Change size of the given vnode. 2081 * Caller should execute tmpfs_update on vp after a successful execution. 2082 * The vnode must be locked on entry and remain locked on exit. 2083 */ 2084 int 2085 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 2086 struct thread *p) 2087 { 2088 int error; 2089 struct tmpfs_node *node; 2090 2091 ASSERT_VOP_ELOCKED(vp, "chsize"); 2092 2093 node = VP_TO_TMPFS_NODE(vp); 2094 2095 /* Decide whether this is a valid operation based on the file type. */ 2096 error = 0; 2097 switch (vp->v_type) { 2098 case VDIR: 2099 return (EISDIR); 2100 2101 case VREG: 2102 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2103 return (EROFS); 2104 break; 2105 2106 case VBLK: 2107 /* FALLTHROUGH */ 2108 case VCHR: 2109 /* FALLTHROUGH */ 2110 case VFIFO: 2111 /* 2112 * Allow modifications of special files even if in the file 2113 * system is mounted read-only (we are not modifying the 2114 * files themselves, but the objects they represent). 2115 */ 2116 return (0); 2117 2118 default: 2119 /* Anything else is unsupported. */ 2120 return (EOPNOTSUPP); 2121 } 2122 2123 /* Immutable or append-only files cannot be modified, either. */ 2124 if (node->tn_flags & (IMMUTABLE | APPEND)) 2125 return (EPERM); 2126 2127 error = tmpfs_truncate(vp, size); 2128 /* 2129 * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 2130 * for us, as will update tn_status; no need to do that here. 2131 */ 2132 2133 ASSERT_VOP_ELOCKED(vp, "chsize2"); 2134 2135 return (error); 2136 } 2137 2138 /* 2139 * Change access and modification times of the given vnode. 2140 * Caller should execute tmpfs_update on vp after a successful execution. 2141 * The vnode must be locked on entry and remain locked on exit. 2142 */ 2143 int 2144 tmpfs_chtimes(struct vnode *vp, struct vattr *vap, 2145 struct ucred *cred, struct thread *l) 2146 { 2147 int error; 2148 struct tmpfs_node *node; 2149 2150 ASSERT_VOP_ELOCKED(vp, "chtimes"); 2151 2152 node = VP_TO_TMPFS_NODE(vp); 2153 2154 /* Disallow this operation if the file system is mounted read-only. */ 2155 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2156 return (EROFS); 2157 2158 /* Immutable or append-only files cannot be modified, either. */ 2159 if (node->tn_flags & (IMMUTABLE | APPEND)) 2160 return (EPERM); 2161 2162 error = vn_utimes_perm(vp, vap, cred, l); 2163 if (error != 0) 2164 return (error); 2165 2166 if (vap->va_atime.tv_sec != VNOVAL) 2167 node->tn_accessed = true; 2168 2169 if (vap->va_mtime.tv_sec != VNOVAL) 2170 node->tn_status |= TMPFS_NODE_MODIFIED; 2171 2172 if (vap->va_birthtime.tv_sec != VNOVAL) 2173 node->tn_status |= TMPFS_NODE_MODIFIED; 2174 2175 tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); 2176 2177 if (vap->va_birthtime.tv_sec != VNOVAL) 2178 node->tn_birthtime = vap->va_birthtime; 2179 ASSERT_VOP_ELOCKED(vp, "chtimes2"); 2180 2181 return (0); 2182 } 2183 2184 void 2185 tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status) 2186 { 2187 2188 if ((node->tn_status & status) == status || tm->tm_ronly) 2189 return; 2190 TMPFS_NODE_LOCK(node); 2191 node->tn_status |= status; 2192 TMPFS_NODE_UNLOCK(node); 2193 } 2194 2195 void 2196 tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node) 2197 { 2198 if (node->tn_accessed || tm->tm_ronly) 2199 return; 2200 atomic_store_8(&node->tn_accessed, true); 2201 } 2202 2203 /* Sync timestamps */ 2204 void 2205 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 2206 const struct timespec *mod) 2207 { 2208 struct tmpfs_node *node; 2209 struct timespec now; 2210 2211 ASSERT_VOP_LOCKED(vp, "tmpfs_itimes"); 2212 node = VP_TO_TMPFS_NODE(vp); 2213 2214 if (!node->tn_accessed && 2215 (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) 2216 return; 2217 2218 vfs_timestamp(&now); 2219 TMPFS_NODE_LOCK(node); 2220 if (node->tn_accessed) { 2221 if (acc == NULL) 2222 acc = &now; 2223 node->tn_atime = *acc; 2224 } 2225 if (node->tn_status & TMPFS_NODE_MODIFIED) { 2226 if (mod == NULL) 2227 mod = &now; 2228 node->tn_mtime = *mod; 2229 } 2230 if (node->tn_status & TMPFS_NODE_CHANGED) 2231 node->tn_ctime = now; 2232 node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 2233 node->tn_accessed = false; 2234 TMPFS_NODE_UNLOCK(node); 2235 2236 /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ 2237 random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME); 2238 } 2239 2240 int 2241 tmpfs_truncate(struct vnode *vp, off_t length) 2242 { 2243 int error; 2244 struct tmpfs_node *node; 2245 2246 node = VP_TO_TMPFS_NODE(vp); 2247 2248 if (length < 0) { 2249 error = EINVAL; 2250 goto out; 2251 } 2252 2253 if (node->tn_size == length) { 2254 error = 0; 2255 goto out; 2256 } 2257 2258 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 2259 return (EFBIG); 2260 2261 error = tmpfs_reg_resize(vp, length, FALSE); 2262 if (error == 0) 2263 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 2264 2265 out: 2266 tmpfs_update(vp); 2267 2268 return (error); 2269 } 2270 2271 static __inline int 2272 tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 2273 { 2274 if (a->td_hash > b->td_hash) 2275 return (1); 2276 else if (a->td_hash < b->td_hash) 2277 return (-1); 2278 return (0); 2279 } 2280 2281 RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 2282