1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5 * 6 * Copyright (c) 2005 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11 * 2005 program. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Efficient memory file system supporting functions. 37 */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/dirent.h> 44 #include <sys/fnv_hash.h> 45 #include <sys/lock.h> 46 #include <sys/limits.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/random.h> 52 #include <sys/refcount.h> 53 #include <sys/rwlock.h> 54 #include <sys/smr.h> 55 #include <sys/stat.h> 56 #include <sys/sysctl.h> 57 #include <sys/user.h> 58 #include <sys/vnode.h> 59 #include <sys/vmmeter.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_param.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/swap_pager.h> 69 70 #include <fs/tmpfs/tmpfs.h> 71 #include <fs/tmpfs/tmpfs_fifoops.h> 72 #include <fs/tmpfs/tmpfs_vnops.h> 73 74 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "tmpfs file system"); 76 77 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; 78 79 MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure"); 80 static uma_zone_t tmpfs_node_pool; 81 VFS_SMR_DECLARE; 82 83 int tmpfs_pager_type = -1; 84 85 static vm_object_t 86 tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 87 vm_ooffset_t offset, struct ucred *cred) 88 { 89 vm_object_t object; 90 91 MPASS(handle == NULL); 92 MPASS(offset == 0); 93 object = vm_object_allocate_dyn(tmpfs_pager_type, size, 94 OBJ_COLORED | OBJ_SWAP); 95 if (!swap_pager_init_object(object, NULL, NULL, size, 0)) { 96 vm_object_deallocate(object); 97 object = NULL; 98 } 99 return (object); 100 } 101 102 /* 103 * Make sure tmpfs vnodes with writable mappings can be found on the lazy list. 104 * 105 * This allows for periodic mtime updates while only scanning vnodes which are 106 * plausibly dirty, see tmpfs_update_mtime_lazy. 107 */ 108 static void 109 tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old, 110 vm_offset_t new) 111 { 112 struct vnode *vp; 113 114 VM_OBJECT_ASSERT_WLOCKED(object); 115 116 vp = object->un_pager.swp.swp_tmpfs; 117 118 /* 119 * Forced unmount? 120 */ 121 if (vp == NULL) { 122 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 123 ("object %p with OBJ_TMPFS_VREF but without vnode", object)); 124 VM_OBJECT_WUNLOCK(object); 125 return; 126 } 127 128 if (old == 0) { 129 VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp, 130 ("object without writable mappings has a reference")); 131 VNPASS(vp->v_usecount > 0, vp); 132 } else { 133 VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp, 134 ("object with writable mappings does not have a reference")); 135 } 136 137 if (old == new) { 138 VM_OBJECT_WUNLOCK(object); 139 return; 140 } 141 142 if (new == 0) { 143 vm_object_clear_flag(object, OBJ_TMPFS_VREF); 144 VM_OBJECT_WUNLOCK(object); 145 vrele(vp); 146 } else { 147 if ((object->flags & OBJ_TMPFS_VREF) == 0) { 148 vref(vp); 149 vlazy(vp); 150 vm_object_set_flag(object, OBJ_TMPFS_VREF); 151 } 152 VM_OBJECT_WUNLOCK(object); 153 } 154 } 155 156 static void 157 tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start, 158 vm_offset_t end) 159 { 160 vm_offset_t new, old; 161 162 VM_OBJECT_WLOCK(object); 163 KASSERT((object->flags & OBJ_ANON) == 0, 164 ("%s: object %p with OBJ_ANON", __func__, object)); 165 old = object->un_pager.swp.writemappings; 166 object->un_pager.swp.writemappings += (vm_ooffset_t)end - start; 167 new = object->un_pager.swp.writemappings; 168 tmpfs_pager_writecount_recalc(object, old, new); 169 VM_OBJECT_ASSERT_UNLOCKED(object); 170 } 171 172 static void 173 tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start, 174 vm_offset_t end) 175 { 176 vm_offset_t new, old; 177 178 VM_OBJECT_WLOCK(object); 179 KASSERT((object->flags & OBJ_ANON) == 0, 180 ("%s: object %p with OBJ_ANON", __func__, object)); 181 old = object->un_pager.swp.writemappings; 182 object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start; 183 new = object->un_pager.swp.writemappings; 184 tmpfs_pager_writecount_recalc(object, old, new); 185 VM_OBJECT_ASSERT_UNLOCKED(object); 186 } 187 188 static void 189 tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) 190 { 191 struct vnode *vp; 192 193 /* 194 * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type 195 * type, but not OBJ_TMPFS flag. In this case there is no 196 * v_writecount to adjust. 197 */ 198 if (vp_heldp != NULL) 199 VM_OBJECT_RLOCK(object); 200 else 201 VM_OBJECT_ASSERT_LOCKED(object); 202 if ((object->flags & OBJ_TMPFS) != 0) { 203 vp = object->un_pager.swp.swp_tmpfs; 204 if (vp != NULL) { 205 *vpp = vp; 206 if (vp_heldp != NULL) { 207 vhold(vp); 208 *vp_heldp = true; 209 } 210 } 211 } 212 if (vp_heldp != NULL) 213 VM_OBJECT_RUNLOCK(object); 214 } 215 216 struct pagerops tmpfs_pager_ops = { 217 .pgo_kvme_type = KVME_TYPE_VNODE, 218 .pgo_alloc = tmpfs_pager_alloc, 219 .pgo_set_writeable_dirty = vm_object_set_writeable_dirty_, 220 .pgo_update_writecount = tmpfs_pager_update_writecount, 221 .pgo_release_writecount = tmpfs_pager_release_writecount, 222 .pgo_mightbedirty = vm_object_mightbedirty_, 223 .pgo_getvp = tmpfs_pager_getvp, 224 }; 225 226 static int 227 tmpfs_node_ctor(void *mem, int size, void *arg, int flags) 228 { 229 struct tmpfs_node *node; 230 231 node = mem; 232 node->tn_gen++; 233 node->tn_size = 0; 234 node->tn_status = 0; 235 node->tn_accessed = false; 236 node->tn_flags = 0; 237 node->tn_links = 0; 238 node->tn_vnode = NULL; 239 node->tn_vpstate = 0; 240 return (0); 241 } 242 243 static void 244 tmpfs_node_dtor(void *mem, int size, void *arg) 245 { 246 struct tmpfs_node *node; 247 248 node = mem; 249 node->tn_type = VNON; 250 } 251 252 static int 253 tmpfs_node_init(void *mem, int size, int flags) 254 { 255 struct tmpfs_node *node; 256 257 node = mem; 258 node->tn_id = 0; 259 mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF); 260 node->tn_gen = arc4random(); 261 return (0); 262 } 263 264 static void 265 tmpfs_node_fini(void *mem, int size) 266 { 267 struct tmpfs_node *node; 268 269 node = mem; 270 mtx_destroy(&node->tn_interlock); 271 } 272 273 int 274 tmpfs_subr_init(void) 275 { 276 tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops, 277 OBJT_SWAP); 278 if (tmpfs_pager_type == -1) 279 return (EINVAL); 280 tmpfs_node_pool = uma_zcreate("TMPFS node", 281 sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, 282 tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); 283 VFS_SMR_ZONE_SET(tmpfs_node_pool); 284 return (0); 285 } 286 287 void 288 tmpfs_subr_uninit(void) 289 { 290 if (tmpfs_pager_type != -1) 291 vm_pager_free_dyn_type(tmpfs_pager_type); 292 tmpfs_pager_type = -1; 293 uma_zdestroy(tmpfs_node_pool); 294 } 295 296 static int 297 sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) 298 { 299 int error; 300 long pages, bytes; 301 302 pages = *(long *)arg1; 303 bytes = pages * PAGE_SIZE; 304 305 error = sysctl_handle_long(oidp, &bytes, 0, req); 306 if (error || !req->newptr) 307 return (error); 308 309 pages = bytes / PAGE_SIZE; 310 if (pages < TMPFS_PAGES_MINRESERVED) 311 return (EINVAL); 312 313 *(long *)arg1 = pages; 314 return (0); 315 } 316 317 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, 318 CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &tmpfs_pages_reserved, 0, 319 sysctl_mem_reserved, "L", 320 "Amount of available memory and swap below which tmpfs growth stops"); 321 322 static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, 323 struct tmpfs_dirent *b); 324 RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 325 326 size_t 327 tmpfs_mem_avail(void) 328 { 329 size_t avail; 330 long reserved; 331 332 avail = swap_pager_avail + vm_free_count(); 333 reserved = atomic_load_long(&tmpfs_pages_reserved); 334 if (__predict_false(avail < reserved)) 335 return (0); 336 return (avail - reserved); 337 } 338 339 size_t 340 tmpfs_pages_used(struct tmpfs_mount *tmp) 341 { 342 const size_t node_size = sizeof(struct tmpfs_node) + 343 sizeof(struct tmpfs_dirent); 344 size_t meta_pages; 345 346 meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, 347 PAGE_SIZE); 348 return (meta_pages + tmp->tm_pages_used); 349 } 350 351 static size_t 352 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) 353 { 354 if (tmpfs_mem_avail() < req_pages) 355 return (0); 356 357 if (tmp->tm_pages_max != ULONG_MAX && 358 tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) 359 return (0); 360 361 return (1); 362 } 363 364 static int 365 tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base, 366 int end, boolean_t ignerr) 367 { 368 vm_page_t m; 369 int rv, error; 370 371 VM_OBJECT_ASSERT_WLOCKED(object); 372 KASSERT(base >= 0, ("%s: base %d", __func__, base)); 373 KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base, 374 end)); 375 error = 0; 376 377 retry: 378 m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); 379 if (m != NULL) { 380 MPASS(vm_page_all_valid(m)); 381 } else if (vm_pager_has_page(object, idx, NULL, NULL)) { 382 m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL | 383 VM_ALLOC_WAITFAIL); 384 if (m == NULL) 385 goto retry; 386 vm_object_pip_add(object, 1); 387 VM_OBJECT_WUNLOCK(object); 388 rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); 389 VM_OBJECT_WLOCK(object); 390 vm_object_pip_wakeup(object); 391 if (rv == VM_PAGER_OK) { 392 /* 393 * Since the page was not resident, and therefore not 394 * recently accessed, immediately enqueue it for 395 * asynchronous laundering. The current operation is 396 * not regarded as an access. 397 */ 398 vm_page_launder(m); 399 } else { 400 vm_page_free(m); 401 m = NULL; 402 if (!ignerr) 403 error = EIO; 404 } 405 } 406 if (m != NULL) { 407 pmap_zero_page_area(m, base, end - base); 408 vm_page_set_dirty(m); 409 vm_page_xunbusy(m); 410 } 411 412 return (error); 413 } 414 415 void 416 tmpfs_ref_node(struct tmpfs_node *node) 417 { 418 #ifdef INVARIANTS 419 u_int old; 420 421 old = 422 #endif 423 refcount_acquire(&node->tn_refcount); 424 #ifdef INVARIANTS 425 KASSERT(old > 0, ("node %p zero refcount", node)); 426 #endif 427 } 428 429 /* 430 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 431 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 432 * using the credentials of the process 'p'. 433 * 434 * If the node type is set to 'VDIR', then the parent parameter must point 435 * to the parent directory of the node being created. It may only be NULL 436 * while allocating the root node. 437 * 438 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 439 * specifies the device the node represents. 440 * 441 * If the node type is set to 'VLNK', then the parameter target specifies 442 * the file name of the target file for the symbolic link that is being 443 * created. 444 * 445 * Note that new nodes are retrieved from the available list if it has 446 * items or, if it is empty, from the node pool as long as there is enough 447 * space to create them. 448 * 449 * Returns zero on success or an appropriate error code on failure. 450 */ 451 int 452 tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, 453 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 454 const char *target, dev_t rdev, struct tmpfs_node **node) 455 { 456 struct tmpfs_node *nnode; 457 char *symlink; 458 char symlink_smr; 459 460 /* If the root directory of the 'tmp' file system is not yet 461 * allocated, this must be the request to do it. */ 462 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 463 464 MPASS(IFF(type == VLNK, target != NULL)); 465 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 466 467 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 468 return (ENOSPC); 469 if (tmpfs_pages_check_avail(tmp, 1) == 0) 470 return (ENOSPC); 471 472 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 473 /* 474 * When a new tmpfs node is created for fully 475 * constructed mount point, there must be a parent 476 * node, which vnode is locked exclusively. As 477 * consequence, if the unmount is executing in 478 * parallel, vflush() cannot reclaim the parent vnode. 479 * Due to this, the check for MNTK_UNMOUNT flag is not 480 * racy: if we did not see MNTK_UNMOUNT flag, then tmp 481 * cannot be destroyed until node construction is 482 * finished and the parent vnode unlocked. 483 * 484 * Tmpfs does not need to instantiate new nodes during 485 * unmount. 486 */ 487 return (EBUSY); 488 } 489 if ((mp->mnt_kern_flag & MNT_RDONLY) != 0) 490 return (EROFS); 491 492 nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK); 493 494 /* Generic initialization. */ 495 nnode->tn_type = type; 496 vfs_timestamp(&nnode->tn_atime); 497 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 498 nnode->tn_atime; 499 nnode->tn_uid = uid; 500 nnode->tn_gid = gid; 501 nnode->tn_mode = mode; 502 nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr); 503 nnode->tn_refcount = 1; 504 505 /* Type-specific initialization. */ 506 switch (nnode->tn_type) { 507 case VBLK: 508 case VCHR: 509 nnode->tn_rdev = rdev; 510 break; 511 512 case VDIR: 513 RB_INIT(&nnode->tn_dir.tn_dirhead); 514 LIST_INIT(&nnode->tn_dir.tn_dupindex); 515 MPASS(parent != nnode); 516 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 517 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 518 nnode->tn_dir.tn_readdir_lastn = 0; 519 nnode->tn_dir.tn_readdir_lastp = NULL; 520 nnode->tn_links++; 521 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 522 nnode->tn_dir.tn_parent->tn_links++; 523 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 524 break; 525 526 case VFIFO: 527 /* FALLTHROUGH */ 528 case VSOCK: 529 break; 530 531 case VLNK: 532 MPASS(strlen(target) < MAXPATHLEN); 533 nnode->tn_size = strlen(target); 534 535 symlink = NULL; 536 if (!tmp->tm_nonc) { 537 symlink = cache_symlink_alloc(nnode->tn_size + 1, M_WAITOK); 538 symlink_smr = true; 539 } 540 if (symlink == NULL) { 541 symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, M_WAITOK); 542 symlink_smr = false; 543 } 544 memcpy(symlink, target, nnode->tn_size + 1); 545 546 /* 547 * Allow safe symlink resolving for lockless lookup. 548 * tmpfs_fplookup_symlink references this comment. 549 * 550 * 1. nnode is not yet visible to the world 551 * 2. both tn_link_target and tn_link_smr get populated 552 * 3. release fence publishes their content 553 * 4. tn_link_target content is immutable until node destruction, 554 * where the pointer gets set to NULL 555 * 5. tn_link_smr is never changed once set 556 * 557 * As a result it is sufficient to issue load consume on the node 558 * pointer to also get the above content in a stable manner. 559 * Worst case tn_link_smr flag may be set to true despite being stale, 560 * while the target buffer is already cleared out. 561 */ 562 atomic_store_ptr(&nnode->tn_link_target, symlink); 563 atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); 564 atomic_thread_fence_rel(); 565 break; 566 567 case VREG: 568 nnode->tn_reg.tn_aobj = 569 vm_pager_allocate(tmpfs_pager_type, NULL, 0, 570 VM_PROT_DEFAULT, 0, 571 NULL /* XXXKIB - tmpfs needs swap reservation */); 572 /* OBJ_TMPFS is set together with the setting of vp->v_object */ 573 nnode->tn_reg.tn_tmp = tmp; 574 break; 575 576 default: 577 panic("tmpfs_alloc_node: type %p %d", nnode, 578 (int)nnode->tn_type); 579 } 580 581 TMPFS_LOCK(tmp); 582 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 583 nnode->tn_attached = true; 584 tmp->tm_nodes_inuse++; 585 tmp->tm_refcount++; 586 TMPFS_UNLOCK(tmp); 587 588 *node = nnode; 589 return (0); 590 } 591 592 /* 593 * Destroys the node pointed to by node from the file system 'tmp'. 594 * If the node references a directory, no entries are allowed. 595 */ 596 void 597 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 598 { 599 if (refcount_release_if_not_last(&node->tn_refcount)) 600 return; 601 602 TMPFS_LOCK(tmp); 603 TMPFS_NODE_LOCK(node); 604 if (!tmpfs_free_node_locked(tmp, node, false)) { 605 TMPFS_NODE_UNLOCK(node); 606 TMPFS_UNLOCK(tmp); 607 } 608 } 609 610 bool 611 tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node, 612 bool detach) 613 { 614 vm_object_t uobj; 615 char *symlink; 616 bool last; 617 618 TMPFS_MP_ASSERT_LOCKED(tmp); 619 TMPFS_NODE_ASSERT_LOCKED(node); 620 621 last = refcount_release(&node->tn_refcount); 622 if (node->tn_attached && (detach || last)) { 623 MPASS(tmp->tm_nodes_inuse > 0); 624 tmp->tm_nodes_inuse--; 625 LIST_REMOVE(node, tn_entries); 626 node->tn_attached = false; 627 } 628 if (!last) 629 return (false); 630 631 TMPFS_NODE_UNLOCK(node); 632 633 #ifdef INVARIANTS 634 MPASS(node->tn_vnode == NULL); 635 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 636 637 /* 638 * Make sure this is a node type we can deal with. Everything is explicitly 639 * enumerated without the 'default' clause so the the compiler can throw an 640 * error in case a new type is added. 641 */ 642 switch (node->tn_type) { 643 case VBLK: 644 case VCHR: 645 case VDIR: 646 case VFIFO: 647 case VSOCK: 648 case VLNK: 649 case VREG: 650 break; 651 case VNON: 652 case VBAD: 653 case VMARKER: 654 panic("%s: bad type %d for node %p", __func__, (int)node->tn_type, node); 655 } 656 #endif 657 658 switch (node->tn_type) { 659 case VREG: 660 uobj = node->tn_reg.tn_aobj; 661 if (uobj != NULL) { 662 if (uobj->size != 0) 663 atomic_subtract_long(&tmp->tm_pages_used, uobj->size); 664 } 665 666 tmpfs_free_tmp(tmp); 667 668 if (uobj != NULL) { 669 KASSERT((uobj->flags & OBJ_TMPFS) == 0, 670 ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); 671 vm_object_deallocate(uobj); 672 } 673 break; 674 case VLNK: 675 tmpfs_free_tmp(tmp); 676 677 symlink = node->tn_link_target; 678 atomic_store_ptr(&node->tn_link_target, NULL); 679 if (atomic_load_char(&node->tn_link_smr)) { 680 cache_symlink_free(symlink, node->tn_size + 1); 681 } else { 682 free(symlink, M_TMPFSNAME); 683 } 684 break; 685 default: 686 tmpfs_free_tmp(tmp); 687 break; 688 } 689 690 uma_zfree_smr(tmpfs_node_pool, node); 691 return (true); 692 } 693 694 static __inline uint32_t 695 tmpfs_dirent_hash(const char *name, u_int len) 696 { 697 uint32_t hash; 698 699 hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; 700 #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP 701 hash &= 0xf; 702 #endif 703 if (hash < TMPFS_DIRCOOKIE_MIN) 704 hash += TMPFS_DIRCOOKIE_MIN; 705 706 return (hash); 707 } 708 709 static __inline off_t 710 tmpfs_dirent_cookie(struct tmpfs_dirent *de) 711 { 712 if (de == NULL) 713 return (TMPFS_DIRCOOKIE_EOF); 714 715 MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); 716 717 return (de->td_cookie); 718 } 719 720 static __inline boolean_t 721 tmpfs_dirent_dup(struct tmpfs_dirent *de) 722 { 723 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); 724 } 725 726 static __inline boolean_t 727 tmpfs_dirent_duphead(struct tmpfs_dirent *de) 728 { 729 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); 730 } 731 732 void 733 tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) 734 { 735 de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); 736 memcpy(de->ud.td_name, name, namelen); 737 de->td_namelen = namelen; 738 } 739 740 /* 741 * Allocates a new directory entry for the node node with a name of name. 742 * The new directory entry is returned in *de. 743 * 744 * The link count of node is increased by one to reflect the new object 745 * referencing it. 746 * 747 * Returns zero on success or an appropriate error code on failure. 748 */ 749 int 750 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 751 const char *name, u_int len, struct tmpfs_dirent **de) 752 { 753 struct tmpfs_dirent *nde; 754 755 nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK); 756 nde->td_node = node; 757 if (name != NULL) { 758 nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 759 tmpfs_dirent_init(nde, name, len); 760 } else 761 nde->td_namelen = 0; 762 if (node != NULL) 763 node->tn_links++; 764 765 *de = nde; 766 767 return (0); 768 } 769 770 /* 771 * Frees a directory entry. It is the caller's responsibility to destroy 772 * the node referenced by it if needed. 773 * 774 * The link count of node is decreased by one to reflect the removal of an 775 * object that referenced it. This only happens if 'node_exists' is true; 776 * otherwise the function will not access the node referred to by the 777 * directory entry, as it may already have been released from the outside. 778 */ 779 void 780 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 781 { 782 struct tmpfs_node *node; 783 784 node = de->td_node; 785 if (node != NULL) { 786 MPASS(node->tn_links > 0); 787 node->tn_links--; 788 } 789 if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) 790 free(de->ud.td_name, M_TMPFSNAME); 791 free(de, M_TMPFSDIR); 792 } 793 794 void 795 tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) 796 { 797 bool want_vrele; 798 799 ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); 800 if (vp->v_type != VREG || obj == NULL) 801 return; 802 803 VM_OBJECT_WLOCK(obj); 804 VI_LOCK(vp); 805 /* 806 * May be going through forced unmount. 807 */ 808 want_vrele = false; 809 if ((obj->flags & OBJ_TMPFS_VREF) != 0) { 810 vm_object_clear_flag(obj, OBJ_TMPFS_VREF); 811 want_vrele = true; 812 } 813 814 vm_object_clear_flag(obj, OBJ_TMPFS); 815 obj->un_pager.swp.swp_tmpfs = NULL; 816 if (vp->v_writecount < 0) 817 vp->v_writecount = 0; 818 VI_UNLOCK(vp); 819 VM_OBJECT_WUNLOCK(obj); 820 if (want_vrele) { 821 vrele(vp); 822 } 823 } 824 825 /* 826 * Need to clear v_object for insmntque failure. 827 */ 828 static void 829 tmpfs_insmntque_dtr(struct vnode *vp) 830 { 831 832 tmpfs_destroy_vobject(vp, vp->v_object); 833 vp->v_object = NULL; 834 vp->v_data = NULL; 835 vp->v_op = &dead_vnodeops; 836 vgone(vp); 837 vput(vp); 838 } 839 840 /* 841 * Allocates a new vnode for the node node or returns a new reference to 842 * an existing one if the node had already a vnode referencing it. The 843 * resulting locked vnode is returned in *vpp. 844 * 845 * Returns zero on success or an appropriate error code on failure. 846 */ 847 int 848 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 849 struct vnode **vpp) 850 { 851 struct vnode *vp; 852 enum vgetstate vs; 853 struct tmpfs_mount *tm; 854 vm_object_t object; 855 int error; 856 857 error = 0; 858 tm = VFS_TO_TMPFS(mp); 859 TMPFS_NODE_LOCK(node); 860 tmpfs_ref_node(node); 861 loop: 862 TMPFS_NODE_ASSERT_LOCKED(node); 863 if ((vp = node->tn_vnode) != NULL) { 864 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 865 if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || 866 (VN_IS_DOOMED(vp) && 867 (lkflag & LK_NOWAIT) != 0)) { 868 TMPFS_NODE_UNLOCK(node); 869 error = ENOENT; 870 vp = NULL; 871 goto out; 872 } 873 if (VN_IS_DOOMED(vp)) { 874 node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; 875 while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { 876 msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 877 0, "tmpfsE", 0); 878 } 879 goto loop; 880 } 881 vs = vget_prep(vp); 882 TMPFS_NODE_UNLOCK(node); 883 error = vget_finish(vp, lkflag, vs); 884 if (error == ENOENT) { 885 TMPFS_NODE_LOCK(node); 886 goto loop; 887 } 888 if (error != 0) { 889 vp = NULL; 890 goto out; 891 } 892 893 /* 894 * Make sure the vnode is still there after 895 * getting the interlock to avoid racing a free. 896 */ 897 if (node->tn_vnode != vp) { 898 vput(vp); 899 TMPFS_NODE_LOCK(node); 900 goto loop; 901 } 902 903 goto out; 904 } 905 906 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 907 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 908 TMPFS_NODE_UNLOCK(node); 909 error = ENOENT; 910 vp = NULL; 911 goto out; 912 } 913 914 /* 915 * otherwise lock the vp list while we call getnewvnode 916 * since that can block. 917 */ 918 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 919 node->tn_vpstate |= TMPFS_VNODE_WANT; 920 error = msleep((caddr_t) &node->tn_vpstate, 921 TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0); 922 if (error != 0) 923 goto out; 924 goto loop; 925 } else 926 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 927 928 TMPFS_NODE_UNLOCK(node); 929 930 /* Get a new vnode and associate it with our node. */ 931 error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ? 932 &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp); 933 if (error != 0) 934 goto unlock; 935 MPASS(vp != NULL); 936 937 /* lkflag is ignored, the lock is exclusive */ 938 (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 939 940 vp->v_data = node; 941 vp->v_type = node->tn_type; 942 943 /* Type-specific initialization. */ 944 switch (node->tn_type) { 945 case VBLK: 946 /* FALLTHROUGH */ 947 case VCHR: 948 /* FALLTHROUGH */ 949 case VLNK: 950 /* FALLTHROUGH */ 951 case VSOCK: 952 break; 953 case VFIFO: 954 vp->v_op = &tmpfs_fifoop_entries; 955 break; 956 case VREG: 957 object = node->tn_reg.tn_aobj; 958 VM_OBJECT_WLOCK(object); 959 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 960 ("%s: object %p with OBJ_TMPFS_VREF but without vnode", 961 __func__, object)); 962 KASSERT(object->un_pager.swp.writemappings == 0, 963 ("%s: object %p has writemappings", 964 __func__, object)); 965 VI_LOCK(vp); 966 KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); 967 vp->v_object = object; 968 object->un_pager.swp.swp_tmpfs = vp; 969 vm_object_set_flag(object, OBJ_TMPFS); 970 vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF); 971 VI_UNLOCK(vp); 972 VM_OBJECT_WUNLOCK(object); 973 break; 974 case VDIR: 975 MPASS(node->tn_dir.tn_parent != NULL); 976 if (node->tn_dir.tn_parent == node) 977 vp->v_vflag |= VV_ROOT; 978 break; 979 980 default: 981 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 982 } 983 if (vp->v_type != VFIFO) 984 VN_LOCK_ASHARE(vp); 985 986 error = insmntque1(vp, mp, NULL, NULL); 987 if (error != 0) { 988 tmpfs_insmntque_dtr(vp); 989 vp = NULL; 990 } 991 992 unlock: 993 TMPFS_NODE_LOCK(node); 994 995 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 996 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 997 node->tn_vnode = vp; 998 999 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 1000 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 1001 TMPFS_NODE_UNLOCK(node); 1002 wakeup((caddr_t) &node->tn_vpstate); 1003 } else 1004 TMPFS_NODE_UNLOCK(node); 1005 1006 out: 1007 if (error == 0) { 1008 *vpp = vp; 1009 1010 #ifdef INVARIANTS 1011 MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); 1012 TMPFS_NODE_LOCK(node); 1013 MPASS(*vpp == node->tn_vnode); 1014 TMPFS_NODE_UNLOCK(node); 1015 #endif 1016 } 1017 tmpfs_free_node(tm, node); 1018 1019 return (error); 1020 } 1021 1022 /* 1023 * Destroys the association between the vnode vp and the node it 1024 * references. 1025 */ 1026 void 1027 tmpfs_free_vp(struct vnode *vp) 1028 { 1029 struct tmpfs_node *node; 1030 1031 node = VP_TO_TMPFS_NODE(vp); 1032 1033 TMPFS_NODE_ASSERT_LOCKED(node); 1034 node->tn_vnode = NULL; 1035 if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) 1036 wakeup(&node->tn_vnode); 1037 node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; 1038 vp->v_data = NULL; 1039 } 1040 1041 /* 1042 * Allocates a new file of type 'type' and adds it to the parent directory 1043 * 'dvp'; this addition is done using the component name given in 'cnp'. 1044 * The ownership of the new file is automatically assigned based on the 1045 * credentials of the caller (through 'cnp'), the group is set based on 1046 * the parent directory and the mode is determined from the 'vap' argument. 1047 * If successful, *vpp holds a vnode to the newly created file and zero 1048 * is returned. Otherwise *vpp is NULL and the function returns an 1049 * appropriate error code. 1050 */ 1051 int 1052 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 1053 struct componentname *cnp, const char *target) 1054 { 1055 int error; 1056 struct tmpfs_dirent *de; 1057 struct tmpfs_mount *tmp; 1058 struct tmpfs_node *dnode; 1059 struct tmpfs_node *node; 1060 struct tmpfs_node *parent; 1061 1062 ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file"); 1063 MPASS(cnp->cn_flags & HASBUF); 1064 1065 tmp = VFS_TO_TMPFS(dvp->v_mount); 1066 dnode = VP_TO_TMPFS_DIR(dvp); 1067 *vpp = NULL; 1068 1069 /* If the entry we are creating is a directory, we cannot overflow 1070 * the number of links of its parent, because it will get a new 1071 * link. */ 1072 if (vap->va_type == VDIR) { 1073 /* Ensure that we do not overflow the maximum number of links 1074 * imposed by the system. */ 1075 MPASS(dnode->tn_links <= TMPFS_LINK_MAX); 1076 if (dnode->tn_links == TMPFS_LINK_MAX) { 1077 return (EMLINK); 1078 } 1079 1080 parent = dnode; 1081 MPASS(parent != NULL); 1082 } else 1083 parent = NULL; 1084 1085 /* Allocate a node that represents the new file. */ 1086 error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, 1087 cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent, 1088 target, vap->va_rdev, &node); 1089 if (error != 0) 1090 return (error); 1091 1092 /* Allocate a directory entry that points to the new file. */ 1093 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 1094 &de); 1095 if (error != 0) { 1096 tmpfs_free_node(tmp, node); 1097 return (error); 1098 } 1099 1100 /* Allocate a vnode for the new file. */ 1101 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 1102 if (error != 0) { 1103 tmpfs_free_dirent(tmp, de); 1104 tmpfs_free_node(tmp, node); 1105 return (error); 1106 } 1107 1108 /* Now that all required items are allocated, we can proceed to 1109 * insert the new node into the directory, an operation that 1110 * cannot fail. */ 1111 if (cnp->cn_flags & ISWHITEOUT) 1112 tmpfs_dir_whiteout_remove(dvp, cnp); 1113 tmpfs_dir_attach(dvp, de); 1114 return (0); 1115 } 1116 1117 struct tmpfs_dirent * 1118 tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1119 { 1120 struct tmpfs_dirent *de; 1121 1122 de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); 1123 dc->tdc_tree = de; 1124 if (de != NULL && tmpfs_dirent_duphead(de)) 1125 de = LIST_FIRST(&de->ud.td_duphead); 1126 dc->tdc_current = de; 1127 1128 return (dc->tdc_current); 1129 } 1130 1131 struct tmpfs_dirent * 1132 tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1133 { 1134 struct tmpfs_dirent *de; 1135 1136 MPASS(dc->tdc_tree != NULL); 1137 if (tmpfs_dirent_dup(dc->tdc_current)) { 1138 dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); 1139 if (dc->tdc_current != NULL) 1140 return (dc->tdc_current); 1141 } 1142 dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, 1143 &dnode->tn_dir.tn_dirhead, dc->tdc_tree); 1144 if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { 1145 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1146 MPASS(dc->tdc_current != NULL); 1147 } 1148 1149 return (dc->tdc_current); 1150 } 1151 1152 /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ 1153 static struct tmpfs_dirent * 1154 tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) 1155 { 1156 struct tmpfs_dirent *de, dekey; 1157 1158 dekey.td_hash = hash; 1159 de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); 1160 return (de); 1161 } 1162 1163 /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ 1164 static struct tmpfs_dirent * 1165 tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, 1166 struct tmpfs_dir_cursor *dc) 1167 { 1168 struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; 1169 struct tmpfs_dirent *de, dekey; 1170 1171 MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); 1172 1173 if (cookie == node->tn_dir.tn_readdir_lastn && 1174 (de = node->tn_dir.tn_readdir_lastp) != NULL) { 1175 /* Protect against possible race, tn_readdir_last[pn] 1176 * may be updated with only shared vnode lock held. */ 1177 if (cookie == tmpfs_dirent_cookie(de)) 1178 goto out; 1179 } 1180 1181 if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { 1182 LIST_FOREACH(de, &node->tn_dir.tn_dupindex, 1183 uh.td_dup.index_entries) { 1184 MPASS(tmpfs_dirent_dup(de)); 1185 if (de->td_cookie == cookie) 1186 goto out; 1187 /* dupindex list is sorted. */ 1188 if (de->td_cookie < cookie) { 1189 de = NULL; 1190 goto out; 1191 } 1192 } 1193 MPASS(de == NULL); 1194 goto out; 1195 } 1196 1197 if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { 1198 de = NULL; 1199 } else { 1200 dekey.td_hash = cookie; 1201 /* Recover if direntry for cookie was removed */ 1202 de = RB_NFIND(tmpfs_dir, dirhead, &dekey); 1203 } 1204 dc->tdc_tree = de; 1205 dc->tdc_current = de; 1206 if (de != NULL && tmpfs_dirent_duphead(de)) { 1207 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1208 MPASS(dc->tdc_current != NULL); 1209 } 1210 return (dc->tdc_current); 1211 1212 out: 1213 dc->tdc_tree = de; 1214 dc->tdc_current = de; 1215 if (de != NULL && tmpfs_dirent_dup(de)) 1216 dc->tdc_tree = tmpfs_dir_xlookup_hash(node, 1217 de->td_hash); 1218 return (dc->tdc_current); 1219 } 1220 1221 /* 1222 * Looks for a directory entry in the directory represented by node. 1223 * 'cnp' describes the name of the entry to look for. Note that the . 1224 * and .. components are not allowed as they do not physically exist 1225 * within directories. 1226 * 1227 * Returns a pointer to the entry when found, otherwise NULL. 1228 */ 1229 struct tmpfs_dirent * 1230 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 1231 struct componentname *cnp) 1232 { 1233 struct tmpfs_dir_duphead *duphead; 1234 struct tmpfs_dirent *de; 1235 uint32_t hash; 1236 1237 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 1238 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 1239 cnp->cn_nameptr[1] == '.'))); 1240 TMPFS_VALIDATE_DIR(node); 1241 1242 hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); 1243 de = tmpfs_dir_xlookup_hash(node, hash); 1244 if (de != NULL && tmpfs_dirent_duphead(de)) { 1245 duphead = &de->ud.td_duphead; 1246 LIST_FOREACH(de, duphead, uh.td_dup.entries) { 1247 if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1248 cnp->cn_namelen)) 1249 break; 1250 } 1251 } else if (de != NULL) { 1252 if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1253 cnp->cn_namelen)) 1254 de = NULL; 1255 } 1256 if (de != NULL && f != NULL && de->td_node != f) 1257 de = NULL; 1258 1259 return (de); 1260 } 1261 1262 /* 1263 * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex 1264 * list, allocate new cookie value. 1265 */ 1266 static void 1267 tmpfs_dir_attach_dup(struct tmpfs_node *dnode, 1268 struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) 1269 { 1270 struct tmpfs_dir_duphead *dupindex; 1271 struct tmpfs_dirent *de, *pde; 1272 1273 dupindex = &dnode->tn_dir.tn_dupindex; 1274 de = LIST_FIRST(dupindex); 1275 if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { 1276 if (de == NULL) 1277 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1278 else 1279 nde->td_cookie = de->td_cookie + 1; 1280 MPASS(tmpfs_dirent_dup(nde)); 1281 LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); 1282 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1283 return; 1284 } 1285 1286 /* 1287 * Cookie numbers are near exhaustion. Scan dupindex list for unused 1288 * numbers. dupindex list is sorted in descending order. Keep it so 1289 * after inserting nde. 1290 */ 1291 while (1) { 1292 pde = de; 1293 de = LIST_NEXT(de, uh.td_dup.index_entries); 1294 if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { 1295 /* 1296 * Last element of the index doesn't have minimal cookie 1297 * value, use it. 1298 */ 1299 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1300 LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); 1301 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1302 return; 1303 } else if (de == NULL) { 1304 /* 1305 * We are so lucky have 2^30 hash duplicates in single 1306 * directory :) Return largest possible cookie value. 1307 * It should be fine except possible issues with 1308 * VOP_READDIR restart. 1309 */ 1310 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; 1311 LIST_INSERT_HEAD(dupindex, nde, 1312 uh.td_dup.index_entries); 1313 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1314 return; 1315 } 1316 if (de->td_cookie + 1 == pde->td_cookie || 1317 de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) 1318 continue; /* No hole or invalid cookie. */ 1319 nde->td_cookie = de->td_cookie + 1; 1320 MPASS(tmpfs_dirent_dup(nde)); 1321 MPASS(pde->td_cookie > nde->td_cookie); 1322 MPASS(nde->td_cookie > de->td_cookie); 1323 LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); 1324 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1325 return; 1326 } 1327 } 1328 1329 /* 1330 * Attaches the directory entry de to the directory represented by vp. 1331 * Note that this does not change the link count of the node pointed by 1332 * the directory entry, as this is done by tmpfs_alloc_dirent. 1333 */ 1334 void 1335 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 1336 { 1337 struct tmpfs_node *dnode; 1338 struct tmpfs_dirent *xde, *nde; 1339 1340 ASSERT_VOP_ELOCKED(vp, __func__); 1341 MPASS(de->td_namelen > 0); 1342 MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); 1343 MPASS(de->td_cookie == de->td_hash); 1344 1345 dnode = VP_TO_TMPFS_DIR(vp); 1346 dnode->tn_dir.tn_readdir_lastn = 0; 1347 dnode->tn_dir.tn_readdir_lastp = NULL; 1348 1349 MPASS(!tmpfs_dirent_dup(de)); 1350 xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1351 if (xde != NULL && tmpfs_dirent_duphead(xde)) 1352 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1353 else if (xde != NULL) { 1354 /* 1355 * Allocate new duphead. Swap xde with duphead to avoid 1356 * adding/removing elements with the same hash. 1357 */ 1358 MPASS(!tmpfs_dirent_dup(xde)); 1359 tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, 1360 &nde); 1361 /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ 1362 memcpy(nde, xde, sizeof(*xde)); 1363 xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; 1364 LIST_INIT(&xde->ud.td_duphead); 1365 xde->td_namelen = 0; 1366 xde->td_node = NULL; 1367 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); 1368 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1369 } 1370 dnode->tn_size += sizeof(struct tmpfs_dirent); 1371 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1372 dnode->tn_accessed = true; 1373 tmpfs_update(vp); 1374 } 1375 1376 /* 1377 * Detaches the directory entry de from the directory represented by vp. 1378 * Note that this does not change the link count of the node pointed by 1379 * the directory entry, as this is done by tmpfs_free_dirent. 1380 */ 1381 void 1382 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 1383 { 1384 struct tmpfs_mount *tmp; 1385 struct tmpfs_dir *head; 1386 struct tmpfs_node *dnode; 1387 struct tmpfs_dirent *xde; 1388 1389 ASSERT_VOP_ELOCKED(vp, __func__); 1390 1391 dnode = VP_TO_TMPFS_DIR(vp); 1392 head = &dnode->tn_dir.tn_dirhead; 1393 dnode->tn_dir.tn_readdir_lastn = 0; 1394 dnode->tn_dir.tn_readdir_lastp = NULL; 1395 1396 if (tmpfs_dirent_dup(de)) { 1397 /* Remove duphead if de was last entry. */ 1398 if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { 1399 xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); 1400 MPASS(tmpfs_dirent_duphead(xde)); 1401 } else 1402 xde = NULL; 1403 LIST_REMOVE(de, uh.td_dup.entries); 1404 LIST_REMOVE(de, uh.td_dup.index_entries); 1405 if (xde != NULL) { 1406 if (LIST_EMPTY(&xde->ud.td_duphead)) { 1407 RB_REMOVE(tmpfs_dir, head, xde); 1408 tmp = VFS_TO_TMPFS(vp->v_mount); 1409 MPASS(xde->td_node == NULL); 1410 tmpfs_free_dirent(tmp, xde); 1411 } 1412 } 1413 de->td_cookie = de->td_hash; 1414 } else 1415 RB_REMOVE(tmpfs_dir, head, de); 1416 1417 dnode->tn_size -= sizeof(struct tmpfs_dirent); 1418 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1419 dnode->tn_accessed = true; 1420 tmpfs_update(vp); 1421 } 1422 1423 void 1424 tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) 1425 { 1426 struct tmpfs_dirent *de, *dde, *nde; 1427 1428 RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { 1429 RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1430 /* Node may already be destroyed. */ 1431 de->td_node = NULL; 1432 if (tmpfs_dirent_duphead(de)) { 1433 while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { 1434 LIST_REMOVE(dde, uh.td_dup.entries); 1435 dde->td_node = NULL; 1436 tmpfs_free_dirent(tmp, dde); 1437 } 1438 } 1439 tmpfs_free_dirent(tmp, de); 1440 } 1441 } 1442 1443 /* 1444 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 1445 * directory and returns it in the uio space. The function returns 0 1446 * on success, -1 if there was not enough space in the uio structure to 1447 * hold the directory entry or an appropriate error code if another 1448 * error happens. 1449 */ 1450 static int 1451 tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1452 struct uio *uio) 1453 { 1454 int error; 1455 struct dirent dent; 1456 1457 TMPFS_VALIDATE_DIR(node); 1458 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 1459 1460 dent.d_fileno = node->tn_id; 1461 dent.d_off = TMPFS_DIRCOOKIE_DOTDOT; 1462 dent.d_type = DT_DIR; 1463 dent.d_namlen = 1; 1464 dent.d_name[0] = '.'; 1465 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1466 dirent_terminate(&dent); 1467 1468 if (dent.d_reclen > uio->uio_resid) 1469 error = EJUSTRETURN; 1470 else 1471 error = uiomove(&dent, dent.d_reclen, uio); 1472 1473 tmpfs_set_accessed(tm, node); 1474 1475 return (error); 1476 } 1477 1478 /* 1479 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 1480 * directory and returns it in the uio space. The function returns 0 1481 * on success, -1 if there was not enough space in the uio structure to 1482 * hold the directory entry or an appropriate error code if another 1483 * error happens. 1484 */ 1485 static int 1486 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1487 struct uio *uio, off_t next) 1488 { 1489 struct tmpfs_node *parent; 1490 struct dirent dent; 1491 int error; 1492 1493 TMPFS_VALIDATE_DIR(node); 1494 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 1495 1496 /* 1497 * Return ENOENT if the current node is already removed. 1498 */ 1499 TMPFS_ASSERT_LOCKED(node); 1500 parent = node->tn_dir.tn_parent; 1501 if (parent == NULL) 1502 return (ENOENT); 1503 1504 dent.d_fileno = parent->tn_id; 1505 dent.d_off = next; 1506 dent.d_type = DT_DIR; 1507 dent.d_namlen = 2; 1508 dent.d_name[0] = '.'; 1509 dent.d_name[1] = '.'; 1510 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1511 dirent_terminate(&dent); 1512 1513 if (dent.d_reclen > uio->uio_resid) 1514 error = EJUSTRETURN; 1515 else 1516 error = uiomove(&dent, dent.d_reclen, uio); 1517 1518 tmpfs_set_accessed(tm, node); 1519 1520 return (error); 1521 } 1522 1523 /* 1524 * Helper function for tmpfs_readdir. Returns as much directory entries 1525 * as can fit in the uio space. The read starts at uio->uio_offset. 1526 * The function returns 0 on success, -1 if there was not enough space 1527 * in the uio structure to hold the directory entry or an appropriate 1528 * error code if another error happens. 1529 */ 1530 int 1531 tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node, 1532 struct uio *uio, int maxcookies, uint64_t *cookies, int *ncookies) 1533 { 1534 struct tmpfs_dir_cursor dc; 1535 struct tmpfs_dirent *de, *nde; 1536 off_t off; 1537 int error; 1538 1539 TMPFS_VALIDATE_DIR(node); 1540 1541 off = 0; 1542 1543 /* 1544 * Lookup the node from the current offset. The starting offset of 1545 * 0 will lookup both '.' and '..', and then the first real entry, 1546 * or EOF if there are none. Then find all entries for the dir that 1547 * fit into the buffer. Once no more entries are found (de == NULL), 1548 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next 1549 * call to return 0. 1550 */ 1551 switch (uio->uio_offset) { 1552 case TMPFS_DIRCOOKIE_DOT: 1553 error = tmpfs_dir_getdotdent(tm, node, uio); 1554 if (error != 0) 1555 return (error); 1556 uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT; 1557 if (cookies != NULL) 1558 cookies[(*ncookies)++] = off; 1559 /* FALLTHROUGH */ 1560 case TMPFS_DIRCOOKIE_DOTDOT: 1561 de = tmpfs_dir_first(node, &dc); 1562 off = tmpfs_dirent_cookie(de); 1563 error = tmpfs_dir_getdotdotdent(tm, node, uio, off); 1564 if (error != 0) 1565 return (error); 1566 uio->uio_offset = off; 1567 if (cookies != NULL) 1568 cookies[(*ncookies)++] = off; 1569 /* EOF. */ 1570 if (de == NULL) 1571 return (0); 1572 break; 1573 case TMPFS_DIRCOOKIE_EOF: 1574 return (0); 1575 default: 1576 de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); 1577 if (de == NULL) 1578 return (EINVAL); 1579 if (cookies != NULL) 1580 off = tmpfs_dirent_cookie(de); 1581 } 1582 1583 /* 1584 * Read as much entries as possible; i.e., until we reach the end of the 1585 * directory or we exhaust uio space. 1586 */ 1587 do { 1588 struct dirent d; 1589 1590 /* 1591 * Create a dirent structure representing the current tmpfs_node 1592 * and fill it. 1593 */ 1594 if (de->td_node == NULL) { 1595 d.d_fileno = 1; 1596 d.d_type = DT_WHT; 1597 } else { 1598 d.d_fileno = de->td_node->tn_id; 1599 switch (de->td_node->tn_type) { 1600 case VBLK: 1601 d.d_type = DT_BLK; 1602 break; 1603 1604 case VCHR: 1605 d.d_type = DT_CHR; 1606 break; 1607 1608 case VDIR: 1609 d.d_type = DT_DIR; 1610 break; 1611 1612 case VFIFO: 1613 d.d_type = DT_FIFO; 1614 break; 1615 1616 case VLNK: 1617 d.d_type = DT_LNK; 1618 break; 1619 1620 case VREG: 1621 d.d_type = DT_REG; 1622 break; 1623 1624 case VSOCK: 1625 d.d_type = DT_SOCK; 1626 break; 1627 1628 default: 1629 panic("tmpfs_dir_getdents: type %p %d", 1630 de->td_node, (int)de->td_node->tn_type); 1631 } 1632 } 1633 d.d_namlen = de->td_namelen; 1634 MPASS(de->td_namelen < sizeof(d.d_name)); 1635 (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); 1636 d.d_reclen = GENERIC_DIRSIZ(&d); 1637 1638 /* 1639 * Stop reading if the directory entry we are treating is bigger 1640 * than the amount of data that can be returned. 1641 */ 1642 if (d.d_reclen > uio->uio_resid) { 1643 error = EJUSTRETURN; 1644 break; 1645 } 1646 1647 nde = tmpfs_dir_next(node, &dc); 1648 d.d_off = tmpfs_dirent_cookie(nde); 1649 dirent_terminate(&d); 1650 1651 /* 1652 * Copy the new dirent structure into the output buffer and 1653 * advance pointers. 1654 */ 1655 error = uiomove(&d, d.d_reclen, uio); 1656 if (error == 0) { 1657 de = nde; 1658 if (cookies != NULL) { 1659 off = tmpfs_dirent_cookie(de); 1660 MPASS(*ncookies < maxcookies); 1661 cookies[(*ncookies)++] = off; 1662 } 1663 } 1664 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 1665 1666 /* Skip setting off when using cookies as it is already done above. */ 1667 if (cookies == NULL) 1668 off = tmpfs_dirent_cookie(de); 1669 1670 /* Update the offset and cache. */ 1671 uio->uio_offset = off; 1672 node->tn_dir.tn_readdir_lastn = off; 1673 node->tn_dir.tn_readdir_lastp = de; 1674 1675 tmpfs_set_accessed(tm, node); 1676 return (error); 1677 } 1678 1679 int 1680 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 1681 { 1682 struct tmpfs_dirent *de; 1683 int error; 1684 1685 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 1686 cnp->cn_nameptr, cnp->cn_namelen, &de); 1687 if (error != 0) 1688 return (error); 1689 tmpfs_dir_attach(dvp, de); 1690 return (0); 1691 } 1692 1693 void 1694 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 1695 { 1696 struct tmpfs_dirent *de; 1697 1698 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 1699 MPASS(de != NULL && de->td_node == NULL); 1700 tmpfs_dir_detach(dvp, de); 1701 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 1702 } 1703 1704 /* 1705 * Resizes the aobj associated with the regular file pointed to by 'vp' to the 1706 * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 1707 * 'newsize' must be positive. 1708 * 1709 * Returns zero on success or an appropriate error code on failure. 1710 */ 1711 int 1712 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) 1713 { 1714 struct tmpfs_mount *tmp; 1715 struct tmpfs_node *node; 1716 vm_object_t uobj; 1717 vm_pindex_t idx, newpages, oldpages; 1718 off_t oldsize; 1719 int base, error; 1720 1721 MPASS(vp->v_type == VREG); 1722 MPASS(newsize >= 0); 1723 1724 node = VP_TO_TMPFS_NODE(vp); 1725 uobj = node->tn_reg.tn_aobj; 1726 tmp = VFS_TO_TMPFS(vp->v_mount); 1727 1728 /* 1729 * Convert the old and new sizes to the number of pages needed to 1730 * store them. It may happen that we do not need to do anything 1731 * because the last allocated page can accommodate the change on 1732 * its own. 1733 */ 1734 oldsize = node->tn_size; 1735 oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 1736 MPASS(oldpages == uobj->size); 1737 newpages = OFF_TO_IDX(newsize + PAGE_MASK); 1738 1739 if (__predict_true(newpages == oldpages && newsize >= oldsize)) { 1740 node->tn_size = newsize; 1741 return (0); 1742 } 1743 1744 if (newpages > oldpages && 1745 tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) 1746 return (ENOSPC); 1747 1748 VM_OBJECT_WLOCK(uobj); 1749 if (newsize < oldsize) { 1750 /* 1751 * Zero the truncated part of the last page. 1752 */ 1753 base = newsize & PAGE_MASK; 1754 if (base != 0) { 1755 idx = OFF_TO_IDX(newsize); 1756 error = tmpfs_partial_page_invalidate(uobj, idx, base, 1757 PAGE_SIZE, ignerr); 1758 if (error != 0) { 1759 VM_OBJECT_WUNLOCK(uobj); 1760 return (error); 1761 } 1762 } 1763 1764 /* 1765 * Release any swap space and free any whole pages. 1766 */ 1767 if (newpages < oldpages) 1768 vm_object_page_remove(uobj, newpages, 0, 0); 1769 } 1770 uobj->size = newpages; 1771 VM_OBJECT_WUNLOCK(uobj); 1772 1773 atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); 1774 1775 node->tn_size = newsize; 1776 return (0); 1777 } 1778 1779 /* 1780 * Punch hole in the aobj associated with the regular file pointed to by 'vp'. 1781 * Requests completely beyond the end-of-file are converted to no-op. 1782 * 1783 * Returns 0 on success or error code from tmpfs_partial_page_invalidate() on 1784 * failure. 1785 */ 1786 int 1787 tmpfs_reg_punch_hole(struct vnode *vp, off_t *offset, off_t *length) 1788 { 1789 struct tmpfs_node *node; 1790 vm_object_t object; 1791 vm_pindex_t pistart, pi, piend; 1792 int startofs, endofs, end; 1793 off_t off, len; 1794 int error; 1795 1796 KASSERT(*length <= OFF_MAX - *offset, ("%s: offset + length overflows", 1797 __func__)); 1798 node = VP_TO_TMPFS_NODE(vp); 1799 KASSERT(node->tn_type == VREG, ("%s: node is not regular file", 1800 __func__)); 1801 object = node->tn_reg.tn_aobj; 1802 off = *offset; 1803 len = omin(node->tn_size - off, *length); 1804 startofs = off & PAGE_MASK; 1805 endofs = (off + len) & PAGE_MASK; 1806 pistart = OFF_TO_IDX(off); 1807 piend = OFF_TO_IDX(off + len); 1808 pi = OFF_TO_IDX((vm_ooffset_t)off + PAGE_MASK); 1809 error = 0; 1810 1811 /* Handle the case when offset is on or beyond file size. */ 1812 if (len <= 0) { 1813 *length = 0; 1814 return (0); 1815 } 1816 1817 VM_OBJECT_WLOCK(object); 1818 1819 /* 1820 * If there is a partial page at the beginning of the hole-punching 1821 * request, fill the partial page with zeroes. 1822 */ 1823 if (startofs != 0) { 1824 end = pistart != piend ? PAGE_SIZE : endofs; 1825 error = tmpfs_partial_page_invalidate(object, pistart, startofs, 1826 end, FALSE); 1827 if (error != 0) 1828 goto out; 1829 off += end - startofs; 1830 len -= end - startofs; 1831 } 1832 1833 /* 1834 * Toss away the full pages in the affected area. 1835 */ 1836 if (pi < piend) { 1837 vm_object_page_remove(object, pi, piend, 0); 1838 off += IDX_TO_OFF(piend - pi); 1839 len -= IDX_TO_OFF(piend - pi); 1840 } 1841 1842 /* 1843 * If there is a partial page at the end of the hole-punching request, 1844 * fill the partial page with zeroes. 1845 */ 1846 if (endofs != 0 && pistart != piend) { 1847 error = tmpfs_partial_page_invalidate(object, piend, 0, endofs, 1848 FALSE); 1849 if (error != 0) 1850 goto out; 1851 off += endofs; 1852 len -= endofs; 1853 } 1854 1855 out: 1856 VM_OBJECT_WUNLOCK(object); 1857 *offset = off; 1858 *length = len; 1859 return (error); 1860 } 1861 1862 void 1863 tmpfs_check_mtime(struct vnode *vp) 1864 { 1865 struct tmpfs_node *node; 1866 struct vm_object *obj; 1867 1868 ASSERT_VOP_ELOCKED(vp, "check_mtime"); 1869 if (vp->v_type != VREG) 1870 return; 1871 obj = vp->v_object; 1872 KASSERT(obj->type == tmpfs_pager_type && 1873 (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) == 1874 (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj")); 1875 /* unlocked read */ 1876 if (obj->generation != obj->cleangeneration) { 1877 VM_OBJECT_WLOCK(obj); 1878 if (obj->generation != obj->cleangeneration) { 1879 obj->cleangeneration = obj->generation; 1880 node = VP_TO_TMPFS_NODE(vp); 1881 node->tn_status |= TMPFS_NODE_MODIFIED | 1882 TMPFS_NODE_CHANGED; 1883 } 1884 VM_OBJECT_WUNLOCK(obj); 1885 } 1886 } 1887 1888 /* 1889 * Change flags of the given vnode. 1890 * Caller should execute tmpfs_update on vp after a successful execution. 1891 * The vnode must be locked on entry and remain locked on exit. 1892 */ 1893 int 1894 tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, 1895 struct thread *p) 1896 { 1897 int error; 1898 struct tmpfs_node *node; 1899 1900 ASSERT_VOP_ELOCKED(vp, "chflags"); 1901 1902 node = VP_TO_TMPFS_NODE(vp); 1903 1904 if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | 1905 UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | 1906 UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 1907 UF_SPARSE | UF_SYSTEM)) != 0) 1908 return (EOPNOTSUPP); 1909 1910 /* Disallow this operation if the file system is mounted read-only. */ 1911 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1912 return (EROFS); 1913 1914 /* 1915 * Callers may only modify the file flags on objects they 1916 * have VADMIN rights for. 1917 */ 1918 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1919 return (error); 1920 /* 1921 * Unprivileged processes are not permitted to unset system 1922 * flags, or modify flags if any system flags are set. 1923 */ 1924 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { 1925 if (node->tn_flags & 1926 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 1927 error = securelevel_gt(cred, 0); 1928 if (error) 1929 return (error); 1930 } 1931 } else { 1932 if (node->tn_flags & 1933 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1934 ((flags ^ node->tn_flags) & SF_SETTABLE)) 1935 return (EPERM); 1936 } 1937 node->tn_flags = flags; 1938 node->tn_status |= TMPFS_NODE_CHANGED; 1939 1940 ASSERT_VOP_ELOCKED(vp, "chflags2"); 1941 1942 return (0); 1943 } 1944 1945 /* 1946 * Change access mode on the given vnode. 1947 * Caller should execute tmpfs_update on vp after a successful execution. 1948 * The vnode must be locked on entry and remain locked on exit. 1949 */ 1950 int 1951 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1952 { 1953 int error; 1954 struct tmpfs_node *node; 1955 mode_t newmode; 1956 1957 ASSERT_VOP_ELOCKED(vp, "chmod"); 1958 ASSERT_VOP_IN_SEQC(vp); 1959 1960 node = VP_TO_TMPFS_NODE(vp); 1961 1962 /* Disallow this operation if the file system is mounted read-only. */ 1963 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1964 return (EROFS); 1965 1966 /* Immutable or append-only files cannot be modified, either. */ 1967 if (node->tn_flags & (IMMUTABLE | APPEND)) 1968 return (EPERM); 1969 1970 /* 1971 * To modify the permissions on a file, must possess VADMIN 1972 * for that file. 1973 */ 1974 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1975 return (error); 1976 1977 /* 1978 * Privileged processes may set the sticky bit on non-directories, 1979 * as well as set the setgid bit on a file with a group that the 1980 * process is not a member of. 1981 */ 1982 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1983 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 1984 return (EFTYPE); 1985 } 1986 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1987 error = priv_check_cred(cred, PRIV_VFS_SETGID); 1988 if (error) 1989 return (error); 1990 } 1991 1992 newmode = node->tn_mode & ~ALLPERMS; 1993 newmode |= mode & ALLPERMS; 1994 atomic_store_short(&node->tn_mode, newmode); 1995 1996 node->tn_status |= TMPFS_NODE_CHANGED; 1997 1998 ASSERT_VOP_ELOCKED(vp, "chmod2"); 1999 2000 return (0); 2001 } 2002 2003 /* 2004 * Change ownership of the given vnode. At least one of uid or gid must 2005 * be different than VNOVAL. If one is set to that value, the attribute 2006 * is unchanged. 2007 * Caller should execute tmpfs_update on vp after a successful execution. 2008 * The vnode must be locked on entry and remain locked on exit. 2009 */ 2010 int 2011 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 2012 struct thread *p) 2013 { 2014 int error; 2015 struct tmpfs_node *node; 2016 uid_t ouid; 2017 gid_t ogid; 2018 mode_t newmode; 2019 2020 ASSERT_VOP_ELOCKED(vp, "chown"); 2021 ASSERT_VOP_IN_SEQC(vp); 2022 2023 node = VP_TO_TMPFS_NODE(vp); 2024 2025 /* Assign default values if they are unknown. */ 2026 MPASS(uid != VNOVAL || gid != VNOVAL); 2027 if (uid == VNOVAL) 2028 uid = node->tn_uid; 2029 if (gid == VNOVAL) 2030 gid = node->tn_gid; 2031 MPASS(uid != VNOVAL && gid != VNOVAL); 2032 2033 /* Disallow this operation if the file system is mounted read-only. */ 2034 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2035 return (EROFS); 2036 2037 /* Immutable or append-only files cannot be modified, either. */ 2038 if (node->tn_flags & (IMMUTABLE | APPEND)) 2039 return (EPERM); 2040 2041 /* 2042 * To modify the ownership of a file, must possess VADMIN for that 2043 * file. 2044 */ 2045 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 2046 return (error); 2047 2048 /* 2049 * To change the owner of a file, or change the group of a file to a 2050 * group of which we are not a member, the caller must have 2051 * privilege. 2052 */ 2053 if ((uid != node->tn_uid || 2054 (gid != node->tn_gid && !groupmember(gid, cred))) && 2055 (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) 2056 return (error); 2057 2058 ogid = node->tn_gid; 2059 ouid = node->tn_uid; 2060 2061 node->tn_uid = uid; 2062 node->tn_gid = gid; 2063 2064 node->tn_status |= TMPFS_NODE_CHANGED; 2065 2066 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 2067 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { 2068 newmode = node->tn_mode & ~(S_ISUID | S_ISGID); 2069 atomic_store_short(&node->tn_mode, newmode); 2070 } 2071 } 2072 2073 ASSERT_VOP_ELOCKED(vp, "chown2"); 2074 2075 return (0); 2076 } 2077 2078 /* 2079 * Change size of the given vnode. 2080 * Caller should execute tmpfs_update on vp after a successful execution. 2081 * The vnode must be locked on entry and remain locked on exit. 2082 */ 2083 int 2084 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 2085 struct thread *p) 2086 { 2087 int error; 2088 struct tmpfs_node *node; 2089 2090 ASSERT_VOP_ELOCKED(vp, "chsize"); 2091 2092 node = VP_TO_TMPFS_NODE(vp); 2093 2094 /* Decide whether this is a valid operation based on the file type. */ 2095 error = 0; 2096 switch (vp->v_type) { 2097 case VDIR: 2098 return (EISDIR); 2099 2100 case VREG: 2101 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2102 return (EROFS); 2103 break; 2104 2105 case VBLK: 2106 /* FALLTHROUGH */ 2107 case VCHR: 2108 /* FALLTHROUGH */ 2109 case VFIFO: 2110 /* 2111 * Allow modifications of special files even if in the file 2112 * system is mounted read-only (we are not modifying the 2113 * files themselves, but the objects they represent). 2114 */ 2115 return (0); 2116 2117 default: 2118 /* Anything else is unsupported. */ 2119 return (EOPNOTSUPP); 2120 } 2121 2122 /* Immutable or append-only files cannot be modified, either. */ 2123 if (node->tn_flags & (IMMUTABLE | APPEND)) 2124 return (EPERM); 2125 2126 error = tmpfs_truncate(vp, size); 2127 /* 2128 * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 2129 * for us, as will update tn_status; no need to do that here. 2130 */ 2131 2132 ASSERT_VOP_ELOCKED(vp, "chsize2"); 2133 2134 return (error); 2135 } 2136 2137 /* 2138 * Change access and modification times of the given vnode. 2139 * Caller should execute tmpfs_update on vp after a successful execution. 2140 * The vnode must be locked on entry and remain locked on exit. 2141 */ 2142 int 2143 tmpfs_chtimes(struct vnode *vp, struct vattr *vap, 2144 struct ucred *cred, struct thread *l) 2145 { 2146 int error; 2147 struct tmpfs_node *node; 2148 2149 ASSERT_VOP_ELOCKED(vp, "chtimes"); 2150 2151 node = VP_TO_TMPFS_NODE(vp); 2152 2153 /* Disallow this operation if the file system is mounted read-only. */ 2154 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2155 return (EROFS); 2156 2157 /* Immutable or append-only files cannot be modified, either. */ 2158 if (node->tn_flags & (IMMUTABLE | APPEND)) 2159 return (EPERM); 2160 2161 error = vn_utimes_perm(vp, vap, cred, l); 2162 if (error != 0) 2163 return (error); 2164 2165 if (vap->va_atime.tv_sec != VNOVAL) 2166 node->tn_accessed = true; 2167 2168 if (vap->va_mtime.tv_sec != VNOVAL) 2169 node->tn_status |= TMPFS_NODE_MODIFIED; 2170 2171 if (vap->va_birthtime.tv_sec != VNOVAL) 2172 node->tn_status |= TMPFS_NODE_MODIFIED; 2173 2174 tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); 2175 2176 if (vap->va_birthtime.tv_sec != VNOVAL) 2177 node->tn_birthtime = vap->va_birthtime; 2178 ASSERT_VOP_ELOCKED(vp, "chtimes2"); 2179 2180 return (0); 2181 } 2182 2183 void 2184 tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status) 2185 { 2186 2187 if ((node->tn_status & status) == status || tm->tm_ronly) 2188 return; 2189 TMPFS_NODE_LOCK(node); 2190 node->tn_status |= status; 2191 TMPFS_NODE_UNLOCK(node); 2192 } 2193 2194 void 2195 tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node) 2196 { 2197 if (node->tn_accessed || tm->tm_ronly) 2198 return; 2199 atomic_store_8(&node->tn_accessed, true); 2200 } 2201 2202 /* Sync timestamps */ 2203 void 2204 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 2205 const struct timespec *mod) 2206 { 2207 struct tmpfs_node *node; 2208 struct timespec now; 2209 2210 ASSERT_VOP_LOCKED(vp, "tmpfs_itimes"); 2211 node = VP_TO_TMPFS_NODE(vp); 2212 2213 if (!node->tn_accessed && 2214 (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) 2215 return; 2216 2217 vfs_timestamp(&now); 2218 TMPFS_NODE_LOCK(node); 2219 if (node->tn_accessed) { 2220 if (acc == NULL) 2221 acc = &now; 2222 node->tn_atime = *acc; 2223 } 2224 if (node->tn_status & TMPFS_NODE_MODIFIED) { 2225 if (mod == NULL) 2226 mod = &now; 2227 node->tn_mtime = *mod; 2228 } 2229 if (node->tn_status & TMPFS_NODE_CHANGED) 2230 node->tn_ctime = now; 2231 node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 2232 node->tn_accessed = false; 2233 TMPFS_NODE_UNLOCK(node); 2234 2235 /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ 2236 random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME); 2237 } 2238 2239 int 2240 tmpfs_truncate(struct vnode *vp, off_t length) 2241 { 2242 int error; 2243 struct tmpfs_node *node; 2244 2245 node = VP_TO_TMPFS_NODE(vp); 2246 2247 if (length < 0) { 2248 error = EINVAL; 2249 goto out; 2250 } 2251 2252 if (node->tn_size == length) { 2253 error = 0; 2254 goto out; 2255 } 2256 2257 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 2258 return (EFBIG); 2259 2260 error = tmpfs_reg_resize(vp, length, FALSE); 2261 if (error == 0) 2262 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 2263 2264 out: 2265 tmpfs_update(vp); 2266 2267 return (error); 2268 } 2269 2270 static __inline int 2271 tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 2272 { 2273 if (a->td_hash > b->td_hash) 2274 return (1); 2275 else if (a->td_hash < b->td_hash) 2276 return (-1); 2277 return (0); 2278 } 2279 2280 RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 2281