1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5 * 6 * Copyright (c) 2005 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11 * 2005 program. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Efficient memory file system supporting functions. 37 */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/dirent.h> 44 #include <sys/fnv_hash.h> 45 #include <sys/lock.h> 46 #include <sys/limits.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/random.h> 52 #include <sys/refcount.h> 53 #include <sys/rwlock.h> 54 #include <sys/smr.h> 55 #include <sys/stat.h> 56 #include <sys/sysctl.h> 57 #include <sys/user.h> 58 #include <sys/vnode.h> 59 #include <sys/vmmeter.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_param.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/swap_pager.h> 69 70 #include <fs/tmpfs/tmpfs.h> 71 #include <fs/tmpfs/tmpfs_fifoops.h> 72 #include <fs/tmpfs/tmpfs_vnops.h> 73 74 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "tmpfs file system"); 76 77 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; 78 79 MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure"); 80 static uma_zone_t tmpfs_node_pool; 81 VFS_SMR_DECLARE; 82 83 int tmpfs_pager_type = -1; 84 85 static vm_object_t 86 tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 87 vm_ooffset_t offset, struct ucred *cred) 88 { 89 vm_object_t object; 90 91 MPASS(handle == NULL); 92 MPASS(offset == 0); 93 object = vm_object_allocate_dyn(tmpfs_pager_type, size, 94 OBJ_COLORED | OBJ_SWAP); 95 if (!swap_pager_init_object(object, NULL, NULL, size, 0)) { 96 vm_object_deallocate(object); 97 object = NULL; 98 } 99 return (object); 100 } 101 102 /* 103 * Make sure tmpfs vnodes with writable mappings can be found on the lazy list. 104 * 105 * This allows for periodic mtime updates while only scanning vnodes which are 106 * plausibly dirty, see tmpfs_update_mtime_lazy. 107 */ 108 static void 109 tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old, 110 vm_offset_t new) 111 { 112 struct vnode *vp; 113 114 VM_OBJECT_ASSERT_WLOCKED(object); 115 116 vp = object->un_pager.swp.swp_tmpfs; 117 118 /* 119 * Forced unmount? 120 */ 121 if (vp == NULL) { 122 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 123 ("object %p with OBJ_TMPFS_VREF but without vnode", 124 object)); 125 VM_OBJECT_WUNLOCK(object); 126 return; 127 } 128 129 if (old == 0) { 130 VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp, 131 ("object without writable mappings has a reference")); 132 VNPASS(vp->v_usecount > 0, vp); 133 } else { 134 VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp, 135 ("object with writable mappings does not " 136 "have a reference")); 137 } 138 139 if (old == new) { 140 VM_OBJECT_WUNLOCK(object); 141 return; 142 } 143 144 if (new == 0) { 145 vm_object_clear_flag(object, OBJ_TMPFS_VREF); 146 VM_OBJECT_WUNLOCK(object); 147 vrele(vp); 148 } else { 149 if ((object->flags & OBJ_TMPFS_VREF) == 0) { 150 vref(vp); 151 vlazy(vp); 152 vm_object_set_flag(object, OBJ_TMPFS_VREF); 153 } 154 VM_OBJECT_WUNLOCK(object); 155 } 156 } 157 158 static void 159 tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start, 160 vm_offset_t end) 161 { 162 vm_offset_t new, old; 163 164 VM_OBJECT_WLOCK(object); 165 KASSERT((object->flags & OBJ_ANON) == 0, 166 ("%s: object %p with OBJ_ANON", __func__, object)); 167 old = object->un_pager.swp.writemappings; 168 object->un_pager.swp.writemappings += (vm_ooffset_t)end - start; 169 new = object->un_pager.swp.writemappings; 170 tmpfs_pager_writecount_recalc(object, old, new); 171 VM_OBJECT_ASSERT_UNLOCKED(object); 172 } 173 174 static void 175 tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start, 176 vm_offset_t end) 177 { 178 vm_offset_t new, old; 179 180 VM_OBJECT_WLOCK(object); 181 KASSERT((object->flags & OBJ_ANON) == 0, 182 ("%s: object %p with OBJ_ANON", __func__, object)); 183 old = object->un_pager.swp.writemappings; 184 object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start; 185 new = object->un_pager.swp.writemappings; 186 tmpfs_pager_writecount_recalc(object, old, new); 187 VM_OBJECT_ASSERT_UNLOCKED(object); 188 } 189 190 static void 191 tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) 192 { 193 struct vnode *vp; 194 195 /* 196 * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type 197 * type, but not OBJ_TMPFS flag. In this case there is no 198 * v_writecount to adjust. 199 */ 200 if (vp_heldp != NULL) 201 VM_OBJECT_RLOCK(object); 202 else 203 VM_OBJECT_ASSERT_LOCKED(object); 204 if ((object->flags & OBJ_TMPFS) != 0) { 205 vp = object->un_pager.swp.swp_tmpfs; 206 if (vp != NULL) { 207 *vpp = vp; 208 if (vp_heldp != NULL) { 209 vhold(vp); 210 *vp_heldp = true; 211 } 212 } 213 } 214 if (vp_heldp != NULL) 215 VM_OBJECT_RUNLOCK(object); 216 } 217 218 struct pagerops tmpfs_pager_ops = { 219 .pgo_kvme_type = KVME_TYPE_VNODE, 220 .pgo_alloc = tmpfs_pager_alloc, 221 .pgo_set_writeable_dirty = vm_object_set_writeable_dirty_, 222 .pgo_update_writecount = tmpfs_pager_update_writecount, 223 .pgo_release_writecount = tmpfs_pager_release_writecount, 224 .pgo_mightbedirty = vm_object_mightbedirty_, 225 .pgo_getvp = tmpfs_pager_getvp, 226 }; 227 228 static int 229 tmpfs_node_ctor(void *mem, int size, void *arg, int flags) 230 { 231 struct tmpfs_node *node; 232 233 node = mem; 234 node->tn_gen++; 235 node->tn_size = 0; 236 node->tn_status = 0; 237 node->tn_accessed = false; 238 node->tn_flags = 0; 239 node->tn_links = 0; 240 node->tn_vnode = NULL; 241 node->tn_vpstate = 0; 242 return (0); 243 } 244 245 static void 246 tmpfs_node_dtor(void *mem, int size, void *arg) 247 { 248 struct tmpfs_node *node; 249 250 node = mem; 251 node->tn_type = VNON; 252 } 253 254 static int 255 tmpfs_node_init(void *mem, int size, int flags) 256 { 257 struct tmpfs_node *node; 258 259 node = mem; 260 node->tn_id = 0; 261 mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF); 262 node->tn_gen = arc4random(); 263 return (0); 264 } 265 266 static void 267 tmpfs_node_fini(void *mem, int size) 268 { 269 struct tmpfs_node *node; 270 271 node = mem; 272 mtx_destroy(&node->tn_interlock); 273 } 274 275 int 276 tmpfs_subr_init(void) 277 { 278 tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops, 279 OBJT_SWAP); 280 if (tmpfs_pager_type == -1) 281 return (EINVAL); 282 tmpfs_node_pool = uma_zcreate("TMPFS node", 283 sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, 284 tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); 285 VFS_SMR_ZONE_SET(tmpfs_node_pool); 286 return (0); 287 } 288 289 void 290 tmpfs_subr_uninit(void) 291 { 292 if (tmpfs_pager_type != -1) 293 vm_pager_free_dyn_type(tmpfs_pager_type); 294 tmpfs_pager_type = -1; 295 uma_zdestroy(tmpfs_node_pool); 296 } 297 298 static int 299 sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) 300 { 301 int error; 302 long pages, bytes; 303 304 pages = *(long *)arg1; 305 bytes = pages * PAGE_SIZE; 306 307 error = sysctl_handle_long(oidp, &bytes, 0, req); 308 if (error || !req->newptr) 309 return (error); 310 311 pages = bytes / PAGE_SIZE; 312 if (pages < TMPFS_PAGES_MINRESERVED) 313 return (EINVAL); 314 315 *(long *)arg1 = pages; 316 return (0); 317 } 318 319 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, 320 CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &tmpfs_pages_reserved, 0, 321 sysctl_mem_reserved, "L", 322 "Amount of available memory and swap below which tmpfs growth stops"); 323 324 static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, 325 struct tmpfs_dirent *b); 326 RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 327 328 size_t 329 tmpfs_mem_avail(void) 330 { 331 size_t avail; 332 long reserved; 333 334 avail = swap_pager_avail + vm_free_count(); 335 reserved = atomic_load_long(&tmpfs_pages_reserved); 336 if (__predict_false(avail < reserved)) 337 return (0); 338 return (avail - reserved); 339 } 340 341 size_t 342 tmpfs_pages_used(struct tmpfs_mount *tmp) 343 { 344 const size_t node_size = sizeof(struct tmpfs_node) + 345 sizeof(struct tmpfs_dirent); 346 size_t meta_pages; 347 348 meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, 349 PAGE_SIZE); 350 return (meta_pages + tmp->tm_pages_used); 351 } 352 353 static bool 354 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) 355 { 356 if (tmpfs_mem_avail() < req_pages) 357 return (false); 358 359 if (tmp->tm_pages_max != ULONG_MAX && 360 tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) 361 return (false); 362 363 return (true); 364 } 365 366 static int 367 tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base, 368 int end, boolean_t ignerr) 369 { 370 vm_page_t m; 371 int rv, error; 372 373 VM_OBJECT_ASSERT_WLOCKED(object); 374 KASSERT(base >= 0, ("%s: base %d", __func__, base)); 375 KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base, 376 end)); 377 error = 0; 378 379 retry: 380 m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); 381 if (m != NULL) { 382 MPASS(vm_page_all_valid(m)); 383 } else if (vm_pager_has_page(object, idx, NULL, NULL)) { 384 m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL | 385 VM_ALLOC_WAITFAIL); 386 if (m == NULL) 387 goto retry; 388 vm_object_pip_add(object, 1); 389 VM_OBJECT_WUNLOCK(object); 390 rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); 391 VM_OBJECT_WLOCK(object); 392 vm_object_pip_wakeup(object); 393 if (rv == VM_PAGER_OK) { 394 /* 395 * Since the page was not resident, and therefore not 396 * recently accessed, immediately enqueue it for 397 * asynchronous laundering. The current operation is 398 * not regarded as an access. 399 */ 400 vm_page_launder(m); 401 } else { 402 vm_page_free(m); 403 m = NULL; 404 if (!ignerr) 405 error = EIO; 406 } 407 } 408 if (m != NULL) { 409 pmap_zero_page_area(m, base, end - base); 410 vm_page_set_dirty(m); 411 vm_page_xunbusy(m); 412 } 413 414 return (error); 415 } 416 417 void 418 tmpfs_ref_node(struct tmpfs_node *node) 419 { 420 #ifdef INVARIANTS 421 u_int old; 422 423 old = 424 #endif 425 refcount_acquire(&node->tn_refcount); 426 #ifdef INVARIANTS 427 KASSERT(old > 0, ("node %p zero refcount", node)); 428 #endif 429 } 430 431 /* 432 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 433 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 434 * using the credentials of the process 'p'. 435 * 436 * If the node type is set to 'VDIR', then the parent parameter must point 437 * to the parent directory of the node being created. It may only be NULL 438 * while allocating the root node. 439 * 440 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 441 * specifies the device the node represents. 442 * 443 * If the node type is set to 'VLNK', then the parameter target specifies 444 * the file name of the target file for the symbolic link that is being 445 * created. 446 * 447 * Note that new nodes are retrieved from the available list if it has 448 * items or, if it is empty, from the node pool as long as there is enough 449 * space to create them. 450 * 451 * Returns zero on success or an appropriate error code on failure. 452 */ 453 int 454 tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, 455 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 456 const char *target, dev_t rdev, struct tmpfs_node **node) 457 { 458 struct tmpfs_node *nnode; 459 char *symlink; 460 char symlink_smr; 461 462 /* If the root directory of the 'tmp' file system is not yet 463 * allocated, this must be the request to do it. */ 464 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 465 466 MPASS(IFF(type == VLNK, target != NULL)); 467 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 468 469 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 470 return (ENOSPC); 471 if (!tmpfs_pages_check_avail(tmp, 1)) 472 return (ENOSPC); 473 474 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 475 /* 476 * When a new tmpfs node is created for fully 477 * constructed mount point, there must be a parent 478 * node, which vnode is locked exclusively. As 479 * consequence, if the unmount is executing in 480 * parallel, vflush() cannot reclaim the parent vnode. 481 * Due to this, the check for MNTK_UNMOUNT flag is not 482 * racy: if we did not see MNTK_UNMOUNT flag, then tmp 483 * cannot be destroyed until node construction is 484 * finished and the parent vnode unlocked. 485 * 486 * Tmpfs does not need to instantiate new nodes during 487 * unmount. 488 */ 489 return (EBUSY); 490 } 491 if ((mp->mnt_kern_flag & MNT_RDONLY) != 0) 492 return (EROFS); 493 494 nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK); 495 496 /* Generic initialization. */ 497 nnode->tn_type = type; 498 vfs_timestamp(&nnode->tn_atime); 499 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 500 nnode->tn_atime; 501 nnode->tn_uid = uid; 502 nnode->tn_gid = gid; 503 nnode->tn_mode = mode; 504 nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr); 505 nnode->tn_refcount = 1; 506 507 /* Type-specific initialization. */ 508 switch (nnode->tn_type) { 509 case VBLK: 510 case VCHR: 511 nnode->tn_rdev = rdev; 512 break; 513 514 case VDIR: 515 RB_INIT(&nnode->tn_dir.tn_dirhead); 516 LIST_INIT(&nnode->tn_dir.tn_dupindex); 517 MPASS(parent != nnode); 518 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 519 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 520 nnode->tn_dir.tn_readdir_lastn = 0; 521 nnode->tn_dir.tn_readdir_lastp = NULL; 522 nnode->tn_links++; 523 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 524 nnode->tn_dir.tn_parent->tn_links++; 525 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 526 break; 527 528 case VFIFO: 529 /* FALLTHROUGH */ 530 case VSOCK: 531 break; 532 533 case VLNK: 534 MPASS(strlen(target) < MAXPATHLEN); 535 nnode->tn_size = strlen(target); 536 537 symlink = NULL; 538 if (!tmp->tm_nonc) { 539 symlink = cache_symlink_alloc(nnode->tn_size + 1, 540 M_WAITOK); 541 symlink_smr = true; 542 } 543 if (symlink == NULL) { 544 symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, 545 M_WAITOK); 546 symlink_smr = false; 547 } 548 memcpy(symlink, target, nnode->tn_size + 1); 549 550 /* 551 * Allow safe symlink resolving for lockless lookup. 552 * tmpfs_fplookup_symlink references this comment. 553 * 554 * 1. nnode is not yet visible to the world 555 * 2. both tn_link_target and tn_link_smr get populated 556 * 3. release fence publishes their content 557 * 4. tn_link_target content is immutable until node 558 * destruction, where the pointer gets set to NULL 559 * 5. tn_link_smr is never changed once set 560 * 561 * As a result it is sufficient to issue load consume 562 * on the node pointer to also get the above content 563 * in a stable manner. Worst case tn_link_smr flag 564 * may be set to true despite being stale, while the 565 * target buffer is already cleared out. 566 */ 567 atomic_store_ptr(&nnode->tn_link_target, symlink); 568 atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); 569 atomic_thread_fence_rel(); 570 break; 571 572 case VREG: 573 nnode->tn_reg.tn_aobj = 574 vm_pager_allocate(tmpfs_pager_type, NULL, 0, 575 VM_PROT_DEFAULT, 0, 576 NULL /* XXXKIB - tmpfs needs swap reservation */); 577 /* OBJ_TMPFS is set together with the setting of vp->v_object */ 578 nnode->tn_reg.tn_tmp = tmp; 579 break; 580 581 default: 582 panic("tmpfs_alloc_node: type %p %d", nnode, 583 (int)nnode->tn_type); 584 } 585 586 TMPFS_LOCK(tmp); 587 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 588 nnode->tn_attached = true; 589 tmp->tm_nodes_inuse++; 590 tmp->tm_refcount++; 591 TMPFS_UNLOCK(tmp); 592 593 *node = nnode; 594 return (0); 595 } 596 597 /* 598 * Destroys the node pointed to by node from the file system 'tmp'. 599 * If the node references a directory, no entries are allowed. 600 */ 601 void 602 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 603 { 604 if (refcount_release_if_not_last(&node->tn_refcount)) 605 return; 606 607 TMPFS_LOCK(tmp); 608 TMPFS_NODE_LOCK(node); 609 if (!tmpfs_free_node_locked(tmp, node, false)) { 610 TMPFS_NODE_UNLOCK(node); 611 TMPFS_UNLOCK(tmp); 612 } 613 } 614 615 bool 616 tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node, 617 bool detach) 618 { 619 vm_object_t uobj; 620 char *symlink; 621 bool last; 622 623 TMPFS_MP_ASSERT_LOCKED(tmp); 624 TMPFS_NODE_ASSERT_LOCKED(node); 625 626 last = refcount_release(&node->tn_refcount); 627 if (node->tn_attached && (detach || last)) { 628 MPASS(tmp->tm_nodes_inuse > 0); 629 tmp->tm_nodes_inuse--; 630 LIST_REMOVE(node, tn_entries); 631 node->tn_attached = false; 632 } 633 if (!last) 634 return (false); 635 636 TMPFS_NODE_UNLOCK(node); 637 638 #ifdef INVARIANTS 639 MPASS(node->tn_vnode == NULL); 640 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 641 642 /* 643 * Make sure this is a node type we can deal with. Everything 644 * is explicitly enumerated without the 'default' clause so 645 * the compiler can throw an error in case a new type is 646 * added. 647 */ 648 switch (node->tn_type) { 649 case VBLK: 650 case VCHR: 651 case VDIR: 652 case VFIFO: 653 case VSOCK: 654 case VLNK: 655 case VREG: 656 break; 657 case VNON: 658 case VBAD: 659 case VMARKER: 660 panic("%s: bad type %d for node %p", __func__, 661 (int)node->tn_type, node); 662 } 663 #endif 664 665 switch (node->tn_type) { 666 case VREG: 667 uobj = node->tn_reg.tn_aobj; 668 if (uobj != NULL && uobj->size != 0) 669 atomic_subtract_long(&tmp->tm_pages_used, uobj->size); 670 671 tmpfs_free_tmp(tmp); 672 673 if (uobj != NULL) { 674 KASSERT((uobj->flags & OBJ_TMPFS) == 0, 675 ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); 676 vm_object_deallocate(uobj); 677 } 678 break; 679 case VLNK: 680 tmpfs_free_tmp(tmp); 681 682 symlink = node->tn_link_target; 683 atomic_store_ptr(&node->tn_link_target, NULL); 684 if (atomic_load_char(&node->tn_link_smr)) { 685 cache_symlink_free(symlink, node->tn_size + 1); 686 } else { 687 free(symlink, M_TMPFSNAME); 688 } 689 break; 690 default: 691 tmpfs_free_tmp(tmp); 692 break; 693 } 694 695 uma_zfree_smr(tmpfs_node_pool, node); 696 return (true); 697 } 698 699 static __inline uint32_t 700 tmpfs_dirent_hash(const char *name, u_int len) 701 { 702 uint32_t hash; 703 704 hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; 705 #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP 706 hash &= 0xf; 707 #endif 708 if (hash < TMPFS_DIRCOOKIE_MIN) 709 hash += TMPFS_DIRCOOKIE_MIN; 710 711 return (hash); 712 } 713 714 static __inline off_t 715 tmpfs_dirent_cookie(struct tmpfs_dirent *de) 716 { 717 if (de == NULL) 718 return (TMPFS_DIRCOOKIE_EOF); 719 720 MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); 721 722 return (de->td_cookie); 723 } 724 725 static __inline boolean_t 726 tmpfs_dirent_dup(struct tmpfs_dirent *de) 727 { 728 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); 729 } 730 731 static __inline boolean_t 732 tmpfs_dirent_duphead(struct tmpfs_dirent *de) 733 { 734 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); 735 } 736 737 void 738 tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) 739 { 740 de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); 741 memcpy(de->ud.td_name, name, namelen); 742 de->td_namelen = namelen; 743 } 744 745 /* 746 * Allocates a new directory entry for the node node with a name of name. 747 * The new directory entry is returned in *de. 748 * 749 * The link count of node is increased by one to reflect the new object 750 * referencing it. 751 * 752 * Returns zero on success or an appropriate error code on failure. 753 */ 754 int 755 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 756 const char *name, u_int len, struct tmpfs_dirent **de) 757 { 758 struct tmpfs_dirent *nde; 759 760 nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK); 761 nde->td_node = node; 762 if (name != NULL) { 763 nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 764 tmpfs_dirent_init(nde, name, len); 765 } else 766 nde->td_namelen = 0; 767 if (node != NULL) 768 node->tn_links++; 769 770 *de = nde; 771 772 return (0); 773 } 774 775 /* 776 * Frees a directory entry. It is the caller's responsibility to destroy 777 * the node referenced by it if needed. 778 * 779 * The link count of node is decreased by one to reflect the removal of an 780 * object that referenced it. This only happens if 'node_exists' is true; 781 * otherwise the function will not access the node referred to by the 782 * directory entry, as it may already have been released from the outside. 783 */ 784 void 785 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 786 { 787 struct tmpfs_node *node; 788 789 node = de->td_node; 790 if (node != NULL) { 791 MPASS(node->tn_links > 0); 792 node->tn_links--; 793 } 794 if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) 795 free(de->ud.td_name, M_TMPFSNAME); 796 free(de, M_TMPFSDIR); 797 } 798 799 void 800 tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) 801 { 802 bool want_vrele; 803 804 ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); 805 if (vp->v_type != VREG || obj == NULL) 806 return; 807 808 VM_OBJECT_WLOCK(obj); 809 VI_LOCK(vp); 810 /* 811 * May be going through forced unmount. 812 */ 813 want_vrele = false; 814 if ((obj->flags & OBJ_TMPFS_VREF) != 0) { 815 vm_object_clear_flag(obj, OBJ_TMPFS_VREF); 816 want_vrele = true; 817 } 818 819 vm_object_clear_flag(obj, OBJ_TMPFS); 820 obj->un_pager.swp.swp_tmpfs = NULL; 821 if (vp->v_writecount < 0) 822 vp->v_writecount = 0; 823 VI_UNLOCK(vp); 824 VM_OBJECT_WUNLOCK(obj); 825 if (want_vrele) { 826 vrele(vp); 827 } 828 } 829 830 /* 831 * Allocates a new vnode for the node node or returns a new reference to 832 * an existing one if the node had already a vnode referencing it. The 833 * resulting locked vnode is returned in *vpp. 834 * 835 * Returns zero on success or an appropriate error code on failure. 836 */ 837 int 838 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 839 struct vnode **vpp) 840 { 841 struct vnode *vp; 842 enum vgetstate vs; 843 struct tmpfs_mount *tm; 844 vm_object_t object; 845 int error; 846 847 error = 0; 848 tm = VFS_TO_TMPFS(mp); 849 TMPFS_NODE_LOCK(node); 850 tmpfs_ref_node(node); 851 loop: 852 TMPFS_NODE_ASSERT_LOCKED(node); 853 if ((vp = node->tn_vnode) != NULL) { 854 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 855 if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || 856 (VN_IS_DOOMED(vp) && 857 (lkflag & LK_NOWAIT) != 0)) { 858 TMPFS_NODE_UNLOCK(node); 859 error = ENOENT; 860 vp = NULL; 861 goto out; 862 } 863 if (VN_IS_DOOMED(vp)) { 864 node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; 865 while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { 866 msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 867 0, "tmpfsE", 0); 868 } 869 goto loop; 870 } 871 vs = vget_prep(vp); 872 TMPFS_NODE_UNLOCK(node); 873 error = vget_finish(vp, lkflag, vs); 874 if (error == ENOENT) { 875 TMPFS_NODE_LOCK(node); 876 goto loop; 877 } 878 if (error != 0) { 879 vp = NULL; 880 goto out; 881 } 882 883 /* 884 * Make sure the vnode is still there after 885 * getting the interlock to avoid racing a free. 886 */ 887 if (node->tn_vnode != vp) { 888 vput(vp); 889 TMPFS_NODE_LOCK(node); 890 goto loop; 891 } 892 893 goto out; 894 } 895 896 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 897 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 898 TMPFS_NODE_UNLOCK(node); 899 error = ENOENT; 900 vp = NULL; 901 goto out; 902 } 903 904 /* 905 * otherwise lock the vp list while we call getnewvnode 906 * since that can block. 907 */ 908 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 909 node->tn_vpstate |= TMPFS_VNODE_WANT; 910 error = msleep((caddr_t) &node->tn_vpstate, 911 TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0); 912 if (error != 0) 913 goto out; 914 goto loop; 915 } else 916 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 917 918 TMPFS_NODE_UNLOCK(node); 919 920 /* Get a new vnode and associate it with our node. */ 921 error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ? 922 &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp); 923 if (error != 0) 924 goto unlock; 925 MPASS(vp != NULL); 926 927 /* lkflag is ignored, the lock is exclusive */ 928 (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 929 930 vp->v_data = node; 931 vp->v_type = node->tn_type; 932 933 /* Type-specific initialization. */ 934 switch (node->tn_type) { 935 case VBLK: 936 /* FALLTHROUGH */ 937 case VCHR: 938 /* FALLTHROUGH */ 939 case VLNK: 940 /* FALLTHROUGH */ 941 case VSOCK: 942 break; 943 case VFIFO: 944 vp->v_op = &tmpfs_fifoop_entries; 945 break; 946 case VREG: 947 object = node->tn_reg.tn_aobj; 948 VM_OBJECT_WLOCK(object); 949 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 950 ("%s: object %p with OBJ_TMPFS_VREF but without vnode", 951 __func__, object)); 952 KASSERT(object->un_pager.swp.writemappings == 0, 953 ("%s: object %p has writemappings", 954 __func__, object)); 955 VI_LOCK(vp); 956 KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); 957 vp->v_object = object; 958 object->un_pager.swp.swp_tmpfs = vp; 959 vm_object_set_flag(object, OBJ_TMPFS); 960 vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF); 961 VI_UNLOCK(vp); 962 VM_OBJECT_WUNLOCK(object); 963 break; 964 case VDIR: 965 MPASS(node->tn_dir.tn_parent != NULL); 966 if (node->tn_dir.tn_parent == node) 967 vp->v_vflag |= VV_ROOT; 968 break; 969 970 default: 971 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 972 } 973 if (vp->v_type != VFIFO) 974 VN_LOCK_ASHARE(vp); 975 976 error = insmntque1(vp, mp); 977 if (error != 0) { 978 /* Need to clear v_object for insmntque failure. */ 979 tmpfs_destroy_vobject(vp, vp->v_object); 980 vp->v_object = NULL; 981 vp->v_data = NULL; 982 vp->v_op = &dead_vnodeops; 983 vgone(vp); 984 vput(vp); 985 vp = NULL; 986 } 987 988 unlock: 989 TMPFS_NODE_LOCK(node); 990 991 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 992 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 993 node->tn_vnode = vp; 994 995 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 996 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 997 TMPFS_NODE_UNLOCK(node); 998 wakeup((caddr_t) &node->tn_vpstate); 999 } else 1000 TMPFS_NODE_UNLOCK(node); 1001 1002 out: 1003 if (error == 0) { 1004 *vpp = vp; 1005 1006 #ifdef INVARIANTS 1007 MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); 1008 TMPFS_NODE_LOCK(node); 1009 MPASS(*vpp == node->tn_vnode); 1010 TMPFS_NODE_UNLOCK(node); 1011 #endif 1012 } 1013 tmpfs_free_node(tm, node); 1014 1015 return (error); 1016 } 1017 1018 /* 1019 * Destroys the association between the vnode vp and the node it 1020 * references. 1021 */ 1022 void 1023 tmpfs_free_vp(struct vnode *vp) 1024 { 1025 struct tmpfs_node *node; 1026 1027 node = VP_TO_TMPFS_NODE(vp); 1028 1029 TMPFS_NODE_ASSERT_LOCKED(node); 1030 node->tn_vnode = NULL; 1031 if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) 1032 wakeup(&node->tn_vnode); 1033 node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; 1034 vp->v_data = NULL; 1035 } 1036 1037 /* 1038 * Allocates a new file of type 'type' and adds it to the parent directory 1039 * 'dvp'; this addition is done using the component name given in 'cnp'. 1040 * The ownership of the new file is automatically assigned based on the 1041 * credentials of the caller (through 'cnp'), the group is set based on 1042 * the parent directory and the mode is determined from the 'vap' argument. 1043 * If successful, *vpp holds a vnode to the newly created file and zero 1044 * is returned. Otherwise *vpp is NULL and the function returns an 1045 * appropriate error code. 1046 */ 1047 int 1048 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 1049 struct componentname *cnp, const char *target) 1050 { 1051 int error; 1052 struct tmpfs_dirent *de; 1053 struct tmpfs_mount *tmp; 1054 struct tmpfs_node *dnode; 1055 struct tmpfs_node *node; 1056 struct tmpfs_node *parent; 1057 1058 ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file"); 1059 1060 tmp = VFS_TO_TMPFS(dvp->v_mount); 1061 dnode = VP_TO_TMPFS_DIR(dvp); 1062 *vpp = NULL; 1063 1064 /* If the entry we are creating is a directory, we cannot overflow 1065 * the number of links of its parent, because it will get a new 1066 * link. */ 1067 if (vap->va_type == VDIR) { 1068 /* Ensure that we do not overflow the maximum number of links 1069 * imposed by the system. */ 1070 MPASS(dnode->tn_links <= TMPFS_LINK_MAX); 1071 if (dnode->tn_links == TMPFS_LINK_MAX) { 1072 return (EMLINK); 1073 } 1074 1075 parent = dnode; 1076 MPASS(parent != NULL); 1077 } else 1078 parent = NULL; 1079 1080 /* Allocate a node that represents the new file. */ 1081 error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, 1082 cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent, 1083 target, vap->va_rdev, &node); 1084 if (error != 0) 1085 return (error); 1086 1087 /* Allocate a directory entry that points to the new file. */ 1088 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 1089 &de); 1090 if (error != 0) { 1091 tmpfs_free_node(tmp, node); 1092 return (error); 1093 } 1094 1095 /* Allocate a vnode for the new file. */ 1096 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 1097 if (error != 0) { 1098 tmpfs_free_dirent(tmp, de); 1099 tmpfs_free_node(tmp, node); 1100 return (error); 1101 } 1102 1103 /* Now that all required items are allocated, we can proceed to 1104 * insert the new node into the directory, an operation that 1105 * cannot fail. */ 1106 if (cnp->cn_flags & ISWHITEOUT) 1107 tmpfs_dir_whiteout_remove(dvp, cnp); 1108 tmpfs_dir_attach(dvp, de); 1109 return (0); 1110 } 1111 1112 struct tmpfs_dirent * 1113 tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1114 { 1115 struct tmpfs_dirent *de; 1116 1117 de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); 1118 dc->tdc_tree = de; 1119 if (de != NULL && tmpfs_dirent_duphead(de)) 1120 de = LIST_FIRST(&de->ud.td_duphead); 1121 dc->tdc_current = de; 1122 1123 return (dc->tdc_current); 1124 } 1125 1126 struct tmpfs_dirent * 1127 tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1128 { 1129 struct tmpfs_dirent *de; 1130 1131 MPASS(dc->tdc_tree != NULL); 1132 if (tmpfs_dirent_dup(dc->tdc_current)) { 1133 dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); 1134 if (dc->tdc_current != NULL) 1135 return (dc->tdc_current); 1136 } 1137 dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, 1138 &dnode->tn_dir.tn_dirhead, dc->tdc_tree); 1139 if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { 1140 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1141 MPASS(dc->tdc_current != NULL); 1142 } 1143 1144 return (dc->tdc_current); 1145 } 1146 1147 /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ 1148 static struct tmpfs_dirent * 1149 tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) 1150 { 1151 struct tmpfs_dirent *de, dekey; 1152 1153 dekey.td_hash = hash; 1154 de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); 1155 return (de); 1156 } 1157 1158 /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ 1159 static struct tmpfs_dirent * 1160 tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, 1161 struct tmpfs_dir_cursor *dc) 1162 { 1163 struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; 1164 struct tmpfs_dirent *de, dekey; 1165 1166 MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); 1167 1168 if (cookie == node->tn_dir.tn_readdir_lastn && 1169 (de = node->tn_dir.tn_readdir_lastp) != NULL) { 1170 /* Protect against possible race, tn_readdir_last[pn] 1171 * may be updated with only shared vnode lock held. */ 1172 if (cookie == tmpfs_dirent_cookie(de)) 1173 goto out; 1174 } 1175 1176 if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { 1177 LIST_FOREACH(de, &node->tn_dir.tn_dupindex, 1178 uh.td_dup.index_entries) { 1179 MPASS(tmpfs_dirent_dup(de)); 1180 if (de->td_cookie == cookie) 1181 goto out; 1182 /* dupindex list is sorted. */ 1183 if (de->td_cookie < cookie) { 1184 de = NULL; 1185 goto out; 1186 } 1187 } 1188 MPASS(de == NULL); 1189 goto out; 1190 } 1191 1192 if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { 1193 de = NULL; 1194 } else { 1195 dekey.td_hash = cookie; 1196 /* Recover if direntry for cookie was removed */ 1197 de = RB_NFIND(tmpfs_dir, dirhead, &dekey); 1198 } 1199 dc->tdc_tree = de; 1200 dc->tdc_current = de; 1201 if (de != NULL && tmpfs_dirent_duphead(de)) { 1202 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1203 MPASS(dc->tdc_current != NULL); 1204 } 1205 return (dc->tdc_current); 1206 1207 out: 1208 dc->tdc_tree = de; 1209 dc->tdc_current = de; 1210 if (de != NULL && tmpfs_dirent_dup(de)) 1211 dc->tdc_tree = tmpfs_dir_xlookup_hash(node, 1212 de->td_hash); 1213 return (dc->tdc_current); 1214 } 1215 1216 /* 1217 * Looks for a directory entry in the directory represented by node. 1218 * 'cnp' describes the name of the entry to look for. Note that the . 1219 * and .. components are not allowed as they do not physically exist 1220 * within directories. 1221 * 1222 * Returns a pointer to the entry when found, otherwise NULL. 1223 */ 1224 struct tmpfs_dirent * 1225 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 1226 struct componentname *cnp) 1227 { 1228 struct tmpfs_dir_duphead *duphead; 1229 struct tmpfs_dirent *de; 1230 uint32_t hash; 1231 1232 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 1233 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 1234 cnp->cn_nameptr[1] == '.'))); 1235 TMPFS_VALIDATE_DIR(node); 1236 1237 hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); 1238 de = tmpfs_dir_xlookup_hash(node, hash); 1239 if (de != NULL && tmpfs_dirent_duphead(de)) { 1240 duphead = &de->ud.td_duphead; 1241 LIST_FOREACH(de, duphead, uh.td_dup.entries) { 1242 if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1243 cnp->cn_namelen)) 1244 break; 1245 } 1246 } else if (de != NULL) { 1247 if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1248 cnp->cn_namelen)) 1249 de = NULL; 1250 } 1251 if (de != NULL && f != NULL && de->td_node != f) 1252 de = NULL; 1253 1254 return (de); 1255 } 1256 1257 /* 1258 * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex 1259 * list, allocate new cookie value. 1260 */ 1261 static void 1262 tmpfs_dir_attach_dup(struct tmpfs_node *dnode, 1263 struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) 1264 { 1265 struct tmpfs_dir_duphead *dupindex; 1266 struct tmpfs_dirent *de, *pde; 1267 1268 dupindex = &dnode->tn_dir.tn_dupindex; 1269 de = LIST_FIRST(dupindex); 1270 if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { 1271 if (de == NULL) 1272 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1273 else 1274 nde->td_cookie = de->td_cookie + 1; 1275 MPASS(tmpfs_dirent_dup(nde)); 1276 LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); 1277 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1278 return; 1279 } 1280 1281 /* 1282 * Cookie numbers are near exhaustion. Scan dupindex list for unused 1283 * numbers. dupindex list is sorted in descending order. Keep it so 1284 * after inserting nde. 1285 */ 1286 while (1) { 1287 pde = de; 1288 de = LIST_NEXT(de, uh.td_dup.index_entries); 1289 if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { 1290 /* 1291 * Last element of the index doesn't have minimal cookie 1292 * value, use it. 1293 */ 1294 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1295 LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); 1296 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1297 return; 1298 } else if (de == NULL) { 1299 /* 1300 * We are so lucky have 2^30 hash duplicates in single 1301 * directory :) Return largest possible cookie value. 1302 * It should be fine except possible issues with 1303 * VOP_READDIR restart. 1304 */ 1305 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; 1306 LIST_INSERT_HEAD(dupindex, nde, 1307 uh.td_dup.index_entries); 1308 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1309 return; 1310 } 1311 if (de->td_cookie + 1 == pde->td_cookie || 1312 de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) 1313 continue; /* No hole or invalid cookie. */ 1314 nde->td_cookie = de->td_cookie + 1; 1315 MPASS(tmpfs_dirent_dup(nde)); 1316 MPASS(pde->td_cookie > nde->td_cookie); 1317 MPASS(nde->td_cookie > de->td_cookie); 1318 LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); 1319 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1320 return; 1321 } 1322 } 1323 1324 /* 1325 * Attaches the directory entry de to the directory represented by vp. 1326 * Note that this does not change the link count of the node pointed by 1327 * the directory entry, as this is done by tmpfs_alloc_dirent. 1328 */ 1329 void 1330 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 1331 { 1332 struct tmpfs_node *dnode; 1333 struct tmpfs_dirent *xde, *nde; 1334 1335 ASSERT_VOP_ELOCKED(vp, __func__); 1336 MPASS(de->td_namelen > 0); 1337 MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); 1338 MPASS(de->td_cookie == de->td_hash); 1339 1340 dnode = VP_TO_TMPFS_DIR(vp); 1341 dnode->tn_dir.tn_readdir_lastn = 0; 1342 dnode->tn_dir.tn_readdir_lastp = NULL; 1343 1344 MPASS(!tmpfs_dirent_dup(de)); 1345 xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1346 if (xde != NULL && tmpfs_dirent_duphead(xde)) 1347 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1348 else if (xde != NULL) { 1349 /* 1350 * Allocate new duphead. Swap xde with duphead to avoid 1351 * adding/removing elements with the same hash. 1352 */ 1353 MPASS(!tmpfs_dirent_dup(xde)); 1354 tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, 1355 &nde); 1356 /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ 1357 memcpy(nde, xde, sizeof(*xde)); 1358 xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; 1359 LIST_INIT(&xde->ud.td_duphead); 1360 xde->td_namelen = 0; 1361 xde->td_node = NULL; 1362 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); 1363 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1364 } 1365 dnode->tn_size += sizeof(struct tmpfs_dirent); 1366 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1367 dnode->tn_accessed = true; 1368 tmpfs_update(vp); 1369 } 1370 1371 /* 1372 * Detaches the directory entry de from the directory represented by vp. 1373 * Note that this does not change the link count of the node pointed by 1374 * the directory entry, as this is done by tmpfs_free_dirent. 1375 */ 1376 void 1377 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 1378 { 1379 struct tmpfs_mount *tmp; 1380 struct tmpfs_dir *head; 1381 struct tmpfs_node *dnode; 1382 struct tmpfs_dirent *xde; 1383 1384 ASSERT_VOP_ELOCKED(vp, __func__); 1385 1386 dnode = VP_TO_TMPFS_DIR(vp); 1387 head = &dnode->tn_dir.tn_dirhead; 1388 dnode->tn_dir.tn_readdir_lastn = 0; 1389 dnode->tn_dir.tn_readdir_lastp = NULL; 1390 1391 if (tmpfs_dirent_dup(de)) { 1392 /* Remove duphead if de was last entry. */ 1393 if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { 1394 xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); 1395 MPASS(tmpfs_dirent_duphead(xde)); 1396 } else 1397 xde = NULL; 1398 LIST_REMOVE(de, uh.td_dup.entries); 1399 LIST_REMOVE(de, uh.td_dup.index_entries); 1400 if (xde != NULL) { 1401 if (LIST_EMPTY(&xde->ud.td_duphead)) { 1402 RB_REMOVE(tmpfs_dir, head, xde); 1403 tmp = VFS_TO_TMPFS(vp->v_mount); 1404 MPASS(xde->td_node == NULL); 1405 tmpfs_free_dirent(tmp, xde); 1406 } 1407 } 1408 de->td_cookie = de->td_hash; 1409 } else 1410 RB_REMOVE(tmpfs_dir, head, de); 1411 1412 dnode->tn_size -= sizeof(struct tmpfs_dirent); 1413 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1414 dnode->tn_accessed = true; 1415 tmpfs_update(vp); 1416 } 1417 1418 void 1419 tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) 1420 { 1421 struct tmpfs_dirent *de, *dde, *nde; 1422 1423 RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { 1424 RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1425 /* Node may already be destroyed. */ 1426 de->td_node = NULL; 1427 if (tmpfs_dirent_duphead(de)) { 1428 while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { 1429 LIST_REMOVE(dde, uh.td_dup.entries); 1430 dde->td_node = NULL; 1431 tmpfs_free_dirent(tmp, dde); 1432 } 1433 } 1434 tmpfs_free_dirent(tmp, de); 1435 } 1436 } 1437 1438 /* 1439 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 1440 * directory and returns it in the uio space. The function returns 0 1441 * on success, -1 if there was not enough space in the uio structure to 1442 * hold the directory entry or an appropriate error code if another 1443 * error happens. 1444 */ 1445 static int 1446 tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1447 struct uio *uio) 1448 { 1449 int error; 1450 struct dirent dent; 1451 1452 TMPFS_VALIDATE_DIR(node); 1453 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 1454 1455 dent.d_fileno = node->tn_id; 1456 dent.d_off = TMPFS_DIRCOOKIE_DOTDOT; 1457 dent.d_type = DT_DIR; 1458 dent.d_namlen = 1; 1459 dent.d_name[0] = '.'; 1460 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1461 dirent_terminate(&dent); 1462 1463 if (dent.d_reclen > uio->uio_resid) 1464 error = EJUSTRETURN; 1465 else 1466 error = uiomove(&dent, dent.d_reclen, uio); 1467 1468 tmpfs_set_accessed(tm, node); 1469 1470 return (error); 1471 } 1472 1473 /* 1474 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 1475 * directory and returns it in the uio space. The function returns 0 1476 * on success, -1 if there was not enough space in the uio structure to 1477 * hold the directory entry or an appropriate error code if another 1478 * error happens. 1479 */ 1480 static int 1481 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1482 struct uio *uio, off_t next) 1483 { 1484 struct tmpfs_node *parent; 1485 struct dirent dent; 1486 int error; 1487 1488 TMPFS_VALIDATE_DIR(node); 1489 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 1490 1491 /* 1492 * Return ENOENT if the current node is already removed. 1493 */ 1494 TMPFS_ASSERT_LOCKED(node); 1495 parent = node->tn_dir.tn_parent; 1496 if (parent == NULL) 1497 return (ENOENT); 1498 1499 dent.d_fileno = parent->tn_id; 1500 dent.d_off = next; 1501 dent.d_type = DT_DIR; 1502 dent.d_namlen = 2; 1503 dent.d_name[0] = '.'; 1504 dent.d_name[1] = '.'; 1505 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1506 dirent_terminate(&dent); 1507 1508 if (dent.d_reclen > uio->uio_resid) 1509 error = EJUSTRETURN; 1510 else 1511 error = uiomove(&dent, dent.d_reclen, uio); 1512 1513 tmpfs_set_accessed(tm, node); 1514 1515 return (error); 1516 } 1517 1518 /* 1519 * Helper function for tmpfs_readdir. Returns as much directory entries 1520 * as can fit in the uio space. The read starts at uio->uio_offset. 1521 * The function returns 0 on success, -1 if there was not enough space 1522 * in the uio structure to hold the directory entry or an appropriate 1523 * error code if another error happens. 1524 */ 1525 int 1526 tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node, 1527 struct uio *uio, int maxcookies, uint64_t *cookies, int *ncookies) 1528 { 1529 struct tmpfs_dir_cursor dc; 1530 struct tmpfs_dirent *de, *nde; 1531 off_t off; 1532 int error; 1533 1534 TMPFS_VALIDATE_DIR(node); 1535 1536 off = 0; 1537 1538 /* 1539 * Lookup the node from the current offset. The starting offset of 1540 * 0 will lookup both '.' and '..', and then the first real entry, 1541 * or EOF if there are none. Then find all entries for the dir that 1542 * fit into the buffer. Once no more entries are found (de == NULL), 1543 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next 1544 * call to return 0. 1545 */ 1546 switch (uio->uio_offset) { 1547 case TMPFS_DIRCOOKIE_DOT: 1548 error = tmpfs_dir_getdotdent(tm, node, uio); 1549 if (error != 0) 1550 return (error); 1551 uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT; 1552 if (cookies != NULL) 1553 cookies[(*ncookies)++] = off; 1554 /* FALLTHROUGH */ 1555 case TMPFS_DIRCOOKIE_DOTDOT: 1556 de = tmpfs_dir_first(node, &dc); 1557 off = tmpfs_dirent_cookie(de); 1558 error = tmpfs_dir_getdotdotdent(tm, node, uio, off); 1559 if (error != 0) 1560 return (error); 1561 uio->uio_offset = off; 1562 if (cookies != NULL) 1563 cookies[(*ncookies)++] = off; 1564 /* EOF. */ 1565 if (de == NULL) 1566 return (0); 1567 break; 1568 case TMPFS_DIRCOOKIE_EOF: 1569 return (0); 1570 default: 1571 de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); 1572 if (de == NULL) 1573 return (EINVAL); 1574 if (cookies != NULL) 1575 off = tmpfs_dirent_cookie(de); 1576 } 1577 1578 /* 1579 * Read as much entries as possible; i.e., until we reach the end of the 1580 * directory or we exhaust uio space. 1581 */ 1582 do { 1583 struct dirent d; 1584 1585 /* 1586 * Create a dirent structure representing the current tmpfs_node 1587 * and fill it. 1588 */ 1589 if (de->td_node == NULL) { 1590 d.d_fileno = 1; 1591 d.d_type = DT_WHT; 1592 } else { 1593 d.d_fileno = de->td_node->tn_id; 1594 switch (de->td_node->tn_type) { 1595 case VBLK: 1596 d.d_type = DT_BLK; 1597 break; 1598 1599 case VCHR: 1600 d.d_type = DT_CHR; 1601 break; 1602 1603 case VDIR: 1604 d.d_type = DT_DIR; 1605 break; 1606 1607 case VFIFO: 1608 d.d_type = DT_FIFO; 1609 break; 1610 1611 case VLNK: 1612 d.d_type = DT_LNK; 1613 break; 1614 1615 case VREG: 1616 d.d_type = DT_REG; 1617 break; 1618 1619 case VSOCK: 1620 d.d_type = DT_SOCK; 1621 break; 1622 1623 default: 1624 panic("tmpfs_dir_getdents: type %p %d", 1625 de->td_node, (int)de->td_node->tn_type); 1626 } 1627 } 1628 d.d_namlen = de->td_namelen; 1629 MPASS(de->td_namelen < sizeof(d.d_name)); 1630 (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); 1631 d.d_reclen = GENERIC_DIRSIZ(&d); 1632 1633 /* 1634 * Stop reading if the directory entry we are treating is bigger 1635 * than the amount of data that can be returned. 1636 */ 1637 if (d.d_reclen > uio->uio_resid) { 1638 error = EJUSTRETURN; 1639 break; 1640 } 1641 1642 nde = tmpfs_dir_next(node, &dc); 1643 d.d_off = tmpfs_dirent_cookie(nde); 1644 dirent_terminate(&d); 1645 1646 /* 1647 * Copy the new dirent structure into the output buffer and 1648 * advance pointers. 1649 */ 1650 error = uiomove(&d, d.d_reclen, uio); 1651 if (error == 0) { 1652 de = nde; 1653 if (cookies != NULL) { 1654 off = tmpfs_dirent_cookie(de); 1655 MPASS(*ncookies < maxcookies); 1656 cookies[(*ncookies)++] = off; 1657 } 1658 } 1659 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 1660 1661 /* Skip setting off when using cookies as it is already done above. */ 1662 if (cookies == NULL) 1663 off = tmpfs_dirent_cookie(de); 1664 1665 /* Update the offset and cache. */ 1666 uio->uio_offset = off; 1667 node->tn_dir.tn_readdir_lastn = off; 1668 node->tn_dir.tn_readdir_lastp = de; 1669 1670 tmpfs_set_accessed(tm, node); 1671 return (error); 1672 } 1673 1674 int 1675 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 1676 { 1677 struct tmpfs_dirent *de; 1678 int error; 1679 1680 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 1681 cnp->cn_nameptr, cnp->cn_namelen, &de); 1682 if (error != 0) 1683 return (error); 1684 tmpfs_dir_attach(dvp, de); 1685 return (0); 1686 } 1687 1688 void 1689 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 1690 { 1691 struct tmpfs_dirent *de; 1692 1693 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 1694 MPASS(de != NULL && de->td_node == NULL); 1695 tmpfs_dir_detach(dvp, de); 1696 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 1697 } 1698 1699 /* 1700 * Resizes the aobj associated with the regular file pointed to by 'vp' to the 1701 * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 1702 * 'newsize' must be positive. 1703 * 1704 * Returns zero on success or an appropriate error code on failure. 1705 */ 1706 int 1707 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) 1708 { 1709 struct tmpfs_mount *tmp; 1710 struct tmpfs_node *node; 1711 vm_object_t uobj; 1712 vm_pindex_t idx, newpages, oldpages; 1713 off_t oldsize; 1714 int base, error; 1715 1716 MPASS(vp->v_type == VREG); 1717 MPASS(newsize >= 0); 1718 1719 node = VP_TO_TMPFS_NODE(vp); 1720 uobj = node->tn_reg.tn_aobj; 1721 tmp = VFS_TO_TMPFS(vp->v_mount); 1722 1723 /* 1724 * Convert the old and new sizes to the number of pages needed to 1725 * store them. It may happen that we do not need to do anything 1726 * because the last allocated page can accommodate the change on 1727 * its own. 1728 */ 1729 oldsize = node->tn_size; 1730 oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 1731 MPASS(oldpages == uobj->size); 1732 newpages = OFF_TO_IDX(newsize + PAGE_MASK); 1733 1734 if (__predict_true(newpages == oldpages && newsize >= oldsize)) { 1735 node->tn_size = newsize; 1736 return (0); 1737 } 1738 1739 if (newpages > oldpages && 1740 !tmpfs_pages_check_avail(tmp, newpages - oldpages)) 1741 return (ENOSPC); 1742 1743 VM_OBJECT_WLOCK(uobj); 1744 if (newsize < oldsize) { 1745 /* 1746 * Zero the truncated part of the last page. 1747 */ 1748 base = newsize & PAGE_MASK; 1749 if (base != 0) { 1750 idx = OFF_TO_IDX(newsize); 1751 error = tmpfs_partial_page_invalidate(uobj, idx, base, 1752 PAGE_SIZE, ignerr); 1753 if (error != 0) { 1754 VM_OBJECT_WUNLOCK(uobj); 1755 return (error); 1756 } 1757 } 1758 1759 /* 1760 * Release any swap space and free any whole pages. 1761 */ 1762 if (newpages < oldpages) 1763 vm_object_page_remove(uobj, newpages, 0, 0); 1764 } 1765 uobj->size = newpages; 1766 VM_OBJECT_WUNLOCK(uobj); 1767 1768 atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); 1769 1770 node->tn_size = newsize; 1771 return (0); 1772 } 1773 1774 /* 1775 * Punch hole in the aobj associated with the regular file pointed to by 'vp'. 1776 * Requests completely beyond the end-of-file are converted to no-op. 1777 * 1778 * Returns 0 on success or error code from tmpfs_partial_page_invalidate() on 1779 * failure. 1780 */ 1781 int 1782 tmpfs_reg_punch_hole(struct vnode *vp, off_t *offset, off_t *length) 1783 { 1784 struct tmpfs_node *node; 1785 vm_object_t object; 1786 vm_pindex_t pistart, pi, piend; 1787 int startofs, endofs, end; 1788 off_t off, len; 1789 int error; 1790 1791 KASSERT(*length <= OFF_MAX - *offset, ("%s: offset + length overflows", 1792 __func__)); 1793 node = VP_TO_TMPFS_NODE(vp); 1794 KASSERT(node->tn_type == VREG, ("%s: node is not regular file", 1795 __func__)); 1796 object = node->tn_reg.tn_aobj; 1797 off = *offset; 1798 len = omin(node->tn_size - off, *length); 1799 startofs = off & PAGE_MASK; 1800 endofs = (off + len) & PAGE_MASK; 1801 pistart = OFF_TO_IDX(off); 1802 piend = OFF_TO_IDX(off + len); 1803 pi = OFF_TO_IDX((vm_ooffset_t)off + PAGE_MASK); 1804 error = 0; 1805 1806 /* Handle the case when offset is on or beyond file size. */ 1807 if (len <= 0) { 1808 *length = 0; 1809 return (0); 1810 } 1811 1812 VM_OBJECT_WLOCK(object); 1813 1814 /* 1815 * If there is a partial page at the beginning of the hole-punching 1816 * request, fill the partial page with zeroes. 1817 */ 1818 if (startofs != 0) { 1819 end = pistart != piend ? PAGE_SIZE : endofs; 1820 error = tmpfs_partial_page_invalidate(object, pistart, startofs, 1821 end, FALSE); 1822 if (error != 0) 1823 goto out; 1824 off += end - startofs; 1825 len -= end - startofs; 1826 } 1827 1828 /* 1829 * Toss away the full pages in the affected area. 1830 */ 1831 if (pi < piend) { 1832 vm_object_page_remove(object, pi, piend, 0); 1833 off += IDX_TO_OFF(piend - pi); 1834 len -= IDX_TO_OFF(piend - pi); 1835 } 1836 1837 /* 1838 * If there is a partial page at the end of the hole-punching request, 1839 * fill the partial page with zeroes. 1840 */ 1841 if (endofs != 0 && pistart != piend) { 1842 error = tmpfs_partial_page_invalidate(object, piend, 0, endofs, 1843 FALSE); 1844 if (error != 0) 1845 goto out; 1846 off += endofs; 1847 len -= endofs; 1848 } 1849 1850 out: 1851 VM_OBJECT_WUNLOCK(object); 1852 *offset = off; 1853 *length = len; 1854 return (error); 1855 } 1856 1857 void 1858 tmpfs_check_mtime(struct vnode *vp) 1859 { 1860 struct tmpfs_node *node; 1861 struct vm_object *obj; 1862 1863 ASSERT_VOP_ELOCKED(vp, "check_mtime"); 1864 if (vp->v_type != VREG) 1865 return; 1866 obj = vp->v_object; 1867 KASSERT(obj->type == tmpfs_pager_type && 1868 (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) == 1869 (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj")); 1870 /* unlocked read */ 1871 if (obj->generation != obj->cleangeneration) { 1872 VM_OBJECT_WLOCK(obj); 1873 if (obj->generation != obj->cleangeneration) { 1874 obj->cleangeneration = obj->generation; 1875 node = VP_TO_TMPFS_NODE(vp); 1876 node->tn_status |= TMPFS_NODE_MODIFIED | 1877 TMPFS_NODE_CHANGED; 1878 } 1879 VM_OBJECT_WUNLOCK(obj); 1880 } 1881 } 1882 1883 /* 1884 * Change flags of the given vnode. 1885 * Caller should execute tmpfs_update on vp after a successful execution. 1886 * The vnode must be locked on entry and remain locked on exit. 1887 */ 1888 int 1889 tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, 1890 struct thread *td) 1891 { 1892 int error; 1893 struct tmpfs_node *node; 1894 1895 ASSERT_VOP_ELOCKED(vp, "chflags"); 1896 1897 node = VP_TO_TMPFS_NODE(vp); 1898 1899 if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | 1900 UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | 1901 UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 1902 UF_SPARSE | UF_SYSTEM)) != 0) 1903 return (EOPNOTSUPP); 1904 1905 /* Disallow this operation if the file system is mounted read-only. */ 1906 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1907 return (EROFS); 1908 1909 /* 1910 * Callers may only modify the file flags on objects they 1911 * have VADMIN rights for. 1912 */ 1913 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 1914 return (error); 1915 /* 1916 * Unprivileged processes are not permitted to unset system 1917 * flags, or modify flags if any system flags are set. 1918 */ 1919 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { 1920 if (node->tn_flags & 1921 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 1922 error = securelevel_gt(cred, 0); 1923 if (error) 1924 return (error); 1925 } 1926 } else { 1927 if (node->tn_flags & 1928 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1929 ((flags ^ node->tn_flags) & SF_SETTABLE)) 1930 return (EPERM); 1931 } 1932 node->tn_flags = flags; 1933 node->tn_status |= TMPFS_NODE_CHANGED; 1934 1935 ASSERT_VOP_ELOCKED(vp, "chflags2"); 1936 1937 return (0); 1938 } 1939 1940 /* 1941 * Change access mode on the given vnode. 1942 * Caller should execute tmpfs_update on vp after a successful execution. 1943 * The vnode must be locked on entry and remain locked on exit. 1944 */ 1945 int 1946 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, 1947 struct thread *td) 1948 { 1949 int error; 1950 struct tmpfs_node *node; 1951 mode_t newmode; 1952 1953 ASSERT_VOP_ELOCKED(vp, "chmod"); 1954 ASSERT_VOP_IN_SEQC(vp); 1955 1956 node = VP_TO_TMPFS_NODE(vp); 1957 1958 /* Disallow this operation if the file system is mounted read-only. */ 1959 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1960 return (EROFS); 1961 1962 /* Immutable or append-only files cannot be modified, either. */ 1963 if (node->tn_flags & (IMMUTABLE | APPEND)) 1964 return (EPERM); 1965 1966 /* 1967 * To modify the permissions on a file, must possess VADMIN 1968 * for that file. 1969 */ 1970 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 1971 return (error); 1972 1973 /* 1974 * Privileged processes may set the sticky bit on non-directories, 1975 * as well as set the setgid bit on a file with a group that the 1976 * process is not a member of. 1977 */ 1978 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1979 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 1980 return (EFTYPE); 1981 } 1982 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1983 error = priv_check_cred(cred, PRIV_VFS_SETGID); 1984 if (error) 1985 return (error); 1986 } 1987 1988 newmode = node->tn_mode & ~ALLPERMS; 1989 newmode |= mode & ALLPERMS; 1990 atomic_store_short(&node->tn_mode, newmode); 1991 1992 node->tn_status |= TMPFS_NODE_CHANGED; 1993 1994 ASSERT_VOP_ELOCKED(vp, "chmod2"); 1995 1996 return (0); 1997 } 1998 1999 /* 2000 * Change ownership of the given vnode. At least one of uid or gid must 2001 * be different than VNOVAL. If one is set to that value, the attribute 2002 * is unchanged. 2003 * Caller should execute tmpfs_update on vp after a successful execution. 2004 * The vnode must be locked on entry and remain locked on exit. 2005 */ 2006 int 2007 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 2008 struct thread *td) 2009 { 2010 int error; 2011 struct tmpfs_node *node; 2012 uid_t ouid; 2013 gid_t ogid; 2014 mode_t newmode; 2015 2016 ASSERT_VOP_ELOCKED(vp, "chown"); 2017 ASSERT_VOP_IN_SEQC(vp); 2018 2019 node = VP_TO_TMPFS_NODE(vp); 2020 2021 /* Assign default values if they are unknown. */ 2022 MPASS(uid != VNOVAL || gid != VNOVAL); 2023 if (uid == VNOVAL) 2024 uid = node->tn_uid; 2025 if (gid == VNOVAL) 2026 gid = node->tn_gid; 2027 MPASS(uid != VNOVAL && gid != VNOVAL); 2028 2029 /* Disallow this operation if the file system is mounted read-only. */ 2030 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2031 return (EROFS); 2032 2033 /* Immutable or append-only files cannot be modified, either. */ 2034 if (node->tn_flags & (IMMUTABLE | APPEND)) 2035 return (EPERM); 2036 2037 /* 2038 * To modify the ownership of a file, must possess VADMIN for that 2039 * file. 2040 */ 2041 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 2042 return (error); 2043 2044 /* 2045 * To change the owner of a file, or change the group of a file to a 2046 * group of which we are not a member, the caller must have 2047 * privilege. 2048 */ 2049 if ((uid != node->tn_uid || 2050 (gid != node->tn_gid && !groupmember(gid, cred))) && 2051 (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) 2052 return (error); 2053 2054 ogid = node->tn_gid; 2055 ouid = node->tn_uid; 2056 2057 node->tn_uid = uid; 2058 node->tn_gid = gid; 2059 2060 node->tn_status |= TMPFS_NODE_CHANGED; 2061 2062 if ((node->tn_mode & (S_ISUID | S_ISGID)) != 0 && 2063 (ouid != uid || ogid != gid)) { 2064 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { 2065 newmode = node->tn_mode & ~(S_ISUID | S_ISGID); 2066 atomic_store_short(&node->tn_mode, newmode); 2067 } 2068 } 2069 2070 ASSERT_VOP_ELOCKED(vp, "chown2"); 2071 2072 return (0); 2073 } 2074 2075 /* 2076 * Change size of the given vnode. 2077 * Caller should execute tmpfs_update on vp after a successful execution. 2078 * The vnode must be locked on entry and remain locked on exit. 2079 */ 2080 int 2081 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 2082 struct thread *td) 2083 { 2084 int error; 2085 struct tmpfs_node *node; 2086 2087 ASSERT_VOP_ELOCKED(vp, "chsize"); 2088 2089 node = VP_TO_TMPFS_NODE(vp); 2090 2091 /* Decide whether this is a valid operation based on the file type. */ 2092 error = 0; 2093 switch (vp->v_type) { 2094 case VDIR: 2095 return (EISDIR); 2096 2097 case VREG: 2098 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2099 return (EROFS); 2100 break; 2101 2102 case VBLK: 2103 /* FALLTHROUGH */ 2104 case VCHR: 2105 /* FALLTHROUGH */ 2106 case VFIFO: 2107 /* 2108 * Allow modifications of special files even if in the file 2109 * system is mounted read-only (we are not modifying the 2110 * files themselves, but the objects they represent). 2111 */ 2112 return (0); 2113 2114 default: 2115 /* Anything else is unsupported. */ 2116 return (EOPNOTSUPP); 2117 } 2118 2119 /* Immutable or append-only files cannot be modified, either. */ 2120 if (node->tn_flags & (IMMUTABLE | APPEND)) 2121 return (EPERM); 2122 2123 error = vn_rlimit_trunc(size, td); 2124 if (error != 0) 2125 return (error); 2126 2127 error = tmpfs_truncate(vp, size); 2128 /* 2129 * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 2130 * for us, as will update tn_status; no need to do that here. 2131 */ 2132 2133 ASSERT_VOP_ELOCKED(vp, "chsize2"); 2134 2135 return (error); 2136 } 2137 2138 /* 2139 * Change access and modification times of the given vnode. 2140 * Caller should execute tmpfs_update on vp after a successful execution. 2141 * The vnode must be locked on entry and remain locked on exit. 2142 */ 2143 int 2144 tmpfs_chtimes(struct vnode *vp, struct vattr *vap, 2145 struct ucred *cred, struct thread *td) 2146 { 2147 int error; 2148 struct tmpfs_node *node; 2149 2150 ASSERT_VOP_ELOCKED(vp, "chtimes"); 2151 2152 node = VP_TO_TMPFS_NODE(vp); 2153 2154 /* Disallow this operation if the file system is mounted read-only. */ 2155 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2156 return (EROFS); 2157 2158 /* Immutable or append-only files cannot be modified, either. */ 2159 if (node->tn_flags & (IMMUTABLE | APPEND)) 2160 return (EPERM); 2161 2162 error = vn_utimes_perm(vp, vap, cred, td); 2163 if (error != 0) 2164 return (error); 2165 2166 if (vap->va_atime.tv_sec != VNOVAL) 2167 node->tn_accessed = true; 2168 if (vap->va_mtime.tv_sec != VNOVAL) 2169 node->tn_status |= TMPFS_NODE_MODIFIED; 2170 if (vap->va_birthtime.tv_sec != VNOVAL) 2171 node->tn_status |= TMPFS_NODE_MODIFIED; 2172 tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); 2173 if (vap->va_birthtime.tv_sec != VNOVAL) 2174 node->tn_birthtime = vap->va_birthtime; 2175 ASSERT_VOP_ELOCKED(vp, "chtimes2"); 2176 2177 return (0); 2178 } 2179 2180 void 2181 tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status) 2182 { 2183 2184 if ((node->tn_status & status) == status || tm->tm_ronly) 2185 return; 2186 TMPFS_NODE_LOCK(node); 2187 node->tn_status |= status; 2188 TMPFS_NODE_UNLOCK(node); 2189 } 2190 2191 void 2192 tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node) 2193 { 2194 if (node->tn_accessed || tm->tm_ronly) 2195 return; 2196 atomic_store_8(&node->tn_accessed, true); 2197 } 2198 2199 /* Sync timestamps */ 2200 void 2201 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 2202 const struct timespec *mod) 2203 { 2204 struct tmpfs_node *node; 2205 struct timespec now; 2206 2207 ASSERT_VOP_LOCKED(vp, "tmpfs_itimes"); 2208 node = VP_TO_TMPFS_NODE(vp); 2209 2210 if (!node->tn_accessed && 2211 (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) 2212 return; 2213 2214 vfs_timestamp(&now); 2215 TMPFS_NODE_LOCK(node); 2216 if (node->tn_accessed) { 2217 if (acc == NULL) 2218 acc = &now; 2219 node->tn_atime = *acc; 2220 } 2221 if (node->tn_status & TMPFS_NODE_MODIFIED) { 2222 if (mod == NULL) 2223 mod = &now; 2224 node->tn_mtime = *mod; 2225 } 2226 if (node->tn_status & TMPFS_NODE_CHANGED) 2227 node->tn_ctime = now; 2228 node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 2229 node->tn_accessed = false; 2230 TMPFS_NODE_UNLOCK(node); 2231 2232 /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ 2233 random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME); 2234 } 2235 2236 int 2237 tmpfs_truncate(struct vnode *vp, off_t length) 2238 { 2239 int error; 2240 struct tmpfs_node *node; 2241 2242 node = VP_TO_TMPFS_NODE(vp); 2243 2244 if (length < 0) { 2245 error = EINVAL; 2246 goto out; 2247 } 2248 2249 if (node->tn_size == length) { 2250 error = 0; 2251 goto out; 2252 } 2253 2254 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 2255 return (EFBIG); 2256 2257 error = tmpfs_reg_resize(vp, length, FALSE); 2258 if (error == 0) 2259 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 2260 2261 out: 2262 tmpfs_update(vp); 2263 2264 return (error); 2265 } 2266 2267 static __inline int 2268 tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 2269 { 2270 if (a->td_hash > b->td_hash) 2271 return (1); 2272 else if (a->td_hash < b->td_hash) 2273 return (-1); 2274 return (0); 2275 } 2276 2277 RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 2278