1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5 * 6 * Copyright (c) 2005 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11 * 2005 program. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Efficient memory file system supporting functions. 37 */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/dirent.h> 44 #include <sys/fnv_hash.h> 45 #include <sys/lock.h> 46 #include <sys/limits.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/random.h> 52 #include <sys/refcount.h> 53 #include <sys/rwlock.h> 54 #include <sys/smr.h> 55 #include <sys/stat.h> 56 #include <sys/sysctl.h> 57 #include <sys/user.h> 58 #include <sys/vnode.h> 59 #include <sys/vmmeter.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_param.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/swap_pager.h> 69 70 #include <fs/tmpfs/tmpfs.h> 71 #include <fs/tmpfs/tmpfs_fifoops.h> 72 #include <fs/tmpfs/tmpfs_vnops.h> 73 74 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "tmpfs file system"); 76 77 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; 78 79 MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure"); 80 static uma_zone_t tmpfs_node_pool; 81 VFS_SMR_DECLARE; 82 83 int tmpfs_pager_type = -1; 84 85 static vm_object_t 86 tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 87 vm_ooffset_t offset, struct ucred *cred) 88 { 89 vm_object_t object; 90 91 MPASS(handle == NULL); 92 MPASS(offset == 0); 93 object = vm_object_allocate_dyn(tmpfs_pager_type, size, 94 OBJ_COLORED | OBJ_SWAP); 95 if (!swap_pager_init_object(object, NULL, NULL, size, 0)) { 96 vm_object_deallocate(object); 97 object = NULL; 98 } 99 return (object); 100 } 101 102 /* 103 * Make sure tmpfs vnodes with writable mappings can be found on the lazy list. 104 * 105 * This allows for periodic mtime updates while only scanning vnodes which are 106 * plausibly dirty, see tmpfs_update_mtime_lazy. 107 */ 108 static void 109 tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old, 110 vm_offset_t new) 111 { 112 struct vnode *vp; 113 114 VM_OBJECT_ASSERT_WLOCKED(object); 115 116 vp = object->un_pager.swp.swp_tmpfs; 117 118 /* 119 * Forced unmount? 120 */ 121 if (vp == NULL) { 122 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 123 ("object %p with OBJ_TMPFS_VREF but without vnode", object)); 124 VM_OBJECT_WUNLOCK(object); 125 return; 126 } 127 128 if (old == 0) { 129 VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp, 130 ("object without writable mappings has a reference")); 131 VNPASS(vp->v_usecount > 0, vp); 132 } else { 133 VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp, 134 ("object with writable mappings does not have a reference")); 135 } 136 137 if (old == new) { 138 VM_OBJECT_WUNLOCK(object); 139 return; 140 } 141 142 if (new == 0) { 143 vm_object_clear_flag(object, OBJ_TMPFS_VREF); 144 VM_OBJECT_WUNLOCK(object); 145 vrele(vp); 146 } else { 147 if ((object->flags & OBJ_TMPFS_VREF) == 0) { 148 vref(vp); 149 vlazy(vp); 150 vm_object_set_flag(object, OBJ_TMPFS_VREF); 151 } 152 VM_OBJECT_WUNLOCK(object); 153 } 154 } 155 156 static void 157 tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start, 158 vm_offset_t end) 159 { 160 vm_offset_t new, old; 161 162 VM_OBJECT_WLOCK(object); 163 KASSERT((object->flags & OBJ_ANON) == 0, 164 ("%s: object %p with OBJ_ANON", __func__, object)); 165 old = object->un_pager.swp.writemappings; 166 object->un_pager.swp.writemappings += (vm_ooffset_t)end - start; 167 new = object->un_pager.swp.writemappings; 168 tmpfs_pager_writecount_recalc(object, old, new); 169 VM_OBJECT_ASSERT_UNLOCKED(object); 170 } 171 172 static void 173 tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start, 174 vm_offset_t end) 175 { 176 vm_offset_t new, old; 177 178 VM_OBJECT_WLOCK(object); 179 KASSERT((object->flags & OBJ_ANON) == 0, 180 ("%s: object %p with OBJ_ANON", __func__, object)); 181 old = object->un_pager.swp.writemappings; 182 object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start; 183 new = object->un_pager.swp.writemappings; 184 tmpfs_pager_writecount_recalc(object, old, new); 185 VM_OBJECT_ASSERT_UNLOCKED(object); 186 } 187 188 static void 189 tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) 190 { 191 struct vnode *vp; 192 193 /* 194 * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type 195 * type, but not OBJ_TMPFS flag. In this case there is no 196 * v_writecount to adjust. 197 */ 198 if (vp_heldp != NULL) 199 VM_OBJECT_RLOCK(object); 200 else 201 VM_OBJECT_ASSERT_LOCKED(object); 202 if ((object->flags & OBJ_TMPFS) != 0) { 203 vp = object->un_pager.swp.swp_tmpfs; 204 if (vp != NULL) { 205 *vpp = vp; 206 if (vp_heldp != NULL) { 207 vhold(vp); 208 *vp_heldp = true; 209 } 210 } 211 } 212 if (vp_heldp != NULL) 213 VM_OBJECT_RUNLOCK(object); 214 } 215 216 struct pagerops tmpfs_pager_ops = { 217 .pgo_kvme_type = KVME_TYPE_VNODE, 218 .pgo_alloc = tmpfs_pager_alloc, 219 .pgo_set_writeable_dirty = vm_object_set_writeable_dirty_, 220 .pgo_update_writecount = tmpfs_pager_update_writecount, 221 .pgo_release_writecount = tmpfs_pager_release_writecount, 222 .pgo_mightbedirty = vm_object_mightbedirty_, 223 .pgo_getvp = tmpfs_pager_getvp, 224 }; 225 226 static int 227 tmpfs_node_ctor(void *mem, int size, void *arg, int flags) 228 { 229 struct tmpfs_node *node; 230 231 node = mem; 232 node->tn_gen++; 233 node->tn_size = 0; 234 node->tn_status = 0; 235 node->tn_accessed = false; 236 node->tn_flags = 0; 237 node->tn_links = 0; 238 node->tn_vnode = NULL; 239 node->tn_vpstate = 0; 240 return (0); 241 } 242 243 static void 244 tmpfs_node_dtor(void *mem, int size, void *arg) 245 { 246 struct tmpfs_node *node; 247 248 node = mem; 249 node->tn_type = VNON; 250 } 251 252 static int 253 tmpfs_node_init(void *mem, int size, int flags) 254 { 255 struct tmpfs_node *node; 256 257 node = mem; 258 node->tn_id = 0; 259 mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF); 260 node->tn_gen = arc4random(); 261 return (0); 262 } 263 264 static void 265 tmpfs_node_fini(void *mem, int size) 266 { 267 struct tmpfs_node *node; 268 269 node = mem; 270 mtx_destroy(&node->tn_interlock); 271 } 272 273 int 274 tmpfs_subr_init(void) 275 { 276 tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops, 277 OBJT_SWAP); 278 if (tmpfs_pager_type == -1) 279 return (EINVAL); 280 tmpfs_node_pool = uma_zcreate("TMPFS node", 281 sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, 282 tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); 283 VFS_SMR_ZONE_SET(tmpfs_node_pool); 284 return (0); 285 } 286 287 void 288 tmpfs_subr_uninit(void) 289 { 290 if (tmpfs_pager_type != -1) 291 vm_pager_free_dyn_type(tmpfs_pager_type); 292 tmpfs_pager_type = -1; 293 uma_zdestroy(tmpfs_node_pool); 294 } 295 296 static int 297 sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) 298 { 299 int error; 300 long pages, bytes; 301 302 pages = *(long *)arg1; 303 bytes = pages * PAGE_SIZE; 304 305 error = sysctl_handle_long(oidp, &bytes, 0, req); 306 if (error || !req->newptr) 307 return (error); 308 309 pages = bytes / PAGE_SIZE; 310 if (pages < TMPFS_PAGES_MINRESERVED) 311 return (EINVAL); 312 313 *(long *)arg1 = pages; 314 return (0); 315 } 316 317 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, 318 CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &tmpfs_pages_reserved, 0, 319 sysctl_mem_reserved, "L", 320 "Amount of available memory and swap below which tmpfs growth stops"); 321 322 static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, 323 struct tmpfs_dirent *b); 324 RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 325 326 size_t 327 tmpfs_mem_avail(void) 328 { 329 size_t avail; 330 long reserved; 331 332 avail = swap_pager_avail + vm_free_count(); 333 reserved = atomic_load_long(&tmpfs_pages_reserved); 334 if (__predict_false(avail < reserved)) 335 return (0); 336 return (avail - reserved); 337 } 338 339 size_t 340 tmpfs_pages_used(struct tmpfs_mount *tmp) 341 { 342 const size_t node_size = sizeof(struct tmpfs_node) + 343 sizeof(struct tmpfs_dirent); 344 size_t meta_pages; 345 346 meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, 347 PAGE_SIZE); 348 return (meta_pages + tmp->tm_pages_used); 349 } 350 351 static size_t 352 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) 353 { 354 if (tmpfs_mem_avail() < req_pages) 355 return (0); 356 357 if (tmp->tm_pages_max != ULONG_MAX && 358 tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) 359 return (0); 360 361 return (1); 362 } 363 364 void 365 tmpfs_ref_node(struct tmpfs_node *node) 366 { 367 #ifdef INVARIANTS 368 u_int old; 369 370 old = 371 #endif 372 refcount_acquire(&node->tn_refcount); 373 #ifdef INVARIANTS 374 KASSERT(old > 0, ("node %p zero refcount", node)); 375 #endif 376 } 377 378 /* 379 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 380 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 381 * using the credentials of the process 'p'. 382 * 383 * If the node type is set to 'VDIR', then the parent parameter must point 384 * to the parent directory of the node being created. It may only be NULL 385 * while allocating the root node. 386 * 387 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 388 * specifies the device the node represents. 389 * 390 * If the node type is set to 'VLNK', then the parameter target specifies 391 * the file name of the target file for the symbolic link that is being 392 * created. 393 * 394 * Note that new nodes are retrieved from the available list if it has 395 * items or, if it is empty, from the node pool as long as there is enough 396 * space to create them. 397 * 398 * Returns zero on success or an appropriate error code on failure. 399 */ 400 int 401 tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, 402 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 403 const char *target, dev_t rdev, struct tmpfs_node **node) 404 { 405 struct tmpfs_node *nnode; 406 vm_object_t obj; 407 char *symlink; 408 char symlink_smr; 409 410 /* If the root directory of the 'tmp' file system is not yet 411 * allocated, this must be the request to do it. */ 412 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 413 414 MPASS(IFF(type == VLNK, target != NULL)); 415 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 416 417 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 418 return (ENOSPC); 419 if (tmpfs_pages_check_avail(tmp, 1) == 0) 420 return (ENOSPC); 421 422 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 423 /* 424 * When a new tmpfs node is created for fully 425 * constructed mount point, there must be a parent 426 * node, which vnode is locked exclusively. As 427 * consequence, if the unmount is executing in 428 * parallel, vflush() cannot reclaim the parent vnode. 429 * Due to this, the check for MNTK_UNMOUNT flag is not 430 * racy: if we did not see MNTK_UNMOUNT flag, then tmp 431 * cannot be destroyed until node construction is 432 * finished and the parent vnode unlocked. 433 * 434 * Tmpfs does not need to instantiate new nodes during 435 * unmount. 436 */ 437 return (EBUSY); 438 } 439 if ((mp->mnt_kern_flag & MNT_RDONLY) != 0) 440 return (EROFS); 441 442 nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK); 443 444 /* Generic initialization. */ 445 nnode->tn_type = type; 446 vfs_timestamp(&nnode->tn_atime); 447 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 448 nnode->tn_atime; 449 nnode->tn_uid = uid; 450 nnode->tn_gid = gid; 451 nnode->tn_mode = mode; 452 nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr); 453 nnode->tn_refcount = 1; 454 455 /* Type-specific initialization. */ 456 switch (nnode->tn_type) { 457 case VBLK: 458 case VCHR: 459 nnode->tn_rdev = rdev; 460 break; 461 462 case VDIR: 463 RB_INIT(&nnode->tn_dir.tn_dirhead); 464 LIST_INIT(&nnode->tn_dir.tn_dupindex); 465 MPASS(parent != nnode); 466 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 467 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 468 nnode->tn_dir.tn_readdir_lastn = 0; 469 nnode->tn_dir.tn_readdir_lastp = NULL; 470 nnode->tn_links++; 471 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 472 nnode->tn_dir.tn_parent->tn_links++; 473 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 474 break; 475 476 case VFIFO: 477 /* FALLTHROUGH */ 478 case VSOCK: 479 break; 480 481 case VLNK: 482 MPASS(strlen(target) < MAXPATHLEN); 483 nnode->tn_size = strlen(target); 484 485 symlink = NULL; 486 if (!tmp->tm_nonc) { 487 symlink = cache_symlink_alloc(nnode->tn_size + 1, M_WAITOK); 488 symlink_smr = true; 489 } 490 if (symlink == NULL) { 491 symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, M_WAITOK); 492 symlink_smr = false; 493 } 494 memcpy(symlink, target, nnode->tn_size + 1); 495 496 /* 497 * Allow safe symlink resolving for lockless lookup. 498 * tmpfs_fplookup_symlink references this comment. 499 * 500 * 1. nnode is not yet visible to the world 501 * 2. both tn_link_target and tn_link_smr get populated 502 * 3. release fence publishes their content 503 * 4. tn_link_target content is immutable until node destruction, 504 * where the pointer gets set to NULL 505 * 5. tn_link_smr is never changed once set 506 * 507 * As a result it is sufficient to issue load consume on the node 508 * pointer to also get the above content in a stable manner. 509 * Worst case tn_link_smr flag may be set to true despite being stale, 510 * while the target buffer is already cleared out. 511 */ 512 atomic_store_ptr(&nnode->tn_link_target, symlink); 513 atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); 514 atomic_thread_fence_rel(); 515 break; 516 517 case VREG: 518 obj = nnode->tn_reg.tn_aobj = 519 vm_pager_allocate(tmpfs_pager_type, NULL, 0, 520 VM_PROT_DEFAULT, 0, 521 NULL /* XXXKIB - tmpfs needs swap reservation */); 522 /* OBJ_TMPFS is set together with the setting of vp->v_object */ 523 nnode->tn_reg.tn_tmp = tmp; 524 break; 525 526 default: 527 panic("tmpfs_alloc_node: type %p %d", nnode, 528 (int)nnode->tn_type); 529 } 530 531 TMPFS_LOCK(tmp); 532 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 533 nnode->tn_attached = true; 534 tmp->tm_nodes_inuse++; 535 tmp->tm_refcount++; 536 TMPFS_UNLOCK(tmp); 537 538 *node = nnode; 539 return (0); 540 } 541 542 /* 543 * Destroys the node pointed to by node from the file system 'tmp'. 544 * If the node references a directory, no entries are allowed. 545 */ 546 void 547 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 548 { 549 if (refcount_release_if_not_last(&node->tn_refcount)) 550 return; 551 552 TMPFS_LOCK(tmp); 553 TMPFS_NODE_LOCK(node); 554 if (!tmpfs_free_node_locked(tmp, node, false)) { 555 TMPFS_NODE_UNLOCK(node); 556 TMPFS_UNLOCK(tmp); 557 } 558 } 559 560 bool 561 tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node, 562 bool detach) 563 { 564 vm_object_t uobj; 565 char *symlink; 566 bool last; 567 568 TMPFS_MP_ASSERT_LOCKED(tmp); 569 TMPFS_NODE_ASSERT_LOCKED(node); 570 571 last = refcount_release(&node->tn_refcount); 572 if (node->tn_attached && (detach || last)) { 573 MPASS(tmp->tm_nodes_inuse > 0); 574 tmp->tm_nodes_inuse--; 575 LIST_REMOVE(node, tn_entries); 576 node->tn_attached = false; 577 } 578 if (!last) 579 return (false); 580 581 TMPFS_NODE_UNLOCK(node); 582 583 #ifdef INVARIANTS 584 MPASS(node->tn_vnode == NULL); 585 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 586 587 /* 588 * Make sure this is a node type we can deal with. Everything is explicitly 589 * enumerated without the 'default' clause so the the compiler can throw an 590 * error in case a new type is added. 591 */ 592 switch (node->tn_type) { 593 case VBLK: 594 case VCHR: 595 case VDIR: 596 case VFIFO: 597 case VSOCK: 598 case VLNK: 599 case VREG: 600 break; 601 case VNON: 602 case VBAD: 603 case VMARKER: 604 panic("%s: bad type %d for node %p", __func__, (int)node->tn_type, node); 605 } 606 #endif 607 608 switch (node->tn_type) { 609 case VREG: 610 uobj = node->tn_reg.tn_aobj; 611 if (uobj != NULL) { 612 if (uobj->size != 0) 613 atomic_subtract_long(&tmp->tm_pages_used, uobj->size); 614 } 615 616 tmpfs_free_tmp(tmp); 617 618 if (uobj != NULL) { 619 KASSERT((uobj->flags & OBJ_TMPFS) == 0, 620 ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); 621 vm_object_deallocate(uobj); 622 } 623 break; 624 case VLNK: 625 tmpfs_free_tmp(tmp); 626 627 symlink = node->tn_link_target; 628 atomic_store_ptr(&node->tn_link_target, NULL); 629 if (atomic_load_char(&node->tn_link_smr)) { 630 cache_symlink_free(symlink, node->tn_size + 1); 631 } else { 632 free(symlink, M_TMPFSNAME); 633 } 634 break; 635 default: 636 tmpfs_free_tmp(tmp); 637 break; 638 } 639 640 uma_zfree_smr(tmpfs_node_pool, node); 641 return (true); 642 } 643 644 static __inline uint32_t 645 tmpfs_dirent_hash(const char *name, u_int len) 646 { 647 uint32_t hash; 648 649 hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; 650 #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP 651 hash &= 0xf; 652 #endif 653 if (hash < TMPFS_DIRCOOKIE_MIN) 654 hash += TMPFS_DIRCOOKIE_MIN; 655 656 return (hash); 657 } 658 659 static __inline off_t 660 tmpfs_dirent_cookie(struct tmpfs_dirent *de) 661 { 662 if (de == NULL) 663 return (TMPFS_DIRCOOKIE_EOF); 664 665 MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); 666 667 return (de->td_cookie); 668 } 669 670 static __inline boolean_t 671 tmpfs_dirent_dup(struct tmpfs_dirent *de) 672 { 673 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); 674 } 675 676 static __inline boolean_t 677 tmpfs_dirent_duphead(struct tmpfs_dirent *de) 678 { 679 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); 680 } 681 682 void 683 tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) 684 { 685 de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); 686 memcpy(de->ud.td_name, name, namelen); 687 de->td_namelen = namelen; 688 } 689 690 /* 691 * Allocates a new directory entry for the node node with a name of name. 692 * The new directory entry is returned in *de. 693 * 694 * The link count of node is increased by one to reflect the new object 695 * referencing it. 696 * 697 * Returns zero on success or an appropriate error code on failure. 698 */ 699 int 700 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 701 const char *name, u_int len, struct tmpfs_dirent **de) 702 { 703 struct tmpfs_dirent *nde; 704 705 nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK); 706 nde->td_node = node; 707 if (name != NULL) { 708 nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 709 tmpfs_dirent_init(nde, name, len); 710 } else 711 nde->td_namelen = 0; 712 if (node != NULL) 713 node->tn_links++; 714 715 *de = nde; 716 717 return 0; 718 } 719 720 /* 721 * Frees a directory entry. It is the caller's responsibility to destroy 722 * the node referenced by it if needed. 723 * 724 * The link count of node is decreased by one to reflect the removal of an 725 * object that referenced it. This only happens if 'node_exists' is true; 726 * otherwise the function will not access the node referred to by the 727 * directory entry, as it may already have been released from the outside. 728 */ 729 void 730 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 731 { 732 struct tmpfs_node *node; 733 734 node = de->td_node; 735 if (node != NULL) { 736 MPASS(node->tn_links > 0); 737 node->tn_links--; 738 } 739 if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) 740 free(de->ud.td_name, M_TMPFSNAME); 741 free(de, M_TMPFSDIR); 742 } 743 744 void 745 tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) 746 { 747 bool want_vrele; 748 749 ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); 750 if (vp->v_type != VREG || obj == NULL) 751 return; 752 753 VM_OBJECT_WLOCK(obj); 754 VI_LOCK(vp); 755 /* 756 * May be going through forced unmount. 757 */ 758 want_vrele = false; 759 if ((obj->flags & OBJ_TMPFS_VREF) != 0) { 760 vm_object_clear_flag(obj, OBJ_TMPFS_VREF); 761 want_vrele = true; 762 } 763 764 vm_object_clear_flag(obj, OBJ_TMPFS); 765 obj->un_pager.swp.swp_tmpfs = NULL; 766 if (vp->v_writecount < 0) 767 vp->v_writecount = 0; 768 VI_UNLOCK(vp); 769 VM_OBJECT_WUNLOCK(obj); 770 if (want_vrele) { 771 vrele(vp); 772 } 773 } 774 775 /* 776 * Need to clear v_object for insmntque failure. 777 */ 778 static void 779 tmpfs_insmntque_dtr(struct vnode *vp, void *dtr_arg) 780 { 781 782 tmpfs_destroy_vobject(vp, vp->v_object); 783 vp->v_object = NULL; 784 vp->v_data = NULL; 785 vp->v_op = &dead_vnodeops; 786 vgone(vp); 787 vput(vp); 788 } 789 790 /* 791 * Allocates a new vnode for the node node or returns a new reference to 792 * an existing one if the node had already a vnode referencing it. The 793 * resulting locked vnode is returned in *vpp. 794 * 795 * Returns zero on success or an appropriate error code on failure. 796 */ 797 int 798 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 799 struct vnode **vpp) 800 { 801 struct vnode *vp; 802 enum vgetstate vs; 803 struct tmpfs_mount *tm; 804 vm_object_t object; 805 int error; 806 807 error = 0; 808 tm = VFS_TO_TMPFS(mp); 809 TMPFS_NODE_LOCK(node); 810 tmpfs_ref_node(node); 811 loop: 812 TMPFS_NODE_ASSERT_LOCKED(node); 813 if ((vp = node->tn_vnode) != NULL) { 814 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 815 if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || 816 (VN_IS_DOOMED(vp) && 817 (lkflag & LK_NOWAIT) != 0)) { 818 TMPFS_NODE_UNLOCK(node); 819 error = ENOENT; 820 vp = NULL; 821 goto out; 822 } 823 if (VN_IS_DOOMED(vp)) { 824 node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; 825 while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { 826 msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 827 0, "tmpfsE", 0); 828 } 829 goto loop; 830 } 831 vs = vget_prep(vp); 832 TMPFS_NODE_UNLOCK(node); 833 error = vget_finish(vp, lkflag, vs); 834 if (error == ENOENT) { 835 TMPFS_NODE_LOCK(node); 836 goto loop; 837 } 838 if (error != 0) { 839 vp = NULL; 840 goto out; 841 } 842 843 /* 844 * Make sure the vnode is still there after 845 * getting the interlock to avoid racing a free. 846 */ 847 if (node->tn_vnode != vp) { 848 vput(vp); 849 TMPFS_NODE_LOCK(node); 850 goto loop; 851 } 852 853 goto out; 854 } 855 856 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 857 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 858 TMPFS_NODE_UNLOCK(node); 859 error = ENOENT; 860 vp = NULL; 861 goto out; 862 } 863 864 /* 865 * otherwise lock the vp list while we call getnewvnode 866 * since that can block. 867 */ 868 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 869 node->tn_vpstate |= TMPFS_VNODE_WANT; 870 error = msleep((caddr_t) &node->tn_vpstate, 871 TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0); 872 if (error != 0) 873 goto out; 874 goto loop; 875 } else 876 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 877 878 TMPFS_NODE_UNLOCK(node); 879 880 /* Get a new vnode and associate it with our node. */ 881 error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ? 882 &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp); 883 if (error != 0) 884 goto unlock; 885 MPASS(vp != NULL); 886 887 /* lkflag is ignored, the lock is exclusive */ 888 (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 889 890 vp->v_data = node; 891 vp->v_type = node->tn_type; 892 893 /* Type-specific initialization. */ 894 switch (node->tn_type) { 895 case VBLK: 896 /* FALLTHROUGH */ 897 case VCHR: 898 /* FALLTHROUGH */ 899 case VLNK: 900 /* FALLTHROUGH */ 901 case VSOCK: 902 break; 903 case VFIFO: 904 vp->v_op = &tmpfs_fifoop_entries; 905 break; 906 case VREG: 907 object = node->tn_reg.tn_aobj; 908 VM_OBJECT_WLOCK(object); 909 KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, 910 ("%s: object %p with OBJ_TMPFS_VREF but without vnode", 911 __func__, object)); 912 KASSERT(object->un_pager.swp.writemappings == 0, 913 ("%s: object %p has writemappings", 914 __func__, object)); 915 VI_LOCK(vp); 916 KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); 917 vp->v_object = object; 918 object->un_pager.swp.swp_tmpfs = vp; 919 vm_object_set_flag(object, OBJ_TMPFS); 920 vn_irflag_set_locked(vp, VIRF_PGREAD); 921 VI_UNLOCK(vp); 922 VM_OBJECT_WUNLOCK(object); 923 break; 924 case VDIR: 925 MPASS(node->tn_dir.tn_parent != NULL); 926 if (node->tn_dir.tn_parent == node) 927 vp->v_vflag |= VV_ROOT; 928 break; 929 930 default: 931 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 932 } 933 if (vp->v_type != VFIFO) 934 VN_LOCK_ASHARE(vp); 935 936 error = insmntque1(vp, mp, tmpfs_insmntque_dtr, NULL); 937 if (error != 0) 938 vp = NULL; 939 940 unlock: 941 TMPFS_NODE_LOCK(node); 942 943 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 944 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 945 node->tn_vnode = vp; 946 947 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 948 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 949 TMPFS_NODE_UNLOCK(node); 950 wakeup((caddr_t) &node->tn_vpstate); 951 } else 952 TMPFS_NODE_UNLOCK(node); 953 954 out: 955 if (error == 0) { 956 *vpp = vp; 957 958 #ifdef INVARIANTS 959 MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); 960 TMPFS_NODE_LOCK(node); 961 MPASS(*vpp == node->tn_vnode); 962 TMPFS_NODE_UNLOCK(node); 963 #endif 964 } 965 tmpfs_free_node(tm, node); 966 967 return (error); 968 } 969 970 /* 971 * Destroys the association between the vnode vp and the node it 972 * references. 973 */ 974 void 975 tmpfs_free_vp(struct vnode *vp) 976 { 977 struct tmpfs_node *node; 978 979 node = VP_TO_TMPFS_NODE(vp); 980 981 TMPFS_NODE_ASSERT_LOCKED(node); 982 node->tn_vnode = NULL; 983 if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) 984 wakeup(&node->tn_vnode); 985 node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; 986 vp->v_data = NULL; 987 } 988 989 /* 990 * Allocates a new file of type 'type' and adds it to the parent directory 991 * 'dvp'; this addition is done using the component name given in 'cnp'. 992 * The ownership of the new file is automatically assigned based on the 993 * credentials of the caller (through 'cnp'), the group is set based on 994 * the parent directory and the mode is determined from the 'vap' argument. 995 * If successful, *vpp holds a vnode to the newly created file and zero 996 * is returned. Otherwise *vpp is NULL and the function returns an 997 * appropriate error code. 998 */ 999 int 1000 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 1001 struct componentname *cnp, const char *target) 1002 { 1003 int error; 1004 struct tmpfs_dirent *de; 1005 struct tmpfs_mount *tmp; 1006 struct tmpfs_node *dnode; 1007 struct tmpfs_node *node; 1008 struct tmpfs_node *parent; 1009 1010 ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file"); 1011 MPASS(cnp->cn_flags & HASBUF); 1012 1013 tmp = VFS_TO_TMPFS(dvp->v_mount); 1014 dnode = VP_TO_TMPFS_DIR(dvp); 1015 *vpp = NULL; 1016 1017 /* If the entry we are creating is a directory, we cannot overflow 1018 * the number of links of its parent, because it will get a new 1019 * link. */ 1020 if (vap->va_type == VDIR) { 1021 /* Ensure that we do not overflow the maximum number of links 1022 * imposed by the system. */ 1023 MPASS(dnode->tn_links <= TMPFS_LINK_MAX); 1024 if (dnode->tn_links == TMPFS_LINK_MAX) { 1025 return (EMLINK); 1026 } 1027 1028 parent = dnode; 1029 MPASS(parent != NULL); 1030 } else 1031 parent = NULL; 1032 1033 /* Allocate a node that represents the new file. */ 1034 error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, 1035 cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent, 1036 target, vap->va_rdev, &node); 1037 if (error != 0) 1038 return (error); 1039 1040 /* Allocate a directory entry that points to the new file. */ 1041 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 1042 &de); 1043 if (error != 0) { 1044 tmpfs_free_node(tmp, node); 1045 return (error); 1046 } 1047 1048 /* Allocate a vnode for the new file. */ 1049 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 1050 if (error != 0) { 1051 tmpfs_free_dirent(tmp, de); 1052 tmpfs_free_node(tmp, node); 1053 return (error); 1054 } 1055 1056 /* Now that all required items are allocated, we can proceed to 1057 * insert the new node into the directory, an operation that 1058 * cannot fail. */ 1059 if (cnp->cn_flags & ISWHITEOUT) 1060 tmpfs_dir_whiteout_remove(dvp, cnp); 1061 tmpfs_dir_attach(dvp, de); 1062 return (0); 1063 } 1064 1065 struct tmpfs_dirent * 1066 tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1067 { 1068 struct tmpfs_dirent *de; 1069 1070 de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); 1071 dc->tdc_tree = de; 1072 if (de != NULL && tmpfs_dirent_duphead(de)) 1073 de = LIST_FIRST(&de->ud.td_duphead); 1074 dc->tdc_current = de; 1075 1076 return (dc->tdc_current); 1077 } 1078 1079 struct tmpfs_dirent * 1080 tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 1081 { 1082 struct tmpfs_dirent *de; 1083 1084 MPASS(dc->tdc_tree != NULL); 1085 if (tmpfs_dirent_dup(dc->tdc_current)) { 1086 dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); 1087 if (dc->tdc_current != NULL) 1088 return (dc->tdc_current); 1089 } 1090 dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, 1091 &dnode->tn_dir.tn_dirhead, dc->tdc_tree); 1092 if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { 1093 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1094 MPASS(dc->tdc_current != NULL); 1095 } 1096 1097 return (dc->tdc_current); 1098 } 1099 1100 /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ 1101 static struct tmpfs_dirent * 1102 tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) 1103 { 1104 struct tmpfs_dirent *de, dekey; 1105 1106 dekey.td_hash = hash; 1107 de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); 1108 return (de); 1109 } 1110 1111 /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ 1112 static struct tmpfs_dirent * 1113 tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, 1114 struct tmpfs_dir_cursor *dc) 1115 { 1116 struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; 1117 struct tmpfs_dirent *de, dekey; 1118 1119 MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); 1120 1121 if (cookie == node->tn_dir.tn_readdir_lastn && 1122 (de = node->tn_dir.tn_readdir_lastp) != NULL) { 1123 /* Protect against possible race, tn_readdir_last[pn] 1124 * may be updated with only shared vnode lock held. */ 1125 if (cookie == tmpfs_dirent_cookie(de)) 1126 goto out; 1127 } 1128 1129 if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { 1130 LIST_FOREACH(de, &node->tn_dir.tn_dupindex, 1131 uh.td_dup.index_entries) { 1132 MPASS(tmpfs_dirent_dup(de)); 1133 if (de->td_cookie == cookie) 1134 goto out; 1135 /* dupindex list is sorted. */ 1136 if (de->td_cookie < cookie) { 1137 de = NULL; 1138 goto out; 1139 } 1140 } 1141 MPASS(de == NULL); 1142 goto out; 1143 } 1144 1145 if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { 1146 de = NULL; 1147 } else { 1148 dekey.td_hash = cookie; 1149 /* Recover if direntry for cookie was removed */ 1150 de = RB_NFIND(tmpfs_dir, dirhead, &dekey); 1151 } 1152 dc->tdc_tree = de; 1153 dc->tdc_current = de; 1154 if (de != NULL && tmpfs_dirent_duphead(de)) { 1155 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 1156 MPASS(dc->tdc_current != NULL); 1157 } 1158 return (dc->tdc_current); 1159 1160 out: 1161 dc->tdc_tree = de; 1162 dc->tdc_current = de; 1163 if (de != NULL && tmpfs_dirent_dup(de)) 1164 dc->tdc_tree = tmpfs_dir_xlookup_hash(node, 1165 de->td_hash); 1166 return (dc->tdc_current); 1167 } 1168 1169 /* 1170 * Looks for a directory entry in the directory represented by node. 1171 * 'cnp' describes the name of the entry to look for. Note that the . 1172 * and .. components are not allowed as they do not physically exist 1173 * within directories. 1174 * 1175 * Returns a pointer to the entry when found, otherwise NULL. 1176 */ 1177 struct tmpfs_dirent * 1178 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 1179 struct componentname *cnp) 1180 { 1181 struct tmpfs_dir_duphead *duphead; 1182 struct tmpfs_dirent *de; 1183 uint32_t hash; 1184 1185 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 1186 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 1187 cnp->cn_nameptr[1] == '.'))); 1188 TMPFS_VALIDATE_DIR(node); 1189 1190 hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); 1191 de = tmpfs_dir_xlookup_hash(node, hash); 1192 if (de != NULL && tmpfs_dirent_duphead(de)) { 1193 duphead = &de->ud.td_duphead; 1194 LIST_FOREACH(de, duphead, uh.td_dup.entries) { 1195 if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1196 cnp->cn_namelen)) 1197 break; 1198 } 1199 } else if (de != NULL) { 1200 if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 1201 cnp->cn_namelen)) 1202 de = NULL; 1203 } 1204 if (de != NULL && f != NULL && de->td_node != f) 1205 de = NULL; 1206 1207 return (de); 1208 } 1209 1210 /* 1211 * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex 1212 * list, allocate new cookie value. 1213 */ 1214 static void 1215 tmpfs_dir_attach_dup(struct tmpfs_node *dnode, 1216 struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) 1217 { 1218 struct tmpfs_dir_duphead *dupindex; 1219 struct tmpfs_dirent *de, *pde; 1220 1221 dupindex = &dnode->tn_dir.tn_dupindex; 1222 de = LIST_FIRST(dupindex); 1223 if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { 1224 if (de == NULL) 1225 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1226 else 1227 nde->td_cookie = de->td_cookie + 1; 1228 MPASS(tmpfs_dirent_dup(nde)); 1229 LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); 1230 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1231 return; 1232 } 1233 1234 /* 1235 * Cookie numbers are near exhaustion. Scan dupindex list for unused 1236 * numbers. dupindex list is sorted in descending order. Keep it so 1237 * after inserting nde. 1238 */ 1239 while (1) { 1240 pde = de; 1241 de = LIST_NEXT(de, uh.td_dup.index_entries); 1242 if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { 1243 /* 1244 * Last element of the index doesn't have minimal cookie 1245 * value, use it. 1246 */ 1247 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 1248 LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); 1249 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1250 return; 1251 } else if (de == NULL) { 1252 /* 1253 * We are so lucky have 2^30 hash duplicates in single 1254 * directory :) Return largest possible cookie value. 1255 * It should be fine except possible issues with 1256 * VOP_READDIR restart. 1257 */ 1258 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; 1259 LIST_INSERT_HEAD(dupindex, nde, 1260 uh.td_dup.index_entries); 1261 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1262 return; 1263 } 1264 if (de->td_cookie + 1 == pde->td_cookie || 1265 de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) 1266 continue; /* No hole or invalid cookie. */ 1267 nde->td_cookie = de->td_cookie + 1; 1268 MPASS(tmpfs_dirent_dup(nde)); 1269 MPASS(pde->td_cookie > nde->td_cookie); 1270 MPASS(nde->td_cookie > de->td_cookie); 1271 LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); 1272 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 1273 return; 1274 } 1275 } 1276 1277 /* 1278 * Attaches the directory entry de to the directory represented by vp. 1279 * Note that this does not change the link count of the node pointed by 1280 * the directory entry, as this is done by tmpfs_alloc_dirent. 1281 */ 1282 void 1283 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 1284 { 1285 struct tmpfs_node *dnode; 1286 struct tmpfs_dirent *xde, *nde; 1287 1288 ASSERT_VOP_ELOCKED(vp, __func__); 1289 MPASS(de->td_namelen > 0); 1290 MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); 1291 MPASS(de->td_cookie == de->td_hash); 1292 1293 dnode = VP_TO_TMPFS_DIR(vp); 1294 dnode->tn_dir.tn_readdir_lastn = 0; 1295 dnode->tn_dir.tn_readdir_lastp = NULL; 1296 1297 MPASS(!tmpfs_dirent_dup(de)); 1298 xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1299 if (xde != NULL && tmpfs_dirent_duphead(xde)) 1300 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1301 else if (xde != NULL) { 1302 /* 1303 * Allocate new duphead. Swap xde with duphead to avoid 1304 * adding/removing elements with the same hash. 1305 */ 1306 MPASS(!tmpfs_dirent_dup(xde)); 1307 tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, 1308 &nde); 1309 /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ 1310 memcpy(nde, xde, sizeof(*xde)); 1311 xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; 1312 LIST_INIT(&xde->ud.td_duphead); 1313 xde->td_namelen = 0; 1314 xde->td_node = NULL; 1315 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); 1316 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 1317 } 1318 dnode->tn_size += sizeof(struct tmpfs_dirent); 1319 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1320 dnode->tn_accessed = true; 1321 tmpfs_update(vp); 1322 } 1323 1324 /* 1325 * Detaches the directory entry de from the directory represented by vp. 1326 * Note that this does not change the link count of the node pointed by 1327 * the directory entry, as this is done by tmpfs_free_dirent. 1328 */ 1329 void 1330 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 1331 { 1332 struct tmpfs_mount *tmp; 1333 struct tmpfs_dir *head; 1334 struct tmpfs_node *dnode; 1335 struct tmpfs_dirent *xde; 1336 1337 ASSERT_VOP_ELOCKED(vp, __func__); 1338 1339 dnode = VP_TO_TMPFS_DIR(vp); 1340 head = &dnode->tn_dir.tn_dirhead; 1341 dnode->tn_dir.tn_readdir_lastn = 0; 1342 dnode->tn_dir.tn_readdir_lastp = NULL; 1343 1344 if (tmpfs_dirent_dup(de)) { 1345 /* Remove duphead if de was last entry. */ 1346 if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { 1347 xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); 1348 MPASS(tmpfs_dirent_duphead(xde)); 1349 } else 1350 xde = NULL; 1351 LIST_REMOVE(de, uh.td_dup.entries); 1352 LIST_REMOVE(de, uh.td_dup.index_entries); 1353 if (xde != NULL) { 1354 if (LIST_EMPTY(&xde->ud.td_duphead)) { 1355 RB_REMOVE(tmpfs_dir, head, xde); 1356 tmp = VFS_TO_TMPFS(vp->v_mount); 1357 MPASS(xde->td_node == NULL); 1358 tmpfs_free_dirent(tmp, xde); 1359 } 1360 } 1361 de->td_cookie = de->td_hash; 1362 } else 1363 RB_REMOVE(tmpfs_dir, head, de); 1364 1365 dnode->tn_size -= sizeof(struct tmpfs_dirent); 1366 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1367 dnode->tn_accessed = true; 1368 tmpfs_update(vp); 1369 } 1370 1371 void 1372 tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) 1373 { 1374 struct tmpfs_dirent *de, *dde, *nde; 1375 1376 RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { 1377 RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1378 /* Node may already be destroyed. */ 1379 de->td_node = NULL; 1380 if (tmpfs_dirent_duphead(de)) { 1381 while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { 1382 LIST_REMOVE(dde, uh.td_dup.entries); 1383 dde->td_node = NULL; 1384 tmpfs_free_dirent(tmp, dde); 1385 } 1386 } 1387 tmpfs_free_dirent(tmp, de); 1388 } 1389 } 1390 1391 /* 1392 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 1393 * directory and returns it in the uio space. The function returns 0 1394 * on success, -1 if there was not enough space in the uio structure to 1395 * hold the directory entry or an appropriate error code if another 1396 * error happens. 1397 */ 1398 static int 1399 tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1400 struct uio *uio) 1401 { 1402 int error; 1403 struct dirent dent; 1404 1405 TMPFS_VALIDATE_DIR(node); 1406 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 1407 1408 dent.d_fileno = node->tn_id; 1409 dent.d_off = TMPFS_DIRCOOKIE_DOTDOT; 1410 dent.d_type = DT_DIR; 1411 dent.d_namlen = 1; 1412 dent.d_name[0] = '.'; 1413 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1414 dirent_terminate(&dent); 1415 1416 if (dent.d_reclen > uio->uio_resid) 1417 error = EJUSTRETURN; 1418 else 1419 error = uiomove(&dent, dent.d_reclen, uio); 1420 1421 tmpfs_set_accessed(tm, node); 1422 1423 return (error); 1424 } 1425 1426 /* 1427 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 1428 * directory and returns it in the uio space. The function returns 0 1429 * on success, -1 if there was not enough space in the uio structure to 1430 * hold the directory entry or an appropriate error code if another 1431 * error happens. 1432 */ 1433 static int 1434 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, 1435 struct uio *uio, off_t next) 1436 { 1437 struct tmpfs_node *parent; 1438 struct dirent dent; 1439 int error; 1440 1441 TMPFS_VALIDATE_DIR(node); 1442 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 1443 1444 /* 1445 * Return ENOENT if the current node is already removed. 1446 */ 1447 TMPFS_ASSERT_LOCKED(node); 1448 parent = node->tn_dir.tn_parent; 1449 if (parent == NULL) 1450 return (ENOENT); 1451 1452 dent.d_fileno = parent->tn_id; 1453 dent.d_off = next; 1454 dent.d_type = DT_DIR; 1455 dent.d_namlen = 2; 1456 dent.d_name[0] = '.'; 1457 dent.d_name[1] = '.'; 1458 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1459 dirent_terminate(&dent); 1460 1461 if (dent.d_reclen > uio->uio_resid) 1462 error = EJUSTRETURN; 1463 else 1464 error = uiomove(&dent, dent.d_reclen, uio); 1465 1466 tmpfs_set_accessed(tm, node); 1467 1468 return (error); 1469 } 1470 1471 /* 1472 * Helper function for tmpfs_readdir. Returns as much directory entries 1473 * as can fit in the uio space. The read starts at uio->uio_offset. 1474 * The function returns 0 on success, -1 if there was not enough space 1475 * in the uio structure to hold the directory entry or an appropriate 1476 * error code if another error happens. 1477 */ 1478 int 1479 tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node, 1480 struct uio *uio, int maxcookies, u_long *cookies, int *ncookies) 1481 { 1482 struct tmpfs_dir_cursor dc; 1483 struct tmpfs_dirent *de, *nde; 1484 off_t off; 1485 int error; 1486 1487 TMPFS_VALIDATE_DIR(node); 1488 1489 off = 0; 1490 1491 /* 1492 * Lookup the node from the current offset. The starting offset of 1493 * 0 will lookup both '.' and '..', and then the first real entry, 1494 * or EOF if there are none. Then find all entries for the dir that 1495 * fit into the buffer. Once no more entries are found (de == NULL), 1496 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next 1497 * call to return 0. 1498 */ 1499 switch (uio->uio_offset) { 1500 case TMPFS_DIRCOOKIE_DOT: 1501 error = tmpfs_dir_getdotdent(tm, node, uio); 1502 if (error != 0) 1503 return (error); 1504 uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT; 1505 if (cookies != NULL) 1506 cookies[(*ncookies)++] = off; 1507 /* FALLTHROUGH */ 1508 case TMPFS_DIRCOOKIE_DOTDOT: 1509 de = tmpfs_dir_first(node, &dc); 1510 off = tmpfs_dirent_cookie(de); 1511 error = tmpfs_dir_getdotdotdent(tm, node, uio, off); 1512 if (error != 0) 1513 return (error); 1514 uio->uio_offset = off; 1515 if (cookies != NULL) 1516 cookies[(*ncookies)++] = off; 1517 /* EOF. */ 1518 if (de == NULL) 1519 return (0); 1520 break; 1521 case TMPFS_DIRCOOKIE_EOF: 1522 return (0); 1523 default: 1524 de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); 1525 if (de == NULL) 1526 return (EINVAL); 1527 if (cookies != NULL) 1528 off = tmpfs_dirent_cookie(de); 1529 } 1530 1531 /* 1532 * Read as much entries as possible; i.e., until we reach the end of the 1533 * directory or we exhaust uio space. 1534 */ 1535 do { 1536 struct dirent d; 1537 1538 /* 1539 * Create a dirent structure representing the current tmpfs_node 1540 * and fill it. 1541 */ 1542 if (de->td_node == NULL) { 1543 d.d_fileno = 1; 1544 d.d_type = DT_WHT; 1545 } else { 1546 d.d_fileno = de->td_node->tn_id; 1547 switch (de->td_node->tn_type) { 1548 case VBLK: 1549 d.d_type = DT_BLK; 1550 break; 1551 1552 case VCHR: 1553 d.d_type = DT_CHR; 1554 break; 1555 1556 case VDIR: 1557 d.d_type = DT_DIR; 1558 break; 1559 1560 case VFIFO: 1561 d.d_type = DT_FIFO; 1562 break; 1563 1564 case VLNK: 1565 d.d_type = DT_LNK; 1566 break; 1567 1568 case VREG: 1569 d.d_type = DT_REG; 1570 break; 1571 1572 case VSOCK: 1573 d.d_type = DT_SOCK; 1574 break; 1575 1576 default: 1577 panic("tmpfs_dir_getdents: type %p %d", 1578 de->td_node, (int)de->td_node->tn_type); 1579 } 1580 } 1581 d.d_namlen = de->td_namelen; 1582 MPASS(de->td_namelen < sizeof(d.d_name)); 1583 (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); 1584 d.d_reclen = GENERIC_DIRSIZ(&d); 1585 1586 /* 1587 * Stop reading if the directory entry we are treating is bigger 1588 * than the amount of data that can be returned. 1589 */ 1590 if (d.d_reclen > uio->uio_resid) { 1591 error = EJUSTRETURN; 1592 break; 1593 } 1594 1595 nde = tmpfs_dir_next(node, &dc); 1596 d.d_off = tmpfs_dirent_cookie(nde); 1597 dirent_terminate(&d); 1598 1599 /* 1600 * Copy the new dirent structure into the output buffer and 1601 * advance pointers. 1602 */ 1603 error = uiomove(&d, d.d_reclen, uio); 1604 if (error == 0) { 1605 de = nde; 1606 if (cookies != NULL) { 1607 off = tmpfs_dirent_cookie(de); 1608 MPASS(*ncookies < maxcookies); 1609 cookies[(*ncookies)++] = off; 1610 } 1611 } 1612 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 1613 1614 /* Skip setting off when using cookies as it is already done above. */ 1615 if (cookies == NULL) 1616 off = tmpfs_dirent_cookie(de); 1617 1618 /* Update the offset and cache. */ 1619 uio->uio_offset = off; 1620 node->tn_dir.tn_readdir_lastn = off; 1621 node->tn_dir.tn_readdir_lastp = de; 1622 1623 tmpfs_set_accessed(tm, node); 1624 return (error); 1625 } 1626 1627 int 1628 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 1629 { 1630 struct tmpfs_dirent *de; 1631 int error; 1632 1633 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 1634 cnp->cn_nameptr, cnp->cn_namelen, &de); 1635 if (error != 0) 1636 return (error); 1637 tmpfs_dir_attach(dvp, de); 1638 return (0); 1639 } 1640 1641 void 1642 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 1643 { 1644 struct tmpfs_dirent *de; 1645 1646 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 1647 MPASS(de != NULL && de->td_node == NULL); 1648 tmpfs_dir_detach(dvp, de); 1649 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 1650 } 1651 1652 /* 1653 * Resizes the aobj associated with the regular file pointed to by 'vp' to the 1654 * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 1655 * 'newsize' must be positive. 1656 * 1657 * Returns zero on success or an appropriate error code on failure. 1658 */ 1659 int 1660 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) 1661 { 1662 struct tmpfs_mount *tmp; 1663 struct tmpfs_node *node; 1664 vm_object_t uobj; 1665 vm_page_t m; 1666 vm_pindex_t idx, newpages, oldpages; 1667 off_t oldsize; 1668 int base, rv; 1669 1670 MPASS(vp->v_type == VREG); 1671 MPASS(newsize >= 0); 1672 1673 node = VP_TO_TMPFS_NODE(vp); 1674 uobj = node->tn_reg.tn_aobj; 1675 tmp = VFS_TO_TMPFS(vp->v_mount); 1676 1677 /* 1678 * Convert the old and new sizes to the number of pages needed to 1679 * store them. It may happen that we do not need to do anything 1680 * because the last allocated page can accommodate the change on 1681 * its own. 1682 */ 1683 oldsize = node->tn_size; 1684 oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 1685 MPASS(oldpages == uobj->size); 1686 newpages = OFF_TO_IDX(newsize + PAGE_MASK); 1687 1688 if (__predict_true(newpages == oldpages && newsize >= oldsize)) { 1689 node->tn_size = newsize; 1690 return (0); 1691 } 1692 1693 if (newpages > oldpages && 1694 tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) 1695 return (ENOSPC); 1696 1697 VM_OBJECT_WLOCK(uobj); 1698 if (newsize < oldsize) { 1699 /* 1700 * Zero the truncated part of the last page. 1701 */ 1702 base = newsize & PAGE_MASK; 1703 if (base != 0) { 1704 idx = OFF_TO_IDX(newsize); 1705 retry: 1706 m = vm_page_grab(uobj, idx, VM_ALLOC_NOCREAT); 1707 if (m != NULL) { 1708 MPASS(vm_page_all_valid(m)); 1709 } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { 1710 m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL | 1711 VM_ALLOC_WAITFAIL); 1712 if (m == NULL) 1713 goto retry; 1714 vm_object_pip_add(uobj, 1); 1715 VM_OBJECT_WUNLOCK(uobj); 1716 rv = vm_pager_get_pages(uobj, &m, 1, NULL, 1717 NULL); 1718 VM_OBJECT_WLOCK(uobj); 1719 vm_object_pip_wakeup(uobj); 1720 if (rv == VM_PAGER_OK) { 1721 /* 1722 * Since the page was not resident, 1723 * and therefore not recently 1724 * accessed, immediately enqueue it 1725 * for asynchronous laundering. The 1726 * current operation is not regarded 1727 * as an access. 1728 */ 1729 vm_page_launder(m); 1730 } else { 1731 vm_page_free(m); 1732 if (ignerr) 1733 m = NULL; 1734 else { 1735 VM_OBJECT_WUNLOCK(uobj); 1736 return (EIO); 1737 } 1738 } 1739 } 1740 if (m != NULL) { 1741 pmap_zero_page_area(m, base, PAGE_SIZE - base); 1742 vm_page_set_dirty(m); 1743 vm_page_xunbusy(m); 1744 } 1745 } 1746 1747 /* 1748 * Release any swap space and free any whole pages. 1749 */ 1750 if (newpages < oldpages) 1751 vm_object_page_remove(uobj, newpages, 0, 0); 1752 } 1753 uobj->size = newpages; 1754 VM_OBJECT_WUNLOCK(uobj); 1755 1756 atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); 1757 1758 node->tn_size = newsize; 1759 return (0); 1760 } 1761 1762 void 1763 tmpfs_check_mtime(struct vnode *vp) 1764 { 1765 struct tmpfs_node *node; 1766 struct vm_object *obj; 1767 1768 ASSERT_VOP_ELOCKED(vp, "check_mtime"); 1769 if (vp->v_type != VREG) 1770 return; 1771 obj = vp->v_object; 1772 KASSERT(obj->type == tmpfs_pager_type && 1773 (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) == 1774 (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj")); 1775 /* unlocked read */ 1776 if (obj->generation != obj->cleangeneration) { 1777 VM_OBJECT_WLOCK(obj); 1778 if (obj->generation != obj->cleangeneration) { 1779 obj->cleangeneration = obj->generation; 1780 node = VP_TO_TMPFS_NODE(vp); 1781 node->tn_status |= TMPFS_NODE_MODIFIED | 1782 TMPFS_NODE_CHANGED; 1783 } 1784 VM_OBJECT_WUNLOCK(obj); 1785 } 1786 } 1787 1788 /* 1789 * Change flags of the given vnode. 1790 * Caller should execute tmpfs_update on vp after a successful execution. 1791 * The vnode must be locked on entry and remain locked on exit. 1792 */ 1793 int 1794 tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, 1795 struct thread *p) 1796 { 1797 int error; 1798 struct tmpfs_node *node; 1799 1800 ASSERT_VOP_ELOCKED(vp, "chflags"); 1801 1802 node = VP_TO_TMPFS_NODE(vp); 1803 1804 if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | 1805 UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | 1806 UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 1807 UF_SPARSE | UF_SYSTEM)) != 0) 1808 return (EOPNOTSUPP); 1809 1810 /* Disallow this operation if the file system is mounted read-only. */ 1811 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1812 return (EROFS); 1813 1814 /* 1815 * Callers may only modify the file flags on objects they 1816 * have VADMIN rights for. 1817 */ 1818 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1819 return (error); 1820 /* 1821 * Unprivileged processes are not permitted to unset system 1822 * flags, or modify flags if any system flags are set. 1823 */ 1824 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { 1825 if (node->tn_flags & 1826 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 1827 error = securelevel_gt(cred, 0); 1828 if (error) 1829 return (error); 1830 } 1831 } else { 1832 if (node->tn_flags & 1833 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1834 ((flags ^ node->tn_flags) & SF_SETTABLE)) 1835 return (EPERM); 1836 } 1837 node->tn_flags = flags; 1838 node->tn_status |= TMPFS_NODE_CHANGED; 1839 1840 ASSERT_VOP_ELOCKED(vp, "chflags2"); 1841 1842 return (0); 1843 } 1844 1845 /* 1846 * Change access mode on the given vnode. 1847 * Caller should execute tmpfs_update on vp after a successful execution. 1848 * The vnode must be locked on entry and remain locked on exit. 1849 */ 1850 int 1851 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1852 { 1853 int error; 1854 struct tmpfs_node *node; 1855 mode_t newmode; 1856 1857 ASSERT_VOP_ELOCKED(vp, "chmod"); 1858 ASSERT_VOP_IN_SEQC(vp); 1859 1860 node = VP_TO_TMPFS_NODE(vp); 1861 1862 /* Disallow this operation if the file system is mounted read-only. */ 1863 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1864 return EROFS; 1865 1866 /* Immutable or append-only files cannot be modified, either. */ 1867 if (node->tn_flags & (IMMUTABLE | APPEND)) 1868 return EPERM; 1869 1870 /* 1871 * To modify the permissions on a file, must possess VADMIN 1872 * for that file. 1873 */ 1874 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1875 return (error); 1876 1877 /* 1878 * Privileged processes may set the sticky bit on non-directories, 1879 * as well as set the setgid bit on a file with a group that the 1880 * process is not a member of. 1881 */ 1882 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1883 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 1884 return (EFTYPE); 1885 } 1886 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1887 error = priv_check_cred(cred, PRIV_VFS_SETGID); 1888 if (error) 1889 return (error); 1890 } 1891 1892 newmode = node->tn_mode & ~ALLPERMS; 1893 newmode |= mode & ALLPERMS; 1894 atomic_store_short(&node->tn_mode, newmode); 1895 1896 node->tn_status |= TMPFS_NODE_CHANGED; 1897 1898 ASSERT_VOP_ELOCKED(vp, "chmod2"); 1899 1900 return (0); 1901 } 1902 1903 /* 1904 * Change ownership of the given vnode. At least one of uid or gid must 1905 * be different than VNOVAL. If one is set to that value, the attribute 1906 * is unchanged. 1907 * Caller should execute tmpfs_update on vp after a successful execution. 1908 * The vnode must be locked on entry and remain locked on exit. 1909 */ 1910 int 1911 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 1912 struct thread *p) 1913 { 1914 int error; 1915 struct tmpfs_node *node; 1916 uid_t ouid; 1917 gid_t ogid; 1918 mode_t newmode; 1919 1920 ASSERT_VOP_ELOCKED(vp, "chown"); 1921 ASSERT_VOP_IN_SEQC(vp); 1922 1923 node = VP_TO_TMPFS_NODE(vp); 1924 1925 /* Assign default values if they are unknown. */ 1926 MPASS(uid != VNOVAL || gid != VNOVAL); 1927 if (uid == VNOVAL) 1928 uid = node->tn_uid; 1929 if (gid == VNOVAL) 1930 gid = node->tn_gid; 1931 MPASS(uid != VNOVAL && gid != VNOVAL); 1932 1933 /* Disallow this operation if the file system is mounted read-only. */ 1934 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1935 return (EROFS); 1936 1937 /* Immutable or append-only files cannot be modified, either. */ 1938 if (node->tn_flags & (IMMUTABLE | APPEND)) 1939 return (EPERM); 1940 1941 /* 1942 * To modify the ownership of a file, must possess VADMIN for that 1943 * file. 1944 */ 1945 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1946 return (error); 1947 1948 /* 1949 * To change the owner of a file, or change the group of a file to a 1950 * group of which we are not a member, the caller must have 1951 * privilege. 1952 */ 1953 if ((uid != node->tn_uid || 1954 (gid != node->tn_gid && !groupmember(gid, cred))) && 1955 (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) 1956 return (error); 1957 1958 ogid = node->tn_gid; 1959 ouid = node->tn_uid; 1960 1961 node->tn_uid = uid; 1962 node->tn_gid = gid; 1963 1964 node->tn_status |= TMPFS_NODE_CHANGED; 1965 1966 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 1967 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { 1968 newmode = node->tn_mode & ~(S_ISUID | S_ISGID); 1969 atomic_store_short(&node->tn_mode, newmode); 1970 } 1971 } 1972 1973 ASSERT_VOP_ELOCKED(vp, "chown2"); 1974 1975 return (0); 1976 } 1977 1978 /* 1979 * Change size of the given vnode. 1980 * Caller should execute tmpfs_update on vp after a successful execution. 1981 * The vnode must be locked on entry and remain locked on exit. 1982 */ 1983 int 1984 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 1985 struct thread *p) 1986 { 1987 int error; 1988 struct tmpfs_node *node; 1989 1990 ASSERT_VOP_ELOCKED(vp, "chsize"); 1991 1992 node = VP_TO_TMPFS_NODE(vp); 1993 1994 /* Decide whether this is a valid operation based on the file type. */ 1995 error = 0; 1996 switch (vp->v_type) { 1997 case VDIR: 1998 return (EISDIR); 1999 2000 case VREG: 2001 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2002 return (EROFS); 2003 break; 2004 2005 case VBLK: 2006 /* FALLTHROUGH */ 2007 case VCHR: 2008 /* FALLTHROUGH */ 2009 case VFIFO: 2010 /* 2011 * Allow modifications of special files even if in the file 2012 * system is mounted read-only (we are not modifying the 2013 * files themselves, but the objects they represent). 2014 */ 2015 return (0); 2016 2017 default: 2018 /* Anything else is unsupported. */ 2019 return (EOPNOTSUPP); 2020 } 2021 2022 /* Immutable or append-only files cannot be modified, either. */ 2023 if (node->tn_flags & (IMMUTABLE | APPEND)) 2024 return (EPERM); 2025 2026 error = tmpfs_truncate(vp, size); 2027 /* 2028 * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 2029 * for us, as will update tn_status; no need to do that here. 2030 */ 2031 2032 ASSERT_VOP_ELOCKED(vp, "chsize2"); 2033 2034 return (error); 2035 } 2036 2037 /* 2038 * Change access and modification times of the given vnode. 2039 * Caller should execute tmpfs_update on vp after a successful execution. 2040 * The vnode must be locked on entry and remain locked on exit. 2041 */ 2042 int 2043 tmpfs_chtimes(struct vnode *vp, struct vattr *vap, 2044 struct ucred *cred, struct thread *l) 2045 { 2046 int error; 2047 struct tmpfs_node *node; 2048 2049 ASSERT_VOP_ELOCKED(vp, "chtimes"); 2050 2051 node = VP_TO_TMPFS_NODE(vp); 2052 2053 /* Disallow this operation if the file system is mounted read-only. */ 2054 if (vp->v_mount->mnt_flag & MNT_RDONLY) 2055 return (EROFS); 2056 2057 /* Immutable or append-only files cannot be modified, either. */ 2058 if (node->tn_flags & (IMMUTABLE | APPEND)) 2059 return (EPERM); 2060 2061 error = vn_utimes_perm(vp, vap, cred, l); 2062 if (error != 0) 2063 return (error); 2064 2065 if (vap->va_atime.tv_sec != VNOVAL) 2066 node->tn_accessed = true; 2067 2068 if (vap->va_mtime.tv_sec != VNOVAL) 2069 node->tn_status |= TMPFS_NODE_MODIFIED; 2070 2071 if (vap->va_birthtime.tv_sec != VNOVAL) 2072 node->tn_status |= TMPFS_NODE_MODIFIED; 2073 2074 tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); 2075 2076 if (vap->va_birthtime.tv_sec != VNOVAL) 2077 node->tn_birthtime = vap->va_birthtime; 2078 ASSERT_VOP_ELOCKED(vp, "chtimes2"); 2079 2080 return (0); 2081 } 2082 2083 void 2084 tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status) 2085 { 2086 2087 if ((node->tn_status & status) == status || tm->tm_ronly) 2088 return; 2089 TMPFS_NODE_LOCK(node); 2090 node->tn_status |= status; 2091 TMPFS_NODE_UNLOCK(node); 2092 } 2093 2094 void 2095 tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node) 2096 { 2097 if (node->tn_accessed || tm->tm_ronly) 2098 return; 2099 atomic_store_8(&node->tn_accessed, true); 2100 } 2101 2102 /* Sync timestamps */ 2103 void 2104 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 2105 const struct timespec *mod) 2106 { 2107 struct tmpfs_node *node; 2108 struct timespec now; 2109 2110 ASSERT_VOP_LOCKED(vp, "tmpfs_itimes"); 2111 node = VP_TO_TMPFS_NODE(vp); 2112 2113 if (!node->tn_accessed && 2114 (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) 2115 return; 2116 2117 vfs_timestamp(&now); 2118 TMPFS_NODE_LOCK(node); 2119 if (node->tn_accessed) { 2120 if (acc == NULL) 2121 acc = &now; 2122 node->tn_atime = *acc; 2123 } 2124 if (node->tn_status & TMPFS_NODE_MODIFIED) { 2125 if (mod == NULL) 2126 mod = &now; 2127 node->tn_mtime = *mod; 2128 } 2129 if (node->tn_status & TMPFS_NODE_CHANGED) 2130 node->tn_ctime = now; 2131 node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 2132 node->tn_accessed = false; 2133 TMPFS_NODE_UNLOCK(node); 2134 2135 /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ 2136 random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME); 2137 } 2138 2139 int 2140 tmpfs_truncate(struct vnode *vp, off_t length) 2141 { 2142 int error; 2143 struct tmpfs_node *node; 2144 2145 node = VP_TO_TMPFS_NODE(vp); 2146 2147 if (length < 0) { 2148 error = EINVAL; 2149 goto out; 2150 } 2151 2152 if (node->tn_size == length) { 2153 error = 0; 2154 goto out; 2155 } 2156 2157 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 2158 return (EFBIG); 2159 2160 error = tmpfs_reg_resize(vp, length, FALSE); 2161 if (error == 0) 2162 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 2163 2164 out: 2165 tmpfs_update(vp); 2166 2167 return (error); 2168 } 2169 2170 static __inline int 2171 tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 2172 { 2173 if (a->td_hash > b->td_hash) 2174 return (1); 2175 else if (a->td_hash < b->td_hash) 2176 return (-1); 2177 return (0); 2178 } 2179 2180 RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 2181