1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/fnv_hash.h> 41 #include <sys/lock.h> 42 #include <sys/namei.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/random.h> 46 #include <sys/rwlock.h> 47 #include <sys/stat.h> 48 #include <sys/systm.h> 49 #include <sys/sysctl.h> 50 #include <sys/vnode.h> 51 #include <sys/vmmeter.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_pageout.h> 58 #include <vm/vm_pager.h> 59 #include <vm/vm_extern.h> 60 61 #include <fs/tmpfs/tmpfs.h> 62 #include <fs/tmpfs/tmpfs_fifoops.h> 63 #include <fs/tmpfs/tmpfs_vnops.h> 64 65 struct tmpfs_dir_cursor { 66 struct tmpfs_dirent *tdc_current; 67 struct tmpfs_dirent *tdc_tree; 68 }; 69 70 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system"); 71 72 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; 73 74 static int 75 sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) 76 { 77 int error; 78 long pages, bytes; 79 80 pages = *(long *)arg1; 81 bytes = pages * PAGE_SIZE; 82 83 error = sysctl_handle_long(oidp, &bytes, 0, req); 84 if (error || !req->newptr) 85 return (error); 86 87 pages = bytes / PAGE_SIZE; 88 if (pages < TMPFS_PAGES_MINRESERVED) 89 return (EINVAL); 90 91 *(long *)arg1 = pages; 92 return (0); 93 } 94 95 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, CTLTYPE_LONG|CTLFLAG_RW, 96 &tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L", 97 "Amount of available memory and swap below which tmpfs growth stops"); 98 99 static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, 100 struct tmpfs_dirent *b); 101 RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 102 103 size_t 104 tmpfs_mem_avail(void) 105 { 106 vm_ooffset_t avail; 107 108 avail = swap_pager_avail + vm_cnt.v_free_count + vm_cnt.v_cache_count - 109 tmpfs_pages_reserved; 110 if (__predict_false(avail < 0)) 111 avail = 0; 112 return (avail); 113 } 114 115 size_t 116 tmpfs_pages_used(struct tmpfs_mount *tmp) 117 { 118 const size_t node_size = sizeof(struct tmpfs_node) + 119 sizeof(struct tmpfs_dirent); 120 size_t meta_pages; 121 122 meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, 123 PAGE_SIZE); 124 return (meta_pages + tmp->tm_pages_used); 125 } 126 127 static size_t 128 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) 129 { 130 if (tmpfs_mem_avail() < req_pages) 131 return (0); 132 133 if (tmp->tm_pages_max != SIZE_MAX && 134 tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) 135 return (0); 136 137 return (1); 138 } 139 140 /* 141 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 142 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 143 * using the credentials of the process 'p'. 144 * 145 * If the node type is set to 'VDIR', then the parent parameter must point 146 * to the parent directory of the node being created. It may only be NULL 147 * while allocating the root node. 148 * 149 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 150 * specifies the device the node represents. 151 * 152 * If the node type is set to 'VLNK', then the parameter target specifies 153 * the file name of the target file for the symbolic link that is being 154 * created. 155 * 156 * Note that new nodes are retrieved from the available list if it has 157 * items or, if it is empty, from the node pool as long as there is enough 158 * space to create them. 159 * 160 * Returns zero on success or an appropriate error code on failure. 161 */ 162 int 163 tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, 164 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 165 char *target, dev_t rdev, struct tmpfs_node **node) 166 { 167 struct tmpfs_node *nnode; 168 vm_object_t obj; 169 170 /* If the root directory of the 'tmp' file system is not yet 171 * allocated, this must be the request to do it. */ 172 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 173 KASSERT(tmp->tm_root == NULL || mp->mnt_writeopcount > 0, 174 ("creating node not under vn_start_write")); 175 176 MPASS(IFF(type == VLNK, target != NULL)); 177 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 178 179 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 180 return (ENOSPC); 181 if (tmpfs_pages_check_avail(tmp, 1) == 0) 182 return (ENOSPC); 183 184 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 185 /* 186 * When a new tmpfs node is created for fully 187 * constructed mount point, there must be a parent 188 * node, which vnode is locked exclusively. As 189 * consequence, if the unmount is executing in 190 * parallel, vflush() cannot reclaim the parent vnode. 191 * Due to this, the check for MNTK_UNMOUNT flag is not 192 * racy: if we did not see MNTK_UNMOUNT flag, then tmp 193 * cannot be destroyed until node construction is 194 * finished and the parent vnode unlocked. 195 * 196 * Tmpfs does not need to instantiate new nodes during 197 * unmount. 198 */ 199 return (EBUSY); 200 } 201 202 nnode = (struct tmpfs_node *)uma_zalloc_arg( 203 tmp->tm_node_pool, tmp, M_WAITOK); 204 205 /* Generic initialization. */ 206 nnode->tn_type = type; 207 vfs_timestamp(&nnode->tn_atime); 208 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 209 nnode->tn_atime; 210 nnode->tn_uid = uid; 211 nnode->tn_gid = gid; 212 nnode->tn_mode = mode; 213 nnode->tn_id = alloc_unr(tmp->tm_ino_unr); 214 215 /* Type-specific initialization. */ 216 switch (nnode->tn_type) { 217 case VBLK: 218 case VCHR: 219 nnode->tn_rdev = rdev; 220 break; 221 222 case VDIR: 223 RB_INIT(&nnode->tn_dir.tn_dirhead); 224 LIST_INIT(&nnode->tn_dir.tn_dupindex); 225 MPASS(parent != nnode); 226 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 227 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 228 nnode->tn_dir.tn_readdir_lastn = 0; 229 nnode->tn_dir.tn_readdir_lastp = NULL; 230 nnode->tn_links++; 231 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 232 nnode->tn_dir.tn_parent->tn_links++; 233 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 234 break; 235 236 case VFIFO: 237 /* FALLTHROUGH */ 238 case VSOCK: 239 break; 240 241 case VLNK: 242 MPASS(strlen(target) < MAXPATHLEN); 243 nnode->tn_size = strlen(target); 244 nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, 245 M_WAITOK); 246 memcpy(nnode->tn_link, target, nnode->tn_size); 247 break; 248 249 case VREG: 250 obj = nnode->tn_reg.tn_aobj = 251 vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, 252 NULL /* XXXKIB - tmpfs needs swap reservation */); 253 VM_OBJECT_WLOCK(obj); 254 /* OBJ_TMPFS is set together with the setting of vp->v_object */ 255 vm_object_set_flag(obj, OBJ_NOSPLIT | OBJ_TMPFS_NODE); 256 vm_object_clear_flag(obj, OBJ_ONEMAPPING); 257 VM_OBJECT_WUNLOCK(obj); 258 break; 259 260 default: 261 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 262 } 263 264 TMPFS_LOCK(tmp); 265 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 266 tmp->tm_nodes_inuse++; 267 TMPFS_UNLOCK(tmp); 268 269 *node = nnode; 270 return 0; 271 } 272 273 /* 274 * Destroys the node pointed to by node from the file system 'tmp'. 275 * If the node does not belong to the given mount point, the results are 276 * unpredicted. 277 * 278 * If the node references a directory; no entries are allowed because 279 * their removal could need a recursive algorithm, something forbidden in 280 * kernel space. Furthermore, there is not need to provide such 281 * functionality (recursive removal) because the only primitives offered 282 * to the user are the removal of empty directories and the deletion of 283 * individual files. 284 * 285 * Note that nodes are not really deleted; in fact, when a node has been 286 * allocated, it cannot be deleted during the whole life of the file 287 * system. Instead, they are moved to the available list and remain there 288 * until reused. 289 */ 290 void 291 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 292 { 293 vm_object_t uobj; 294 295 #ifdef INVARIANTS 296 TMPFS_NODE_LOCK(node); 297 MPASS(node->tn_vnode == NULL); 298 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 299 TMPFS_NODE_UNLOCK(node); 300 #endif 301 302 TMPFS_LOCK(tmp); 303 LIST_REMOVE(node, tn_entries); 304 tmp->tm_nodes_inuse--; 305 TMPFS_UNLOCK(tmp); 306 307 switch (node->tn_type) { 308 case VNON: 309 /* Do not do anything. VNON is provided to let the 310 * allocation routine clean itself easily by avoiding 311 * duplicating code in it. */ 312 /* FALLTHROUGH */ 313 case VBLK: 314 /* FALLTHROUGH */ 315 case VCHR: 316 /* FALLTHROUGH */ 317 case VDIR: 318 /* FALLTHROUGH */ 319 case VFIFO: 320 /* FALLTHROUGH */ 321 case VSOCK: 322 break; 323 324 case VLNK: 325 free(node->tn_link, M_TMPFSNAME); 326 break; 327 328 case VREG: 329 uobj = node->tn_reg.tn_aobj; 330 if (uobj != NULL) { 331 TMPFS_LOCK(tmp); 332 tmp->tm_pages_used -= uobj->size; 333 TMPFS_UNLOCK(tmp); 334 KASSERT((uobj->flags & OBJ_TMPFS) == 0, 335 ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); 336 vm_object_deallocate(uobj); 337 } 338 break; 339 340 default: 341 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 342 } 343 344 free_unr(tmp->tm_ino_unr, node->tn_id); 345 uma_zfree(tmp->tm_node_pool, node); 346 } 347 348 static __inline uint32_t 349 tmpfs_dirent_hash(const char *name, u_int len) 350 { 351 uint32_t hash; 352 353 hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; 354 #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP 355 hash &= 0xf; 356 #endif 357 if (hash < TMPFS_DIRCOOKIE_MIN) 358 hash += TMPFS_DIRCOOKIE_MIN; 359 360 return (hash); 361 } 362 363 static __inline off_t 364 tmpfs_dirent_cookie(struct tmpfs_dirent *de) 365 { 366 if (de == NULL) 367 return (TMPFS_DIRCOOKIE_EOF); 368 369 MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); 370 371 return (de->td_cookie); 372 } 373 374 static __inline boolean_t 375 tmpfs_dirent_dup(struct tmpfs_dirent *de) 376 { 377 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); 378 } 379 380 static __inline boolean_t 381 tmpfs_dirent_duphead(struct tmpfs_dirent *de) 382 { 383 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); 384 } 385 386 void 387 tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) 388 { 389 de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); 390 memcpy(de->ud.td_name, name, namelen); 391 de->td_namelen = namelen; 392 } 393 394 /* 395 * Allocates a new directory entry for the node node with a name of name. 396 * The new directory entry is returned in *de. 397 * 398 * The link count of node is increased by one to reflect the new object 399 * referencing it. 400 * 401 * Returns zero on success or an appropriate error code on failure. 402 */ 403 int 404 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 405 const char *name, u_int len, struct tmpfs_dirent **de) 406 { 407 struct tmpfs_dirent *nde; 408 409 nde = uma_zalloc(tmp->tm_dirent_pool, M_WAITOK); 410 nde->td_node = node; 411 if (name != NULL) { 412 nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 413 tmpfs_dirent_init(nde, name, len); 414 } else 415 nde->td_namelen = 0; 416 if (node != NULL) 417 node->tn_links++; 418 419 *de = nde; 420 421 return 0; 422 } 423 424 /* 425 * Frees a directory entry. It is the caller's responsibility to destroy 426 * the node referenced by it if needed. 427 * 428 * The link count of node is decreased by one to reflect the removal of an 429 * object that referenced it. This only happens if 'node_exists' is true; 430 * otherwise the function will not access the node referred to by the 431 * directory entry, as it may already have been released from the outside. 432 */ 433 void 434 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 435 { 436 struct tmpfs_node *node; 437 438 node = de->td_node; 439 if (node != NULL) { 440 MPASS(node->tn_links > 0); 441 node->tn_links--; 442 } 443 if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) 444 free(de->ud.td_name, M_TMPFSNAME); 445 uma_zfree(tmp->tm_dirent_pool, de); 446 } 447 448 void 449 tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) 450 { 451 452 ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); 453 if (vp->v_type != VREG || obj == NULL) 454 return; 455 456 VM_OBJECT_WLOCK(obj); 457 VI_LOCK(vp); 458 vm_object_clear_flag(obj, OBJ_TMPFS); 459 obj->un_pager.swp.swp_tmpfs = NULL; 460 VI_UNLOCK(vp); 461 VM_OBJECT_WUNLOCK(obj); 462 } 463 464 /* 465 * Need to clear v_object for insmntque failure. 466 */ 467 static void 468 tmpfs_insmntque_dtr(struct vnode *vp, void *dtr_arg) 469 { 470 471 tmpfs_destroy_vobject(vp, vp->v_object); 472 vp->v_object = NULL; 473 vp->v_data = NULL; 474 vp->v_op = &dead_vnodeops; 475 vgone(vp); 476 vput(vp); 477 } 478 479 /* 480 * Allocates a new vnode for the node node or returns a new reference to 481 * an existing one if the node had already a vnode referencing it. The 482 * resulting locked vnode is returned in *vpp. 483 * 484 * Returns zero on success or an appropriate error code on failure. 485 */ 486 int 487 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 488 struct vnode **vpp) 489 { 490 struct vnode *vp; 491 vm_object_t object; 492 int error; 493 494 error = 0; 495 loop: 496 TMPFS_NODE_LOCK(node); 497 loop1: 498 if ((vp = node->tn_vnode) != NULL) { 499 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 500 VI_LOCK(vp); 501 if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || 502 ((vp->v_iflag & VI_DOOMED) != 0 && 503 (lkflag & LK_NOWAIT) != 0)) { 504 VI_UNLOCK(vp); 505 TMPFS_NODE_UNLOCK(node); 506 error = ENOENT; 507 vp = NULL; 508 goto out; 509 } 510 if ((vp->v_iflag & VI_DOOMED) != 0) { 511 VI_UNLOCK(vp); 512 node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; 513 while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { 514 msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 515 0, "tmpfsE", 0); 516 } 517 goto loop1; 518 } 519 TMPFS_NODE_UNLOCK(node); 520 error = vget(vp, lkflag | LK_INTERLOCK, curthread); 521 if (error == ENOENT) 522 goto loop; 523 if (error != 0) { 524 vp = NULL; 525 goto out; 526 } 527 528 /* 529 * Make sure the vnode is still there after 530 * getting the interlock to avoid racing a free. 531 */ 532 if (node->tn_vnode == NULL || node->tn_vnode != vp) { 533 vput(vp); 534 goto loop; 535 } 536 537 goto out; 538 } 539 540 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 541 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 542 TMPFS_NODE_UNLOCK(node); 543 error = ENOENT; 544 vp = NULL; 545 goto out; 546 } 547 548 /* 549 * otherwise lock the vp list while we call getnewvnode 550 * since that can block. 551 */ 552 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 553 node->tn_vpstate |= TMPFS_VNODE_WANT; 554 error = msleep((caddr_t) &node->tn_vpstate, 555 TMPFS_NODE_MTX(node), PDROP | PCATCH, 556 "tmpfs_alloc_vp", 0); 557 if (error) 558 return error; 559 560 goto loop; 561 } else 562 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 563 564 TMPFS_NODE_UNLOCK(node); 565 566 /* Get a new vnode and associate it with our node. */ 567 error = getnewvnode("tmpfs", mp, &tmpfs_vnodeop_entries, &vp); 568 if (error != 0) 569 goto unlock; 570 MPASS(vp != NULL); 571 572 /* lkflag is ignored, the lock is exclusive */ 573 (void) vn_lock(vp, lkflag | LK_RETRY); 574 575 vp->v_data = node; 576 vp->v_type = node->tn_type; 577 578 /* Type-specific initialization. */ 579 switch (node->tn_type) { 580 case VBLK: 581 /* FALLTHROUGH */ 582 case VCHR: 583 /* FALLTHROUGH */ 584 case VLNK: 585 /* FALLTHROUGH */ 586 case VSOCK: 587 break; 588 case VFIFO: 589 vp->v_op = &tmpfs_fifoop_entries; 590 break; 591 case VREG: 592 object = node->tn_reg.tn_aobj; 593 VM_OBJECT_WLOCK(object); 594 VI_LOCK(vp); 595 KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); 596 vp->v_object = object; 597 object->un_pager.swp.swp_tmpfs = vp; 598 vm_object_set_flag(object, OBJ_TMPFS); 599 VI_UNLOCK(vp); 600 VM_OBJECT_WUNLOCK(object); 601 break; 602 case VDIR: 603 MPASS(node->tn_dir.tn_parent != NULL); 604 if (node->tn_dir.tn_parent == node) 605 vp->v_vflag |= VV_ROOT; 606 break; 607 608 default: 609 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 610 } 611 if (vp->v_type != VFIFO) 612 VN_LOCK_ASHARE(vp); 613 614 error = insmntque1(vp, mp, tmpfs_insmntque_dtr, NULL); 615 if (error) 616 vp = NULL; 617 618 unlock: 619 TMPFS_NODE_LOCK(node); 620 621 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 622 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 623 node->tn_vnode = vp; 624 625 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 626 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 627 TMPFS_NODE_UNLOCK(node); 628 wakeup((caddr_t) &node->tn_vpstate); 629 } else 630 TMPFS_NODE_UNLOCK(node); 631 632 out: 633 *vpp = vp; 634 635 #ifdef INVARIANTS 636 if (error == 0) { 637 MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); 638 TMPFS_NODE_LOCK(node); 639 MPASS(*vpp == node->tn_vnode); 640 TMPFS_NODE_UNLOCK(node); 641 } 642 #endif 643 644 return error; 645 } 646 647 /* 648 * Destroys the association between the vnode vp and the node it 649 * references. 650 */ 651 void 652 tmpfs_free_vp(struct vnode *vp) 653 { 654 struct tmpfs_node *node; 655 656 node = VP_TO_TMPFS_NODE(vp); 657 658 TMPFS_NODE_ASSERT_LOCKED(node); 659 node->tn_vnode = NULL; 660 if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) 661 wakeup(&node->tn_vnode); 662 node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; 663 vp->v_data = NULL; 664 } 665 666 /* 667 * Allocates a new file of type 'type' and adds it to the parent directory 668 * 'dvp'; this addition is done using the component name given in 'cnp'. 669 * The ownership of the new file is automatically assigned based on the 670 * credentials of the caller (through 'cnp'), the group is set based on 671 * the parent directory and the mode is determined from the 'vap' argument. 672 * If successful, *vpp holds a vnode to the newly created file and zero 673 * is returned. Otherwise *vpp is NULL and the function returns an 674 * appropriate error code. 675 */ 676 int 677 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 678 struct componentname *cnp, char *target) 679 { 680 int error; 681 struct tmpfs_dirent *de; 682 struct tmpfs_mount *tmp; 683 struct tmpfs_node *dnode; 684 struct tmpfs_node *node; 685 struct tmpfs_node *parent; 686 687 MPASS(VOP_ISLOCKED(dvp)); 688 MPASS(cnp->cn_flags & HASBUF); 689 690 tmp = VFS_TO_TMPFS(dvp->v_mount); 691 dnode = VP_TO_TMPFS_DIR(dvp); 692 *vpp = NULL; 693 694 /* If the entry we are creating is a directory, we cannot overflow 695 * the number of links of its parent, because it will get a new 696 * link. */ 697 if (vap->va_type == VDIR) { 698 /* Ensure that we do not overflow the maximum number of links 699 * imposed by the system. */ 700 MPASS(dnode->tn_links <= LINK_MAX); 701 if (dnode->tn_links == LINK_MAX) { 702 return (EMLINK); 703 } 704 705 parent = dnode; 706 MPASS(parent != NULL); 707 } else 708 parent = NULL; 709 710 /* Allocate a node that represents the new file. */ 711 error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, 712 cnp->cn_cred->cr_uid, 713 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 714 if (error != 0) 715 return (error); 716 717 /* Allocate a directory entry that points to the new file. */ 718 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 719 &de); 720 if (error != 0) { 721 tmpfs_free_node(tmp, node); 722 return (error); 723 } 724 725 /* Allocate a vnode for the new file. */ 726 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 727 if (error != 0) { 728 tmpfs_free_dirent(tmp, de); 729 tmpfs_free_node(tmp, node); 730 return (error); 731 } 732 733 /* Now that all required items are allocated, we can proceed to 734 * insert the new node into the directory, an operation that 735 * cannot fail. */ 736 if (cnp->cn_flags & ISWHITEOUT) 737 tmpfs_dir_whiteout_remove(dvp, cnp); 738 tmpfs_dir_attach(dvp, de); 739 return (0); 740 } 741 742 static struct tmpfs_dirent * 743 tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 744 { 745 struct tmpfs_dirent *de; 746 747 de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); 748 dc->tdc_tree = de; 749 if (de != NULL && tmpfs_dirent_duphead(de)) 750 de = LIST_FIRST(&de->ud.td_duphead); 751 dc->tdc_current = de; 752 753 return (dc->tdc_current); 754 } 755 756 static struct tmpfs_dirent * 757 tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 758 { 759 struct tmpfs_dirent *de; 760 761 MPASS(dc->tdc_tree != NULL); 762 if (tmpfs_dirent_dup(dc->tdc_current)) { 763 dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); 764 if (dc->tdc_current != NULL) 765 return (dc->tdc_current); 766 } 767 dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, 768 &dnode->tn_dir.tn_dirhead, dc->tdc_tree); 769 if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { 770 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 771 MPASS(dc->tdc_current != NULL); 772 } 773 774 return (dc->tdc_current); 775 } 776 777 /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ 778 static struct tmpfs_dirent * 779 tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) 780 { 781 struct tmpfs_dirent *de, dekey; 782 783 dekey.td_hash = hash; 784 de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); 785 return (de); 786 } 787 788 /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ 789 static struct tmpfs_dirent * 790 tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, 791 struct tmpfs_dir_cursor *dc) 792 { 793 struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; 794 struct tmpfs_dirent *de, dekey; 795 796 MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); 797 798 if (cookie == node->tn_dir.tn_readdir_lastn && 799 (de = node->tn_dir.tn_readdir_lastp) != NULL) { 800 /* Protect against possible race, tn_readdir_last[pn] 801 * may be updated with only shared vnode lock held. */ 802 if (cookie == tmpfs_dirent_cookie(de)) 803 goto out; 804 } 805 806 if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { 807 LIST_FOREACH(de, &node->tn_dir.tn_dupindex, 808 uh.td_dup.index_entries) { 809 MPASS(tmpfs_dirent_dup(de)); 810 if (de->td_cookie == cookie) 811 goto out; 812 /* dupindex list is sorted. */ 813 if (de->td_cookie < cookie) { 814 de = NULL; 815 goto out; 816 } 817 } 818 MPASS(de == NULL); 819 goto out; 820 } 821 822 MPASS((cookie & TMPFS_DIRCOOKIE_MASK) == cookie); 823 dekey.td_hash = cookie; 824 /* Recover if direntry for cookie was removed */ 825 de = RB_NFIND(tmpfs_dir, dirhead, &dekey); 826 dc->tdc_tree = de; 827 dc->tdc_current = de; 828 if (de != NULL && tmpfs_dirent_duphead(de)) { 829 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 830 MPASS(dc->tdc_current != NULL); 831 } 832 return (dc->tdc_current); 833 834 out: 835 dc->tdc_tree = de; 836 dc->tdc_current = de; 837 if (de != NULL && tmpfs_dirent_dup(de)) 838 dc->tdc_tree = tmpfs_dir_xlookup_hash(node, 839 de->td_hash); 840 return (dc->tdc_current); 841 } 842 843 /* 844 * Looks for a directory entry in the directory represented by node. 845 * 'cnp' describes the name of the entry to look for. Note that the . 846 * and .. components are not allowed as they do not physically exist 847 * within directories. 848 * 849 * Returns a pointer to the entry when found, otherwise NULL. 850 */ 851 struct tmpfs_dirent * 852 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 853 struct componentname *cnp) 854 { 855 struct tmpfs_dir_duphead *duphead; 856 struct tmpfs_dirent *de; 857 uint32_t hash; 858 859 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 860 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 861 cnp->cn_nameptr[1] == '.'))); 862 TMPFS_VALIDATE_DIR(node); 863 864 hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); 865 de = tmpfs_dir_xlookup_hash(node, hash); 866 if (de != NULL && tmpfs_dirent_duphead(de)) { 867 duphead = &de->ud.td_duphead; 868 LIST_FOREACH(de, duphead, uh.td_dup.entries) { 869 if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 870 cnp->cn_namelen)) 871 break; 872 } 873 } else if (de != NULL) { 874 if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 875 cnp->cn_namelen)) 876 de = NULL; 877 } 878 if (de != NULL && f != NULL && de->td_node != f) 879 de = NULL; 880 881 return (de); 882 } 883 884 /* 885 * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex 886 * list, allocate new cookie value. 887 */ 888 static void 889 tmpfs_dir_attach_dup(struct tmpfs_node *dnode, 890 struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) 891 { 892 struct tmpfs_dir_duphead *dupindex; 893 struct tmpfs_dirent *de, *pde; 894 895 dupindex = &dnode->tn_dir.tn_dupindex; 896 de = LIST_FIRST(dupindex); 897 if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { 898 if (de == NULL) 899 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 900 else 901 nde->td_cookie = de->td_cookie + 1; 902 MPASS(tmpfs_dirent_dup(nde)); 903 LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); 904 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 905 return; 906 } 907 908 /* 909 * Cookie numbers are near exhaustion. Scan dupindex list for unused 910 * numbers. dupindex list is sorted in descending order. Keep it so 911 * after inserting nde. 912 */ 913 while (1) { 914 pde = de; 915 de = LIST_NEXT(de, uh.td_dup.index_entries); 916 if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { 917 /* 918 * Last element of the index doesn't have minimal cookie 919 * value, use it. 920 */ 921 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 922 LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); 923 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 924 return; 925 } else if (de == NULL) { 926 /* 927 * We are so lucky have 2^30 hash duplicates in single 928 * directory :) Return largest possible cookie value. 929 * It should be fine except possible issues with 930 * VOP_READDIR restart. 931 */ 932 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; 933 LIST_INSERT_HEAD(dupindex, nde, 934 uh.td_dup.index_entries); 935 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 936 return; 937 } 938 if (de->td_cookie + 1 == pde->td_cookie || 939 de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) 940 continue; /* No hole or invalid cookie. */ 941 nde->td_cookie = de->td_cookie + 1; 942 MPASS(tmpfs_dirent_dup(nde)); 943 MPASS(pde->td_cookie > nde->td_cookie); 944 MPASS(nde->td_cookie > de->td_cookie); 945 LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); 946 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 947 return; 948 }; 949 } 950 951 /* 952 * Attaches the directory entry de to the directory represented by vp. 953 * Note that this does not change the link count of the node pointed by 954 * the directory entry, as this is done by tmpfs_alloc_dirent. 955 */ 956 void 957 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 958 { 959 struct tmpfs_node *dnode; 960 struct tmpfs_dirent *xde, *nde; 961 962 ASSERT_VOP_ELOCKED(vp, __func__); 963 MPASS(de->td_namelen > 0); 964 MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); 965 MPASS(de->td_cookie == de->td_hash); 966 967 dnode = VP_TO_TMPFS_DIR(vp); 968 dnode->tn_dir.tn_readdir_lastn = 0; 969 dnode->tn_dir.tn_readdir_lastp = NULL; 970 971 MPASS(!tmpfs_dirent_dup(de)); 972 xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 973 if (xde != NULL && tmpfs_dirent_duphead(xde)) 974 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 975 else if (xde != NULL) { 976 /* 977 * Allocate new duphead. Swap xde with duphead to avoid 978 * adding/removing elements with the same hash. 979 */ 980 MPASS(!tmpfs_dirent_dup(xde)); 981 tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, 982 &nde); 983 /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ 984 memcpy(nde, xde, sizeof(*xde)); 985 xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; 986 LIST_INIT(&xde->ud.td_duphead); 987 xde->td_namelen = 0; 988 xde->td_node = NULL; 989 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); 990 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 991 } 992 dnode->tn_size += sizeof(struct tmpfs_dirent); 993 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 994 TMPFS_NODE_MODIFIED; 995 tmpfs_update(vp); 996 } 997 998 /* 999 * Detaches the directory entry de from the directory represented by vp. 1000 * Note that this does not change the link count of the node pointed by 1001 * the directory entry, as this is done by tmpfs_free_dirent. 1002 */ 1003 void 1004 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 1005 { 1006 struct tmpfs_mount *tmp; 1007 struct tmpfs_dir *head; 1008 struct tmpfs_node *dnode; 1009 struct tmpfs_dirent *xde; 1010 1011 ASSERT_VOP_ELOCKED(vp, __func__); 1012 1013 dnode = VP_TO_TMPFS_DIR(vp); 1014 head = &dnode->tn_dir.tn_dirhead; 1015 dnode->tn_dir.tn_readdir_lastn = 0; 1016 dnode->tn_dir.tn_readdir_lastp = NULL; 1017 1018 if (tmpfs_dirent_dup(de)) { 1019 /* Remove duphead if de was last entry. */ 1020 if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { 1021 xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); 1022 MPASS(tmpfs_dirent_duphead(xde)); 1023 } else 1024 xde = NULL; 1025 LIST_REMOVE(de, uh.td_dup.entries); 1026 LIST_REMOVE(de, uh.td_dup.index_entries); 1027 if (xde != NULL) { 1028 if (LIST_EMPTY(&xde->ud.td_duphead)) { 1029 RB_REMOVE(tmpfs_dir, head, xde); 1030 tmp = VFS_TO_TMPFS(vp->v_mount); 1031 MPASS(xde->td_node == NULL); 1032 tmpfs_free_dirent(tmp, xde); 1033 } 1034 } 1035 de->td_cookie = de->td_hash; 1036 } else 1037 RB_REMOVE(tmpfs_dir, head, de); 1038 1039 dnode->tn_size -= sizeof(struct tmpfs_dirent); 1040 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 1041 TMPFS_NODE_MODIFIED; 1042 tmpfs_update(vp); 1043 } 1044 1045 void 1046 tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) 1047 { 1048 struct tmpfs_dirent *de, *dde, *nde; 1049 1050 RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { 1051 RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1052 /* Node may already be destroyed. */ 1053 de->td_node = NULL; 1054 if (tmpfs_dirent_duphead(de)) { 1055 while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { 1056 LIST_REMOVE(dde, uh.td_dup.entries); 1057 dde->td_node = NULL; 1058 tmpfs_free_dirent(tmp, dde); 1059 } 1060 } 1061 tmpfs_free_dirent(tmp, de); 1062 } 1063 } 1064 1065 /* 1066 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 1067 * directory and returns it in the uio space. The function returns 0 1068 * on success, -1 if there was not enough space in the uio structure to 1069 * hold the directory entry or an appropriate error code if another 1070 * error happens. 1071 */ 1072 static int 1073 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 1074 { 1075 int error; 1076 struct dirent dent; 1077 1078 TMPFS_VALIDATE_DIR(node); 1079 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 1080 1081 dent.d_fileno = node->tn_id; 1082 dent.d_type = DT_DIR; 1083 dent.d_namlen = 1; 1084 dent.d_name[0] = '.'; 1085 dent.d_name[1] = '\0'; 1086 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1087 1088 if (dent.d_reclen > uio->uio_resid) 1089 error = EJUSTRETURN; 1090 else 1091 error = uiomove(&dent, dent.d_reclen, uio); 1092 1093 node->tn_status |= TMPFS_NODE_ACCESSED; 1094 1095 return error; 1096 } 1097 1098 /* 1099 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 1100 * directory and returns it in the uio space. The function returns 0 1101 * on success, -1 if there was not enough space in the uio structure to 1102 * hold the directory entry or an appropriate error code if another 1103 * error happens. 1104 */ 1105 static int 1106 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 1107 { 1108 int error; 1109 struct dirent dent; 1110 1111 TMPFS_VALIDATE_DIR(node); 1112 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 1113 1114 /* 1115 * Return ENOENT if the current node is already removed. 1116 */ 1117 TMPFS_ASSERT_LOCKED(node); 1118 if (node->tn_dir.tn_parent == NULL) { 1119 return (ENOENT); 1120 } 1121 1122 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 1123 dent.d_fileno = node->tn_dir.tn_parent->tn_id; 1124 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 1125 1126 dent.d_type = DT_DIR; 1127 dent.d_namlen = 2; 1128 dent.d_name[0] = '.'; 1129 dent.d_name[1] = '.'; 1130 dent.d_name[2] = '\0'; 1131 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1132 1133 if (dent.d_reclen > uio->uio_resid) 1134 error = EJUSTRETURN; 1135 else 1136 error = uiomove(&dent, dent.d_reclen, uio); 1137 1138 node->tn_status |= TMPFS_NODE_ACCESSED; 1139 1140 return error; 1141 } 1142 1143 /* 1144 * Helper function for tmpfs_readdir. Returns as much directory entries 1145 * as can fit in the uio space. The read starts at uio->uio_offset. 1146 * The function returns 0 on success, -1 if there was not enough space 1147 * in the uio structure to hold the directory entry or an appropriate 1148 * error code if another error happens. 1149 */ 1150 int 1151 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, int maxcookies, 1152 u_long *cookies, int *ncookies) 1153 { 1154 struct tmpfs_dir_cursor dc; 1155 struct tmpfs_dirent *de; 1156 off_t off; 1157 int error; 1158 1159 TMPFS_VALIDATE_DIR(node); 1160 1161 off = 0; 1162 1163 /* 1164 * Lookup the node from the current offset. The starting offset of 1165 * 0 will lookup both '.' and '..', and then the first real entry, 1166 * or EOF if there are none. Then find all entries for the dir that 1167 * fit into the buffer. Once no more entries are found (de == NULL), 1168 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next 1169 * call to return 0. 1170 */ 1171 switch (uio->uio_offset) { 1172 case TMPFS_DIRCOOKIE_DOT: 1173 error = tmpfs_dir_getdotdent(node, uio); 1174 if (error != 0) 1175 return (error); 1176 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 1177 if (cookies != NULL) 1178 cookies[(*ncookies)++] = off = uio->uio_offset; 1179 /* FALLTHROUGH */ 1180 case TMPFS_DIRCOOKIE_DOTDOT: 1181 error = tmpfs_dir_getdotdotdent(node, uio); 1182 if (error != 0) 1183 return (error); 1184 de = tmpfs_dir_first(node, &dc); 1185 uio->uio_offset = tmpfs_dirent_cookie(de); 1186 if (cookies != NULL) 1187 cookies[(*ncookies)++] = off = uio->uio_offset; 1188 /* EOF. */ 1189 if (de == NULL) 1190 return (0); 1191 break; 1192 case TMPFS_DIRCOOKIE_EOF: 1193 return (0); 1194 default: 1195 de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); 1196 if (de == NULL) 1197 return (EINVAL); 1198 if (cookies != NULL) 1199 off = tmpfs_dirent_cookie(de); 1200 } 1201 1202 /* Read as much entries as possible; i.e., until we reach the end of 1203 * the directory or we exhaust uio space. */ 1204 do { 1205 struct dirent d; 1206 1207 /* Create a dirent structure representing the current 1208 * tmpfs_node and fill it. */ 1209 if (de->td_node == NULL) { 1210 d.d_fileno = 1; 1211 d.d_type = DT_WHT; 1212 } else { 1213 d.d_fileno = de->td_node->tn_id; 1214 switch (de->td_node->tn_type) { 1215 case VBLK: 1216 d.d_type = DT_BLK; 1217 break; 1218 1219 case VCHR: 1220 d.d_type = DT_CHR; 1221 break; 1222 1223 case VDIR: 1224 d.d_type = DT_DIR; 1225 break; 1226 1227 case VFIFO: 1228 d.d_type = DT_FIFO; 1229 break; 1230 1231 case VLNK: 1232 d.d_type = DT_LNK; 1233 break; 1234 1235 case VREG: 1236 d.d_type = DT_REG; 1237 break; 1238 1239 case VSOCK: 1240 d.d_type = DT_SOCK; 1241 break; 1242 1243 default: 1244 panic("tmpfs_dir_getdents: type %p %d", 1245 de->td_node, (int)de->td_node->tn_type); 1246 } 1247 } 1248 d.d_namlen = de->td_namelen; 1249 MPASS(de->td_namelen < sizeof(d.d_name)); 1250 (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); 1251 d.d_name[de->td_namelen] = '\0'; 1252 d.d_reclen = GENERIC_DIRSIZ(&d); 1253 1254 /* Stop reading if the directory entry we are treating is 1255 * bigger than the amount of data that can be returned. */ 1256 if (d.d_reclen > uio->uio_resid) { 1257 error = EJUSTRETURN; 1258 break; 1259 } 1260 1261 /* Copy the new dirent structure into the output buffer and 1262 * advance pointers. */ 1263 error = uiomove(&d, d.d_reclen, uio); 1264 if (error == 0) { 1265 de = tmpfs_dir_next(node, &dc); 1266 if (cookies != NULL) { 1267 off = tmpfs_dirent_cookie(de); 1268 MPASS(*ncookies < maxcookies); 1269 cookies[(*ncookies)++] = off; 1270 } 1271 } 1272 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 1273 1274 /* Skip setting off when using cookies as it is already done above. */ 1275 if (cookies == NULL) 1276 off = tmpfs_dirent_cookie(de); 1277 1278 /* Update the offset and cache. */ 1279 uio->uio_offset = off; 1280 node->tn_dir.tn_readdir_lastn = off; 1281 node->tn_dir.tn_readdir_lastp = de; 1282 1283 node->tn_status |= TMPFS_NODE_ACCESSED; 1284 return error; 1285 } 1286 1287 int 1288 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 1289 { 1290 struct tmpfs_dirent *de; 1291 int error; 1292 1293 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 1294 cnp->cn_nameptr, cnp->cn_namelen, &de); 1295 if (error != 0) 1296 return (error); 1297 tmpfs_dir_attach(dvp, de); 1298 return (0); 1299 } 1300 1301 void 1302 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 1303 { 1304 struct tmpfs_dirent *de; 1305 1306 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 1307 MPASS(de != NULL && de->td_node == NULL); 1308 tmpfs_dir_detach(dvp, de); 1309 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 1310 } 1311 1312 /* 1313 * Resizes the aobj associated with the regular file pointed to by 'vp' to the 1314 * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 1315 * 'newsize' must be positive. 1316 * 1317 * Returns zero on success or an appropriate error code on failure. 1318 */ 1319 int 1320 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) 1321 { 1322 struct tmpfs_mount *tmp; 1323 struct tmpfs_node *node; 1324 vm_object_t uobj; 1325 vm_page_t m; 1326 vm_pindex_t idx, newpages, oldpages; 1327 off_t oldsize; 1328 int base, rv; 1329 1330 MPASS(vp->v_type == VREG); 1331 MPASS(newsize >= 0); 1332 1333 node = VP_TO_TMPFS_NODE(vp); 1334 uobj = node->tn_reg.tn_aobj; 1335 tmp = VFS_TO_TMPFS(vp->v_mount); 1336 1337 /* 1338 * Convert the old and new sizes to the number of pages needed to 1339 * store them. It may happen that we do not need to do anything 1340 * because the last allocated page can accommodate the change on 1341 * its own. 1342 */ 1343 oldsize = node->tn_size; 1344 oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 1345 MPASS(oldpages == uobj->size); 1346 newpages = OFF_TO_IDX(newsize + PAGE_MASK); 1347 if (newpages > oldpages && 1348 tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) 1349 return (ENOSPC); 1350 1351 VM_OBJECT_WLOCK(uobj); 1352 if (newsize < oldsize) { 1353 /* 1354 * Zero the truncated part of the last page. 1355 */ 1356 base = newsize & PAGE_MASK; 1357 if (base != 0) { 1358 idx = OFF_TO_IDX(newsize); 1359 retry: 1360 m = vm_page_lookup(uobj, idx); 1361 if (m != NULL) { 1362 if (vm_page_sleep_if_busy(m, "tmfssz")) 1363 goto retry; 1364 MPASS(m->valid == VM_PAGE_BITS_ALL); 1365 } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { 1366 m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL); 1367 if (m == NULL) { 1368 VM_OBJECT_WUNLOCK(uobj); 1369 VM_WAIT; 1370 VM_OBJECT_WLOCK(uobj); 1371 goto retry; 1372 } else if (m->valid != VM_PAGE_BITS_ALL) 1373 rv = vm_pager_get_pages(uobj, &m, 1, 1374 NULL, NULL); 1375 else 1376 /* A cached page was reactivated. */ 1377 rv = VM_PAGER_OK; 1378 vm_page_lock(m); 1379 if (rv == VM_PAGER_OK) { 1380 vm_page_deactivate(m); 1381 vm_page_unlock(m); 1382 vm_page_xunbusy(m); 1383 } else { 1384 vm_page_free(m); 1385 vm_page_unlock(m); 1386 if (ignerr) 1387 m = NULL; 1388 else { 1389 VM_OBJECT_WUNLOCK(uobj); 1390 return (EIO); 1391 } 1392 } 1393 } 1394 if (m != NULL) { 1395 pmap_zero_page_area(m, base, PAGE_SIZE - base); 1396 vm_page_dirty(m); 1397 vm_pager_page_unswapped(m); 1398 } 1399 } 1400 1401 /* 1402 * Release any swap space and free any whole pages. 1403 */ 1404 if (newpages < oldpages) { 1405 swap_pager_freespace(uobj, newpages, oldpages - 1406 newpages); 1407 vm_object_page_remove(uobj, newpages, 0, 0); 1408 } 1409 } 1410 uobj->size = newpages; 1411 VM_OBJECT_WUNLOCK(uobj); 1412 1413 TMPFS_LOCK(tmp); 1414 tmp->tm_pages_used += (newpages - oldpages); 1415 TMPFS_UNLOCK(tmp); 1416 1417 node->tn_size = newsize; 1418 return (0); 1419 } 1420 1421 void 1422 tmpfs_check_mtime(struct vnode *vp) 1423 { 1424 struct tmpfs_node *node; 1425 struct vm_object *obj; 1426 1427 ASSERT_VOP_ELOCKED(vp, "check_mtime"); 1428 if (vp->v_type != VREG) 1429 return; 1430 obj = vp->v_object; 1431 KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == 1432 (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); 1433 /* unlocked read */ 1434 if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { 1435 VM_OBJECT_WLOCK(obj); 1436 if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { 1437 obj->flags &= ~OBJ_TMPFS_DIRTY; 1438 node = VP_TO_TMPFS_NODE(vp); 1439 node->tn_status |= TMPFS_NODE_MODIFIED | 1440 TMPFS_NODE_CHANGED; 1441 } 1442 VM_OBJECT_WUNLOCK(obj); 1443 } 1444 } 1445 1446 /* 1447 * Change flags of the given vnode. 1448 * Caller should execute tmpfs_update on vp after a successful execution. 1449 * The vnode must be locked on entry and remain locked on exit. 1450 */ 1451 int 1452 tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, 1453 struct thread *p) 1454 { 1455 int error; 1456 struct tmpfs_node *node; 1457 1458 MPASS(VOP_ISLOCKED(vp)); 1459 1460 node = VP_TO_TMPFS_NODE(vp); 1461 1462 if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | 1463 UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | 1464 UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 1465 UF_SPARSE | UF_SYSTEM)) != 0) 1466 return (EOPNOTSUPP); 1467 1468 /* Disallow this operation if the file system is mounted read-only. */ 1469 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1470 return EROFS; 1471 1472 /* 1473 * Callers may only modify the file flags on objects they 1474 * have VADMIN rights for. 1475 */ 1476 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1477 return (error); 1478 /* 1479 * Unprivileged processes are not permitted to unset system 1480 * flags, or modify flags if any system flags are set. 1481 */ 1482 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 1483 if (node->tn_flags & 1484 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 1485 error = securelevel_gt(cred, 0); 1486 if (error) 1487 return (error); 1488 } 1489 } else { 1490 if (node->tn_flags & 1491 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1492 ((flags ^ node->tn_flags) & SF_SETTABLE)) 1493 return (EPERM); 1494 } 1495 node->tn_flags = flags; 1496 node->tn_status |= TMPFS_NODE_CHANGED; 1497 1498 MPASS(VOP_ISLOCKED(vp)); 1499 1500 return 0; 1501 } 1502 1503 /* 1504 * Change access mode on the given vnode. 1505 * Caller should execute tmpfs_update on vp after a successful execution. 1506 * The vnode must be locked on entry and remain locked on exit. 1507 */ 1508 int 1509 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1510 { 1511 int error; 1512 struct tmpfs_node *node; 1513 1514 MPASS(VOP_ISLOCKED(vp)); 1515 1516 node = VP_TO_TMPFS_NODE(vp); 1517 1518 /* Disallow this operation if the file system is mounted read-only. */ 1519 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1520 return EROFS; 1521 1522 /* Immutable or append-only files cannot be modified, either. */ 1523 if (node->tn_flags & (IMMUTABLE | APPEND)) 1524 return EPERM; 1525 1526 /* 1527 * To modify the permissions on a file, must possess VADMIN 1528 * for that file. 1529 */ 1530 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1531 return (error); 1532 1533 /* 1534 * Privileged processes may set the sticky bit on non-directories, 1535 * as well as set the setgid bit on a file with a group that the 1536 * process is not a member of. 1537 */ 1538 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1539 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) 1540 return (EFTYPE); 1541 } 1542 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1543 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 1544 if (error) 1545 return (error); 1546 } 1547 1548 1549 node->tn_mode &= ~ALLPERMS; 1550 node->tn_mode |= mode & ALLPERMS; 1551 1552 node->tn_status |= TMPFS_NODE_CHANGED; 1553 1554 MPASS(VOP_ISLOCKED(vp)); 1555 1556 return 0; 1557 } 1558 1559 /* 1560 * Change ownership of the given vnode. At least one of uid or gid must 1561 * be different than VNOVAL. If one is set to that value, the attribute 1562 * is unchanged. 1563 * Caller should execute tmpfs_update on vp after a successful execution. 1564 * The vnode must be locked on entry and remain locked on exit. 1565 */ 1566 int 1567 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 1568 struct thread *p) 1569 { 1570 int error; 1571 struct tmpfs_node *node; 1572 uid_t ouid; 1573 gid_t ogid; 1574 1575 MPASS(VOP_ISLOCKED(vp)); 1576 1577 node = VP_TO_TMPFS_NODE(vp); 1578 1579 /* Assign default values if they are unknown. */ 1580 MPASS(uid != VNOVAL || gid != VNOVAL); 1581 if (uid == VNOVAL) 1582 uid = node->tn_uid; 1583 if (gid == VNOVAL) 1584 gid = node->tn_gid; 1585 MPASS(uid != VNOVAL && gid != VNOVAL); 1586 1587 /* Disallow this operation if the file system is mounted read-only. */ 1588 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1589 return EROFS; 1590 1591 /* Immutable or append-only files cannot be modified, either. */ 1592 if (node->tn_flags & (IMMUTABLE | APPEND)) 1593 return EPERM; 1594 1595 /* 1596 * To modify the ownership of a file, must possess VADMIN for that 1597 * file. 1598 */ 1599 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1600 return (error); 1601 1602 /* 1603 * To change the owner of a file, or change the group of a file to a 1604 * group of which we are not a member, the caller must have 1605 * privilege. 1606 */ 1607 if ((uid != node->tn_uid || 1608 (gid != node->tn_gid && !groupmember(gid, cred))) && 1609 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) 1610 return (error); 1611 1612 ogid = node->tn_gid; 1613 ouid = node->tn_uid; 1614 1615 node->tn_uid = uid; 1616 node->tn_gid = gid; 1617 1618 node->tn_status |= TMPFS_NODE_CHANGED; 1619 1620 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 1621 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) 1622 node->tn_mode &= ~(S_ISUID | S_ISGID); 1623 } 1624 1625 MPASS(VOP_ISLOCKED(vp)); 1626 1627 return 0; 1628 } 1629 1630 /* 1631 * Change size of the given vnode. 1632 * Caller should execute tmpfs_update on vp after a successful execution. 1633 * The vnode must be locked on entry and remain locked on exit. 1634 */ 1635 int 1636 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 1637 struct thread *p) 1638 { 1639 int error; 1640 struct tmpfs_node *node; 1641 1642 MPASS(VOP_ISLOCKED(vp)); 1643 1644 node = VP_TO_TMPFS_NODE(vp); 1645 1646 /* Decide whether this is a valid operation based on the file type. */ 1647 error = 0; 1648 switch (vp->v_type) { 1649 case VDIR: 1650 return EISDIR; 1651 1652 case VREG: 1653 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1654 return EROFS; 1655 break; 1656 1657 case VBLK: 1658 /* FALLTHROUGH */ 1659 case VCHR: 1660 /* FALLTHROUGH */ 1661 case VFIFO: 1662 /* Allow modifications of special files even if in the file 1663 * system is mounted read-only (we are not modifying the 1664 * files themselves, but the objects they represent). */ 1665 return 0; 1666 1667 default: 1668 /* Anything else is unsupported. */ 1669 return EOPNOTSUPP; 1670 } 1671 1672 /* Immutable or append-only files cannot be modified, either. */ 1673 if (node->tn_flags & (IMMUTABLE | APPEND)) 1674 return EPERM; 1675 1676 error = tmpfs_truncate(vp, size); 1677 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1678 * for us, as will update tn_status; no need to do that here. */ 1679 1680 MPASS(VOP_ISLOCKED(vp)); 1681 1682 return error; 1683 } 1684 1685 /* 1686 * Change access and modification times of the given vnode. 1687 * Caller should execute tmpfs_update on vp after a successful execution. 1688 * The vnode must be locked on entry and remain locked on exit. 1689 */ 1690 int 1691 tmpfs_chtimes(struct vnode *vp, struct vattr *vap, 1692 struct ucred *cred, struct thread *l) 1693 { 1694 int error; 1695 struct tmpfs_node *node; 1696 1697 MPASS(VOP_ISLOCKED(vp)); 1698 1699 node = VP_TO_TMPFS_NODE(vp); 1700 1701 /* Disallow this operation if the file system is mounted read-only. */ 1702 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1703 return EROFS; 1704 1705 /* Immutable or append-only files cannot be modified, either. */ 1706 if (node->tn_flags & (IMMUTABLE | APPEND)) 1707 return EPERM; 1708 1709 error = vn_utimes_perm(vp, vap, cred, l); 1710 if (error != 0) 1711 return (error); 1712 1713 if (vap->va_atime.tv_sec != VNOVAL) 1714 node->tn_status |= TMPFS_NODE_ACCESSED; 1715 1716 if (vap->va_mtime.tv_sec != VNOVAL) 1717 node->tn_status |= TMPFS_NODE_MODIFIED; 1718 1719 if (vap->va_birthtime.tv_sec != VNOVAL) 1720 node->tn_status |= TMPFS_NODE_MODIFIED; 1721 1722 tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); 1723 1724 if (vap->va_birthtime.tv_sec != VNOVAL) 1725 node->tn_birthtime = vap->va_birthtime; 1726 MPASS(VOP_ISLOCKED(vp)); 1727 1728 return 0; 1729 } 1730 1731 /* Sync timestamps */ 1732 void 1733 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1734 const struct timespec *mod) 1735 { 1736 struct tmpfs_node *node; 1737 struct timespec now; 1738 1739 node = VP_TO_TMPFS_NODE(vp); 1740 1741 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1742 TMPFS_NODE_CHANGED)) == 0) 1743 return; 1744 1745 vfs_timestamp(&now); 1746 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1747 if (acc == NULL) 1748 acc = &now; 1749 node->tn_atime = *acc; 1750 } 1751 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1752 if (mod == NULL) 1753 mod = &now; 1754 node->tn_mtime = *mod; 1755 } 1756 if (node->tn_status & TMPFS_NODE_CHANGED) { 1757 node->tn_ctime = now; 1758 } 1759 node->tn_status &= 1760 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1761 /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ 1762 random_harvest_queue(node, sizeof(*node), 1, RANDOM_FS_ATIME); 1763 } 1764 1765 void 1766 tmpfs_update(struct vnode *vp) 1767 { 1768 1769 tmpfs_itimes(vp, NULL, NULL); 1770 } 1771 1772 int 1773 tmpfs_truncate(struct vnode *vp, off_t length) 1774 { 1775 int error; 1776 struct tmpfs_node *node; 1777 1778 node = VP_TO_TMPFS_NODE(vp); 1779 1780 if (length < 0) { 1781 error = EINVAL; 1782 goto out; 1783 } 1784 1785 if (node->tn_size == length) { 1786 error = 0; 1787 goto out; 1788 } 1789 1790 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1791 return (EFBIG); 1792 1793 error = tmpfs_reg_resize(vp, length, FALSE); 1794 if (error == 0) { 1795 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1796 } 1797 1798 out: 1799 tmpfs_update(vp); 1800 1801 return error; 1802 } 1803 1804 static __inline int 1805 tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1806 { 1807 if (a->td_hash > b->td_hash) 1808 return (1); 1809 else if (a->td_hash < b->td_hash) 1810 return (-1); 1811 return (0); 1812 } 1813 1814 RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 1815