1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/fnv_hash.h> 41 #include <sys/lock.h> 42 #include <sys/namei.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/random.h> 46 #include <sys/rwlock.h> 47 #include <sys/stat.h> 48 #include <sys/systm.h> 49 #include <sys/sysctl.h> 50 #include <sys/vnode.h> 51 #include <sys/vmmeter.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_pageout.h> 58 #include <vm/vm_pager.h> 59 #include <vm/vm_extern.h> 60 61 #include <fs/tmpfs/tmpfs.h> 62 #include <fs/tmpfs/tmpfs_fifoops.h> 63 #include <fs/tmpfs/tmpfs_vnops.h> 64 65 struct tmpfs_dir_cursor { 66 struct tmpfs_dirent *tdc_current; 67 struct tmpfs_dirent *tdc_tree; 68 }; 69 70 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system"); 71 72 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; 73 74 static int 75 sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) 76 { 77 int error; 78 long pages, bytes; 79 80 pages = *(long *)arg1; 81 bytes = pages * PAGE_SIZE; 82 83 error = sysctl_handle_long(oidp, &bytes, 0, req); 84 if (error || !req->newptr) 85 return (error); 86 87 pages = bytes / PAGE_SIZE; 88 if (pages < TMPFS_PAGES_MINRESERVED) 89 return (EINVAL); 90 91 *(long *)arg1 = pages; 92 return (0); 93 } 94 95 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, CTLTYPE_LONG|CTLFLAG_RW, 96 &tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L", 97 "Amount of available memory and swap below which tmpfs growth stops"); 98 99 static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, 100 struct tmpfs_dirent *b); 101 RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 102 103 size_t 104 tmpfs_mem_avail(void) 105 { 106 vm_ooffset_t avail; 107 108 avail = swap_pager_avail + vm_cnt.v_free_count + vm_cnt.v_cache_count - 109 tmpfs_pages_reserved; 110 if (__predict_false(avail < 0)) 111 avail = 0; 112 return (avail); 113 } 114 115 size_t 116 tmpfs_pages_used(struct tmpfs_mount *tmp) 117 { 118 const size_t node_size = sizeof(struct tmpfs_node) + 119 sizeof(struct tmpfs_dirent); 120 size_t meta_pages; 121 122 meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, 123 PAGE_SIZE); 124 return (meta_pages + tmp->tm_pages_used); 125 } 126 127 static size_t 128 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) 129 { 130 if (tmpfs_mem_avail() < req_pages) 131 return (0); 132 133 if (tmp->tm_pages_max != SIZE_MAX && 134 tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) 135 return (0); 136 137 return (1); 138 } 139 140 /* 141 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 142 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 143 * using the credentials of the process 'p'. 144 * 145 * If the node type is set to 'VDIR', then the parent parameter must point 146 * to the parent directory of the node being created. It may only be NULL 147 * while allocating the root node. 148 * 149 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 150 * specifies the device the node represents. 151 * 152 * If the node type is set to 'VLNK', then the parameter target specifies 153 * the file name of the target file for the symbolic link that is being 154 * created. 155 * 156 * Note that new nodes are retrieved from the available list if it has 157 * items or, if it is empty, from the node pool as long as there is enough 158 * space to create them. 159 * 160 * Returns zero on success or an appropriate error code on failure. 161 */ 162 int 163 tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, 164 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 165 char *target, dev_t rdev, struct tmpfs_node **node) 166 { 167 struct tmpfs_node *nnode; 168 vm_object_t obj; 169 170 /* If the root directory of the 'tmp' file system is not yet 171 * allocated, this must be the request to do it. */ 172 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 173 KASSERT(tmp->tm_root == NULL || mp->mnt_writeopcount > 0, 174 ("creating node not under vn_start_write")); 175 176 MPASS(IFF(type == VLNK, target != NULL)); 177 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 178 179 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 180 return (ENOSPC); 181 if (tmpfs_pages_check_avail(tmp, 1) == 0) 182 return (ENOSPC); 183 184 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 185 /* 186 * When a new tmpfs node is created for fully 187 * constructed mount point, there must be a parent 188 * node, which vnode is locked exclusively. As 189 * consequence, if the unmount is executing in 190 * parallel, vflush() cannot reclaim the parent vnode. 191 * Due to this, the check for MNTK_UNMOUNT flag is not 192 * racy: if we did not see MNTK_UNMOUNT flag, then tmp 193 * cannot be destroyed until node construction is 194 * finished and the parent vnode unlocked. 195 * 196 * Tmpfs does not need to instantiate new nodes during 197 * unmount. 198 */ 199 return (EBUSY); 200 } 201 202 nnode = (struct tmpfs_node *)uma_zalloc_arg( 203 tmp->tm_node_pool, tmp, M_WAITOK); 204 205 /* Generic initialization. */ 206 nnode->tn_type = type; 207 vfs_timestamp(&nnode->tn_atime); 208 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 209 nnode->tn_atime; 210 nnode->tn_uid = uid; 211 nnode->tn_gid = gid; 212 nnode->tn_mode = mode; 213 nnode->tn_id = alloc_unr(tmp->tm_ino_unr); 214 215 /* Type-specific initialization. */ 216 switch (nnode->tn_type) { 217 case VBLK: 218 case VCHR: 219 nnode->tn_rdev = rdev; 220 break; 221 222 case VDIR: 223 RB_INIT(&nnode->tn_dir.tn_dirhead); 224 LIST_INIT(&nnode->tn_dir.tn_dupindex); 225 MPASS(parent != nnode); 226 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 227 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 228 nnode->tn_dir.tn_readdir_lastn = 0; 229 nnode->tn_dir.tn_readdir_lastp = NULL; 230 nnode->tn_links++; 231 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 232 nnode->tn_dir.tn_parent->tn_links++; 233 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 234 break; 235 236 case VFIFO: 237 /* FALLTHROUGH */ 238 case VSOCK: 239 break; 240 241 case VLNK: 242 MPASS(strlen(target) < MAXPATHLEN); 243 nnode->tn_size = strlen(target); 244 nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, 245 M_WAITOK); 246 memcpy(nnode->tn_link, target, nnode->tn_size); 247 break; 248 249 case VREG: 250 obj = nnode->tn_reg.tn_aobj = 251 vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, 252 NULL /* XXXKIB - tmpfs needs swap reservation */); 253 VM_OBJECT_WLOCK(obj); 254 /* OBJ_TMPFS is set together with the setting of vp->v_object */ 255 vm_object_set_flag(obj, OBJ_NOSPLIT | OBJ_TMPFS_NODE); 256 vm_object_clear_flag(obj, OBJ_ONEMAPPING); 257 VM_OBJECT_WUNLOCK(obj); 258 break; 259 260 default: 261 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 262 } 263 264 TMPFS_LOCK(tmp); 265 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 266 tmp->tm_nodes_inuse++; 267 TMPFS_UNLOCK(tmp); 268 269 *node = nnode; 270 return 0; 271 } 272 273 /* 274 * Destroys the node pointed to by node from the file system 'tmp'. 275 * If the node does not belong to the given mount point, the results are 276 * unpredicted. 277 * 278 * If the node references a directory; no entries are allowed because 279 * their removal could need a recursive algorithm, something forbidden in 280 * kernel space. Furthermore, there is not need to provide such 281 * functionality (recursive removal) because the only primitives offered 282 * to the user are the removal of empty directories and the deletion of 283 * individual files. 284 * 285 * Note that nodes are not really deleted; in fact, when a node has been 286 * allocated, it cannot be deleted during the whole life of the file 287 * system. Instead, they are moved to the available list and remain there 288 * until reused. 289 */ 290 void 291 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 292 { 293 vm_object_t uobj; 294 295 #ifdef INVARIANTS 296 TMPFS_NODE_LOCK(node); 297 MPASS(node->tn_vnode == NULL); 298 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 299 TMPFS_NODE_UNLOCK(node); 300 #endif 301 302 TMPFS_LOCK(tmp); 303 LIST_REMOVE(node, tn_entries); 304 tmp->tm_nodes_inuse--; 305 TMPFS_UNLOCK(tmp); 306 307 switch (node->tn_type) { 308 case VNON: 309 /* Do not do anything. VNON is provided to let the 310 * allocation routine clean itself easily by avoiding 311 * duplicating code in it. */ 312 /* FALLTHROUGH */ 313 case VBLK: 314 /* FALLTHROUGH */ 315 case VCHR: 316 /* FALLTHROUGH */ 317 case VDIR: 318 /* FALLTHROUGH */ 319 case VFIFO: 320 /* FALLTHROUGH */ 321 case VSOCK: 322 break; 323 324 case VLNK: 325 free(node->tn_link, M_TMPFSNAME); 326 break; 327 328 case VREG: 329 uobj = node->tn_reg.tn_aobj; 330 if (uobj != NULL) { 331 TMPFS_LOCK(tmp); 332 tmp->tm_pages_used -= uobj->size; 333 TMPFS_UNLOCK(tmp); 334 KASSERT((uobj->flags & OBJ_TMPFS) == 0, 335 ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); 336 vm_object_deallocate(uobj); 337 } 338 break; 339 340 default: 341 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 342 } 343 344 free_unr(tmp->tm_ino_unr, node->tn_id); 345 uma_zfree(tmp->tm_node_pool, node); 346 } 347 348 static __inline uint32_t 349 tmpfs_dirent_hash(const char *name, u_int len) 350 { 351 uint32_t hash; 352 353 hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; 354 #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP 355 hash &= 0xf; 356 #endif 357 if (hash < TMPFS_DIRCOOKIE_MIN) 358 hash += TMPFS_DIRCOOKIE_MIN; 359 360 return (hash); 361 } 362 363 static __inline off_t 364 tmpfs_dirent_cookie(struct tmpfs_dirent *de) 365 { 366 if (de == NULL) 367 return (TMPFS_DIRCOOKIE_EOF); 368 369 MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); 370 371 return (de->td_cookie); 372 } 373 374 static __inline boolean_t 375 tmpfs_dirent_dup(struct tmpfs_dirent *de) 376 { 377 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); 378 } 379 380 static __inline boolean_t 381 tmpfs_dirent_duphead(struct tmpfs_dirent *de) 382 { 383 return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); 384 } 385 386 void 387 tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) 388 { 389 de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); 390 memcpy(de->ud.td_name, name, namelen); 391 de->td_namelen = namelen; 392 } 393 394 /* 395 * Allocates a new directory entry for the node node with a name of name. 396 * The new directory entry is returned in *de. 397 * 398 * The link count of node is increased by one to reflect the new object 399 * referencing it. 400 * 401 * Returns zero on success or an appropriate error code on failure. 402 */ 403 int 404 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 405 const char *name, u_int len, struct tmpfs_dirent **de) 406 { 407 struct tmpfs_dirent *nde; 408 409 nde = uma_zalloc(tmp->tm_dirent_pool, M_WAITOK); 410 nde->td_node = node; 411 if (name != NULL) { 412 nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 413 tmpfs_dirent_init(nde, name, len); 414 } else 415 nde->td_namelen = 0; 416 if (node != NULL) 417 node->tn_links++; 418 419 *de = nde; 420 421 return 0; 422 } 423 424 /* 425 * Frees a directory entry. It is the caller's responsibility to destroy 426 * the node referenced by it if needed. 427 * 428 * The link count of node is decreased by one to reflect the removal of an 429 * object that referenced it. This only happens if 'node_exists' is true; 430 * otherwise the function will not access the node referred to by the 431 * directory entry, as it may already have been released from the outside. 432 */ 433 void 434 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 435 { 436 struct tmpfs_node *node; 437 438 node = de->td_node; 439 if (node != NULL) { 440 MPASS(node->tn_links > 0); 441 node->tn_links--; 442 } 443 if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) 444 free(de->ud.td_name, M_TMPFSNAME); 445 uma_zfree(tmp->tm_dirent_pool, de); 446 } 447 448 void 449 tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) 450 { 451 452 ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); 453 if (vp->v_type != VREG || obj == NULL) 454 return; 455 456 VM_OBJECT_WLOCK(obj); 457 VI_LOCK(vp); 458 vm_object_clear_flag(obj, OBJ_TMPFS); 459 obj->un_pager.swp.swp_tmpfs = NULL; 460 VI_UNLOCK(vp); 461 VM_OBJECT_WUNLOCK(obj); 462 } 463 464 /* 465 * Need to clear v_object for insmntque failure. 466 */ 467 static void 468 tmpfs_insmntque_dtr(struct vnode *vp, void *dtr_arg) 469 { 470 471 tmpfs_destroy_vobject(vp, vp->v_object); 472 vp->v_object = NULL; 473 vp->v_data = NULL; 474 vp->v_op = &dead_vnodeops; 475 vgone(vp); 476 vput(vp); 477 } 478 479 /* 480 * Allocates a new vnode for the node node or returns a new reference to 481 * an existing one if the node had already a vnode referencing it. The 482 * resulting locked vnode is returned in *vpp. 483 * 484 * Returns zero on success or an appropriate error code on failure. 485 */ 486 int 487 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 488 struct vnode **vpp) 489 { 490 struct vnode *vp; 491 vm_object_t object; 492 int error; 493 494 error = 0; 495 loop: 496 TMPFS_NODE_LOCK(node); 497 loop1: 498 if ((vp = node->tn_vnode) != NULL) { 499 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 500 VI_LOCK(vp); 501 if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || 502 ((vp->v_iflag & VI_DOOMED) != 0 && 503 (lkflag & LK_NOWAIT) != 0)) { 504 VI_UNLOCK(vp); 505 TMPFS_NODE_UNLOCK(node); 506 error = ENOENT; 507 vp = NULL; 508 goto out; 509 } 510 if ((vp->v_iflag & VI_DOOMED) != 0) { 511 VI_UNLOCK(vp); 512 node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; 513 while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { 514 msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 515 0, "tmpfsE", 0); 516 } 517 goto loop1; 518 } 519 TMPFS_NODE_UNLOCK(node); 520 error = vget(vp, lkflag | LK_INTERLOCK, curthread); 521 if (error == ENOENT) 522 goto loop; 523 if (error != 0) { 524 vp = NULL; 525 goto out; 526 } 527 528 /* 529 * Make sure the vnode is still there after 530 * getting the interlock to avoid racing a free. 531 */ 532 if (node->tn_vnode == NULL || node->tn_vnode != vp) { 533 vput(vp); 534 goto loop; 535 } 536 537 goto out; 538 } 539 540 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 541 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 542 TMPFS_NODE_UNLOCK(node); 543 error = ENOENT; 544 vp = NULL; 545 goto out; 546 } 547 548 /* 549 * otherwise lock the vp list while we call getnewvnode 550 * since that can block. 551 */ 552 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 553 node->tn_vpstate |= TMPFS_VNODE_WANT; 554 error = msleep((caddr_t) &node->tn_vpstate, 555 TMPFS_NODE_MTX(node), PDROP | PCATCH, 556 "tmpfs_alloc_vp", 0); 557 if (error) 558 return error; 559 560 goto loop; 561 } else 562 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 563 564 TMPFS_NODE_UNLOCK(node); 565 566 /* Get a new vnode and associate it with our node. */ 567 error = getnewvnode("tmpfs", mp, &tmpfs_vnodeop_entries, &vp); 568 if (error != 0) 569 goto unlock; 570 MPASS(vp != NULL); 571 572 /* lkflag is ignored, the lock is exclusive */ 573 (void) vn_lock(vp, lkflag | LK_RETRY); 574 575 vp->v_data = node; 576 vp->v_type = node->tn_type; 577 578 /* Type-specific initialization. */ 579 switch (node->tn_type) { 580 case VBLK: 581 /* FALLTHROUGH */ 582 case VCHR: 583 /* FALLTHROUGH */ 584 case VLNK: 585 /* FALLTHROUGH */ 586 case VSOCK: 587 break; 588 case VFIFO: 589 vp->v_op = &tmpfs_fifoop_entries; 590 break; 591 case VREG: 592 object = node->tn_reg.tn_aobj; 593 VM_OBJECT_WLOCK(object); 594 VI_LOCK(vp); 595 KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); 596 vp->v_object = object; 597 object->un_pager.swp.swp_tmpfs = vp; 598 vm_object_set_flag(object, OBJ_TMPFS); 599 VI_UNLOCK(vp); 600 VM_OBJECT_WUNLOCK(object); 601 break; 602 case VDIR: 603 MPASS(node->tn_dir.tn_parent != NULL); 604 if (node->tn_dir.tn_parent == node) 605 vp->v_vflag |= VV_ROOT; 606 break; 607 608 default: 609 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 610 } 611 if (vp->v_type != VFIFO) 612 VN_LOCK_ASHARE(vp); 613 614 error = insmntque1(vp, mp, tmpfs_insmntque_dtr, NULL); 615 if (error) 616 vp = NULL; 617 618 unlock: 619 TMPFS_NODE_LOCK(node); 620 621 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 622 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 623 node->tn_vnode = vp; 624 625 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 626 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 627 TMPFS_NODE_UNLOCK(node); 628 wakeup((caddr_t) &node->tn_vpstate); 629 } else 630 TMPFS_NODE_UNLOCK(node); 631 632 out: 633 *vpp = vp; 634 635 #ifdef INVARIANTS 636 if (error == 0) { 637 MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); 638 TMPFS_NODE_LOCK(node); 639 MPASS(*vpp == node->tn_vnode); 640 TMPFS_NODE_UNLOCK(node); 641 } 642 #endif 643 644 return error; 645 } 646 647 /* 648 * Destroys the association between the vnode vp and the node it 649 * references. 650 */ 651 void 652 tmpfs_free_vp(struct vnode *vp) 653 { 654 struct tmpfs_node *node; 655 656 node = VP_TO_TMPFS_NODE(vp); 657 658 TMPFS_NODE_ASSERT_LOCKED(node); 659 node->tn_vnode = NULL; 660 if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) 661 wakeup(&node->tn_vnode); 662 node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; 663 vp->v_data = NULL; 664 } 665 666 /* 667 * Allocates a new file of type 'type' and adds it to the parent directory 668 * 'dvp'; this addition is done using the component name given in 'cnp'. 669 * The ownership of the new file is automatically assigned based on the 670 * credentials of the caller (through 'cnp'), the group is set based on 671 * the parent directory and the mode is determined from the 'vap' argument. 672 * If successful, *vpp holds a vnode to the newly created file and zero 673 * is returned. Otherwise *vpp is NULL and the function returns an 674 * appropriate error code. 675 */ 676 int 677 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 678 struct componentname *cnp, char *target) 679 { 680 int error; 681 struct tmpfs_dirent *de; 682 struct tmpfs_mount *tmp; 683 struct tmpfs_node *dnode; 684 struct tmpfs_node *node; 685 struct tmpfs_node *parent; 686 687 MPASS(VOP_ISLOCKED(dvp)); 688 MPASS(cnp->cn_flags & HASBUF); 689 690 tmp = VFS_TO_TMPFS(dvp->v_mount); 691 dnode = VP_TO_TMPFS_DIR(dvp); 692 *vpp = NULL; 693 694 /* If the entry we are creating is a directory, we cannot overflow 695 * the number of links of its parent, because it will get a new 696 * link. */ 697 if (vap->va_type == VDIR) { 698 /* Ensure that we do not overflow the maximum number of links 699 * imposed by the system. */ 700 MPASS(dnode->tn_links <= LINK_MAX); 701 if (dnode->tn_links == LINK_MAX) { 702 return (EMLINK); 703 } 704 705 parent = dnode; 706 MPASS(parent != NULL); 707 } else 708 parent = NULL; 709 710 /* Allocate a node that represents the new file. */ 711 error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, 712 cnp->cn_cred->cr_uid, 713 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 714 if (error != 0) 715 return (error); 716 717 /* Allocate a directory entry that points to the new file. */ 718 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 719 &de); 720 if (error != 0) { 721 tmpfs_free_node(tmp, node); 722 return (error); 723 } 724 725 /* Allocate a vnode for the new file. */ 726 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 727 if (error != 0) { 728 tmpfs_free_dirent(tmp, de); 729 tmpfs_free_node(tmp, node); 730 return (error); 731 } 732 733 /* Now that all required items are allocated, we can proceed to 734 * insert the new node into the directory, an operation that 735 * cannot fail. */ 736 if (cnp->cn_flags & ISWHITEOUT) 737 tmpfs_dir_whiteout_remove(dvp, cnp); 738 tmpfs_dir_attach(dvp, de); 739 return (0); 740 } 741 742 static struct tmpfs_dirent * 743 tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 744 { 745 struct tmpfs_dirent *de; 746 747 de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); 748 dc->tdc_tree = de; 749 if (de != NULL && tmpfs_dirent_duphead(de)) 750 de = LIST_FIRST(&de->ud.td_duphead); 751 dc->tdc_current = de; 752 753 return (dc->tdc_current); 754 } 755 756 static struct tmpfs_dirent * 757 tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) 758 { 759 struct tmpfs_dirent *de; 760 761 MPASS(dc->tdc_tree != NULL); 762 if (tmpfs_dirent_dup(dc->tdc_current)) { 763 dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); 764 if (dc->tdc_current != NULL) 765 return (dc->tdc_current); 766 } 767 dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, 768 &dnode->tn_dir.tn_dirhead, dc->tdc_tree); 769 if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { 770 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 771 MPASS(dc->tdc_current != NULL); 772 } 773 774 return (dc->tdc_current); 775 } 776 777 /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ 778 static struct tmpfs_dirent * 779 tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) 780 { 781 struct tmpfs_dirent *de, dekey; 782 783 dekey.td_hash = hash; 784 de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); 785 return (de); 786 } 787 788 /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ 789 static struct tmpfs_dirent * 790 tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, 791 struct tmpfs_dir_cursor *dc) 792 { 793 struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; 794 struct tmpfs_dirent *de, dekey; 795 796 MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); 797 798 if (cookie == node->tn_dir.tn_readdir_lastn && 799 (de = node->tn_dir.tn_readdir_lastp) != NULL) { 800 /* Protect against possible race, tn_readdir_last[pn] 801 * may be updated with only shared vnode lock held. */ 802 if (cookie == tmpfs_dirent_cookie(de)) 803 goto out; 804 } 805 806 if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { 807 LIST_FOREACH(de, &node->tn_dir.tn_dupindex, 808 uh.td_dup.index_entries) { 809 MPASS(tmpfs_dirent_dup(de)); 810 if (de->td_cookie == cookie) 811 goto out; 812 /* dupindex list is sorted. */ 813 if (de->td_cookie < cookie) { 814 de = NULL; 815 goto out; 816 } 817 } 818 MPASS(de == NULL); 819 goto out; 820 } 821 822 if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { 823 de = NULL; 824 } else { 825 dekey.td_hash = cookie; 826 /* Recover if direntry for cookie was removed */ 827 de = RB_NFIND(tmpfs_dir, dirhead, &dekey); 828 } 829 dc->tdc_tree = de; 830 dc->tdc_current = de; 831 if (de != NULL && tmpfs_dirent_duphead(de)) { 832 dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); 833 MPASS(dc->tdc_current != NULL); 834 } 835 return (dc->tdc_current); 836 837 out: 838 dc->tdc_tree = de; 839 dc->tdc_current = de; 840 if (de != NULL && tmpfs_dirent_dup(de)) 841 dc->tdc_tree = tmpfs_dir_xlookup_hash(node, 842 de->td_hash); 843 return (dc->tdc_current); 844 } 845 846 /* 847 * Looks for a directory entry in the directory represented by node. 848 * 'cnp' describes the name of the entry to look for. Note that the . 849 * and .. components are not allowed as they do not physically exist 850 * within directories. 851 * 852 * Returns a pointer to the entry when found, otherwise NULL. 853 */ 854 struct tmpfs_dirent * 855 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 856 struct componentname *cnp) 857 { 858 struct tmpfs_dir_duphead *duphead; 859 struct tmpfs_dirent *de; 860 uint32_t hash; 861 862 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 863 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 864 cnp->cn_nameptr[1] == '.'))); 865 TMPFS_VALIDATE_DIR(node); 866 867 hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); 868 de = tmpfs_dir_xlookup_hash(node, hash); 869 if (de != NULL && tmpfs_dirent_duphead(de)) { 870 duphead = &de->ud.td_duphead; 871 LIST_FOREACH(de, duphead, uh.td_dup.entries) { 872 if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 873 cnp->cn_namelen)) 874 break; 875 } 876 } else if (de != NULL) { 877 if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, 878 cnp->cn_namelen)) 879 de = NULL; 880 } 881 if (de != NULL && f != NULL && de->td_node != f) 882 de = NULL; 883 884 return (de); 885 } 886 887 /* 888 * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex 889 * list, allocate new cookie value. 890 */ 891 static void 892 tmpfs_dir_attach_dup(struct tmpfs_node *dnode, 893 struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) 894 { 895 struct tmpfs_dir_duphead *dupindex; 896 struct tmpfs_dirent *de, *pde; 897 898 dupindex = &dnode->tn_dir.tn_dupindex; 899 de = LIST_FIRST(dupindex); 900 if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { 901 if (de == NULL) 902 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 903 else 904 nde->td_cookie = de->td_cookie + 1; 905 MPASS(tmpfs_dirent_dup(nde)); 906 LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); 907 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 908 return; 909 } 910 911 /* 912 * Cookie numbers are near exhaustion. Scan dupindex list for unused 913 * numbers. dupindex list is sorted in descending order. Keep it so 914 * after inserting nde. 915 */ 916 while (1) { 917 pde = de; 918 de = LIST_NEXT(de, uh.td_dup.index_entries); 919 if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { 920 /* 921 * Last element of the index doesn't have minimal cookie 922 * value, use it. 923 */ 924 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; 925 LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); 926 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 927 return; 928 } else if (de == NULL) { 929 /* 930 * We are so lucky have 2^30 hash duplicates in single 931 * directory :) Return largest possible cookie value. 932 * It should be fine except possible issues with 933 * VOP_READDIR restart. 934 */ 935 nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; 936 LIST_INSERT_HEAD(dupindex, nde, 937 uh.td_dup.index_entries); 938 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 939 return; 940 } 941 if (de->td_cookie + 1 == pde->td_cookie || 942 de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) 943 continue; /* No hole or invalid cookie. */ 944 nde->td_cookie = de->td_cookie + 1; 945 MPASS(tmpfs_dirent_dup(nde)); 946 MPASS(pde->td_cookie > nde->td_cookie); 947 MPASS(nde->td_cookie > de->td_cookie); 948 LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); 949 LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); 950 return; 951 } 952 } 953 954 /* 955 * Attaches the directory entry de to the directory represented by vp. 956 * Note that this does not change the link count of the node pointed by 957 * the directory entry, as this is done by tmpfs_alloc_dirent. 958 */ 959 void 960 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 961 { 962 struct tmpfs_node *dnode; 963 struct tmpfs_dirent *xde, *nde; 964 965 ASSERT_VOP_ELOCKED(vp, __func__); 966 MPASS(de->td_namelen > 0); 967 MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); 968 MPASS(de->td_cookie == de->td_hash); 969 970 dnode = VP_TO_TMPFS_DIR(vp); 971 dnode->tn_dir.tn_readdir_lastn = 0; 972 dnode->tn_dir.tn_readdir_lastp = NULL; 973 974 MPASS(!tmpfs_dirent_dup(de)); 975 xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 976 if (xde != NULL && tmpfs_dirent_duphead(xde)) 977 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 978 else if (xde != NULL) { 979 /* 980 * Allocate new duphead. Swap xde with duphead to avoid 981 * adding/removing elements with the same hash. 982 */ 983 MPASS(!tmpfs_dirent_dup(xde)); 984 tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, 985 &nde); 986 /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ 987 memcpy(nde, xde, sizeof(*xde)); 988 xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; 989 LIST_INIT(&xde->ud.td_duphead); 990 xde->td_namelen = 0; 991 xde->td_node = NULL; 992 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); 993 tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); 994 } 995 dnode->tn_size += sizeof(struct tmpfs_dirent); 996 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 997 TMPFS_NODE_MODIFIED; 998 tmpfs_update(vp); 999 } 1000 1001 /* 1002 * Detaches the directory entry de from the directory represented by vp. 1003 * Note that this does not change the link count of the node pointed by 1004 * the directory entry, as this is done by tmpfs_free_dirent. 1005 */ 1006 void 1007 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 1008 { 1009 struct tmpfs_mount *tmp; 1010 struct tmpfs_dir *head; 1011 struct tmpfs_node *dnode; 1012 struct tmpfs_dirent *xde; 1013 1014 ASSERT_VOP_ELOCKED(vp, __func__); 1015 1016 dnode = VP_TO_TMPFS_DIR(vp); 1017 head = &dnode->tn_dir.tn_dirhead; 1018 dnode->tn_dir.tn_readdir_lastn = 0; 1019 dnode->tn_dir.tn_readdir_lastp = NULL; 1020 1021 if (tmpfs_dirent_dup(de)) { 1022 /* Remove duphead if de was last entry. */ 1023 if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { 1024 xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); 1025 MPASS(tmpfs_dirent_duphead(xde)); 1026 } else 1027 xde = NULL; 1028 LIST_REMOVE(de, uh.td_dup.entries); 1029 LIST_REMOVE(de, uh.td_dup.index_entries); 1030 if (xde != NULL) { 1031 if (LIST_EMPTY(&xde->ud.td_duphead)) { 1032 RB_REMOVE(tmpfs_dir, head, xde); 1033 tmp = VFS_TO_TMPFS(vp->v_mount); 1034 MPASS(xde->td_node == NULL); 1035 tmpfs_free_dirent(tmp, xde); 1036 } 1037 } 1038 de->td_cookie = de->td_hash; 1039 } else 1040 RB_REMOVE(tmpfs_dir, head, de); 1041 1042 dnode->tn_size -= sizeof(struct tmpfs_dirent); 1043 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 1044 TMPFS_NODE_MODIFIED; 1045 tmpfs_update(vp); 1046 } 1047 1048 void 1049 tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) 1050 { 1051 struct tmpfs_dirent *de, *dde, *nde; 1052 1053 RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { 1054 RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); 1055 /* Node may already be destroyed. */ 1056 de->td_node = NULL; 1057 if (tmpfs_dirent_duphead(de)) { 1058 while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { 1059 LIST_REMOVE(dde, uh.td_dup.entries); 1060 dde->td_node = NULL; 1061 tmpfs_free_dirent(tmp, dde); 1062 } 1063 } 1064 tmpfs_free_dirent(tmp, de); 1065 } 1066 } 1067 1068 /* 1069 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 1070 * directory and returns it in the uio space. The function returns 0 1071 * on success, -1 if there was not enough space in the uio structure to 1072 * hold the directory entry or an appropriate error code if another 1073 * error happens. 1074 */ 1075 static int 1076 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 1077 { 1078 int error; 1079 struct dirent dent; 1080 1081 TMPFS_VALIDATE_DIR(node); 1082 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 1083 1084 dent.d_fileno = node->tn_id; 1085 dent.d_type = DT_DIR; 1086 dent.d_namlen = 1; 1087 dent.d_name[0] = '.'; 1088 dent.d_name[1] = '\0'; 1089 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1090 1091 if (dent.d_reclen > uio->uio_resid) 1092 error = EJUSTRETURN; 1093 else 1094 error = uiomove(&dent, dent.d_reclen, uio); 1095 1096 node->tn_status |= TMPFS_NODE_ACCESSED; 1097 1098 return error; 1099 } 1100 1101 /* 1102 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 1103 * directory and returns it in the uio space. The function returns 0 1104 * on success, -1 if there was not enough space in the uio structure to 1105 * hold the directory entry or an appropriate error code if another 1106 * error happens. 1107 */ 1108 static int 1109 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 1110 { 1111 int error; 1112 struct dirent dent; 1113 1114 TMPFS_VALIDATE_DIR(node); 1115 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 1116 1117 /* 1118 * Return ENOENT if the current node is already removed. 1119 */ 1120 TMPFS_ASSERT_LOCKED(node); 1121 if (node->tn_dir.tn_parent == NULL) { 1122 return (ENOENT); 1123 } 1124 1125 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 1126 dent.d_fileno = node->tn_dir.tn_parent->tn_id; 1127 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 1128 1129 dent.d_type = DT_DIR; 1130 dent.d_namlen = 2; 1131 dent.d_name[0] = '.'; 1132 dent.d_name[1] = '.'; 1133 dent.d_name[2] = '\0'; 1134 dent.d_reclen = GENERIC_DIRSIZ(&dent); 1135 1136 if (dent.d_reclen > uio->uio_resid) 1137 error = EJUSTRETURN; 1138 else 1139 error = uiomove(&dent, dent.d_reclen, uio); 1140 1141 node->tn_status |= TMPFS_NODE_ACCESSED; 1142 1143 return error; 1144 } 1145 1146 /* 1147 * Helper function for tmpfs_readdir. Returns as much directory entries 1148 * as can fit in the uio space. The read starts at uio->uio_offset. 1149 * The function returns 0 on success, -1 if there was not enough space 1150 * in the uio structure to hold the directory entry or an appropriate 1151 * error code if another error happens. 1152 */ 1153 int 1154 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, int maxcookies, 1155 u_long *cookies, int *ncookies) 1156 { 1157 struct tmpfs_dir_cursor dc; 1158 struct tmpfs_dirent *de; 1159 off_t off; 1160 int error; 1161 1162 TMPFS_VALIDATE_DIR(node); 1163 1164 off = 0; 1165 1166 /* 1167 * Lookup the node from the current offset. The starting offset of 1168 * 0 will lookup both '.' and '..', and then the first real entry, 1169 * or EOF if there are none. Then find all entries for the dir that 1170 * fit into the buffer. Once no more entries are found (de == NULL), 1171 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next 1172 * call to return 0. 1173 */ 1174 switch (uio->uio_offset) { 1175 case TMPFS_DIRCOOKIE_DOT: 1176 error = tmpfs_dir_getdotdent(node, uio); 1177 if (error != 0) 1178 return (error); 1179 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 1180 if (cookies != NULL) 1181 cookies[(*ncookies)++] = off = uio->uio_offset; 1182 /* FALLTHROUGH */ 1183 case TMPFS_DIRCOOKIE_DOTDOT: 1184 error = tmpfs_dir_getdotdotdent(node, uio); 1185 if (error != 0) 1186 return (error); 1187 de = tmpfs_dir_first(node, &dc); 1188 uio->uio_offset = tmpfs_dirent_cookie(de); 1189 if (cookies != NULL) 1190 cookies[(*ncookies)++] = off = uio->uio_offset; 1191 /* EOF. */ 1192 if (de == NULL) 1193 return (0); 1194 break; 1195 case TMPFS_DIRCOOKIE_EOF: 1196 return (0); 1197 default: 1198 de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); 1199 if (de == NULL) 1200 return (EINVAL); 1201 if (cookies != NULL) 1202 off = tmpfs_dirent_cookie(de); 1203 } 1204 1205 /* Read as much entries as possible; i.e., until we reach the end of 1206 * the directory or we exhaust uio space. */ 1207 do { 1208 struct dirent d; 1209 1210 /* Create a dirent structure representing the current 1211 * tmpfs_node and fill it. */ 1212 if (de->td_node == NULL) { 1213 d.d_fileno = 1; 1214 d.d_type = DT_WHT; 1215 } else { 1216 d.d_fileno = de->td_node->tn_id; 1217 switch (de->td_node->tn_type) { 1218 case VBLK: 1219 d.d_type = DT_BLK; 1220 break; 1221 1222 case VCHR: 1223 d.d_type = DT_CHR; 1224 break; 1225 1226 case VDIR: 1227 d.d_type = DT_DIR; 1228 break; 1229 1230 case VFIFO: 1231 d.d_type = DT_FIFO; 1232 break; 1233 1234 case VLNK: 1235 d.d_type = DT_LNK; 1236 break; 1237 1238 case VREG: 1239 d.d_type = DT_REG; 1240 break; 1241 1242 case VSOCK: 1243 d.d_type = DT_SOCK; 1244 break; 1245 1246 default: 1247 panic("tmpfs_dir_getdents: type %p %d", 1248 de->td_node, (int)de->td_node->tn_type); 1249 } 1250 } 1251 d.d_namlen = de->td_namelen; 1252 MPASS(de->td_namelen < sizeof(d.d_name)); 1253 (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); 1254 d.d_name[de->td_namelen] = '\0'; 1255 d.d_reclen = GENERIC_DIRSIZ(&d); 1256 1257 /* Stop reading if the directory entry we are treating is 1258 * bigger than the amount of data that can be returned. */ 1259 if (d.d_reclen > uio->uio_resid) { 1260 error = EJUSTRETURN; 1261 break; 1262 } 1263 1264 /* Copy the new dirent structure into the output buffer and 1265 * advance pointers. */ 1266 error = uiomove(&d, d.d_reclen, uio); 1267 if (error == 0) { 1268 de = tmpfs_dir_next(node, &dc); 1269 if (cookies != NULL) { 1270 off = tmpfs_dirent_cookie(de); 1271 MPASS(*ncookies < maxcookies); 1272 cookies[(*ncookies)++] = off; 1273 } 1274 } 1275 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 1276 1277 /* Skip setting off when using cookies as it is already done above. */ 1278 if (cookies == NULL) 1279 off = tmpfs_dirent_cookie(de); 1280 1281 /* Update the offset and cache. */ 1282 uio->uio_offset = off; 1283 node->tn_dir.tn_readdir_lastn = off; 1284 node->tn_dir.tn_readdir_lastp = de; 1285 1286 node->tn_status |= TMPFS_NODE_ACCESSED; 1287 return error; 1288 } 1289 1290 int 1291 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 1292 { 1293 struct tmpfs_dirent *de; 1294 int error; 1295 1296 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 1297 cnp->cn_nameptr, cnp->cn_namelen, &de); 1298 if (error != 0) 1299 return (error); 1300 tmpfs_dir_attach(dvp, de); 1301 return (0); 1302 } 1303 1304 void 1305 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 1306 { 1307 struct tmpfs_dirent *de; 1308 1309 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 1310 MPASS(de != NULL && de->td_node == NULL); 1311 tmpfs_dir_detach(dvp, de); 1312 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); 1313 } 1314 1315 /* 1316 * Resizes the aobj associated with the regular file pointed to by 'vp' to the 1317 * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 1318 * 'newsize' must be positive. 1319 * 1320 * Returns zero on success or an appropriate error code on failure. 1321 */ 1322 int 1323 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) 1324 { 1325 struct tmpfs_mount *tmp; 1326 struct tmpfs_node *node; 1327 vm_object_t uobj; 1328 vm_page_t m; 1329 vm_pindex_t idx, newpages, oldpages; 1330 off_t oldsize; 1331 int base, rv; 1332 1333 MPASS(vp->v_type == VREG); 1334 MPASS(newsize >= 0); 1335 1336 node = VP_TO_TMPFS_NODE(vp); 1337 uobj = node->tn_reg.tn_aobj; 1338 tmp = VFS_TO_TMPFS(vp->v_mount); 1339 1340 /* 1341 * Convert the old and new sizes to the number of pages needed to 1342 * store them. It may happen that we do not need to do anything 1343 * because the last allocated page can accommodate the change on 1344 * its own. 1345 */ 1346 oldsize = node->tn_size; 1347 oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 1348 MPASS(oldpages == uobj->size); 1349 newpages = OFF_TO_IDX(newsize + PAGE_MASK); 1350 if (newpages > oldpages && 1351 tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) 1352 return (ENOSPC); 1353 1354 VM_OBJECT_WLOCK(uobj); 1355 if (newsize < oldsize) { 1356 /* 1357 * Zero the truncated part of the last page. 1358 */ 1359 base = newsize & PAGE_MASK; 1360 if (base != 0) { 1361 idx = OFF_TO_IDX(newsize); 1362 retry: 1363 m = vm_page_lookup(uobj, idx); 1364 if (m != NULL) { 1365 if (vm_page_sleep_if_busy(m, "tmfssz")) 1366 goto retry; 1367 MPASS(m->valid == VM_PAGE_BITS_ALL); 1368 } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { 1369 m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL); 1370 if (m == NULL) { 1371 VM_OBJECT_WUNLOCK(uobj); 1372 VM_WAIT; 1373 VM_OBJECT_WLOCK(uobj); 1374 goto retry; 1375 } else if (m->valid != VM_PAGE_BITS_ALL) 1376 rv = vm_pager_get_pages(uobj, &m, 1, 1377 NULL, NULL); 1378 else 1379 /* A cached page was reactivated. */ 1380 rv = VM_PAGER_OK; 1381 vm_page_lock(m); 1382 if (rv == VM_PAGER_OK) { 1383 vm_page_deactivate(m); 1384 vm_page_unlock(m); 1385 vm_page_xunbusy(m); 1386 } else { 1387 vm_page_free(m); 1388 vm_page_unlock(m); 1389 if (ignerr) 1390 m = NULL; 1391 else { 1392 VM_OBJECT_WUNLOCK(uobj); 1393 return (EIO); 1394 } 1395 } 1396 } 1397 if (m != NULL) { 1398 pmap_zero_page_area(m, base, PAGE_SIZE - base); 1399 vm_page_dirty(m); 1400 vm_pager_page_unswapped(m); 1401 } 1402 } 1403 1404 /* 1405 * Release any swap space and free any whole pages. 1406 */ 1407 if (newpages < oldpages) { 1408 swap_pager_freespace(uobj, newpages, oldpages - 1409 newpages); 1410 vm_object_page_remove(uobj, newpages, 0, 0); 1411 } 1412 } 1413 uobj->size = newpages; 1414 VM_OBJECT_WUNLOCK(uobj); 1415 1416 TMPFS_LOCK(tmp); 1417 tmp->tm_pages_used += (newpages - oldpages); 1418 TMPFS_UNLOCK(tmp); 1419 1420 node->tn_size = newsize; 1421 return (0); 1422 } 1423 1424 void 1425 tmpfs_check_mtime(struct vnode *vp) 1426 { 1427 struct tmpfs_node *node; 1428 struct vm_object *obj; 1429 1430 ASSERT_VOP_ELOCKED(vp, "check_mtime"); 1431 if (vp->v_type != VREG) 1432 return; 1433 obj = vp->v_object; 1434 KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == 1435 (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); 1436 /* unlocked read */ 1437 if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { 1438 VM_OBJECT_WLOCK(obj); 1439 if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { 1440 obj->flags &= ~OBJ_TMPFS_DIRTY; 1441 node = VP_TO_TMPFS_NODE(vp); 1442 node->tn_status |= TMPFS_NODE_MODIFIED | 1443 TMPFS_NODE_CHANGED; 1444 } 1445 VM_OBJECT_WUNLOCK(obj); 1446 } 1447 } 1448 1449 /* 1450 * Change flags of the given vnode. 1451 * Caller should execute tmpfs_update on vp after a successful execution. 1452 * The vnode must be locked on entry and remain locked on exit. 1453 */ 1454 int 1455 tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, 1456 struct thread *p) 1457 { 1458 int error; 1459 struct tmpfs_node *node; 1460 1461 MPASS(VOP_ISLOCKED(vp)); 1462 1463 node = VP_TO_TMPFS_NODE(vp); 1464 1465 if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | 1466 UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | 1467 UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 1468 UF_SPARSE | UF_SYSTEM)) != 0) 1469 return (EOPNOTSUPP); 1470 1471 /* Disallow this operation if the file system is mounted read-only. */ 1472 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1473 return EROFS; 1474 1475 /* 1476 * Callers may only modify the file flags on objects they 1477 * have VADMIN rights for. 1478 */ 1479 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1480 return (error); 1481 /* 1482 * Unprivileged processes are not permitted to unset system 1483 * flags, or modify flags if any system flags are set. 1484 */ 1485 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 1486 if (node->tn_flags & 1487 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 1488 error = securelevel_gt(cred, 0); 1489 if (error) 1490 return (error); 1491 } 1492 } else { 1493 if (node->tn_flags & 1494 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1495 ((flags ^ node->tn_flags) & SF_SETTABLE)) 1496 return (EPERM); 1497 } 1498 node->tn_flags = flags; 1499 node->tn_status |= TMPFS_NODE_CHANGED; 1500 1501 MPASS(VOP_ISLOCKED(vp)); 1502 1503 return 0; 1504 } 1505 1506 /* 1507 * Change access mode on the given vnode. 1508 * Caller should execute tmpfs_update on vp after a successful execution. 1509 * The vnode must be locked on entry and remain locked on exit. 1510 */ 1511 int 1512 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1513 { 1514 int error; 1515 struct tmpfs_node *node; 1516 1517 MPASS(VOP_ISLOCKED(vp)); 1518 1519 node = VP_TO_TMPFS_NODE(vp); 1520 1521 /* Disallow this operation if the file system is mounted read-only. */ 1522 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1523 return EROFS; 1524 1525 /* Immutable or append-only files cannot be modified, either. */ 1526 if (node->tn_flags & (IMMUTABLE | APPEND)) 1527 return EPERM; 1528 1529 /* 1530 * To modify the permissions on a file, must possess VADMIN 1531 * for that file. 1532 */ 1533 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1534 return (error); 1535 1536 /* 1537 * Privileged processes may set the sticky bit on non-directories, 1538 * as well as set the setgid bit on a file with a group that the 1539 * process is not a member of. 1540 */ 1541 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1542 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) 1543 return (EFTYPE); 1544 } 1545 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1546 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 1547 if (error) 1548 return (error); 1549 } 1550 1551 1552 node->tn_mode &= ~ALLPERMS; 1553 node->tn_mode |= mode & ALLPERMS; 1554 1555 node->tn_status |= TMPFS_NODE_CHANGED; 1556 1557 MPASS(VOP_ISLOCKED(vp)); 1558 1559 return 0; 1560 } 1561 1562 /* 1563 * Change ownership of the given vnode. At least one of uid or gid must 1564 * be different than VNOVAL. If one is set to that value, the attribute 1565 * is unchanged. 1566 * Caller should execute tmpfs_update on vp after a successful execution. 1567 * The vnode must be locked on entry and remain locked on exit. 1568 */ 1569 int 1570 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 1571 struct thread *p) 1572 { 1573 int error; 1574 struct tmpfs_node *node; 1575 uid_t ouid; 1576 gid_t ogid; 1577 1578 MPASS(VOP_ISLOCKED(vp)); 1579 1580 node = VP_TO_TMPFS_NODE(vp); 1581 1582 /* Assign default values if they are unknown. */ 1583 MPASS(uid != VNOVAL || gid != VNOVAL); 1584 if (uid == VNOVAL) 1585 uid = node->tn_uid; 1586 if (gid == VNOVAL) 1587 gid = node->tn_gid; 1588 MPASS(uid != VNOVAL && gid != VNOVAL); 1589 1590 /* Disallow this operation if the file system is mounted read-only. */ 1591 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1592 return EROFS; 1593 1594 /* Immutable or append-only files cannot be modified, either. */ 1595 if (node->tn_flags & (IMMUTABLE | APPEND)) 1596 return EPERM; 1597 1598 /* 1599 * To modify the ownership of a file, must possess VADMIN for that 1600 * file. 1601 */ 1602 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1603 return (error); 1604 1605 /* 1606 * To change the owner of a file, or change the group of a file to a 1607 * group of which we are not a member, the caller must have 1608 * privilege. 1609 */ 1610 if ((uid != node->tn_uid || 1611 (gid != node->tn_gid && !groupmember(gid, cred))) && 1612 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) 1613 return (error); 1614 1615 ogid = node->tn_gid; 1616 ouid = node->tn_uid; 1617 1618 node->tn_uid = uid; 1619 node->tn_gid = gid; 1620 1621 node->tn_status |= TMPFS_NODE_CHANGED; 1622 1623 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 1624 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) 1625 node->tn_mode &= ~(S_ISUID | S_ISGID); 1626 } 1627 1628 MPASS(VOP_ISLOCKED(vp)); 1629 1630 return 0; 1631 } 1632 1633 /* 1634 * Change size of the given vnode. 1635 * Caller should execute tmpfs_update on vp after a successful execution. 1636 * The vnode must be locked on entry and remain locked on exit. 1637 */ 1638 int 1639 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 1640 struct thread *p) 1641 { 1642 int error; 1643 struct tmpfs_node *node; 1644 1645 MPASS(VOP_ISLOCKED(vp)); 1646 1647 node = VP_TO_TMPFS_NODE(vp); 1648 1649 /* Decide whether this is a valid operation based on the file type. */ 1650 error = 0; 1651 switch (vp->v_type) { 1652 case VDIR: 1653 return EISDIR; 1654 1655 case VREG: 1656 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1657 return EROFS; 1658 break; 1659 1660 case VBLK: 1661 /* FALLTHROUGH */ 1662 case VCHR: 1663 /* FALLTHROUGH */ 1664 case VFIFO: 1665 /* Allow modifications of special files even if in the file 1666 * system is mounted read-only (we are not modifying the 1667 * files themselves, but the objects they represent). */ 1668 return 0; 1669 1670 default: 1671 /* Anything else is unsupported. */ 1672 return EOPNOTSUPP; 1673 } 1674 1675 /* Immutable or append-only files cannot be modified, either. */ 1676 if (node->tn_flags & (IMMUTABLE | APPEND)) 1677 return EPERM; 1678 1679 error = tmpfs_truncate(vp, size); 1680 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1681 * for us, as will update tn_status; no need to do that here. */ 1682 1683 MPASS(VOP_ISLOCKED(vp)); 1684 1685 return error; 1686 } 1687 1688 /* 1689 * Change access and modification times of the given vnode. 1690 * Caller should execute tmpfs_update on vp after a successful execution. 1691 * The vnode must be locked on entry and remain locked on exit. 1692 */ 1693 int 1694 tmpfs_chtimes(struct vnode *vp, struct vattr *vap, 1695 struct ucred *cred, struct thread *l) 1696 { 1697 int error; 1698 struct tmpfs_node *node; 1699 1700 MPASS(VOP_ISLOCKED(vp)); 1701 1702 node = VP_TO_TMPFS_NODE(vp); 1703 1704 /* Disallow this operation if the file system is mounted read-only. */ 1705 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1706 return EROFS; 1707 1708 /* Immutable or append-only files cannot be modified, either. */ 1709 if (node->tn_flags & (IMMUTABLE | APPEND)) 1710 return EPERM; 1711 1712 error = vn_utimes_perm(vp, vap, cred, l); 1713 if (error != 0) 1714 return (error); 1715 1716 if (vap->va_atime.tv_sec != VNOVAL) 1717 node->tn_status |= TMPFS_NODE_ACCESSED; 1718 1719 if (vap->va_mtime.tv_sec != VNOVAL) 1720 node->tn_status |= TMPFS_NODE_MODIFIED; 1721 1722 if (vap->va_birthtime.tv_sec != VNOVAL) 1723 node->tn_status |= TMPFS_NODE_MODIFIED; 1724 1725 tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); 1726 1727 if (vap->va_birthtime.tv_sec != VNOVAL) 1728 node->tn_birthtime = vap->va_birthtime; 1729 MPASS(VOP_ISLOCKED(vp)); 1730 1731 return 0; 1732 } 1733 1734 /* Sync timestamps */ 1735 void 1736 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1737 const struct timespec *mod) 1738 { 1739 struct tmpfs_node *node; 1740 struct timespec now; 1741 1742 node = VP_TO_TMPFS_NODE(vp); 1743 1744 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1745 TMPFS_NODE_CHANGED)) == 0) 1746 return; 1747 1748 vfs_timestamp(&now); 1749 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1750 if (acc == NULL) 1751 acc = &now; 1752 node->tn_atime = *acc; 1753 } 1754 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1755 if (mod == NULL) 1756 mod = &now; 1757 node->tn_mtime = *mod; 1758 } 1759 if (node->tn_status & TMPFS_NODE_CHANGED) { 1760 node->tn_ctime = now; 1761 } 1762 node->tn_status &= 1763 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1764 /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ 1765 random_harvest_queue(node, sizeof(*node), 1, RANDOM_FS_ATIME); 1766 } 1767 1768 void 1769 tmpfs_update(struct vnode *vp) 1770 { 1771 1772 tmpfs_itimes(vp, NULL, NULL); 1773 } 1774 1775 int 1776 tmpfs_truncate(struct vnode *vp, off_t length) 1777 { 1778 int error; 1779 struct tmpfs_node *node; 1780 1781 node = VP_TO_TMPFS_NODE(vp); 1782 1783 if (length < 0) { 1784 error = EINVAL; 1785 goto out; 1786 } 1787 1788 if (node->tn_size == length) { 1789 error = 0; 1790 goto out; 1791 } 1792 1793 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1794 return (EFBIG); 1795 1796 error = tmpfs_reg_resize(vp, length, FALSE); 1797 if (error == 0) { 1798 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1799 } 1800 1801 out: 1802 tmpfs_update(vp); 1803 1804 return error; 1805 } 1806 1807 static __inline int 1808 tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1809 { 1810 if (a->td_hash > b->td_hash) 1811 return (1); 1812 else if (a->td_hash < b->td_hash) 1813 return (-1); 1814 return (0); 1815 } 1816 1817 RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); 1818