1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /* 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Efficient memory file system supporting functions. 42 */ 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include <sys/param.h> 47 #include <sys/namei.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/stat.h> 51 #include <sys/systm.h> 52 #include <sys/vnode.h> 53 #include <sys/vmmeter.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_pager.h> 59 #include <vm/vm_extern.h> 60 61 #include <fs/tmpfs/tmpfs.h> 62 #include <fs/tmpfs/tmpfs_fifoops.h> 63 #include <fs/tmpfs/tmpfs_vnops.h> 64 65 /* --------------------------------------------------------------------- */ 66 67 /* 68 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 69 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 70 * using the credentials of the process 'p'. 71 * 72 * If the node type is set to 'VDIR', then the parent parameter must point 73 * to the parent directory of the node being created. It may only be NULL 74 * while allocating the root node. 75 * 76 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 77 * specifies the device the node represents. 78 * 79 * If the node type is set to 'VLNK', then the parameter target specifies 80 * the file name of the target file for the symbolic link that is being 81 * created. 82 * 83 * Note that new nodes are retrieved from the available list if it has 84 * items or, if it is empty, from the node pool as long as there is enough 85 * space to create them. 86 * 87 * Returns zero on success or an appropriate error code on failure. 88 */ 89 int 90 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 91 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 92 char *target, dev_t rdev, struct thread *p, struct tmpfs_node **node) 93 { 94 struct tmpfs_node *nnode; 95 96 /* If the root directory of the 'tmp' file system is not yet 97 * allocated, this must be the request to do it. */ 98 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 99 100 MPASS(IFF(type == VLNK, target != NULL)); 101 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 102 103 if (tmp->tm_nodes_inuse > tmp->tm_nodes_max) 104 return (ENOSPC); 105 106 nnode = (struct tmpfs_node *)uma_zalloc_arg( 107 tmp->tm_node_pool, tmp, M_WAITOK); 108 109 /* Generic initialization. */ 110 nnode->tn_type = type; 111 vfs_timestamp(&nnode->tn_atime); 112 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 113 nnode->tn_atime; 114 nnode->tn_uid = uid; 115 nnode->tn_gid = gid; 116 nnode->tn_mode = mode; 117 nnode->tn_id = alloc_unr(tmp->tm_ino_unr); 118 119 /* Type-specific initialization. */ 120 switch (nnode->tn_type) { 121 case VBLK: 122 case VCHR: 123 nnode->tn_rdev = rdev; 124 break; 125 126 case VDIR: 127 TAILQ_INIT(&nnode->tn_dir.tn_dirhead); 128 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 129 nnode->tn_dir.tn_readdir_lastn = 0; 130 nnode->tn_dir.tn_readdir_lastp = NULL; 131 nnode->tn_links++; 132 nnode->tn_dir.tn_parent->tn_links++; 133 break; 134 135 case VFIFO: 136 /* FALLTHROUGH */ 137 case VSOCK: 138 break; 139 140 case VLNK: 141 MPASS(strlen(target) < MAXPATHLEN); 142 nnode->tn_size = strlen(target); 143 nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, 144 M_WAITOK); 145 memcpy(nnode->tn_link, target, nnode->tn_size); 146 break; 147 148 case VREG: 149 nnode->tn_reg.tn_aobj = 150 vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0); 151 nnode->tn_reg.tn_aobj_pages = 0; 152 break; 153 154 default: 155 MPASS(0); 156 } 157 158 TMPFS_LOCK(tmp); 159 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 160 tmp->tm_nodes_inuse++; 161 TMPFS_UNLOCK(tmp); 162 163 *node = nnode; 164 return 0; 165 } 166 167 /* --------------------------------------------------------------------- */ 168 169 /* 170 * Destroys the node pointed to by node from the file system 'tmp'. 171 * If the node does not belong to the given mount point, the results are 172 * unpredicted. 173 * 174 * If the node references a directory; no entries are allowed because 175 * their removal could need a recursive algorithm, something forbidden in 176 * kernel space. Furthermore, there is not need to provide such 177 * functionality (recursive removal) because the only primitives offered 178 * to the user are the removal of empty directories and the deletion of 179 * individual files. 180 * 181 * Note that nodes are not really deleted; in fact, when a node has been 182 * allocated, it cannot be deleted during the whole life of the file 183 * system. Instead, they are moved to the available list and remain there 184 * until reused. 185 */ 186 void 187 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 188 { 189 size_t pages = 0; 190 191 #ifdef INVARIANTS 192 TMPFS_NODE_LOCK(node); 193 MPASS(node->tn_vnode == NULL); 194 TMPFS_NODE_UNLOCK(node); 195 #endif 196 197 TMPFS_LOCK(tmp); 198 LIST_REMOVE(node, tn_entries); 199 tmp->tm_nodes_inuse--; 200 TMPFS_UNLOCK(tmp); 201 202 switch (node->tn_type) { 203 case VNON: 204 /* Do not do anything. VNON is provided to let the 205 * allocation routine clean itself easily by avoiding 206 * duplicating code in it. */ 207 /* FALLTHROUGH */ 208 case VBLK: 209 /* FALLTHROUGH */ 210 case VCHR: 211 /* FALLTHROUGH */ 212 case VDIR: 213 /* FALLTHROUGH */ 214 case VFIFO: 215 /* FALLTHROUGH */ 216 case VSOCK: 217 break; 218 219 case VLNK: 220 free(node->tn_link, M_TMPFSNAME); 221 break; 222 223 case VREG: 224 if (node->tn_reg.tn_aobj != NULL) 225 vm_object_deallocate(node->tn_reg.tn_aobj); 226 pages = node->tn_reg.tn_aobj_pages; 227 break; 228 229 default: 230 MPASS(0); 231 break; 232 } 233 234 free_unr(tmp->tm_ino_unr, node->tn_id); 235 uma_zfree(tmp->tm_node_pool, node); 236 237 TMPFS_LOCK(tmp); 238 tmp->tm_pages_used -= pages; 239 TMPFS_UNLOCK(tmp); 240 } 241 242 /* --------------------------------------------------------------------- */ 243 244 /* 245 * Allocates a new directory entry for the node node with a name of name. 246 * The new directory entry is returned in *de. 247 * 248 * The link count of node is increased by one to reflect the new object 249 * referencing it. 250 * 251 * Returns zero on success or an appropriate error code on failure. 252 */ 253 int 254 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 255 const char *name, uint16_t len, struct tmpfs_dirent **de) 256 { 257 struct tmpfs_dirent *nde; 258 259 nde = (struct tmpfs_dirent *)uma_zalloc( 260 tmp->tm_dirent_pool, M_WAITOK); 261 nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 262 nde->td_namelen = len; 263 memcpy(nde->td_name, name, len); 264 265 nde->td_node = node; 266 node->tn_links++; 267 268 *de = nde; 269 270 return 0; 271 } 272 273 /* --------------------------------------------------------------------- */ 274 275 /* 276 * Frees a directory entry. It is the caller's responsibility to destroy 277 * the node referenced by it if needed. 278 * 279 * The link count of node is decreased by one to reflect the removal of an 280 * object that referenced it. This only happens if 'node_exists' is true; 281 * otherwise the function will not access the node referred to by the 282 * directory entry, as it may already have been released from the outside. 283 */ 284 void 285 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 286 boolean_t node_exists) 287 { 288 if (node_exists) { 289 struct tmpfs_node *node; 290 291 node = de->td_node; 292 293 MPASS(node->tn_links > 0); 294 node->tn_links--; 295 } 296 297 free(de->td_name, M_TMPFSNAME); 298 uma_zfree(tmp->tm_dirent_pool, de); 299 } 300 301 /* --------------------------------------------------------------------- */ 302 303 /* 304 * Allocates a new vnode for the node node or returns a new reference to 305 * an existing one if the node had already a vnode referencing it. The 306 * resulting locked vnode is returned in *vpp. 307 * 308 * Returns zero on success or an appropriate error code on failure. 309 */ 310 int 311 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 312 struct vnode **vpp, struct thread *td) 313 { 314 int error = 0; 315 struct vnode *vp; 316 317 loop: 318 TMPFS_NODE_LOCK(node); 319 if ((vp = node->tn_vnode) != NULL) { 320 VI_LOCK(vp); 321 TMPFS_NODE_UNLOCK(node); 322 vholdl(vp); 323 (void) vget(vp, lkflag | LK_INTERLOCK | LK_RETRY, td); 324 vdrop(vp); 325 326 /* 327 * Make sure the vnode is still there after 328 * getting the interlock to avoid racing a free. 329 */ 330 if (node->tn_vnode == NULL || node->tn_vnode != vp) { 331 vput(vp); 332 goto loop; 333 } 334 335 goto out; 336 } 337 338 /* 339 * otherwise lock the vp list while we call getnewvnode 340 * since that can block. 341 */ 342 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 343 node->tn_vpstate |= TMPFS_VNODE_WANT; 344 error = msleep((caddr_t) &node->tn_vpstate, 345 TMPFS_NODE_MTX(node), PDROP | PCATCH, 346 "tmpfs_alloc_vp", 0); 347 if (error) 348 return error; 349 350 goto loop; 351 } else 352 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 353 354 TMPFS_NODE_UNLOCK(node); 355 356 /* Get a new vnode and associate it with our node. */ 357 error = getnewvnode("tmpfs", mp, &tmpfs_vnodeop_entries, &vp); 358 if (error != 0) 359 goto unlock; 360 MPASS(vp != NULL); 361 362 (void) vn_lock(vp, lkflag | LK_RETRY, td); 363 364 vp->v_data = node; 365 vp->v_type = node->tn_type; 366 367 /* Type-specific initialization. */ 368 switch (node->tn_type) { 369 case VBLK: 370 /* FALLTHROUGH */ 371 case VCHR: 372 /* FALLTHROUGH */ 373 case VDIR: 374 /* FALLTHROUGH */ 375 case VLNK: 376 /* FALLTHROUGH */ 377 case VREG: 378 /* FALLTHROUGH */ 379 case VSOCK: 380 break; 381 case VFIFO: 382 vp->v_op = &tmpfs_fifoop_entries; 383 break; 384 385 default: 386 MPASS(0); 387 } 388 389 vnode_pager_setsize(vp, node->tn_size); 390 error = insmntque(vp, mp); 391 if (error) 392 vp = NULL; 393 394 unlock: 395 TMPFS_NODE_LOCK(node); 396 397 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 398 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 399 node->tn_vnode = vp; 400 401 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 402 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 403 TMPFS_NODE_UNLOCK(node); 404 wakeup((caddr_t) &node->tn_vpstate); 405 } else 406 TMPFS_NODE_UNLOCK(node); 407 408 out: 409 *vpp = vp; 410 411 MPASS(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp, td))); 412 #ifdef INVARIANTS 413 TMPFS_NODE_LOCK(node); 414 MPASS(*vpp == node->tn_vnode); 415 TMPFS_NODE_UNLOCK(node); 416 #endif 417 418 return error; 419 } 420 421 /* --------------------------------------------------------------------- */ 422 423 /* 424 * Destroys the association between the vnode vp and the node it 425 * references. 426 */ 427 void 428 tmpfs_free_vp(struct vnode *vp) 429 { 430 struct tmpfs_node *node; 431 432 node = VP_TO_TMPFS_NODE(vp); 433 434 TMPFS_NODE_LOCK(node); 435 node->tn_vnode = NULL; 436 vp->v_data = NULL; 437 TMPFS_NODE_UNLOCK(node); 438 } 439 440 /* --------------------------------------------------------------------- */ 441 442 /* 443 * Allocates a new file of type 'type' and adds it to the parent directory 444 * 'dvp'; this addition is done using the component name given in 'cnp'. 445 * The ownership of the new file is automatically assigned based on the 446 * credentials of the caller (through 'cnp'), the group is set based on 447 * the parent directory and the mode is determined from the 'vap' argument. 448 * If successful, *vpp holds a vnode to the newly created file and zero 449 * is returned. Otherwise *vpp is NULL and the function returns an 450 * appropriate error code. 451 */ 452 int 453 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 454 struct componentname *cnp, char *target) 455 { 456 int error; 457 struct tmpfs_dirent *de; 458 struct tmpfs_mount *tmp; 459 struct tmpfs_node *dnode; 460 struct tmpfs_node *node; 461 struct tmpfs_node *parent; 462 463 MPASS(VOP_ISLOCKED(dvp, cnp->cn_thread)); 464 MPASS(cnp->cn_flags & HASBUF); 465 466 tmp = VFS_TO_TMPFS(dvp->v_mount); 467 dnode = VP_TO_TMPFS_DIR(dvp); 468 *vpp = NULL; 469 470 /* If the entry we are creating is a directory, we cannot overflow 471 * the number of links of its parent, because it will get a new 472 * link. */ 473 if (vap->va_type == VDIR) { 474 /* Ensure that we do not overflow the maximum number of links 475 * imposed by the system. */ 476 MPASS(dnode->tn_links <= LINK_MAX); 477 if (dnode->tn_links == LINK_MAX) { 478 error = EMLINK; 479 goto out; 480 } 481 482 parent = dnode; 483 } else 484 parent = NULL; 485 486 /* Allocate a node that represents the new file. */ 487 error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid, 488 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, 489 cnp->cn_thread, &node); 490 if (error != 0) 491 goto out; 492 493 /* Allocate a directory entry that points to the new file. */ 494 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 495 &de); 496 if (error != 0) { 497 tmpfs_free_node(tmp, node); 498 goto out; 499 } 500 501 /* Allocate a vnode for the new file. */ 502 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp, 503 cnp->cn_thread); 504 if (error != 0) { 505 tmpfs_free_dirent(tmp, de, TRUE); 506 tmpfs_free_node(tmp, node); 507 goto out; 508 } 509 510 /* Now that all required items are allocated, we can proceed to 511 * insert the new node into the directory, an operation that 512 * cannot fail. */ 513 tmpfs_dir_attach(dvp, de); 514 515 out: 516 517 return error; 518 } 519 520 /* --------------------------------------------------------------------- */ 521 522 /* 523 * Attaches the directory entry de to the directory represented by vp. 524 * Note that this does not change the link count of the node pointed by 525 * the directory entry, as this is done by tmpfs_alloc_dirent. 526 */ 527 void 528 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 529 { 530 struct tmpfs_node *dnode; 531 532 ASSERT_VOP_ELOCKED(vp, __func__); 533 dnode = VP_TO_TMPFS_DIR(vp); 534 TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries); 535 dnode->tn_size += sizeof(struct tmpfs_dirent); 536 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 537 TMPFS_NODE_MODIFIED; 538 } 539 540 /* --------------------------------------------------------------------- */ 541 542 /* 543 * Detaches the directory entry de from the directory represented by vp. 544 * Note that this does not change the link count of the node pointed by 545 * the directory entry, as this is done by tmpfs_free_dirent. 546 */ 547 void 548 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 549 { 550 struct tmpfs_node *dnode; 551 552 ASSERT_VOP_ELOCKED(vp, __func__); 553 dnode = VP_TO_TMPFS_DIR(vp); 554 555 if (dnode->tn_dir.tn_readdir_lastp == de) { 556 dnode->tn_dir.tn_readdir_lastn = 0; 557 dnode->tn_dir.tn_readdir_lastp = NULL; 558 } 559 560 TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries); 561 dnode->tn_size -= sizeof(struct tmpfs_dirent); 562 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 563 TMPFS_NODE_MODIFIED; 564 } 565 566 /* --------------------------------------------------------------------- */ 567 568 /* 569 * Looks for a directory entry in the directory represented by node. 570 * 'cnp' describes the name of the entry to look for. Note that the . 571 * and .. components are not allowed as they do not physically exist 572 * within directories. 573 * 574 * Returns a pointer to the entry when found, otherwise NULL. 575 */ 576 struct tmpfs_dirent * 577 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp) 578 { 579 boolean_t found; 580 struct tmpfs_dirent *de; 581 582 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 583 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 584 cnp->cn_nameptr[1] == '.'))); 585 TMPFS_VALIDATE_DIR(node); 586 587 found = 0; 588 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 589 MPASS(cnp->cn_namelen < 0xffff); 590 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 591 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 592 found = 1; 593 break; 594 } 595 } 596 node->tn_status |= TMPFS_NODE_ACCESSED; 597 598 return found ? de : NULL; 599 } 600 601 /* --------------------------------------------------------------------- */ 602 603 /* 604 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 605 * directory and returns it in the uio space. The function returns 0 606 * on success, -1 if there was not enough space in the uio structure to 607 * hold the directory entry or an appropriate error code if another 608 * error happens. 609 */ 610 int 611 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 612 { 613 int error; 614 struct dirent dent; 615 616 TMPFS_VALIDATE_DIR(node); 617 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 618 619 dent.d_fileno = node->tn_id; 620 dent.d_type = DT_DIR; 621 dent.d_namlen = 1; 622 dent.d_name[0] = '.'; 623 dent.d_name[1] = '\0'; 624 dent.d_reclen = GENERIC_DIRSIZ(&dent); 625 626 if (dent.d_reclen > uio->uio_resid) 627 error = -1; 628 else { 629 error = uiomove(&dent, dent.d_reclen, uio); 630 if (error == 0) 631 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 632 } 633 634 node->tn_status |= TMPFS_NODE_ACCESSED; 635 636 return error; 637 } 638 639 /* --------------------------------------------------------------------- */ 640 641 /* 642 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 643 * directory and returns it in the uio space. The function returns 0 644 * on success, -1 if there was not enough space in the uio structure to 645 * hold the directory entry or an appropriate error code if another 646 * error happens. 647 */ 648 int 649 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 650 { 651 int error; 652 struct dirent dent; 653 654 TMPFS_VALIDATE_DIR(node); 655 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 656 657 dent.d_fileno = node->tn_dir.tn_parent->tn_id; 658 dent.d_type = DT_DIR; 659 dent.d_namlen = 2; 660 dent.d_name[0] = '.'; 661 dent.d_name[1] = '.'; 662 dent.d_name[2] = '\0'; 663 dent.d_reclen = GENERIC_DIRSIZ(&dent); 664 665 if (dent.d_reclen > uio->uio_resid) 666 error = -1; 667 else { 668 error = uiomove(&dent, dent.d_reclen, uio); 669 if (error == 0) { 670 struct tmpfs_dirent *de; 671 672 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 673 if (de == NULL) 674 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 675 else 676 uio->uio_offset = tmpfs_dircookie(de); 677 } 678 } 679 680 node->tn_status |= TMPFS_NODE_ACCESSED; 681 682 return error; 683 } 684 685 /* --------------------------------------------------------------------- */ 686 687 /* 688 * Lookup a directory entry by its associated cookie. 689 */ 690 struct tmpfs_dirent * 691 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 692 { 693 struct tmpfs_dirent *de; 694 695 if (cookie == node->tn_dir.tn_readdir_lastn && 696 node->tn_dir.tn_readdir_lastp != NULL) { 697 return node->tn_dir.tn_readdir_lastp; 698 } 699 700 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 701 if (tmpfs_dircookie(de) == cookie) { 702 break; 703 } 704 } 705 706 return de; 707 } 708 709 /* --------------------------------------------------------------------- */ 710 711 /* 712 * Helper function for tmpfs_readdir. Returns as much directory entries 713 * as can fit in the uio space. The read starts at uio->uio_offset. 714 * The function returns 0 on success, -1 if there was not enough space 715 * in the uio structure to hold the directory entry or an appropriate 716 * error code if another error happens. 717 */ 718 int 719 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 720 { 721 int error; 722 off_t startcookie; 723 struct tmpfs_dirent *de; 724 725 TMPFS_VALIDATE_DIR(node); 726 727 /* Locate the first directory entry we have to return. We have cached 728 * the last readdir in the node, so use those values if appropriate. 729 * Otherwise do a linear scan to find the requested entry. */ 730 startcookie = uio->uio_offset; 731 MPASS(startcookie != TMPFS_DIRCOOKIE_DOT); 732 MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 733 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 734 return 0; 735 } else { 736 de = tmpfs_dir_lookupbycookie(node, startcookie); 737 } 738 if (de == NULL) { 739 return EINVAL; 740 } 741 742 /* Read as much entries as possible; i.e., until we reach the end of 743 * the directory or we exhaust uio space. */ 744 do { 745 struct dirent d; 746 747 /* Create a dirent structure representing the current 748 * tmpfs_node and fill it. */ 749 d.d_fileno = de->td_node->tn_id; 750 switch (de->td_node->tn_type) { 751 case VBLK: 752 d.d_type = DT_BLK; 753 break; 754 755 case VCHR: 756 d.d_type = DT_CHR; 757 break; 758 759 case VDIR: 760 d.d_type = DT_DIR; 761 break; 762 763 case VFIFO: 764 d.d_type = DT_FIFO; 765 break; 766 767 case VLNK: 768 d.d_type = DT_LNK; 769 break; 770 771 case VREG: 772 d.d_type = DT_REG; 773 break; 774 775 case VSOCK: 776 d.d_type = DT_SOCK; 777 break; 778 779 default: 780 MPASS(0); 781 } 782 d.d_namlen = de->td_namelen; 783 MPASS(de->td_namelen < sizeof(d.d_name)); 784 (void)memcpy(d.d_name, de->td_name, de->td_namelen); 785 d.d_name[de->td_namelen] = '\0'; 786 d.d_reclen = GENERIC_DIRSIZ(&d); 787 788 /* Stop reading if the directory entry we are treating is 789 * bigger than the amount of data that can be returned. */ 790 if (d.d_reclen > uio->uio_resid) { 791 error = -1; 792 break; 793 } 794 795 /* Copy the new dirent structure into the output buffer and 796 * advance pointers. */ 797 error = uiomove(&d, d.d_reclen, uio); 798 799 (*cntp)++; 800 de = TAILQ_NEXT(de, td_entries); 801 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 802 803 /* Update the offset and cache. */ 804 if (de == NULL) { 805 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 806 node->tn_dir.tn_readdir_lastn = 0; 807 node->tn_dir.tn_readdir_lastp = NULL; 808 } else { 809 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 810 node->tn_dir.tn_readdir_lastp = de; 811 } 812 813 node->tn_status |= TMPFS_NODE_ACCESSED; 814 return error; 815 } 816 817 /* --------------------------------------------------------------------- */ 818 819 /* 820 * Resizes the aobj associated to the regular file pointed to by vp to 821 * the size newsize. 'vp' must point to a vnode that represents a regular 822 * file. 'newsize' must be positive. 823 * 824 * Returns zero on success or an appropriate error code on failure. 825 */ 826 int 827 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 828 { 829 int error; 830 size_t newpages, oldpages; 831 struct tmpfs_mount *tmp; 832 struct tmpfs_node *node; 833 off_t oldsize; 834 835 MPASS(vp->v_type == VREG); 836 MPASS(newsize >= 0); 837 838 node = VP_TO_TMPFS_NODE(vp); 839 tmp = VFS_TO_TMPFS(vp->v_mount); 840 841 /* Convert the old and new sizes to the number of pages needed to 842 * store them. It may happen that we do not need to do anything 843 * because the last allocated page can accommodate the change on 844 * its own. */ 845 oldsize = node->tn_size; 846 oldpages = round_page(oldsize) / PAGE_SIZE; 847 MPASS(oldpages == node->tn_reg.tn_aobj_pages); 848 newpages = round_page(newsize) / PAGE_SIZE; 849 850 if (newpages > oldpages && 851 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) { 852 error = ENOSPC; 853 goto out; 854 } 855 856 node->tn_reg.tn_aobj_pages = newpages; 857 858 TMPFS_LOCK(tmp); 859 tmp->tm_pages_used += (newpages - oldpages); 860 TMPFS_UNLOCK(tmp); 861 862 node->tn_size = newsize; 863 vnode_pager_setsize(vp, newsize); 864 if (newsize < oldsize) { 865 size_t zerolen = round_page(newsize) - newsize; 866 vm_object_t uobj = node->tn_reg.tn_aobj; 867 vm_page_t m; 868 869 /* 870 * free "backing store" 871 */ 872 VM_OBJECT_LOCK(uobj); 873 if (newpages < oldpages) { 874 swap_pager_freespace(uobj, 875 newpages, oldpages - newpages); 876 vm_object_page_remove(uobj, 877 OFF_TO_IDX(newsize + PAGE_MASK), 0, FALSE); 878 } 879 880 /* 881 * zero out the truncated part of the last page. 882 */ 883 884 if (zerolen > 0) { 885 m = vm_page_grab(uobj, OFF_TO_IDX(newsize), 886 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 887 pmap_zero_page_area(m, PAGE_SIZE - zerolen, 888 zerolen); 889 vm_page_wakeup(m); 890 } 891 VM_OBJECT_UNLOCK(uobj); 892 893 } 894 895 error = 0; 896 897 out: 898 return error; 899 } 900 901 /* --------------------------------------------------------------------- */ 902 903 /* 904 * Change flags of the given vnode. 905 * Caller should execute tmpfs_update on vp after a successful execution. 906 * The vnode must be locked on entry and remain locked on exit. 907 */ 908 int 909 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct thread *p) 910 { 911 int error; 912 struct tmpfs_node *node; 913 914 MPASS(VOP_ISLOCKED(vp, p)); 915 916 node = VP_TO_TMPFS_NODE(vp); 917 918 /* Disallow this operation if the file system is mounted read-only. */ 919 if (vp->v_mount->mnt_flag & MNT_RDONLY) 920 return EROFS; 921 922 /* 923 * Callers may only modify the file flags on objects they 924 * have VADMIN rights for. 925 */ 926 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 927 return (error); 928 /* 929 * Unprivileged processes are not permitted to unset system 930 * flags, or modify flags if any system flags are set. 931 */ 932 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 933 if (node->tn_flags 934 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 935 error = securelevel_gt(cred, 0); 936 if (error) 937 return (error); 938 } 939 /* Snapshot flag cannot be set or cleared */ 940 if (((flags & SF_SNAPSHOT) != 0 && 941 (node->tn_flags & SF_SNAPSHOT) == 0) || 942 ((flags & SF_SNAPSHOT) == 0 && 943 (node->tn_flags & SF_SNAPSHOT) != 0)) 944 return (EPERM); 945 node->tn_flags = flags; 946 } else { 947 if (node->tn_flags 948 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 949 (flags & UF_SETTABLE) != flags) 950 return (EPERM); 951 node->tn_flags &= SF_SETTABLE; 952 node->tn_flags |= (flags & UF_SETTABLE); 953 } 954 node->tn_status |= TMPFS_NODE_CHANGED; 955 956 MPASS(VOP_ISLOCKED(vp, p)); 957 958 return 0; 959 } 960 961 /* --------------------------------------------------------------------- */ 962 963 /* 964 * Change access mode on the given vnode. 965 * Caller should execute tmpfs_update on vp after a successful execution. 966 * The vnode must be locked on entry and remain locked on exit. 967 */ 968 int 969 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 970 { 971 int error; 972 struct tmpfs_node *node; 973 974 MPASS(VOP_ISLOCKED(vp, p)); 975 976 node = VP_TO_TMPFS_NODE(vp); 977 978 /* Disallow this operation if the file system is mounted read-only. */ 979 if (vp->v_mount->mnt_flag & MNT_RDONLY) 980 return EROFS; 981 982 /* Immutable or append-only files cannot be modified, either. */ 983 if (node->tn_flags & (IMMUTABLE | APPEND)) 984 return EPERM; 985 986 /* 987 * To modify the permissions on a file, must possess VADMIN 988 * for that file. 989 */ 990 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 991 return (error); 992 993 /* 994 * Privileged processes may set the sticky bit on non-directories, 995 * as well as set the setgid bit on a file with a group that the 996 * process is not a member of. 997 */ 998 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 999 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) 1000 return (EFTYPE); 1001 } 1002 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1003 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 1004 if (error) 1005 return (error); 1006 } 1007 1008 1009 node->tn_mode &= ~ALLPERMS; 1010 node->tn_mode |= mode & ALLPERMS; 1011 1012 node->tn_status |= TMPFS_NODE_CHANGED; 1013 1014 MPASS(VOP_ISLOCKED(vp, p)); 1015 1016 return 0; 1017 } 1018 1019 /* --------------------------------------------------------------------- */ 1020 1021 /* 1022 * Change ownership of the given vnode. At least one of uid or gid must 1023 * be different than VNOVAL. If one is set to that value, the attribute 1024 * is unchanged. 1025 * Caller should execute tmpfs_update on vp after a successful execution. 1026 * The vnode must be locked on entry and remain locked on exit. 1027 */ 1028 int 1029 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 1030 struct thread *p) 1031 { 1032 int error; 1033 struct tmpfs_node *node; 1034 uid_t ouid; 1035 gid_t ogid; 1036 1037 MPASS(VOP_ISLOCKED(vp, p)); 1038 1039 node = VP_TO_TMPFS_NODE(vp); 1040 1041 /* Assign default values if they are unknown. */ 1042 MPASS(uid != VNOVAL || gid != VNOVAL); 1043 if (uid == VNOVAL) 1044 uid = node->tn_uid; 1045 if (gid == VNOVAL) 1046 gid = node->tn_gid; 1047 MPASS(uid != VNOVAL && gid != VNOVAL); 1048 1049 /* Disallow this operation if the file system is mounted read-only. */ 1050 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1051 return EROFS; 1052 1053 /* Immutable or append-only files cannot be modified, either. */ 1054 if (node->tn_flags & (IMMUTABLE | APPEND)) 1055 return EPERM; 1056 1057 /* 1058 * To modify the ownership of a file, must possess VADMIN for that 1059 * file. 1060 */ 1061 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1062 return (error); 1063 1064 /* 1065 * To change the owner of a file, or change the group of a file to a 1066 * group of which we are not a member, the caller must have 1067 * privilege. 1068 */ 1069 if ((uid != node->tn_uid || 1070 (gid != node->tn_gid && !groupmember(gid, cred))) && 1071 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) 1072 return (error); 1073 1074 ogid = node->tn_gid; 1075 ouid = node->tn_uid; 1076 1077 node->tn_uid = uid; 1078 node->tn_gid = gid; 1079 1080 node->tn_status |= TMPFS_NODE_CHANGED; 1081 1082 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 1083 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) 1084 node->tn_mode &= ~(S_ISUID | S_ISGID); 1085 } 1086 1087 MPASS(VOP_ISLOCKED(vp, p)); 1088 1089 return 0; 1090 } 1091 1092 /* --------------------------------------------------------------------- */ 1093 1094 /* 1095 * Change size of the given vnode. 1096 * Caller should execute tmpfs_update on vp after a successful execution. 1097 * The vnode must be locked on entry and remain locked on exit. 1098 */ 1099 int 1100 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 1101 struct thread *p) 1102 { 1103 int error; 1104 struct tmpfs_node *node; 1105 1106 MPASS(VOP_ISLOCKED(vp, p)); 1107 1108 node = VP_TO_TMPFS_NODE(vp); 1109 1110 /* Decide whether this is a valid operation based on the file type. */ 1111 error = 0; 1112 switch (vp->v_type) { 1113 case VDIR: 1114 return EISDIR; 1115 1116 case VREG: 1117 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1118 return EROFS; 1119 break; 1120 1121 case VBLK: 1122 /* FALLTHROUGH */ 1123 case VCHR: 1124 /* FALLTHROUGH */ 1125 case VFIFO: 1126 /* Allow modifications of special files even if in the file 1127 * system is mounted read-only (we are not modifying the 1128 * files themselves, but the objects they represent). */ 1129 return 0; 1130 1131 default: 1132 /* Anything else is unsupported. */ 1133 return EOPNOTSUPP; 1134 } 1135 1136 /* Immutable or append-only files cannot be modified, either. */ 1137 if (node->tn_flags & (IMMUTABLE | APPEND)) 1138 return EPERM; 1139 1140 error = tmpfs_truncate(vp, size); 1141 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1142 * for us, as will update tn_status; no need to do that here. */ 1143 1144 MPASS(VOP_ISLOCKED(vp, p)); 1145 1146 return error; 1147 } 1148 1149 /* --------------------------------------------------------------------- */ 1150 1151 /* 1152 * Change access and modification times of the given vnode. 1153 * Caller should execute tmpfs_update on vp after a successful execution. 1154 * The vnode must be locked on entry and remain locked on exit. 1155 */ 1156 int 1157 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1158 struct timespec *birthtime, int vaflags, struct ucred *cred, struct thread *l) 1159 { 1160 int error; 1161 struct tmpfs_node *node; 1162 1163 MPASS(VOP_ISLOCKED(vp, l)); 1164 1165 node = VP_TO_TMPFS_NODE(vp); 1166 1167 /* Disallow this operation if the file system is mounted read-only. */ 1168 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1169 return EROFS; 1170 1171 /* Immutable or append-only files cannot be modified, either. */ 1172 if (node->tn_flags & (IMMUTABLE | APPEND)) 1173 return EPERM; 1174 1175 /* Determine if the user have proper privilege to update time. */ 1176 if (vaflags & VA_UTIMES_NULL) { 1177 error = VOP_ACCESS(vp, VADMIN, cred, l); 1178 if (error) 1179 error = VOP_ACCESS(vp, VWRITE, cred, l); 1180 } else 1181 error = VOP_ACCESS(vp, VADMIN, cred, l); 1182 if (error) 1183 return (error); 1184 1185 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1186 node->tn_status |= TMPFS_NODE_ACCESSED; 1187 1188 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1189 node->tn_status |= TMPFS_NODE_MODIFIED; 1190 1191 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1192 node->tn_status |= TMPFS_NODE_MODIFIED; 1193 1194 tmpfs_itimes(vp, atime, mtime); 1195 1196 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1197 node->tn_birthtime = *birthtime; 1198 MPASS(VOP_ISLOCKED(vp, l)); 1199 1200 return 0; 1201 } 1202 1203 /* --------------------------------------------------------------------- */ 1204 /* Sync timestamps */ 1205 void 1206 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1207 const struct timespec *mod) 1208 { 1209 struct tmpfs_node *node; 1210 struct timespec now; 1211 1212 node = VP_TO_TMPFS_NODE(vp); 1213 1214 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1215 TMPFS_NODE_CHANGED)) == 0) 1216 return; 1217 1218 vfs_timestamp(&now); 1219 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1220 if (acc == NULL) 1221 acc = &now; 1222 node->tn_atime = *acc; 1223 } 1224 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1225 if (mod == NULL) 1226 mod = &now; 1227 node->tn_mtime = *mod; 1228 } 1229 if (node->tn_status & TMPFS_NODE_CHANGED) { 1230 node->tn_ctime = now; 1231 } 1232 node->tn_status &= 1233 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1234 } 1235 1236 /* --------------------------------------------------------------------- */ 1237 1238 void 1239 tmpfs_update(struct vnode *vp) 1240 { 1241 1242 tmpfs_itimes(vp, NULL, NULL); 1243 } 1244 1245 /* --------------------------------------------------------------------- */ 1246 1247 int 1248 tmpfs_truncate(struct vnode *vp, off_t length) 1249 { 1250 boolean_t extended; 1251 int error; 1252 struct tmpfs_node *node; 1253 1254 node = VP_TO_TMPFS_NODE(vp); 1255 extended = length > node->tn_size; 1256 1257 if (length < 0) { 1258 error = EINVAL; 1259 goto out; 1260 } 1261 1262 if (node->tn_size == length) { 1263 error = 0; 1264 goto out; 1265 } 1266 1267 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1268 return (EFBIG); 1269 1270 error = tmpfs_reg_resize(vp, length); 1271 if (error == 0) { 1272 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1273 } 1274 1275 out: 1276 tmpfs_update(vp); 1277 1278 return error; 1279 } 1280