1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/namei.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/stat.h> 44 #include <sys/systm.h> 45 #include <sys/vnode.h> 46 #include <sys/vmmeter.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_pager.h> 52 #include <vm/vm_extern.h> 53 54 #include <fs/tmpfs/tmpfs.h> 55 #include <fs/tmpfs/tmpfs_fifoops.h> 56 #include <fs/tmpfs/tmpfs_vnops.h> 57 58 /* --------------------------------------------------------------------- */ 59 60 /* 61 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 62 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 63 * using the credentials of the process 'p'. 64 * 65 * If the node type is set to 'VDIR', then the parent parameter must point 66 * to the parent directory of the node being created. It may only be NULL 67 * while allocating the root node. 68 * 69 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 70 * specifies the device the node represents. 71 * 72 * If the node type is set to 'VLNK', then the parameter target specifies 73 * the file name of the target file for the symbolic link that is being 74 * created. 75 * 76 * Note that new nodes are retrieved from the available list if it has 77 * items or, if it is empty, from the node pool as long as there is enough 78 * space to create them. 79 * 80 * Returns zero on success or an appropriate error code on failure. 81 */ 82 int 83 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 84 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 85 char *target, dev_t rdev, struct tmpfs_node **node) 86 { 87 struct tmpfs_node *nnode; 88 89 /* If the root directory of the 'tmp' file system is not yet 90 * allocated, this must be the request to do it. */ 91 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 92 93 MPASS(IFF(type == VLNK, target != NULL)); 94 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 95 96 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 97 return (ENOSPC); 98 99 nnode = (struct tmpfs_node *)uma_zalloc_arg( 100 tmp->tm_node_pool, tmp, M_WAITOK); 101 102 /* Generic initialization. */ 103 nnode->tn_type = type; 104 vfs_timestamp(&nnode->tn_atime); 105 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 106 nnode->tn_atime; 107 nnode->tn_uid = uid; 108 nnode->tn_gid = gid; 109 nnode->tn_mode = mode; 110 nnode->tn_id = alloc_unr(tmp->tm_ino_unr); 111 112 /* Type-specific initialization. */ 113 switch (nnode->tn_type) { 114 case VBLK: 115 case VCHR: 116 nnode->tn_rdev = rdev; 117 break; 118 119 case VDIR: 120 TAILQ_INIT(&nnode->tn_dir.tn_dirhead); 121 MPASS(parent != nnode); 122 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 123 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 124 nnode->tn_dir.tn_readdir_lastn = 0; 125 nnode->tn_dir.tn_readdir_lastp = NULL; 126 nnode->tn_links++; 127 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 128 nnode->tn_dir.tn_parent->tn_links++; 129 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 130 break; 131 132 case VFIFO: 133 /* FALLTHROUGH */ 134 case VSOCK: 135 break; 136 137 case VLNK: 138 MPASS(strlen(target) < MAXPATHLEN); 139 nnode->tn_size = strlen(target); 140 nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, 141 M_WAITOK); 142 memcpy(nnode->tn_link, target, nnode->tn_size); 143 break; 144 145 case VREG: 146 nnode->tn_reg.tn_aobj = 147 vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, 148 NULL /* XXXKIB - tmpfs needs swap reservation */); 149 nnode->tn_reg.tn_aobj_pages = 0; 150 break; 151 152 default: 153 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 154 } 155 156 TMPFS_LOCK(tmp); 157 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 158 tmp->tm_nodes_inuse++; 159 TMPFS_UNLOCK(tmp); 160 161 *node = nnode; 162 return 0; 163 } 164 165 /* --------------------------------------------------------------------- */ 166 167 /* 168 * Destroys the node pointed to by node from the file system 'tmp'. 169 * If the node does not belong to the given mount point, the results are 170 * unpredicted. 171 * 172 * If the node references a directory; no entries are allowed because 173 * their removal could need a recursive algorithm, something forbidden in 174 * kernel space. Furthermore, there is not need to provide such 175 * functionality (recursive removal) because the only primitives offered 176 * to the user are the removal of empty directories and the deletion of 177 * individual files. 178 * 179 * Note that nodes are not really deleted; in fact, when a node has been 180 * allocated, it cannot be deleted during the whole life of the file 181 * system. Instead, they are moved to the available list and remain there 182 * until reused. 183 */ 184 void 185 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 186 { 187 size_t pages = 0; 188 189 #ifdef INVARIANTS 190 TMPFS_NODE_LOCK(node); 191 MPASS(node->tn_vnode == NULL); 192 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 193 TMPFS_NODE_UNLOCK(node); 194 #endif 195 196 TMPFS_LOCK(tmp); 197 LIST_REMOVE(node, tn_entries); 198 tmp->tm_nodes_inuse--; 199 TMPFS_UNLOCK(tmp); 200 201 switch (node->tn_type) { 202 case VNON: 203 /* Do not do anything. VNON is provided to let the 204 * allocation routine clean itself easily by avoiding 205 * duplicating code in it. */ 206 /* FALLTHROUGH */ 207 case VBLK: 208 /* FALLTHROUGH */ 209 case VCHR: 210 /* FALLTHROUGH */ 211 case VDIR: 212 /* FALLTHROUGH */ 213 case VFIFO: 214 /* FALLTHROUGH */ 215 case VSOCK: 216 break; 217 218 case VLNK: 219 free(node->tn_link, M_TMPFSNAME); 220 break; 221 222 case VREG: 223 if (node->tn_reg.tn_aobj != NULL) 224 vm_object_deallocate(node->tn_reg.tn_aobj); 225 pages = node->tn_reg.tn_aobj_pages; 226 break; 227 228 default: 229 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 230 } 231 232 free_unr(tmp->tm_ino_unr, node->tn_id); 233 uma_zfree(tmp->tm_node_pool, node); 234 235 TMPFS_LOCK(tmp); 236 tmp->tm_pages_used -= pages; 237 TMPFS_UNLOCK(tmp); 238 } 239 240 /* --------------------------------------------------------------------- */ 241 242 /* 243 * Allocates a new directory entry for the node node with a name of name. 244 * The new directory entry is returned in *de. 245 * 246 * The link count of node is increased by one to reflect the new object 247 * referencing it. 248 * 249 * Returns zero on success or an appropriate error code on failure. 250 */ 251 int 252 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 253 const char *name, uint16_t len, struct tmpfs_dirent **de) 254 { 255 struct tmpfs_dirent *nde; 256 257 nde = (struct tmpfs_dirent *)uma_zalloc( 258 tmp->tm_dirent_pool, M_WAITOK); 259 nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 260 nde->td_namelen = len; 261 memcpy(nde->td_name, name, len); 262 263 nde->td_node = node; 264 if (node != NULL) 265 node->tn_links++; 266 267 *de = nde; 268 269 return 0; 270 } 271 272 /* --------------------------------------------------------------------- */ 273 274 /* 275 * Frees a directory entry. It is the caller's responsibility to destroy 276 * the node referenced by it if needed. 277 * 278 * The link count of node is decreased by one to reflect the removal of an 279 * object that referenced it. This only happens if 'node_exists' is true; 280 * otherwise the function will not access the node referred to by the 281 * directory entry, as it may already have been released from the outside. 282 */ 283 void 284 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 285 boolean_t node_exists) 286 { 287 if (node_exists) { 288 struct tmpfs_node *node; 289 290 node = de->td_node; 291 if (node != NULL) { 292 MPASS(node->tn_links > 0); 293 node->tn_links--; 294 } 295 } 296 297 free(de->td_name, M_TMPFSNAME); 298 uma_zfree(tmp->tm_dirent_pool, de); 299 } 300 301 /* --------------------------------------------------------------------- */ 302 303 /* 304 * Allocates a new vnode for the node node or returns a new reference to 305 * an existing one if the node had already a vnode referencing it. The 306 * resulting locked vnode is returned in *vpp. 307 * 308 * Returns zero on success or an appropriate error code on failure. 309 */ 310 int 311 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 312 struct vnode **vpp) 313 { 314 int error = 0; 315 struct vnode *vp; 316 317 loop: 318 TMPFS_NODE_LOCK(node); 319 if ((vp = node->tn_vnode) != NULL) { 320 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 321 VI_LOCK(vp); 322 TMPFS_NODE_UNLOCK(node); 323 vholdl(vp); 324 (void) vget(vp, lkflag | LK_INTERLOCK | LK_RETRY, curthread); 325 vdrop(vp); 326 327 /* 328 * Make sure the vnode is still there after 329 * getting the interlock to avoid racing a free. 330 */ 331 if (node->tn_vnode == NULL || node->tn_vnode != vp) { 332 vput(vp); 333 goto loop; 334 } 335 336 goto out; 337 } 338 339 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 340 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 341 TMPFS_NODE_UNLOCK(node); 342 error = ENOENT; 343 vp = NULL; 344 goto out; 345 } 346 347 /* 348 * otherwise lock the vp list while we call getnewvnode 349 * since that can block. 350 */ 351 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 352 node->tn_vpstate |= TMPFS_VNODE_WANT; 353 error = msleep((caddr_t) &node->tn_vpstate, 354 TMPFS_NODE_MTX(node), PDROP | PCATCH, 355 "tmpfs_alloc_vp", 0); 356 if (error) 357 return error; 358 359 goto loop; 360 } else 361 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 362 363 TMPFS_NODE_UNLOCK(node); 364 365 /* Get a new vnode and associate it with our node. */ 366 error = getnewvnode("tmpfs", mp, &tmpfs_vnodeop_entries, &vp); 367 if (error != 0) 368 goto unlock; 369 MPASS(vp != NULL); 370 371 (void) vn_lock(vp, lkflag | LK_RETRY); 372 373 vp->v_data = node; 374 vp->v_type = node->tn_type; 375 376 /* Type-specific initialization. */ 377 switch (node->tn_type) { 378 case VBLK: 379 /* FALLTHROUGH */ 380 case VCHR: 381 /* FALLTHROUGH */ 382 case VLNK: 383 /* FALLTHROUGH */ 384 case VREG: 385 /* FALLTHROUGH */ 386 case VSOCK: 387 break; 388 case VFIFO: 389 vp->v_op = &tmpfs_fifoop_entries; 390 break; 391 case VDIR: 392 MPASS(node->tn_dir.tn_parent != NULL); 393 if (node->tn_dir.tn_parent == node) 394 vp->v_vflag |= VV_ROOT; 395 break; 396 397 default: 398 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 399 } 400 401 vnode_pager_setsize(vp, node->tn_size); 402 error = insmntque(vp, mp); 403 if (error) 404 vp = NULL; 405 406 unlock: 407 TMPFS_NODE_LOCK(node); 408 409 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 410 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 411 node->tn_vnode = vp; 412 413 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 414 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 415 TMPFS_NODE_UNLOCK(node); 416 wakeup((caddr_t) &node->tn_vpstate); 417 } else 418 TMPFS_NODE_UNLOCK(node); 419 420 out: 421 *vpp = vp; 422 423 MPASS(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 424 #ifdef INVARIANTS 425 TMPFS_NODE_LOCK(node); 426 MPASS(*vpp == node->tn_vnode); 427 TMPFS_NODE_UNLOCK(node); 428 #endif 429 430 return error; 431 } 432 433 /* --------------------------------------------------------------------- */ 434 435 /* 436 * Destroys the association between the vnode vp and the node it 437 * references. 438 */ 439 void 440 tmpfs_free_vp(struct vnode *vp) 441 { 442 struct tmpfs_node *node; 443 444 node = VP_TO_TMPFS_NODE(vp); 445 446 mtx_assert(TMPFS_NODE_MTX(node), MA_OWNED); 447 node->tn_vnode = NULL; 448 vp->v_data = NULL; 449 } 450 451 /* --------------------------------------------------------------------- */ 452 453 /* 454 * Allocates a new file of type 'type' and adds it to the parent directory 455 * 'dvp'; this addition is done using the component name given in 'cnp'. 456 * The ownership of the new file is automatically assigned based on the 457 * credentials of the caller (through 'cnp'), the group is set based on 458 * the parent directory and the mode is determined from the 'vap' argument. 459 * If successful, *vpp holds a vnode to the newly created file and zero 460 * is returned. Otherwise *vpp is NULL and the function returns an 461 * appropriate error code. 462 */ 463 int 464 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 465 struct componentname *cnp, char *target) 466 { 467 int error; 468 struct tmpfs_dirent *de; 469 struct tmpfs_mount *tmp; 470 struct tmpfs_node *dnode; 471 struct tmpfs_node *node; 472 struct tmpfs_node *parent; 473 474 MPASS(VOP_ISLOCKED(dvp)); 475 MPASS(cnp->cn_flags & HASBUF); 476 477 tmp = VFS_TO_TMPFS(dvp->v_mount); 478 dnode = VP_TO_TMPFS_DIR(dvp); 479 *vpp = NULL; 480 481 /* If the entry we are creating is a directory, we cannot overflow 482 * the number of links of its parent, because it will get a new 483 * link. */ 484 if (vap->va_type == VDIR) { 485 /* Ensure that we do not overflow the maximum number of links 486 * imposed by the system. */ 487 MPASS(dnode->tn_links <= LINK_MAX); 488 if (dnode->tn_links == LINK_MAX) { 489 error = EMLINK; 490 goto out; 491 } 492 493 parent = dnode; 494 MPASS(parent != NULL); 495 } else 496 parent = NULL; 497 498 /* Allocate a node that represents the new file. */ 499 error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid, 500 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 501 if (error != 0) 502 goto out; 503 504 /* Allocate a directory entry that points to the new file. */ 505 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 506 &de); 507 if (error != 0) { 508 tmpfs_free_node(tmp, node); 509 goto out; 510 } 511 512 /* Allocate a vnode for the new file. */ 513 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 514 if (error != 0) { 515 tmpfs_free_dirent(tmp, de, TRUE); 516 tmpfs_free_node(tmp, node); 517 goto out; 518 } 519 520 /* Now that all required items are allocated, we can proceed to 521 * insert the new node into the directory, an operation that 522 * cannot fail. */ 523 if (cnp->cn_flags & ISWHITEOUT) 524 tmpfs_dir_whiteout_remove(dvp, cnp); 525 tmpfs_dir_attach(dvp, de); 526 527 out: 528 529 return error; 530 } 531 532 /* --------------------------------------------------------------------- */ 533 534 /* 535 * Attaches the directory entry de to the directory represented by vp. 536 * Note that this does not change the link count of the node pointed by 537 * the directory entry, as this is done by tmpfs_alloc_dirent. 538 */ 539 void 540 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 541 { 542 struct tmpfs_node *dnode; 543 544 ASSERT_VOP_ELOCKED(vp, __func__); 545 dnode = VP_TO_TMPFS_DIR(vp); 546 TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries); 547 dnode->tn_size += sizeof(struct tmpfs_dirent); 548 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 549 TMPFS_NODE_MODIFIED; 550 } 551 552 /* --------------------------------------------------------------------- */ 553 554 /* 555 * Detaches the directory entry de from the directory represented by vp. 556 * Note that this does not change the link count of the node pointed by 557 * the directory entry, as this is done by tmpfs_free_dirent. 558 */ 559 void 560 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 561 { 562 struct tmpfs_node *dnode; 563 564 ASSERT_VOP_ELOCKED(vp, __func__); 565 dnode = VP_TO_TMPFS_DIR(vp); 566 567 if (dnode->tn_dir.tn_readdir_lastp == de) { 568 dnode->tn_dir.tn_readdir_lastn = 0; 569 dnode->tn_dir.tn_readdir_lastp = NULL; 570 } 571 572 TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries); 573 dnode->tn_size -= sizeof(struct tmpfs_dirent); 574 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 575 TMPFS_NODE_MODIFIED; 576 } 577 578 /* --------------------------------------------------------------------- */ 579 580 /* 581 * Looks for a directory entry in the directory represented by node. 582 * 'cnp' describes the name of the entry to look for. Note that the . 583 * and .. components are not allowed as they do not physically exist 584 * within directories. 585 * 586 * Returns a pointer to the entry when found, otherwise NULL. 587 */ 588 struct tmpfs_dirent * 589 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 590 struct componentname *cnp) 591 { 592 boolean_t found; 593 struct tmpfs_dirent *de; 594 595 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 596 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 597 cnp->cn_nameptr[1] == '.'))); 598 TMPFS_VALIDATE_DIR(node); 599 600 found = 0; 601 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 602 if (f != NULL && de->td_node != f) 603 continue; 604 MPASS(cnp->cn_namelen < 0xffff); 605 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 606 bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 607 found = 1; 608 break; 609 } 610 } 611 node->tn_status |= TMPFS_NODE_ACCESSED; 612 613 return found ? de : NULL; 614 } 615 616 /* --------------------------------------------------------------------- */ 617 618 /* 619 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 620 * directory and returns it in the uio space. The function returns 0 621 * on success, -1 if there was not enough space in the uio structure to 622 * hold the directory entry or an appropriate error code if another 623 * error happens. 624 */ 625 int 626 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 627 { 628 int error; 629 struct dirent dent; 630 631 TMPFS_VALIDATE_DIR(node); 632 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 633 634 dent.d_fileno = node->tn_id; 635 dent.d_type = DT_DIR; 636 dent.d_namlen = 1; 637 dent.d_name[0] = '.'; 638 dent.d_name[1] = '\0'; 639 dent.d_reclen = GENERIC_DIRSIZ(&dent); 640 641 if (dent.d_reclen > uio->uio_resid) 642 error = -1; 643 else { 644 error = uiomove(&dent, dent.d_reclen, uio); 645 if (error == 0) 646 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 647 } 648 649 node->tn_status |= TMPFS_NODE_ACCESSED; 650 651 return error; 652 } 653 654 /* --------------------------------------------------------------------- */ 655 656 /* 657 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 658 * directory and returns it in the uio space. The function returns 0 659 * on success, -1 if there was not enough space in the uio structure to 660 * hold the directory entry or an appropriate error code if another 661 * error happens. 662 */ 663 int 664 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 665 { 666 int error; 667 struct dirent dent; 668 669 TMPFS_VALIDATE_DIR(node); 670 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 671 672 /* 673 * Return ENOENT if the current node is already removed. 674 */ 675 TMPFS_ASSERT_LOCKED(node); 676 if (node->tn_dir.tn_parent == NULL) { 677 return (ENOENT); 678 } 679 680 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 681 dent.d_fileno = node->tn_dir.tn_parent->tn_id; 682 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 683 684 dent.d_type = DT_DIR; 685 dent.d_namlen = 2; 686 dent.d_name[0] = '.'; 687 dent.d_name[1] = '.'; 688 dent.d_name[2] = '\0'; 689 dent.d_reclen = GENERIC_DIRSIZ(&dent); 690 691 if (dent.d_reclen > uio->uio_resid) 692 error = -1; 693 else { 694 error = uiomove(&dent, dent.d_reclen, uio); 695 if (error == 0) { 696 struct tmpfs_dirent *de; 697 698 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 699 if (de == NULL) 700 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 701 else 702 uio->uio_offset = tmpfs_dircookie(de); 703 } 704 } 705 706 node->tn_status |= TMPFS_NODE_ACCESSED; 707 708 return error; 709 } 710 711 /* --------------------------------------------------------------------- */ 712 713 /* 714 * Lookup a directory entry by its associated cookie. 715 */ 716 struct tmpfs_dirent * 717 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 718 { 719 struct tmpfs_dirent *de; 720 721 if (cookie == node->tn_dir.tn_readdir_lastn && 722 node->tn_dir.tn_readdir_lastp != NULL) { 723 return node->tn_dir.tn_readdir_lastp; 724 } 725 726 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 727 if (tmpfs_dircookie(de) == cookie) { 728 break; 729 } 730 } 731 732 return de; 733 } 734 735 /* --------------------------------------------------------------------- */ 736 737 /* 738 * Helper function for tmpfs_readdir. Returns as much directory entries 739 * as can fit in the uio space. The read starts at uio->uio_offset. 740 * The function returns 0 on success, -1 if there was not enough space 741 * in the uio structure to hold the directory entry or an appropriate 742 * error code if another error happens. 743 */ 744 int 745 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 746 { 747 int error; 748 off_t startcookie; 749 struct tmpfs_dirent *de; 750 751 TMPFS_VALIDATE_DIR(node); 752 753 /* Locate the first directory entry we have to return. We have cached 754 * the last readdir in the node, so use those values if appropriate. 755 * Otherwise do a linear scan to find the requested entry. */ 756 startcookie = uio->uio_offset; 757 MPASS(startcookie != TMPFS_DIRCOOKIE_DOT); 758 MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 759 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 760 return 0; 761 } else { 762 de = tmpfs_dir_lookupbycookie(node, startcookie); 763 } 764 if (de == NULL) { 765 return EINVAL; 766 } 767 768 /* Read as much entries as possible; i.e., until we reach the end of 769 * the directory or we exhaust uio space. */ 770 do { 771 struct dirent d; 772 773 /* Create a dirent structure representing the current 774 * tmpfs_node and fill it. */ 775 if (de->td_node == NULL) { 776 d.d_fileno = 1; 777 d.d_type = DT_WHT; 778 } else { 779 d.d_fileno = de->td_node->tn_id; 780 switch (de->td_node->tn_type) { 781 case VBLK: 782 d.d_type = DT_BLK; 783 break; 784 785 case VCHR: 786 d.d_type = DT_CHR; 787 break; 788 789 case VDIR: 790 d.d_type = DT_DIR; 791 break; 792 793 case VFIFO: 794 d.d_type = DT_FIFO; 795 break; 796 797 case VLNK: 798 d.d_type = DT_LNK; 799 break; 800 801 case VREG: 802 d.d_type = DT_REG; 803 break; 804 805 case VSOCK: 806 d.d_type = DT_SOCK; 807 break; 808 809 default: 810 panic("tmpfs_dir_getdents: type %p %d", 811 de->td_node, (int)de->td_node->tn_type); 812 } 813 } 814 d.d_namlen = de->td_namelen; 815 MPASS(de->td_namelen < sizeof(d.d_name)); 816 (void)memcpy(d.d_name, de->td_name, de->td_namelen); 817 d.d_name[de->td_namelen] = '\0'; 818 d.d_reclen = GENERIC_DIRSIZ(&d); 819 820 /* Stop reading if the directory entry we are treating is 821 * bigger than the amount of data that can be returned. */ 822 if (d.d_reclen > uio->uio_resid) { 823 error = -1; 824 break; 825 } 826 827 /* Copy the new dirent structure into the output buffer and 828 * advance pointers. */ 829 error = uiomove(&d, d.d_reclen, uio); 830 831 (*cntp)++; 832 de = TAILQ_NEXT(de, td_entries); 833 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 834 835 /* Update the offset and cache. */ 836 if (de == NULL) { 837 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 838 node->tn_dir.tn_readdir_lastn = 0; 839 node->tn_dir.tn_readdir_lastp = NULL; 840 } else { 841 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 842 node->tn_dir.tn_readdir_lastp = de; 843 } 844 845 node->tn_status |= TMPFS_NODE_ACCESSED; 846 return error; 847 } 848 849 int 850 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 851 { 852 struct tmpfs_dirent *de; 853 int error; 854 855 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 856 cnp->cn_nameptr, cnp->cn_namelen, &de); 857 if (error != 0) 858 return (error); 859 tmpfs_dir_attach(dvp, de); 860 return (0); 861 } 862 863 void 864 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 865 { 866 struct tmpfs_dirent *de; 867 868 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 869 MPASS(de != NULL && de->td_node == NULL); 870 tmpfs_dir_detach(dvp, de); 871 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de, TRUE); 872 } 873 874 /* --------------------------------------------------------------------- */ 875 876 /* 877 * Resizes the aobj associated to the regular file pointed to by vp to 878 * the size newsize. 'vp' must point to a vnode that represents a regular 879 * file. 'newsize' must be positive. 880 * 881 * Returns zero on success or an appropriate error code on failure. 882 */ 883 int 884 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 885 { 886 int error; 887 size_t newpages, oldpages; 888 struct tmpfs_mount *tmp; 889 struct tmpfs_node *node; 890 off_t oldsize; 891 892 MPASS(vp->v_type == VREG); 893 MPASS(newsize >= 0); 894 895 node = VP_TO_TMPFS_NODE(vp); 896 tmp = VFS_TO_TMPFS(vp->v_mount); 897 898 /* Convert the old and new sizes to the number of pages needed to 899 * store them. It may happen that we do not need to do anything 900 * because the last allocated page can accommodate the change on 901 * its own. */ 902 oldsize = node->tn_size; 903 oldpages = round_page(oldsize) / PAGE_SIZE; 904 MPASS(oldpages == node->tn_reg.tn_aobj_pages); 905 newpages = round_page(newsize) / PAGE_SIZE; 906 907 if (newpages > oldpages && 908 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) { 909 error = ENOSPC; 910 goto out; 911 } 912 913 node->tn_reg.tn_aobj_pages = newpages; 914 915 TMPFS_LOCK(tmp); 916 tmp->tm_pages_used += (newpages - oldpages); 917 TMPFS_UNLOCK(tmp); 918 919 node->tn_size = newsize; 920 vnode_pager_setsize(vp, newsize); 921 if (newsize < oldsize) { 922 size_t zerolen = round_page(newsize) - newsize; 923 vm_object_t uobj = node->tn_reg.tn_aobj; 924 vm_page_t m; 925 926 /* 927 * free "backing store" 928 */ 929 VM_OBJECT_LOCK(uobj); 930 if (newpages < oldpages) { 931 swap_pager_freespace(uobj, 932 newpages, oldpages - newpages); 933 vm_object_page_remove(uobj, 934 OFF_TO_IDX(newsize + PAGE_MASK), 0, FALSE); 935 } 936 937 /* 938 * zero out the truncated part of the last page. 939 */ 940 941 if (zerolen > 0) { 942 m = vm_page_grab(uobj, OFF_TO_IDX(newsize), 943 VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 944 pmap_zero_page_area(m, PAGE_SIZE - zerolen, 945 zerolen); 946 } 947 VM_OBJECT_UNLOCK(uobj); 948 949 } 950 951 error = 0; 952 953 out: 954 return error; 955 } 956 957 /* --------------------------------------------------------------------- */ 958 959 /* 960 * Change flags of the given vnode. 961 * Caller should execute tmpfs_update on vp after a successful execution. 962 * The vnode must be locked on entry and remain locked on exit. 963 */ 964 int 965 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct thread *p) 966 { 967 int error; 968 struct tmpfs_node *node; 969 970 MPASS(VOP_ISLOCKED(vp)); 971 972 node = VP_TO_TMPFS_NODE(vp); 973 974 /* Disallow this operation if the file system is mounted read-only. */ 975 if (vp->v_mount->mnt_flag & MNT_RDONLY) 976 return EROFS; 977 978 /* 979 * Callers may only modify the file flags on objects they 980 * have VADMIN rights for. 981 */ 982 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 983 return (error); 984 /* 985 * Unprivileged processes are not permitted to unset system 986 * flags, or modify flags if any system flags are set. 987 */ 988 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 989 if (node->tn_flags 990 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 991 error = securelevel_gt(cred, 0); 992 if (error) 993 return (error); 994 } 995 /* Snapshot flag cannot be set or cleared */ 996 if (((flags & SF_SNAPSHOT) != 0 && 997 (node->tn_flags & SF_SNAPSHOT) == 0) || 998 ((flags & SF_SNAPSHOT) == 0 && 999 (node->tn_flags & SF_SNAPSHOT) != 0)) 1000 return (EPERM); 1001 node->tn_flags = flags; 1002 } else { 1003 if (node->tn_flags 1004 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1005 (flags & UF_SETTABLE) != flags) 1006 return (EPERM); 1007 node->tn_flags &= SF_SETTABLE; 1008 node->tn_flags |= (flags & UF_SETTABLE); 1009 } 1010 node->tn_status |= TMPFS_NODE_CHANGED; 1011 1012 MPASS(VOP_ISLOCKED(vp)); 1013 1014 return 0; 1015 } 1016 1017 /* --------------------------------------------------------------------- */ 1018 1019 /* 1020 * Change access mode on the given vnode. 1021 * Caller should execute tmpfs_update on vp after a successful execution. 1022 * The vnode must be locked on entry and remain locked on exit. 1023 */ 1024 int 1025 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1026 { 1027 int error; 1028 struct tmpfs_node *node; 1029 1030 MPASS(VOP_ISLOCKED(vp)); 1031 1032 node = VP_TO_TMPFS_NODE(vp); 1033 1034 /* Disallow this operation if the file system is mounted read-only. */ 1035 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1036 return EROFS; 1037 1038 /* Immutable or append-only files cannot be modified, either. */ 1039 if (node->tn_flags & (IMMUTABLE | APPEND)) 1040 return EPERM; 1041 1042 /* 1043 * To modify the permissions on a file, must possess VADMIN 1044 * for that file. 1045 */ 1046 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1047 return (error); 1048 1049 /* 1050 * Privileged processes may set the sticky bit on non-directories, 1051 * as well as set the setgid bit on a file with a group that the 1052 * process is not a member of. 1053 */ 1054 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1055 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) 1056 return (EFTYPE); 1057 } 1058 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1059 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 1060 if (error) 1061 return (error); 1062 } 1063 1064 1065 node->tn_mode &= ~ALLPERMS; 1066 node->tn_mode |= mode & ALLPERMS; 1067 1068 node->tn_status |= TMPFS_NODE_CHANGED; 1069 1070 MPASS(VOP_ISLOCKED(vp)); 1071 1072 return 0; 1073 } 1074 1075 /* --------------------------------------------------------------------- */ 1076 1077 /* 1078 * Change ownership of the given vnode. At least one of uid or gid must 1079 * be different than VNOVAL. If one is set to that value, the attribute 1080 * is unchanged. 1081 * Caller should execute tmpfs_update on vp after a successful execution. 1082 * The vnode must be locked on entry and remain locked on exit. 1083 */ 1084 int 1085 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 1086 struct thread *p) 1087 { 1088 int error; 1089 struct tmpfs_node *node; 1090 uid_t ouid; 1091 gid_t ogid; 1092 1093 MPASS(VOP_ISLOCKED(vp)); 1094 1095 node = VP_TO_TMPFS_NODE(vp); 1096 1097 /* Assign default values if they are unknown. */ 1098 MPASS(uid != VNOVAL || gid != VNOVAL); 1099 if (uid == VNOVAL) 1100 uid = node->tn_uid; 1101 if (gid == VNOVAL) 1102 gid = node->tn_gid; 1103 MPASS(uid != VNOVAL && gid != VNOVAL); 1104 1105 /* Disallow this operation if the file system is mounted read-only. */ 1106 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1107 return EROFS; 1108 1109 /* Immutable or append-only files cannot be modified, either. */ 1110 if (node->tn_flags & (IMMUTABLE | APPEND)) 1111 return EPERM; 1112 1113 /* 1114 * To modify the ownership of a file, must possess VADMIN for that 1115 * file. 1116 */ 1117 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1118 return (error); 1119 1120 /* 1121 * To change the owner of a file, or change the group of a file to a 1122 * group of which we are not a member, the caller must have 1123 * privilege. 1124 */ 1125 if ((uid != node->tn_uid || 1126 (gid != node->tn_gid && !groupmember(gid, cred))) && 1127 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) 1128 return (error); 1129 1130 ogid = node->tn_gid; 1131 ouid = node->tn_uid; 1132 1133 node->tn_uid = uid; 1134 node->tn_gid = gid; 1135 1136 node->tn_status |= TMPFS_NODE_CHANGED; 1137 1138 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 1139 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) 1140 node->tn_mode &= ~(S_ISUID | S_ISGID); 1141 } 1142 1143 MPASS(VOP_ISLOCKED(vp)); 1144 1145 return 0; 1146 } 1147 1148 /* --------------------------------------------------------------------- */ 1149 1150 /* 1151 * Change size of the given vnode. 1152 * Caller should execute tmpfs_update on vp after a successful execution. 1153 * The vnode must be locked on entry and remain locked on exit. 1154 */ 1155 int 1156 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 1157 struct thread *p) 1158 { 1159 int error; 1160 struct tmpfs_node *node; 1161 1162 MPASS(VOP_ISLOCKED(vp)); 1163 1164 node = VP_TO_TMPFS_NODE(vp); 1165 1166 /* Decide whether this is a valid operation based on the file type. */ 1167 error = 0; 1168 switch (vp->v_type) { 1169 case VDIR: 1170 return EISDIR; 1171 1172 case VREG: 1173 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1174 return EROFS; 1175 break; 1176 1177 case VBLK: 1178 /* FALLTHROUGH */ 1179 case VCHR: 1180 /* FALLTHROUGH */ 1181 case VFIFO: 1182 /* Allow modifications of special files even if in the file 1183 * system is mounted read-only (we are not modifying the 1184 * files themselves, but the objects they represent). */ 1185 return 0; 1186 1187 default: 1188 /* Anything else is unsupported. */ 1189 return EOPNOTSUPP; 1190 } 1191 1192 /* Immutable or append-only files cannot be modified, either. */ 1193 if (node->tn_flags & (IMMUTABLE | APPEND)) 1194 return EPERM; 1195 1196 error = tmpfs_truncate(vp, size); 1197 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1198 * for us, as will update tn_status; no need to do that here. */ 1199 1200 MPASS(VOP_ISLOCKED(vp)); 1201 1202 return error; 1203 } 1204 1205 /* --------------------------------------------------------------------- */ 1206 1207 /* 1208 * Change access and modification times of the given vnode. 1209 * Caller should execute tmpfs_update on vp after a successful execution. 1210 * The vnode must be locked on entry and remain locked on exit. 1211 */ 1212 int 1213 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1214 struct timespec *birthtime, int vaflags, struct ucred *cred, struct thread *l) 1215 { 1216 int error; 1217 struct tmpfs_node *node; 1218 1219 MPASS(VOP_ISLOCKED(vp)); 1220 1221 node = VP_TO_TMPFS_NODE(vp); 1222 1223 /* Disallow this operation if the file system is mounted read-only. */ 1224 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1225 return EROFS; 1226 1227 /* Immutable or append-only files cannot be modified, either. */ 1228 if (node->tn_flags & (IMMUTABLE | APPEND)) 1229 return EPERM; 1230 1231 /* Determine if the user have proper privilege to update time. */ 1232 if (vaflags & VA_UTIMES_NULL) { 1233 error = VOP_ACCESS(vp, VADMIN, cred, l); 1234 if (error) 1235 error = VOP_ACCESS(vp, VWRITE, cred, l); 1236 } else 1237 error = VOP_ACCESS(vp, VADMIN, cred, l); 1238 if (error) 1239 return (error); 1240 1241 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1242 node->tn_status |= TMPFS_NODE_ACCESSED; 1243 1244 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1245 node->tn_status |= TMPFS_NODE_MODIFIED; 1246 1247 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1248 node->tn_status |= TMPFS_NODE_MODIFIED; 1249 1250 tmpfs_itimes(vp, atime, mtime); 1251 1252 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1253 node->tn_birthtime = *birthtime; 1254 MPASS(VOP_ISLOCKED(vp)); 1255 1256 return 0; 1257 } 1258 1259 /* --------------------------------------------------------------------- */ 1260 /* Sync timestamps */ 1261 void 1262 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1263 const struct timespec *mod) 1264 { 1265 struct tmpfs_node *node; 1266 struct timespec now; 1267 1268 node = VP_TO_TMPFS_NODE(vp); 1269 1270 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1271 TMPFS_NODE_CHANGED)) == 0) 1272 return; 1273 1274 vfs_timestamp(&now); 1275 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1276 if (acc == NULL) 1277 acc = &now; 1278 node->tn_atime = *acc; 1279 } 1280 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1281 if (mod == NULL) 1282 mod = &now; 1283 node->tn_mtime = *mod; 1284 } 1285 if (node->tn_status & TMPFS_NODE_CHANGED) { 1286 node->tn_ctime = now; 1287 } 1288 node->tn_status &= 1289 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1290 } 1291 1292 /* --------------------------------------------------------------------- */ 1293 1294 void 1295 tmpfs_update(struct vnode *vp) 1296 { 1297 1298 tmpfs_itimes(vp, NULL, NULL); 1299 } 1300 1301 /* --------------------------------------------------------------------- */ 1302 1303 int 1304 tmpfs_truncate(struct vnode *vp, off_t length) 1305 { 1306 int error; 1307 struct tmpfs_node *node; 1308 1309 node = VP_TO_TMPFS_NODE(vp); 1310 1311 if (length < 0) { 1312 error = EINVAL; 1313 goto out; 1314 } 1315 1316 if (node->tn_size == length) { 1317 error = 0; 1318 goto out; 1319 } 1320 1321 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1322 return (EFBIG); 1323 1324 error = tmpfs_reg_resize(vp, length); 1325 if (error == 0) { 1326 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1327 } 1328 1329 out: 1330 tmpfs_update(vp); 1331 1332 return error; 1333 } 1334