1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/namei.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/stat.h> 44 #include <sys/systm.h> 45 #include <sys/vnode.h> 46 #include <sys/vmmeter.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_pager.h> 52 #include <vm/vm_extern.h> 53 54 #include <fs/tmpfs/tmpfs.h> 55 #include <fs/tmpfs/tmpfs_fifoops.h> 56 #include <fs/tmpfs/tmpfs_vnops.h> 57 58 /* --------------------------------------------------------------------- */ 59 60 /* 61 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 62 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 63 * using the credentials of the process 'p'. 64 * 65 * If the node type is set to 'VDIR', then the parent parameter must point 66 * to the parent directory of the node being created. It may only be NULL 67 * while allocating the root node. 68 * 69 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 70 * specifies the device the node represents. 71 * 72 * If the node type is set to 'VLNK', then the parameter target specifies 73 * the file name of the target file for the symbolic link that is being 74 * created. 75 * 76 * Note that new nodes are retrieved from the available list if it has 77 * items or, if it is empty, from the node pool as long as there is enough 78 * space to create them. 79 * 80 * Returns zero on success or an appropriate error code on failure. 81 */ 82 int 83 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 84 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 85 char *target, dev_t rdev, struct tmpfs_node **node) 86 { 87 struct tmpfs_node *nnode; 88 89 /* If the root directory of the 'tmp' file system is not yet 90 * allocated, this must be the request to do it. */ 91 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 92 93 MPASS(IFF(type == VLNK, target != NULL)); 94 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 95 96 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 97 return (ENOSPC); 98 99 nnode = (struct tmpfs_node *)uma_zalloc_arg( 100 tmp->tm_node_pool, tmp, M_WAITOK); 101 102 /* Generic initialization. */ 103 nnode->tn_type = type; 104 vfs_timestamp(&nnode->tn_atime); 105 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 106 nnode->tn_atime; 107 nnode->tn_uid = uid; 108 nnode->tn_gid = gid; 109 nnode->tn_mode = mode; 110 nnode->tn_id = alloc_unr(tmp->tm_ino_unr); 111 112 /* Type-specific initialization. */ 113 switch (nnode->tn_type) { 114 case VBLK: 115 case VCHR: 116 nnode->tn_rdev = rdev; 117 break; 118 119 case VDIR: 120 TAILQ_INIT(&nnode->tn_dir.tn_dirhead); 121 MPASS(parent != nnode); 122 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 123 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 124 nnode->tn_dir.tn_readdir_lastn = 0; 125 nnode->tn_dir.tn_readdir_lastp = NULL; 126 nnode->tn_links++; 127 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 128 nnode->tn_dir.tn_parent->tn_links++; 129 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 130 break; 131 132 case VFIFO: 133 /* FALLTHROUGH */ 134 case VSOCK: 135 break; 136 137 case VLNK: 138 MPASS(strlen(target) < MAXPATHLEN); 139 nnode->tn_size = strlen(target); 140 nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, 141 M_WAITOK); 142 memcpy(nnode->tn_link, target, nnode->tn_size); 143 break; 144 145 case VREG: 146 nnode->tn_reg.tn_aobj = 147 vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, 148 NULL /* XXXKIB - tmpfs needs swap reservation */); 149 nnode->tn_reg.tn_aobj_pages = 0; 150 break; 151 152 default: 153 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 154 } 155 156 TMPFS_LOCK(tmp); 157 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 158 tmp->tm_nodes_inuse++; 159 TMPFS_UNLOCK(tmp); 160 161 *node = nnode; 162 return 0; 163 } 164 165 /* --------------------------------------------------------------------- */ 166 167 /* 168 * Destroys the node pointed to by node from the file system 'tmp'. 169 * If the node does not belong to the given mount point, the results are 170 * unpredicted. 171 * 172 * If the node references a directory; no entries are allowed because 173 * their removal could need a recursive algorithm, something forbidden in 174 * kernel space. Furthermore, there is not need to provide such 175 * functionality (recursive removal) because the only primitives offered 176 * to the user are the removal of empty directories and the deletion of 177 * individual files. 178 * 179 * Note that nodes are not really deleted; in fact, when a node has been 180 * allocated, it cannot be deleted during the whole life of the file 181 * system. Instead, they are moved to the available list and remain there 182 * until reused. 183 */ 184 void 185 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 186 { 187 size_t pages = 0; 188 189 #ifdef INVARIANTS 190 TMPFS_NODE_LOCK(node); 191 MPASS(node->tn_vnode == NULL); 192 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 193 TMPFS_NODE_UNLOCK(node); 194 #endif 195 196 TMPFS_LOCK(tmp); 197 LIST_REMOVE(node, tn_entries); 198 tmp->tm_nodes_inuse--; 199 TMPFS_UNLOCK(tmp); 200 201 switch (node->tn_type) { 202 case VNON: 203 /* Do not do anything. VNON is provided to let the 204 * allocation routine clean itself easily by avoiding 205 * duplicating code in it. */ 206 /* FALLTHROUGH */ 207 case VBLK: 208 /* FALLTHROUGH */ 209 case VCHR: 210 /* FALLTHROUGH */ 211 case VDIR: 212 /* FALLTHROUGH */ 213 case VFIFO: 214 /* FALLTHROUGH */ 215 case VSOCK: 216 break; 217 218 case VLNK: 219 free(node->tn_link, M_TMPFSNAME); 220 break; 221 222 case VREG: 223 if (node->tn_reg.tn_aobj != NULL) 224 vm_object_deallocate(node->tn_reg.tn_aobj); 225 pages = node->tn_reg.tn_aobj_pages; 226 break; 227 228 default: 229 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 230 } 231 232 free_unr(tmp->tm_ino_unr, node->tn_id); 233 uma_zfree(tmp->tm_node_pool, node); 234 235 TMPFS_LOCK(tmp); 236 tmp->tm_pages_used -= pages; 237 TMPFS_UNLOCK(tmp); 238 } 239 240 /* --------------------------------------------------------------------- */ 241 242 /* 243 * Allocates a new directory entry for the node node with a name of name. 244 * The new directory entry is returned in *de. 245 * 246 * The link count of node is increased by one to reflect the new object 247 * referencing it. 248 * 249 * Returns zero on success or an appropriate error code on failure. 250 */ 251 int 252 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 253 const char *name, uint16_t len, struct tmpfs_dirent **de) 254 { 255 struct tmpfs_dirent *nde; 256 257 nde = (struct tmpfs_dirent *)uma_zalloc( 258 tmp->tm_dirent_pool, M_WAITOK); 259 nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 260 nde->td_namelen = len; 261 memcpy(nde->td_name, name, len); 262 263 nde->td_node = node; 264 if (node != NULL) 265 node->tn_links++; 266 267 *de = nde; 268 269 return 0; 270 } 271 272 /* --------------------------------------------------------------------- */ 273 274 /* 275 * Frees a directory entry. It is the caller's responsibility to destroy 276 * the node referenced by it if needed. 277 * 278 * The link count of node is decreased by one to reflect the removal of an 279 * object that referenced it. This only happens if 'node_exists' is true; 280 * otherwise the function will not access the node referred to by the 281 * directory entry, as it may already have been released from the outside. 282 */ 283 void 284 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 285 boolean_t node_exists) 286 { 287 if (node_exists) { 288 struct tmpfs_node *node; 289 290 node = de->td_node; 291 if (node != NULL) { 292 MPASS(node->tn_links > 0); 293 node->tn_links--; 294 } 295 } 296 297 free(de->td_name, M_TMPFSNAME); 298 uma_zfree(tmp->tm_dirent_pool, de); 299 } 300 301 /* --------------------------------------------------------------------- */ 302 303 /* 304 * Allocates a new vnode for the node node or returns a new reference to 305 * an existing one if the node had already a vnode referencing it. The 306 * resulting locked vnode is returned in *vpp. 307 * 308 * Returns zero on success or an appropriate error code on failure. 309 */ 310 int 311 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 312 struct vnode **vpp) 313 { 314 int error = 0; 315 struct vnode *vp; 316 317 loop: 318 TMPFS_NODE_LOCK(node); 319 if ((vp = node->tn_vnode) != NULL) { 320 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 321 VI_LOCK(vp); 322 TMPFS_NODE_UNLOCK(node); 323 vholdl(vp); 324 (void) vget(vp, lkflag | LK_INTERLOCK | LK_RETRY, curthread); 325 vdrop(vp); 326 327 /* 328 * Make sure the vnode is still there after 329 * getting the interlock to avoid racing a free. 330 */ 331 if (node->tn_vnode == NULL || node->tn_vnode != vp) { 332 vput(vp); 333 goto loop; 334 } 335 336 goto out; 337 } 338 339 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 340 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 341 TMPFS_NODE_UNLOCK(node); 342 error = ENOENT; 343 vp = NULL; 344 goto out; 345 } 346 347 /* 348 * otherwise lock the vp list while we call getnewvnode 349 * since that can block. 350 */ 351 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 352 node->tn_vpstate |= TMPFS_VNODE_WANT; 353 error = msleep((caddr_t) &node->tn_vpstate, 354 TMPFS_NODE_MTX(node), PDROP | PCATCH, 355 "tmpfs_alloc_vp", 0); 356 if (error) 357 return error; 358 359 goto loop; 360 } else 361 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 362 363 TMPFS_NODE_UNLOCK(node); 364 365 /* Get a new vnode and associate it with our node. */ 366 error = getnewvnode("tmpfs", mp, &tmpfs_vnodeop_entries, &vp); 367 if (error != 0) 368 goto unlock; 369 MPASS(vp != NULL); 370 371 (void) vn_lock(vp, lkflag | LK_RETRY); 372 373 vp->v_data = node; 374 vp->v_type = node->tn_type; 375 376 /* Type-specific initialization. */ 377 switch (node->tn_type) { 378 case VBLK: 379 /* FALLTHROUGH */ 380 case VCHR: 381 /* FALLTHROUGH */ 382 case VLNK: 383 /* FALLTHROUGH */ 384 case VREG: 385 /* FALLTHROUGH */ 386 case VSOCK: 387 break; 388 case VFIFO: 389 vp->v_op = &tmpfs_fifoop_entries; 390 break; 391 case VDIR: 392 MPASS(node->tn_dir.tn_parent != NULL); 393 if (node->tn_dir.tn_parent == node) 394 vp->v_vflag |= VV_ROOT; 395 break; 396 397 default: 398 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 399 } 400 401 vnode_pager_setsize(vp, node->tn_size); 402 error = insmntque(vp, mp); 403 if (error) 404 vp = NULL; 405 406 unlock: 407 TMPFS_NODE_LOCK(node); 408 409 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 410 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 411 node->tn_vnode = vp; 412 413 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 414 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 415 TMPFS_NODE_UNLOCK(node); 416 wakeup((caddr_t) &node->tn_vpstate); 417 } else 418 TMPFS_NODE_UNLOCK(node); 419 420 out: 421 *vpp = vp; 422 423 MPASS(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 424 #ifdef INVARIANTS 425 TMPFS_NODE_LOCK(node); 426 MPASS(*vpp == node->tn_vnode); 427 TMPFS_NODE_UNLOCK(node); 428 #endif 429 430 return error; 431 } 432 433 /* --------------------------------------------------------------------- */ 434 435 /* 436 * Destroys the association between the vnode vp and the node it 437 * references. 438 */ 439 void 440 tmpfs_free_vp(struct vnode *vp) 441 { 442 struct tmpfs_node *node; 443 444 node = VP_TO_TMPFS_NODE(vp); 445 446 mtx_assert(TMPFS_NODE_MTX(node), MA_OWNED); 447 node->tn_vnode = NULL; 448 vp->v_data = NULL; 449 } 450 451 /* --------------------------------------------------------------------- */ 452 453 /* 454 * Allocates a new file of type 'type' and adds it to the parent directory 455 * 'dvp'; this addition is done using the component name given in 'cnp'. 456 * The ownership of the new file is automatically assigned based on the 457 * credentials of the caller (through 'cnp'), the group is set based on 458 * the parent directory and the mode is determined from the 'vap' argument. 459 * If successful, *vpp holds a vnode to the newly created file and zero 460 * is returned. Otherwise *vpp is NULL and the function returns an 461 * appropriate error code. 462 */ 463 int 464 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 465 struct componentname *cnp, char *target) 466 { 467 int error; 468 struct tmpfs_dirent *de; 469 struct tmpfs_mount *tmp; 470 struct tmpfs_node *dnode; 471 struct tmpfs_node *node; 472 struct tmpfs_node *parent; 473 474 MPASS(VOP_ISLOCKED(dvp)); 475 MPASS(cnp->cn_flags & HASBUF); 476 477 tmp = VFS_TO_TMPFS(dvp->v_mount); 478 dnode = VP_TO_TMPFS_DIR(dvp); 479 *vpp = NULL; 480 481 /* If the entry we are creating is a directory, we cannot overflow 482 * the number of links of its parent, because it will get a new 483 * link. */ 484 if (vap->va_type == VDIR) { 485 /* Ensure that we do not overflow the maximum number of links 486 * imposed by the system. */ 487 MPASS(dnode->tn_links <= LINK_MAX); 488 if (dnode->tn_links == LINK_MAX) { 489 error = EMLINK; 490 goto out; 491 } 492 493 parent = dnode; 494 MPASS(parent != NULL); 495 } else 496 parent = NULL; 497 498 /* Allocate a node that represents the new file. */ 499 error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid, 500 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 501 if (error != 0) 502 goto out; 503 504 /* Allocate a directory entry that points to the new file. */ 505 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 506 &de); 507 if (error != 0) { 508 tmpfs_free_node(tmp, node); 509 goto out; 510 } 511 512 /* Allocate a vnode for the new file. */ 513 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 514 if (error != 0) { 515 tmpfs_free_dirent(tmp, de, TRUE); 516 tmpfs_free_node(tmp, node); 517 goto out; 518 } 519 520 /* Now that all required items are allocated, we can proceed to 521 * insert the new node into the directory, an operation that 522 * cannot fail. */ 523 if (cnp->cn_flags & ISWHITEOUT) 524 tmpfs_dir_whiteout_remove(dvp, cnp); 525 tmpfs_dir_attach(dvp, de); 526 527 out: 528 529 return error; 530 } 531 532 /* --------------------------------------------------------------------- */ 533 534 /* 535 * Attaches the directory entry de to the directory represented by vp. 536 * Note that this does not change the link count of the node pointed by 537 * the directory entry, as this is done by tmpfs_alloc_dirent. 538 */ 539 void 540 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 541 { 542 struct tmpfs_node *dnode; 543 544 ASSERT_VOP_ELOCKED(vp, __func__); 545 dnode = VP_TO_TMPFS_DIR(vp); 546 TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries); 547 dnode->tn_size += sizeof(struct tmpfs_dirent); 548 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 549 TMPFS_NODE_MODIFIED; 550 } 551 552 /* --------------------------------------------------------------------- */ 553 554 /* 555 * Detaches the directory entry de from the directory represented by vp. 556 * Note that this does not change the link count of the node pointed by 557 * the directory entry, as this is done by tmpfs_free_dirent. 558 */ 559 void 560 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 561 { 562 struct tmpfs_node *dnode; 563 564 ASSERT_VOP_ELOCKED(vp, __func__); 565 dnode = VP_TO_TMPFS_DIR(vp); 566 567 if (dnode->tn_dir.tn_readdir_lastp == de) { 568 dnode->tn_dir.tn_readdir_lastn = 0; 569 dnode->tn_dir.tn_readdir_lastp = NULL; 570 } 571 572 TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries); 573 dnode->tn_size -= sizeof(struct tmpfs_dirent); 574 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 575 TMPFS_NODE_MODIFIED; 576 } 577 578 /* --------------------------------------------------------------------- */ 579 580 /* 581 * Looks for a directory entry in the directory represented by node. 582 * 'cnp' describes the name of the entry to look for. Note that the . 583 * and .. components are not allowed as they do not physically exist 584 * within directories. 585 * 586 * Returns a pointer to the entry when found, otherwise NULL. 587 */ 588 struct tmpfs_dirent * 589 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 590 struct componentname *cnp) 591 { 592 boolean_t found; 593 struct tmpfs_dirent *de; 594 595 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 596 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 597 cnp->cn_nameptr[1] == '.'))); 598 TMPFS_VALIDATE_DIR(node); 599 600 found = 0; 601 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 602 if (f != NULL && de->td_node != f) 603 continue; 604 MPASS(cnp->cn_namelen < 0xffff); 605 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 606 bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 607 found = 1; 608 break; 609 } 610 } 611 node->tn_status |= TMPFS_NODE_ACCESSED; 612 613 return found ? de : NULL; 614 } 615 616 /* --------------------------------------------------------------------- */ 617 618 /* 619 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 620 * directory and returns it in the uio space. The function returns 0 621 * on success, -1 if there was not enough space in the uio structure to 622 * hold the directory entry or an appropriate error code if another 623 * error happens. 624 */ 625 int 626 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 627 { 628 int error; 629 struct dirent dent; 630 631 TMPFS_VALIDATE_DIR(node); 632 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 633 634 dent.d_fileno = node->tn_id; 635 dent.d_type = DT_DIR; 636 dent.d_namlen = 1; 637 dent.d_name[0] = '.'; 638 dent.d_name[1] = '\0'; 639 dent.d_reclen = GENERIC_DIRSIZ(&dent); 640 641 if (dent.d_reclen > uio->uio_resid) 642 error = -1; 643 else { 644 error = uiomove(&dent, dent.d_reclen, uio); 645 if (error == 0) 646 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 647 } 648 649 node->tn_status |= TMPFS_NODE_ACCESSED; 650 651 return error; 652 } 653 654 /* --------------------------------------------------------------------- */ 655 656 /* 657 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 658 * directory and returns it in the uio space. The function returns 0 659 * on success, -1 if there was not enough space in the uio structure to 660 * hold the directory entry or an appropriate error code if another 661 * error happens. 662 */ 663 int 664 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 665 { 666 int error; 667 struct dirent dent; 668 669 TMPFS_VALIDATE_DIR(node); 670 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 671 672 /* 673 * Return ENOENT if the current node is already removed. 674 */ 675 TMPFS_ASSERT_LOCKED(node); 676 if (node->tn_dir.tn_parent == NULL) { 677 return (ENOENT); 678 } 679 680 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 681 dent.d_fileno = node->tn_dir.tn_parent->tn_id; 682 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 683 684 dent.d_type = DT_DIR; 685 dent.d_namlen = 2; 686 dent.d_name[0] = '.'; 687 dent.d_name[1] = '.'; 688 dent.d_name[2] = '\0'; 689 dent.d_reclen = GENERIC_DIRSIZ(&dent); 690 691 if (dent.d_reclen > uio->uio_resid) 692 error = -1; 693 else { 694 error = uiomove(&dent, dent.d_reclen, uio); 695 if (error == 0) { 696 struct tmpfs_dirent *de; 697 698 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 699 if (de == NULL) 700 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 701 else 702 uio->uio_offset = tmpfs_dircookie(de); 703 } 704 } 705 706 node->tn_status |= TMPFS_NODE_ACCESSED; 707 708 return error; 709 } 710 711 /* --------------------------------------------------------------------- */ 712 713 /* 714 * Lookup a directory entry by its associated cookie. 715 */ 716 struct tmpfs_dirent * 717 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 718 { 719 struct tmpfs_dirent *de; 720 721 if (cookie == node->tn_dir.tn_readdir_lastn && 722 node->tn_dir.tn_readdir_lastp != NULL) { 723 return node->tn_dir.tn_readdir_lastp; 724 } 725 726 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 727 if (tmpfs_dircookie(de) == cookie) { 728 break; 729 } 730 } 731 732 return de; 733 } 734 735 /* --------------------------------------------------------------------- */ 736 737 /* 738 * Helper function for tmpfs_readdir. Returns as much directory entries 739 * as can fit in the uio space. The read starts at uio->uio_offset. 740 * The function returns 0 on success, -1 if there was not enough space 741 * in the uio structure to hold the directory entry or an appropriate 742 * error code if another error happens. 743 */ 744 int 745 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 746 { 747 int error; 748 off_t startcookie; 749 struct tmpfs_dirent *de; 750 751 TMPFS_VALIDATE_DIR(node); 752 753 /* Locate the first directory entry we have to return. We have cached 754 * the last readdir in the node, so use those values if appropriate. 755 * Otherwise do a linear scan to find the requested entry. */ 756 startcookie = uio->uio_offset; 757 MPASS(startcookie != TMPFS_DIRCOOKIE_DOT); 758 MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 759 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 760 return 0; 761 } else { 762 de = tmpfs_dir_lookupbycookie(node, startcookie); 763 } 764 if (de == NULL) { 765 return EINVAL; 766 } 767 768 /* Read as much entries as possible; i.e., until we reach the end of 769 * the directory or we exhaust uio space. */ 770 do { 771 struct dirent d; 772 773 /* Create a dirent structure representing the current 774 * tmpfs_node and fill it. */ 775 if (de->td_node == NULL) { 776 d.d_fileno = 1; 777 d.d_type = DT_WHT; 778 } else { 779 d.d_fileno = de->td_node->tn_id; 780 switch (de->td_node->tn_type) { 781 case VBLK: 782 d.d_type = DT_BLK; 783 break; 784 785 case VCHR: 786 d.d_type = DT_CHR; 787 break; 788 789 case VDIR: 790 d.d_type = DT_DIR; 791 break; 792 793 case VFIFO: 794 d.d_type = DT_FIFO; 795 break; 796 797 case VLNK: 798 d.d_type = DT_LNK; 799 break; 800 801 case VREG: 802 d.d_type = DT_REG; 803 break; 804 805 case VSOCK: 806 d.d_type = DT_SOCK; 807 break; 808 809 default: 810 panic("tmpfs_dir_getdents: type %p %d", 811 de->td_node, (int)de->td_node->tn_type); 812 } 813 } 814 d.d_namlen = de->td_namelen; 815 MPASS(de->td_namelen < sizeof(d.d_name)); 816 (void)memcpy(d.d_name, de->td_name, de->td_namelen); 817 d.d_name[de->td_namelen] = '\0'; 818 d.d_reclen = GENERIC_DIRSIZ(&d); 819 820 /* Stop reading if the directory entry we are treating is 821 * bigger than the amount of data that can be returned. */ 822 if (d.d_reclen > uio->uio_resid) { 823 error = -1; 824 break; 825 } 826 827 /* Copy the new dirent structure into the output buffer and 828 * advance pointers. */ 829 error = uiomove(&d, d.d_reclen, uio); 830 if (error == 0) { 831 (*cntp)++; 832 de = TAILQ_NEXT(de, td_entries); 833 } 834 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 835 836 /* Update the offset and cache. */ 837 if (de == NULL) { 838 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 839 node->tn_dir.tn_readdir_lastn = 0; 840 node->tn_dir.tn_readdir_lastp = NULL; 841 } else { 842 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 843 node->tn_dir.tn_readdir_lastp = de; 844 } 845 846 node->tn_status |= TMPFS_NODE_ACCESSED; 847 return error; 848 } 849 850 int 851 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 852 { 853 struct tmpfs_dirent *de; 854 int error; 855 856 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 857 cnp->cn_nameptr, cnp->cn_namelen, &de); 858 if (error != 0) 859 return (error); 860 tmpfs_dir_attach(dvp, de); 861 return (0); 862 } 863 864 void 865 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 866 { 867 struct tmpfs_dirent *de; 868 869 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 870 MPASS(de != NULL && de->td_node == NULL); 871 tmpfs_dir_detach(dvp, de); 872 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de, TRUE); 873 } 874 875 /* --------------------------------------------------------------------- */ 876 877 /* 878 * Resizes the aobj associated to the regular file pointed to by vp to 879 * the size newsize. 'vp' must point to a vnode that represents a regular 880 * file. 'newsize' must be positive. 881 * 882 * Returns zero on success or an appropriate error code on failure. 883 */ 884 int 885 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 886 { 887 int error; 888 size_t newpages, oldpages; 889 struct tmpfs_mount *tmp; 890 struct tmpfs_node *node; 891 off_t oldsize; 892 893 MPASS(vp->v_type == VREG); 894 MPASS(newsize >= 0); 895 896 node = VP_TO_TMPFS_NODE(vp); 897 tmp = VFS_TO_TMPFS(vp->v_mount); 898 899 /* Convert the old and new sizes to the number of pages needed to 900 * store them. It may happen that we do not need to do anything 901 * because the last allocated page can accommodate the change on 902 * its own. */ 903 oldsize = node->tn_size; 904 oldpages = round_page(oldsize) / PAGE_SIZE; 905 MPASS(oldpages == node->tn_reg.tn_aobj_pages); 906 newpages = round_page(newsize) / PAGE_SIZE; 907 908 if (newpages > oldpages && 909 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) { 910 error = ENOSPC; 911 goto out; 912 } 913 914 node->tn_reg.tn_aobj_pages = newpages; 915 916 TMPFS_LOCK(tmp); 917 tmp->tm_pages_used += (newpages - oldpages); 918 TMPFS_UNLOCK(tmp); 919 920 node->tn_size = newsize; 921 vnode_pager_setsize(vp, newsize); 922 if (newsize < oldsize) { 923 size_t zerolen = round_page(newsize) - newsize; 924 vm_object_t uobj = node->tn_reg.tn_aobj; 925 vm_page_t m; 926 927 /* 928 * free "backing store" 929 */ 930 VM_OBJECT_LOCK(uobj); 931 if (newpages < oldpages) { 932 swap_pager_freespace(uobj, 933 newpages, oldpages - newpages); 934 vm_object_page_remove(uobj, 935 OFF_TO_IDX(newsize + PAGE_MASK), 0, FALSE); 936 } 937 938 /* 939 * zero out the truncated part of the last page. 940 */ 941 942 if (zerolen > 0) { 943 m = vm_page_grab(uobj, OFF_TO_IDX(newsize), 944 VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 945 pmap_zero_page_area(m, PAGE_SIZE - zerolen, 946 zerolen); 947 } 948 VM_OBJECT_UNLOCK(uobj); 949 950 } 951 952 error = 0; 953 954 out: 955 return error; 956 } 957 958 /* --------------------------------------------------------------------- */ 959 960 /* 961 * Change flags of the given vnode. 962 * Caller should execute tmpfs_update on vp after a successful execution. 963 * The vnode must be locked on entry and remain locked on exit. 964 */ 965 int 966 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct thread *p) 967 { 968 int error; 969 struct tmpfs_node *node; 970 971 MPASS(VOP_ISLOCKED(vp)); 972 973 node = VP_TO_TMPFS_NODE(vp); 974 975 /* Disallow this operation if the file system is mounted read-only. */ 976 if (vp->v_mount->mnt_flag & MNT_RDONLY) 977 return EROFS; 978 979 /* 980 * Callers may only modify the file flags on objects they 981 * have VADMIN rights for. 982 */ 983 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 984 return (error); 985 /* 986 * Unprivileged processes are not permitted to unset system 987 * flags, or modify flags if any system flags are set. 988 */ 989 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 990 if (node->tn_flags 991 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 992 error = securelevel_gt(cred, 0); 993 if (error) 994 return (error); 995 } 996 /* Snapshot flag cannot be set or cleared */ 997 if (((flags & SF_SNAPSHOT) != 0 && 998 (node->tn_flags & SF_SNAPSHOT) == 0) || 999 ((flags & SF_SNAPSHOT) == 0 && 1000 (node->tn_flags & SF_SNAPSHOT) != 0)) 1001 return (EPERM); 1002 node->tn_flags = flags; 1003 } else { 1004 if (node->tn_flags 1005 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 1006 (flags & UF_SETTABLE) != flags) 1007 return (EPERM); 1008 node->tn_flags &= SF_SETTABLE; 1009 node->tn_flags |= (flags & UF_SETTABLE); 1010 } 1011 node->tn_status |= TMPFS_NODE_CHANGED; 1012 1013 MPASS(VOP_ISLOCKED(vp)); 1014 1015 return 0; 1016 } 1017 1018 /* --------------------------------------------------------------------- */ 1019 1020 /* 1021 * Change access mode on the given vnode. 1022 * Caller should execute tmpfs_update on vp after a successful execution. 1023 * The vnode must be locked on entry and remain locked on exit. 1024 */ 1025 int 1026 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1027 { 1028 int error; 1029 struct tmpfs_node *node; 1030 1031 MPASS(VOP_ISLOCKED(vp)); 1032 1033 node = VP_TO_TMPFS_NODE(vp); 1034 1035 /* Disallow this operation if the file system is mounted read-only. */ 1036 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1037 return EROFS; 1038 1039 /* Immutable or append-only files cannot be modified, either. */ 1040 if (node->tn_flags & (IMMUTABLE | APPEND)) 1041 return EPERM; 1042 1043 /* 1044 * To modify the permissions on a file, must possess VADMIN 1045 * for that file. 1046 */ 1047 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1048 return (error); 1049 1050 /* 1051 * Privileged processes may set the sticky bit on non-directories, 1052 * as well as set the setgid bit on a file with a group that the 1053 * process is not a member of. 1054 */ 1055 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1056 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) 1057 return (EFTYPE); 1058 } 1059 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1060 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 1061 if (error) 1062 return (error); 1063 } 1064 1065 1066 node->tn_mode &= ~ALLPERMS; 1067 node->tn_mode |= mode & ALLPERMS; 1068 1069 node->tn_status |= TMPFS_NODE_CHANGED; 1070 1071 MPASS(VOP_ISLOCKED(vp)); 1072 1073 return 0; 1074 } 1075 1076 /* --------------------------------------------------------------------- */ 1077 1078 /* 1079 * Change ownership of the given vnode. At least one of uid or gid must 1080 * be different than VNOVAL. If one is set to that value, the attribute 1081 * is unchanged. 1082 * Caller should execute tmpfs_update on vp after a successful execution. 1083 * The vnode must be locked on entry and remain locked on exit. 1084 */ 1085 int 1086 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 1087 struct thread *p) 1088 { 1089 int error; 1090 struct tmpfs_node *node; 1091 uid_t ouid; 1092 gid_t ogid; 1093 1094 MPASS(VOP_ISLOCKED(vp)); 1095 1096 node = VP_TO_TMPFS_NODE(vp); 1097 1098 /* Assign default values if they are unknown. */ 1099 MPASS(uid != VNOVAL || gid != VNOVAL); 1100 if (uid == VNOVAL) 1101 uid = node->tn_uid; 1102 if (gid == VNOVAL) 1103 gid = node->tn_gid; 1104 MPASS(uid != VNOVAL && gid != VNOVAL); 1105 1106 /* Disallow this operation if the file system is mounted read-only. */ 1107 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1108 return EROFS; 1109 1110 /* Immutable or append-only files cannot be modified, either. */ 1111 if (node->tn_flags & (IMMUTABLE | APPEND)) 1112 return EPERM; 1113 1114 /* 1115 * To modify the ownership of a file, must possess VADMIN for that 1116 * file. 1117 */ 1118 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1119 return (error); 1120 1121 /* 1122 * To change the owner of a file, or change the group of a file to a 1123 * group of which we are not a member, the caller must have 1124 * privilege. 1125 */ 1126 if ((uid != node->tn_uid || 1127 (gid != node->tn_gid && !groupmember(gid, cred))) && 1128 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) 1129 return (error); 1130 1131 ogid = node->tn_gid; 1132 ouid = node->tn_uid; 1133 1134 node->tn_uid = uid; 1135 node->tn_gid = gid; 1136 1137 node->tn_status |= TMPFS_NODE_CHANGED; 1138 1139 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 1140 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) 1141 node->tn_mode &= ~(S_ISUID | S_ISGID); 1142 } 1143 1144 MPASS(VOP_ISLOCKED(vp)); 1145 1146 return 0; 1147 } 1148 1149 /* --------------------------------------------------------------------- */ 1150 1151 /* 1152 * Change size of the given vnode. 1153 * Caller should execute tmpfs_update on vp after a successful execution. 1154 * The vnode must be locked on entry and remain locked on exit. 1155 */ 1156 int 1157 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 1158 struct thread *p) 1159 { 1160 int error; 1161 struct tmpfs_node *node; 1162 1163 MPASS(VOP_ISLOCKED(vp)); 1164 1165 node = VP_TO_TMPFS_NODE(vp); 1166 1167 /* Decide whether this is a valid operation based on the file type. */ 1168 error = 0; 1169 switch (vp->v_type) { 1170 case VDIR: 1171 return EISDIR; 1172 1173 case VREG: 1174 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1175 return EROFS; 1176 break; 1177 1178 case VBLK: 1179 /* FALLTHROUGH */ 1180 case VCHR: 1181 /* FALLTHROUGH */ 1182 case VFIFO: 1183 /* Allow modifications of special files even if in the file 1184 * system is mounted read-only (we are not modifying the 1185 * files themselves, but the objects they represent). */ 1186 return 0; 1187 1188 default: 1189 /* Anything else is unsupported. */ 1190 return EOPNOTSUPP; 1191 } 1192 1193 /* Immutable or append-only files cannot be modified, either. */ 1194 if (node->tn_flags & (IMMUTABLE | APPEND)) 1195 return EPERM; 1196 1197 error = tmpfs_truncate(vp, size); 1198 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1199 * for us, as will update tn_status; no need to do that here. */ 1200 1201 MPASS(VOP_ISLOCKED(vp)); 1202 1203 return error; 1204 } 1205 1206 /* --------------------------------------------------------------------- */ 1207 1208 /* 1209 * Change access and modification times of the given vnode. 1210 * Caller should execute tmpfs_update on vp after a successful execution. 1211 * The vnode must be locked on entry and remain locked on exit. 1212 */ 1213 int 1214 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1215 struct timespec *birthtime, int vaflags, struct ucred *cred, struct thread *l) 1216 { 1217 int error; 1218 struct tmpfs_node *node; 1219 1220 MPASS(VOP_ISLOCKED(vp)); 1221 1222 node = VP_TO_TMPFS_NODE(vp); 1223 1224 /* Disallow this operation if the file system is mounted read-only. */ 1225 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1226 return EROFS; 1227 1228 /* Immutable or append-only files cannot be modified, either. */ 1229 if (node->tn_flags & (IMMUTABLE | APPEND)) 1230 return EPERM; 1231 1232 /* Determine if the user have proper privilege to update time. */ 1233 if (vaflags & VA_UTIMES_NULL) { 1234 error = VOP_ACCESS(vp, VADMIN, cred, l); 1235 if (error) 1236 error = VOP_ACCESS(vp, VWRITE, cred, l); 1237 } else 1238 error = VOP_ACCESS(vp, VADMIN, cred, l); 1239 if (error) 1240 return (error); 1241 1242 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1243 node->tn_status |= TMPFS_NODE_ACCESSED; 1244 1245 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1246 node->tn_status |= TMPFS_NODE_MODIFIED; 1247 1248 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1249 node->tn_status |= TMPFS_NODE_MODIFIED; 1250 1251 tmpfs_itimes(vp, atime, mtime); 1252 1253 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1254 node->tn_birthtime = *birthtime; 1255 MPASS(VOP_ISLOCKED(vp)); 1256 1257 return 0; 1258 } 1259 1260 /* --------------------------------------------------------------------- */ 1261 /* Sync timestamps */ 1262 void 1263 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1264 const struct timespec *mod) 1265 { 1266 struct tmpfs_node *node; 1267 struct timespec now; 1268 1269 node = VP_TO_TMPFS_NODE(vp); 1270 1271 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1272 TMPFS_NODE_CHANGED)) == 0) 1273 return; 1274 1275 vfs_timestamp(&now); 1276 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1277 if (acc == NULL) 1278 acc = &now; 1279 node->tn_atime = *acc; 1280 } 1281 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1282 if (mod == NULL) 1283 mod = &now; 1284 node->tn_mtime = *mod; 1285 } 1286 if (node->tn_status & TMPFS_NODE_CHANGED) { 1287 node->tn_ctime = now; 1288 } 1289 node->tn_status &= 1290 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1291 } 1292 1293 /* --------------------------------------------------------------------- */ 1294 1295 void 1296 tmpfs_update(struct vnode *vp) 1297 { 1298 1299 tmpfs_itimes(vp, NULL, NULL); 1300 } 1301 1302 /* --------------------------------------------------------------------- */ 1303 1304 int 1305 tmpfs_truncate(struct vnode *vp, off_t length) 1306 { 1307 int error; 1308 struct tmpfs_node *node; 1309 1310 node = VP_TO_TMPFS_NODE(vp); 1311 1312 if (length < 0) { 1313 error = EINVAL; 1314 goto out; 1315 } 1316 1317 if (node->tn_size == length) { 1318 error = 0; 1319 goto out; 1320 } 1321 1322 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1323 return (EFBIG); 1324 1325 error = tmpfs_reg_resize(vp, length); 1326 if (error == 0) { 1327 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1328 } 1329 1330 out: 1331 tmpfs_update(vp); 1332 1333 return error; 1334 } 1335