1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/namei.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/stat.h> 44 #include <sys/systm.h> 45 #include <sys/vnode.h> 46 #include <sys/vmmeter.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_pager.h> 52 #include <vm/vm_extern.h> 53 54 #include <fs/tmpfs/tmpfs.h> 55 #include <fs/tmpfs/tmpfs_fifoops.h> 56 #include <fs/tmpfs/tmpfs_vnops.h> 57 58 /* --------------------------------------------------------------------- */ 59 60 /* 61 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 62 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 63 * using the credentials of the process 'p'. 64 * 65 * If the node type is set to 'VDIR', then the parent parameter must point 66 * to the parent directory of the node being created. It may only be NULL 67 * while allocating the root node. 68 * 69 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 70 * specifies the device the node represents. 71 * 72 * If the node type is set to 'VLNK', then the parameter target specifies 73 * the file name of the target file for the symbolic link that is being 74 * created. 75 * 76 * Note that new nodes are retrieved from the available list if it has 77 * items or, if it is empty, from the node pool as long as there is enough 78 * space to create them. 79 * 80 * Returns zero on success or an appropriate error code on failure. 81 */ 82 int 83 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 84 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 85 char *target, dev_t rdev, struct tmpfs_node **node) 86 { 87 struct tmpfs_node *nnode; 88 89 /* If the root directory of the 'tmp' file system is not yet 90 * allocated, this must be the request to do it. */ 91 MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 92 93 MPASS(IFF(type == VLNK, target != NULL)); 94 MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); 95 96 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 97 return (ENOSPC); 98 99 nnode = (struct tmpfs_node *)uma_zalloc_arg( 100 tmp->tm_node_pool, tmp, M_WAITOK); 101 102 /* Generic initialization. */ 103 nnode->tn_type = type; 104 vfs_timestamp(&nnode->tn_atime); 105 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = 106 nnode->tn_atime; 107 nnode->tn_uid = uid; 108 nnode->tn_gid = gid; 109 nnode->tn_mode = mode; 110 nnode->tn_id = alloc_unr(tmp->tm_ino_unr); 111 112 /* Type-specific initialization. */ 113 switch (nnode->tn_type) { 114 case VBLK: 115 case VCHR: 116 nnode->tn_rdev = rdev; 117 break; 118 119 case VDIR: 120 TAILQ_INIT(&nnode->tn_dir.tn_dirhead); 121 MPASS(parent != nnode); 122 MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 123 nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; 124 nnode->tn_dir.tn_readdir_lastn = 0; 125 nnode->tn_dir.tn_readdir_lastp = NULL; 126 nnode->tn_links++; 127 TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); 128 nnode->tn_dir.tn_parent->tn_links++; 129 TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); 130 break; 131 132 case VFIFO: 133 /* FALLTHROUGH */ 134 case VSOCK: 135 break; 136 137 case VLNK: 138 MPASS(strlen(target) < MAXPATHLEN); 139 nnode->tn_size = strlen(target); 140 nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, 141 M_WAITOK); 142 memcpy(nnode->tn_link, target, nnode->tn_size); 143 break; 144 145 case VREG: 146 nnode->tn_reg.tn_aobj = 147 vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, 148 NULL /* XXXKIB - tmpfs needs swap reservation */); 149 break; 150 151 default: 152 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 153 } 154 155 TMPFS_LOCK(tmp); 156 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 157 tmp->tm_nodes_inuse++; 158 TMPFS_UNLOCK(tmp); 159 160 *node = nnode; 161 return 0; 162 } 163 164 /* --------------------------------------------------------------------- */ 165 166 /* 167 * Destroys the node pointed to by node from the file system 'tmp'. 168 * If the node does not belong to the given mount point, the results are 169 * unpredicted. 170 * 171 * If the node references a directory; no entries are allowed because 172 * their removal could need a recursive algorithm, something forbidden in 173 * kernel space. Furthermore, there is not need to provide such 174 * functionality (recursive removal) because the only primitives offered 175 * to the user are the removal of empty directories and the deletion of 176 * individual files. 177 * 178 * Note that nodes are not really deleted; in fact, when a node has been 179 * allocated, it cannot be deleted during the whole life of the file 180 * system. Instead, they are moved to the available list and remain there 181 * until reused. 182 */ 183 void 184 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 185 { 186 vm_object_t uobj; 187 188 #ifdef INVARIANTS 189 TMPFS_NODE_LOCK(node); 190 MPASS(node->tn_vnode == NULL); 191 MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 192 TMPFS_NODE_UNLOCK(node); 193 #endif 194 195 TMPFS_LOCK(tmp); 196 LIST_REMOVE(node, tn_entries); 197 tmp->tm_nodes_inuse--; 198 TMPFS_UNLOCK(tmp); 199 200 switch (node->tn_type) { 201 case VNON: 202 /* Do not do anything. VNON is provided to let the 203 * allocation routine clean itself easily by avoiding 204 * duplicating code in it. */ 205 /* FALLTHROUGH */ 206 case VBLK: 207 /* FALLTHROUGH */ 208 case VCHR: 209 /* FALLTHROUGH */ 210 case VDIR: 211 /* FALLTHROUGH */ 212 case VFIFO: 213 /* FALLTHROUGH */ 214 case VSOCK: 215 break; 216 217 case VLNK: 218 free(node->tn_link, M_TMPFSNAME); 219 break; 220 221 case VREG: 222 uobj = node->tn_reg.tn_aobj; 223 if (uobj != NULL) { 224 TMPFS_LOCK(tmp); 225 tmp->tm_pages_used -= uobj->size; 226 TMPFS_UNLOCK(tmp); 227 vm_object_deallocate(uobj); 228 } 229 break; 230 231 default: 232 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 233 } 234 235 free_unr(tmp->tm_ino_unr, node->tn_id); 236 uma_zfree(tmp->tm_node_pool, node); 237 } 238 239 /* --------------------------------------------------------------------- */ 240 241 /* 242 * Allocates a new directory entry for the node node with a name of name. 243 * The new directory entry is returned in *de. 244 * 245 * The link count of node is increased by one to reflect the new object 246 * referencing it. 247 * 248 * Returns zero on success or an appropriate error code on failure. 249 */ 250 int 251 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 252 const char *name, uint16_t len, struct tmpfs_dirent **de) 253 { 254 struct tmpfs_dirent *nde; 255 256 nde = (struct tmpfs_dirent *)uma_zalloc( 257 tmp->tm_dirent_pool, M_WAITOK); 258 nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK); 259 nde->td_namelen = len; 260 memcpy(nde->td_name, name, len); 261 262 nde->td_node = node; 263 if (node != NULL) 264 node->tn_links++; 265 266 *de = nde; 267 268 return 0; 269 } 270 271 /* --------------------------------------------------------------------- */ 272 273 /* 274 * Frees a directory entry. It is the caller's responsibility to destroy 275 * the node referenced by it if needed. 276 * 277 * The link count of node is decreased by one to reflect the removal of an 278 * object that referenced it. This only happens if 'node_exists' is true; 279 * otherwise the function will not access the node referred to by the 280 * directory entry, as it may already have been released from the outside. 281 */ 282 void 283 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de, 284 boolean_t node_exists) 285 { 286 if (node_exists) { 287 struct tmpfs_node *node; 288 289 node = de->td_node; 290 if (node != NULL) { 291 MPASS(node->tn_links > 0); 292 node->tn_links--; 293 } 294 } 295 296 free(de->td_name, M_TMPFSNAME); 297 uma_zfree(tmp->tm_dirent_pool, de); 298 } 299 300 /* --------------------------------------------------------------------- */ 301 302 /* 303 * Allocates a new vnode for the node node or returns a new reference to 304 * an existing one if the node had already a vnode referencing it. The 305 * resulting locked vnode is returned in *vpp. 306 * 307 * Returns zero on success or an appropriate error code on failure. 308 */ 309 int 310 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 311 struct vnode **vpp) 312 { 313 int error = 0; 314 struct vnode *vp; 315 316 loop: 317 TMPFS_NODE_LOCK(node); 318 if ((vp = node->tn_vnode) != NULL) { 319 MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 320 VI_LOCK(vp); 321 TMPFS_NODE_UNLOCK(node); 322 vholdl(vp); 323 (void) vget(vp, lkflag | LK_INTERLOCK | LK_RETRY, curthread); 324 vdrop(vp); 325 326 /* 327 * Make sure the vnode is still there after 328 * getting the interlock to avoid racing a free. 329 */ 330 if (node->tn_vnode == NULL || node->tn_vnode != vp) { 331 vput(vp); 332 goto loop; 333 } 334 335 goto out; 336 } 337 338 if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || 339 (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { 340 TMPFS_NODE_UNLOCK(node); 341 error = ENOENT; 342 vp = NULL; 343 goto out; 344 } 345 346 /* 347 * otherwise lock the vp list while we call getnewvnode 348 * since that can block. 349 */ 350 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 351 node->tn_vpstate |= TMPFS_VNODE_WANT; 352 error = msleep((caddr_t) &node->tn_vpstate, 353 TMPFS_NODE_MTX(node), PDROP | PCATCH, 354 "tmpfs_alloc_vp", 0); 355 if (error) 356 return error; 357 358 goto loop; 359 } else 360 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 361 362 TMPFS_NODE_UNLOCK(node); 363 364 /* Get a new vnode and associate it with our node. */ 365 error = getnewvnode("tmpfs", mp, &tmpfs_vnodeop_entries, &vp); 366 if (error != 0) 367 goto unlock; 368 MPASS(vp != NULL); 369 370 (void) vn_lock(vp, lkflag | LK_RETRY); 371 372 vp->v_data = node; 373 vp->v_type = node->tn_type; 374 375 /* Type-specific initialization. */ 376 switch (node->tn_type) { 377 case VBLK: 378 /* FALLTHROUGH */ 379 case VCHR: 380 /* FALLTHROUGH */ 381 case VLNK: 382 /* FALLTHROUGH */ 383 case VREG: 384 /* FALLTHROUGH */ 385 case VSOCK: 386 break; 387 case VFIFO: 388 vp->v_op = &tmpfs_fifoop_entries; 389 break; 390 case VDIR: 391 MPASS(node->tn_dir.tn_parent != NULL); 392 if (node->tn_dir.tn_parent == node) 393 vp->v_vflag |= VV_ROOT; 394 break; 395 396 default: 397 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 398 } 399 400 vnode_pager_setsize(vp, node->tn_size); 401 error = insmntque(vp, mp); 402 if (error) 403 vp = NULL; 404 405 unlock: 406 TMPFS_NODE_LOCK(node); 407 408 MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 409 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 410 node->tn_vnode = vp; 411 412 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 413 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 414 TMPFS_NODE_UNLOCK(node); 415 wakeup((caddr_t) &node->tn_vpstate); 416 } else 417 TMPFS_NODE_UNLOCK(node); 418 419 out: 420 *vpp = vp; 421 422 MPASS(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); 423 #ifdef INVARIANTS 424 TMPFS_NODE_LOCK(node); 425 MPASS(*vpp == node->tn_vnode); 426 TMPFS_NODE_UNLOCK(node); 427 #endif 428 429 return error; 430 } 431 432 /* --------------------------------------------------------------------- */ 433 434 /* 435 * Destroys the association between the vnode vp and the node it 436 * references. 437 */ 438 void 439 tmpfs_free_vp(struct vnode *vp) 440 { 441 struct tmpfs_node *node; 442 443 node = VP_TO_TMPFS_NODE(vp); 444 445 mtx_assert(TMPFS_NODE_MTX(node), MA_OWNED); 446 node->tn_vnode = NULL; 447 vp->v_data = NULL; 448 } 449 450 /* --------------------------------------------------------------------- */ 451 452 /* 453 * Allocates a new file of type 'type' and adds it to the parent directory 454 * 'dvp'; this addition is done using the component name given in 'cnp'. 455 * The ownership of the new file is automatically assigned based on the 456 * credentials of the caller (through 'cnp'), the group is set based on 457 * the parent directory and the mode is determined from the 'vap' argument. 458 * If successful, *vpp holds a vnode to the newly created file and zero 459 * is returned. Otherwise *vpp is NULL and the function returns an 460 * appropriate error code. 461 */ 462 int 463 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 464 struct componentname *cnp, char *target) 465 { 466 int error; 467 struct tmpfs_dirent *de; 468 struct tmpfs_mount *tmp; 469 struct tmpfs_node *dnode; 470 struct tmpfs_node *node; 471 struct tmpfs_node *parent; 472 473 MPASS(VOP_ISLOCKED(dvp)); 474 MPASS(cnp->cn_flags & HASBUF); 475 476 tmp = VFS_TO_TMPFS(dvp->v_mount); 477 dnode = VP_TO_TMPFS_DIR(dvp); 478 *vpp = NULL; 479 480 /* If the entry we are creating is a directory, we cannot overflow 481 * the number of links of its parent, because it will get a new 482 * link. */ 483 if (vap->va_type == VDIR) { 484 /* Ensure that we do not overflow the maximum number of links 485 * imposed by the system. */ 486 MPASS(dnode->tn_links <= LINK_MAX); 487 if (dnode->tn_links == LINK_MAX) { 488 error = EMLINK; 489 goto out; 490 } 491 492 parent = dnode; 493 MPASS(parent != NULL); 494 } else 495 parent = NULL; 496 497 /* Allocate a node that represents the new file. */ 498 error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid, 499 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); 500 if (error != 0) 501 goto out; 502 503 /* Allocate a directory entry that points to the new file. */ 504 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, 505 &de); 506 if (error != 0) { 507 tmpfs_free_node(tmp, node); 508 goto out; 509 } 510 511 /* Allocate a vnode for the new file. */ 512 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 513 if (error != 0) { 514 tmpfs_free_dirent(tmp, de, TRUE); 515 tmpfs_free_node(tmp, node); 516 goto out; 517 } 518 519 /* Now that all required items are allocated, we can proceed to 520 * insert the new node into the directory, an operation that 521 * cannot fail. */ 522 if (cnp->cn_flags & ISWHITEOUT) 523 tmpfs_dir_whiteout_remove(dvp, cnp); 524 tmpfs_dir_attach(dvp, de); 525 526 out: 527 528 return error; 529 } 530 531 /* --------------------------------------------------------------------- */ 532 533 /* 534 * Attaches the directory entry de to the directory represented by vp. 535 * Note that this does not change the link count of the node pointed by 536 * the directory entry, as this is done by tmpfs_alloc_dirent. 537 */ 538 void 539 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) 540 { 541 struct tmpfs_node *dnode; 542 543 ASSERT_VOP_ELOCKED(vp, __func__); 544 dnode = VP_TO_TMPFS_DIR(vp); 545 TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries); 546 dnode->tn_size += sizeof(struct tmpfs_dirent); 547 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 548 TMPFS_NODE_MODIFIED; 549 } 550 551 /* --------------------------------------------------------------------- */ 552 553 /* 554 * Detaches the directory entry de from the directory represented by vp. 555 * Note that this does not change the link count of the node pointed by 556 * the directory entry, as this is done by tmpfs_free_dirent. 557 */ 558 void 559 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) 560 { 561 struct tmpfs_node *dnode; 562 563 ASSERT_VOP_ELOCKED(vp, __func__); 564 dnode = VP_TO_TMPFS_DIR(vp); 565 566 if (dnode->tn_dir.tn_readdir_lastp == de) { 567 dnode->tn_dir.tn_readdir_lastn = 0; 568 dnode->tn_dir.tn_readdir_lastp = NULL; 569 } 570 571 TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries); 572 dnode->tn_size -= sizeof(struct tmpfs_dirent); 573 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ 574 TMPFS_NODE_MODIFIED; 575 } 576 577 /* --------------------------------------------------------------------- */ 578 579 /* 580 * Looks for a directory entry in the directory represented by node. 581 * 'cnp' describes the name of the entry to look for. Note that the . 582 * and .. components are not allowed as they do not physically exist 583 * within directories. 584 * 585 * Returns a pointer to the entry when found, otherwise NULL. 586 */ 587 struct tmpfs_dirent * 588 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 589 struct componentname *cnp) 590 { 591 boolean_t found; 592 struct tmpfs_dirent *de; 593 594 MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); 595 MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && 596 cnp->cn_nameptr[1] == '.'))); 597 TMPFS_VALIDATE_DIR(node); 598 599 found = 0; 600 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 601 if (f != NULL && de->td_node != f) 602 continue; 603 MPASS(cnp->cn_namelen < 0xffff); 604 if (de->td_namelen == (uint16_t)cnp->cn_namelen && 605 bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { 606 found = 1; 607 break; 608 } 609 } 610 node->tn_status |= TMPFS_NODE_ACCESSED; 611 612 return found ? de : NULL; 613 } 614 615 /* --------------------------------------------------------------------- */ 616 617 /* 618 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 619 * directory and returns it in the uio space. The function returns 0 620 * on success, -1 if there was not enough space in the uio structure to 621 * hold the directory entry or an appropriate error code if another 622 * error happens. 623 */ 624 int 625 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 626 { 627 int error; 628 struct dirent dent; 629 630 TMPFS_VALIDATE_DIR(node); 631 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 632 633 dent.d_fileno = node->tn_id; 634 dent.d_type = DT_DIR; 635 dent.d_namlen = 1; 636 dent.d_name[0] = '.'; 637 dent.d_name[1] = '\0'; 638 dent.d_reclen = GENERIC_DIRSIZ(&dent); 639 640 if (dent.d_reclen > uio->uio_resid) 641 error = -1; 642 else { 643 error = uiomove(&dent, dent.d_reclen, uio); 644 if (error == 0) 645 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 646 } 647 648 node->tn_status |= TMPFS_NODE_ACCESSED; 649 650 return error; 651 } 652 653 /* --------------------------------------------------------------------- */ 654 655 /* 656 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 657 * directory and returns it in the uio space. The function returns 0 658 * on success, -1 if there was not enough space in the uio structure to 659 * hold the directory entry or an appropriate error code if another 660 * error happens. 661 */ 662 int 663 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) 664 { 665 int error; 666 struct dirent dent; 667 668 TMPFS_VALIDATE_DIR(node); 669 MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 670 671 /* 672 * Return ENOENT if the current node is already removed. 673 */ 674 TMPFS_ASSERT_LOCKED(node); 675 if (node->tn_dir.tn_parent == NULL) { 676 return (ENOENT); 677 } 678 679 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 680 dent.d_fileno = node->tn_dir.tn_parent->tn_id; 681 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 682 683 dent.d_type = DT_DIR; 684 dent.d_namlen = 2; 685 dent.d_name[0] = '.'; 686 dent.d_name[1] = '.'; 687 dent.d_name[2] = '\0'; 688 dent.d_reclen = GENERIC_DIRSIZ(&dent); 689 690 if (dent.d_reclen > uio->uio_resid) 691 error = -1; 692 else { 693 error = uiomove(&dent, dent.d_reclen, uio); 694 if (error == 0) { 695 struct tmpfs_dirent *de; 696 697 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 698 if (de == NULL) 699 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 700 else 701 uio->uio_offset = tmpfs_dircookie(de); 702 } 703 } 704 705 node->tn_status |= TMPFS_NODE_ACCESSED; 706 707 return error; 708 } 709 710 /* --------------------------------------------------------------------- */ 711 712 /* 713 * Lookup a directory entry by its associated cookie. 714 */ 715 struct tmpfs_dirent * 716 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 717 { 718 struct tmpfs_dirent *de; 719 720 if (cookie == node->tn_dir.tn_readdir_lastn && 721 node->tn_dir.tn_readdir_lastp != NULL) { 722 return node->tn_dir.tn_readdir_lastp; 723 } 724 725 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 726 if (tmpfs_dircookie(de) == cookie) { 727 break; 728 } 729 } 730 731 return de; 732 } 733 734 /* --------------------------------------------------------------------- */ 735 736 /* 737 * Helper function for tmpfs_readdir. Returns as much directory entries 738 * as can fit in the uio space. The read starts at uio->uio_offset. 739 * The function returns 0 on success, -1 if there was not enough space 740 * in the uio structure to hold the directory entry or an appropriate 741 * error code if another error happens. 742 */ 743 int 744 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 745 { 746 int error; 747 off_t startcookie; 748 struct tmpfs_dirent *de; 749 750 TMPFS_VALIDATE_DIR(node); 751 752 /* Locate the first directory entry we have to return. We have cached 753 * the last readdir in the node, so use those values if appropriate. 754 * Otherwise do a linear scan to find the requested entry. */ 755 startcookie = uio->uio_offset; 756 MPASS(startcookie != TMPFS_DIRCOOKIE_DOT); 757 MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 758 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 759 return 0; 760 } else { 761 de = tmpfs_dir_lookupbycookie(node, startcookie); 762 } 763 if (de == NULL) { 764 return EINVAL; 765 } 766 767 /* Read as much entries as possible; i.e., until we reach the end of 768 * the directory or we exhaust uio space. */ 769 do { 770 struct dirent d; 771 772 /* Create a dirent structure representing the current 773 * tmpfs_node and fill it. */ 774 if (de->td_node == NULL) { 775 d.d_fileno = 1; 776 d.d_type = DT_WHT; 777 } else { 778 d.d_fileno = de->td_node->tn_id; 779 switch (de->td_node->tn_type) { 780 case VBLK: 781 d.d_type = DT_BLK; 782 break; 783 784 case VCHR: 785 d.d_type = DT_CHR; 786 break; 787 788 case VDIR: 789 d.d_type = DT_DIR; 790 break; 791 792 case VFIFO: 793 d.d_type = DT_FIFO; 794 break; 795 796 case VLNK: 797 d.d_type = DT_LNK; 798 break; 799 800 case VREG: 801 d.d_type = DT_REG; 802 break; 803 804 case VSOCK: 805 d.d_type = DT_SOCK; 806 break; 807 808 default: 809 panic("tmpfs_dir_getdents: type %p %d", 810 de->td_node, (int)de->td_node->tn_type); 811 } 812 } 813 d.d_namlen = de->td_namelen; 814 MPASS(de->td_namelen < sizeof(d.d_name)); 815 (void)memcpy(d.d_name, de->td_name, de->td_namelen); 816 d.d_name[de->td_namelen] = '\0'; 817 d.d_reclen = GENERIC_DIRSIZ(&d); 818 819 /* Stop reading if the directory entry we are treating is 820 * bigger than the amount of data that can be returned. */ 821 if (d.d_reclen > uio->uio_resid) { 822 error = -1; 823 break; 824 } 825 826 /* Copy the new dirent structure into the output buffer and 827 * advance pointers. */ 828 error = uiomove(&d, d.d_reclen, uio); 829 if (error == 0) { 830 (*cntp)++; 831 de = TAILQ_NEXT(de, td_entries); 832 } 833 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 834 835 /* Update the offset and cache. */ 836 if (de == NULL) { 837 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 838 node->tn_dir.tn_readdir_lastn = 0; 839 node->tn_dir.tn_readdir_lastp = NULL; 840 } else { 841 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 842 node->tn_dir.tn_readdir_lastp = de; 843 } 844 845 node->tn_status |= TMPFS_NODE_ACCESSED; 846 return error; 847 } 848 849 int 850 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) 851 { 852 struct tmpfs_dirent *de; 853 int error; 854 855 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, 856 cnp->cn_nameptr, cnp->cn_namelen, &de); 857 if (error != 0) 858 return (error); 859 tmpfs_dir_attach(dvp, de); 860 return (0); 861 } 862 863 void 864 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) 865 { 866 struct tmpfs_dirent *de; 867 868 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); 869 MPASS(de != NULL && de->td_node == NULL); 870 tmpfs_dir_detach(dvp, de); 871 tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de, TRUE); 872 } 873 874 /* --------------------------------------------------------------------- */ 875 876 /* 877 * Resizes the aobj associated with the regular file pointed to by 'vp' to the 878 * size 'newsize'. 'vp' must point to a vnode that represents a regular file. 879 * 'newsize' must be positive. 880 * 881 * Returns zero on success or an appropriate error code on failure. 882 */ 883 int 884 tmpfs_reg_resize(struct vnode *vp, off_t newsize) 885 { 886 struct tmpfs_mount *tmp; 887 struct tmpfs_node *node; 888 vm_object_t uobj; 889 vm_page_t m; 890 vm_pindex_t newpages, oldpages; 891 off_t oldsize; 892 size_t zerolen; 893 894 MPASS(vp->v_type == VREG); 895 MPASS(newsize >= 0); 896 897 node = VP_TO_TMPFS_NODE(vp); 898 uobj = node->tn_reg.tn_aobj; 899 tmp = VFS_TO_TMPFS(vp->v_mount); 900 901 /* 902 * Convert the old and new sizes to the number of pages needed to 903 * store them. It may happen that we do not need to do anything 904 * because the last allocated page can accommodate the change on 905 * its own. 906 */ 907 oldsize = node->tn_size; 908 oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); 909 MPASS(oldpages == uobj->size); 910 newpages = OFF_TO_IDX(newsize + PAGE_MASK); 911 if (newpages > oldpages && 912 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) 913 return (ENOSPC); 914 915 TMPFS_LOCK(tmp); 916 tmp->tm_pages_used += (newpages - oldpages); 917 TMPFS_UNLOCK(tmp); 918 919 node->tn_size = newsize; 920 vnode_pager_setsize(vp, newsize); 921 VM_OBJECT_LOCK(uobj); 922 if (newsize < oldsize) { 923 /* 924 * Release any swap space and free any whole pages. 925 */ 926 if (newpages < oldpages) { 927 swap_pager_freespace(uobj, newpages, oldpages - 928 newpages); 929 vm_object_page_remove(uobj, newpages, 0, 0); 930 } 931 932 /* 933 * Zero the truncated part of the last page. 934 */ 935 zerolen = round_page(newsize) - newsize; 936 if (zerolen > 0) { 937 m = vm_page_grab(uobj, OFF_TO_IDX(newsize), 938 VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 939 pmap_zero_page_area(m, PAGE_SIZE - zerolen, zerolen); 940 } 941 } 942 uobj->size = newpages; 943 VM_OBJECT_UNLOCK(uobj); 944 return (0); 945 } 946 947 /* --------------------------------------------------------------------- */ 948 949 /* 950 * Change flags of the given vnode. 951 * Caller should execute tmpfs_update on vp after a successful execution. 952 * The vnode must be locked on entry and remain locked on exit. 953 */ 954 int 955 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct thread *p) 956 { 957 int error; 958 struct tmpfs_node *node; 959 960 MPASS(VOP_ISLOCKED(vp)); 961 962 node = VP_TO_TMPFS_NODE(vp); 963 964 /* Disallow this operation if the file system is mounted read-only. */ 965 if (vp->v_mount->mnt_flag & MNT_RDONLY) 966 return EROFS; 967 968 /* 969 * Callers may only modify the file flags on objects they 970 * have VADMIN rights for. 971 */ 972 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 973 return (error); 974 /* 975 * Unprivileged processes are not permitted to unset system 976 * flags, or modify flags if any system flags are set. 977 */ 978 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 979 if (node->tn_flags 980 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 981 error = securelevel_gt(cred, 0); 982 if (error) 983 return (error); 984 } 985 /* Snapshot flag cannot be set or cleared */ 986 if (((flags & SF_SNAPSHOT) != 0 && 987 (node->tn_flags & SF_SNAPSHOT) == 0) || 988 ((flags & SF_SNAPSHOT) == 0 && 989 (node->tn_flags & SF_SNAPSHOT) != 0)) 990 return (EPERM); 991 node->tn_flags = flags; 992 } else { 993 if (node->tn_flags 994 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 995 (flags & UF_SETTABLE) != flags) 996 return (EPERM); 997 node->tn_flags &= SF_SETTABLE; 998 node->tn_flags |= (flags & UF_SETTABLE); 999 } 1000 node->tn_status |= TMPFS_NODE_CHANGED; 1001 1002 MPASS(VOP_ISLOCKED(vp)); 1003 1004 return 0; 1005 } 1006 1007 /* --------------------------------------------------------------------- */ 1008 1009 /* 1010 * Change access mode on the given vnode. 1011 * Caller should execute tmpfs_update on vp after a successful execution. 1012 * The vnode must be locked on entry and remain locked on exit. 1013 */ 1014 int 1015 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) 1016 { 1017 int error; 1018 struct tmpfs_node *node; 1019 1020 MPASS(VOP_ISLOCKED(vp)); 1021 1022 node = VP_TO_TMPFS_NODE(vp); 1023 1024 /* Disallow this operation if the file system is mounted read-only. */ 1025 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1026 return EROFS; 1027 1028 /* Immutable or append-only files cannot be modified, either. */ 1029 if (node->tn_flags & (IMMUTABLE | APPEND)) 1030 return EPERM; 1031 1032 /* 1033 * To modify the permissions on a file, must possess VADMIN 1034 * for that file. 1035 */ 1036 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1037 return (error); 1038 1039 /* 1040 * Privileged processes may set the sticky bit on non-directories, 1041 * as well as set the setgid bit on a file with a group that the 1042 * process is not a member of. 1043 */ 1044 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 1045 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) 1046 return (EFTYPE); 1047 } 1048 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { 1049 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 1050 if (error) 1051 return (error); 1052 } 1053 1054 1055 node->tn_mode &= ~ALLPERMS; 1056 node->tn_mode |= mode & ALLPERMS; 1057 1058 node->tn_status |= TMPFS_NODE_CHANGED; 1059 1060 MPASS(VOP_ISLOCKED(vp)); 1061 1062 return 0; 1063 } 1064 1065 /* --------------------------------------------------------------------- */ 1066 1067 /* 1068 * Change ownership of the given vnode. At least one of uid or gid must 1069 * be different than VNOVAL. If one is set to that value, the attribute 1070 * is unchanged. 1071 * Caller should execute tmpfs_update on vp after a successful execution. 1072 * The vnode must be locked on entry and remain locked on exit. 1073 */ 1074 int 1075 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 1076 struct thread *p) 1077 { 1078 int error; 1079 struct tmpfs_node *node; 1080 uid_t ouid; 1081 gid_t ogid; 1082 1083 MPASS(VOP_ISLOCKED(vp)); 1084 1085 node = VP_TO_TMPFS_NODE(vp); 1086 1087 /* Assign default values if they are unknown. */ 1088 MPASS(uid != VNOVAL || gid != VNOVAL); 1089 if (uid == VNOVAL) 1090 uid = node->tn_uid; 1091 if (gid == VNOVAL) 1092 gid = node->tn_gid; 1093 MPASS(uid != VNOVAL && gid != VNOVAL); 1094 1095 /* Disallow this operation if the file system is mounted read-only. */ 1096 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1097 return EROFS; 1098 1099 /* Immutable or append-only files cannot be modified, either. */ 1100 if (node->tn_flags & (IMMUTABLE | APPEND)) 1101 return EPERM; 1102 1103 /* 1104 * To modify the ownership of a file, must possess VADMIN for that 1105 * file. 1106 */ 1107 if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) 1108 return (error); 1109 1110 /* 1111 * To change the owner of a file, or change the group of a file to a 1112 * group of which we are not a member, the caller must have 1113 * privilege. 1114 */ 1115 if ((uid != node->tn_uid || 1116 (gid != node->tn_gid && !groupmember(gid, cred))) && 1117 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) 1118 return (error); 1119 1120 ogid = node->tn_gid; 1121 ouid = node->tn_uid; 1122 1123 node->tn_uid = uid; 1124 node->tn_gid = gid; 1125 1126 node->tn_status |= TMPFS_NODE_CHANGED; 1127 1128 if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { 1129 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) 1130 node->tn_mode &= ~(S_ISUID | S_ISGID); 1131 } 1132 1133 MPASS(VOP_ISLOCKED(vp)); 1134 1135 return 0; 1136 } 1137 1138 /* --------------------------------------------------------------------- */ 1139 1140 /* 1141 * Change size of the given vnode. 1142 * Caller should execute tmpfs_update on vp after a successful execution. 1143 * The vnode must be locked on entry and remain locked on exit. 1144 */ 1145 int 1146 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, 1147 struct thread *p) 1148 { 1149 int error; 1150 struct tmpfs_node *node; 1151 1152 MPASS(VOP_ISLOCKED(vp)); 1153 1154 node = VP_TO_TMPFS_NODE(vp); 1155 1156 /* Decide whether this is a valid operation based on the file type. */ 1157 error = 0; 1158 switch (vp->v_type) { 1159 case VDIR: 1160 return EISDIR; 1161 1162 case VREG: 1163 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1164 return EROFS; 1165 break; 1166 1167 case VBLK: 1168 /* FALLTHROUGH */ 1169 case VCHR: 1170 /* FALLTHROUGH */ 1171 case VFIFO: 1172 /* Allow modifications of special files even if in the file 1173 * system is mounted read-only (we are not modifying the 1174 * files themselves, but the objects they represent). */ 1175 return 0; 1176 1177 default: 1178 /* Anything else is unsupported. */ 1179 return EOPNOTSUPP; 1180 } 1181 1182 /* Immutable or append-only files cannot be modified, either. */ 1183 if (node->tn_flags & (IMMUTABLE | APPEND)) 1184 return EPERM; 1185 1186 error = tmpfs_truncate(vp, size); 1187 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1188 * for us, as will update tn_status; no need to do that here. */ 1189 1190 MPASS(VOP_ISLOCKED(vp)); 1191 1192 return error; 1193 } 1194 1195 /* --------------------------------------------------------------------- */ 1196 1197 /* 1198 * Change access and modification times of the given vnode. 1199 * Caller should execute tmpfs_update on vp after a successful execution. 1200 * The vnode must be locked on entry and remain locked on exit. 1201 */ 1202 int 1203 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1204 struct timespec *birthtime, int vaflags, struct ucred *cred, struct thread *l) 1205 { 1206 int error; 1207 struct tmpfs_node *node; 1208 1209 MPASS(VOP_ISLOCKED(vp)); 1210 1211 node = VP_TO_TMPFS_NODE(vp); 1212 1213 /* Disallow this operation if the file system is mounted read-only. */ 1214 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1215 return EROFS; 1216 1217 /* Immutable or append-only files cannot be modified, either. */ 1218 if (node->tn_flags & (IMMUTABLE | APPEND)) 1219 return EPERM; 1220 1221 /* Determine if the user have proper privilege to update time. */ 1222 if (vaflags & VA_UTIMES_NULL) { 1223 error = VOP_ACCESS(vp, VADMIN, cred, l); 1224 if (error) 1225 error = VOP_ACCESS(vp, VWRITE, cred, l); 1226 } else 1227 error = VOP_ACCESS(vp, VADMIN, cred, l); 1228 if (error) 1229 return (error); 1230 1231 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1232 node->tn_status |= TMPFS_NODE_ACCESSED; 1233 1234 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1235 node->tn_status |= TMPFS_NODE_MODIFIED; 1236 1237 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1238 node->tn_status |= TMPFS_NODE_MODIFIED; 1239 1240 tmpfs_itimes(vp, atime, mtime); 1241 1242 if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL) 1243 node->tn_birthtime = *birthtime; 1244 MPASS(VOP_ISLOCKED(vp)); 1245 1246 return 0; 1247 } 1248 1249 /* --------------------------------------------------------------------- */ 1250 /* Sync timestamps */ 1251 void 1252 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1253 const struct timespec *mod) 1254 { 1255 struct tmpfs_node *node; 1256 struct timespec now; 1257 1258 node = VP_TO_TMPFS_NODE(vp); 1259 1260 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1261 TMPFS_NODE_CHANGED)) == 0) 1262 return; 1263 1264 vfs_timestamp(&now); 1265 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1266 if (acc == NULL) 1267 acc = &now; 1268 node->tn_atime = *acc; 1269 } 1270 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1271 if (mod == NULL) 1272 mod = &now; 1273 node->tn_mtime = *mod; 1274 } 1275 if (node->tn_status & TMPFS_NODE_CHANGED) { 1276 node->tn_ctime = now; 1277 } 1278 node->tn_status &= 1279 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1280 } 1281 1282 /* --------------------------------------------------------------------- */ 1283 1284 void 1285 tmpfs_update(struct vnode *vp) 1286 { 1287 1288 tmpfs_itimes(vp, NULL, NULL); 1289 } 1290 1291 /* --------------------------------------------------------------------- */ 1292 1293 int 1294 tmpfs_truncate(struct vnode *vp, off_t length) 1295 { 1296 int error; 1297 struct tmpfs_node *node; 1298 1299 node = VP_TO_TMPFS_NODE(vp); 1300 1301 if (length < 0) { 1302 error = EINVAL; 1303 goto out; 1304 } 1305 1306 if (node->tn_size == length) { 1307 error = 0; 1308 goto out; 1309 } 1310 1311 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1312 return (EFBIG); 1313 1314 error = tmpfs_reg_resize(vp, length); 1315 if (error == 0) { 1316 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1317 } 1318 1319 out: 1320 tmpfs_update(vp); 1321 1322 return error; 1323 } 1324