1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * (c) UNIX System Laboratories, Inc. 11 * All or some portions of this file are derived from material licensed 12 * to the University of California by American Telephone and Telegraph 13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 14 * the permission of UNIX System Laboratories, Inc. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 41 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 42 * $FreeBSD$ 43 */ 44 45 #include "opt_suiddir.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/fcntl.h> 51 #include <sys/stat.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/endian.h> 55 #include <sys/priv.h> 56 #include <sys/mount.h> 57 #include <sys/unistd.h> 58 #include <sys/time.h> 59 #include <sys/vnode.h> 60 #include <sys/namei.h> 61 #include <sys/lockf.h> 62 #include <sys/event.h> 63 #include <sys/conf.h> 64 #include <sys/file.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vnode_pager.h> 71 72 #include "opt_directio.h" 73 74 #include <ufs/ufs/dir.h> 75 76 #include <fs/ext2fs/fs.h> 77 #include <fs/ext2fs/inode.h> 78 #include <fs/ext2fs/ext2_extern.h> 79 #include <fs/ext2fs/ext2fs.h> 80 #include <fs/ext2fs/ext2_dinode.h> 81 #include <fs/ext2fs/ext2_dir.h> 82 #include <fs/ext2fs/ext2_mount.h> 83 84 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 85 static void ext2_itimes_locked(struct vnode *); 86 87 static vop_access_t ext2_access; 88 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 89 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 90 struct thread *); 91 static vop_close_t ext2_close; 92 static vop_create_t ext2_create; 93 static vop_fsync_t ext2_fsync; 94 static vop_getattr_t ext2_getattr; 95 static vop_link_t ext2_link; 96 static vop_mkdir_t ext2_mkdir; 97 static vop_mknod_t ext2_mknod; 98 static vop_open_t ext2_open; 99 static vop_pathconf_t ext2_pathconf; 100 static vop_print_t ext2_print; 101 static vop_read_t ext2_read; 102 static vop_readlink_t ext2_readlink; 103 static vop_remove_t ext2_remove; 104 static vop_rename_t ext2_rename; 105 static vop_rmdir_t ext2_rmdir; 106 static vop_setattr_t ext2_setattr; 107 static vop_strategy_t ext2_strategy; 108 static vop_symlink_t ext2_symlink; 109 static vop_write_t ext2_write; 110 static vop_vptofh_t ext2_vptofh; 111 static vop_close_t ext2fifo_close; 112 static vop_kqfilter_t ext2fifo_kqfilter; 113 114 /* Global vfs data structures for ext2. */ 115 struct vop_vector ext2_vnodeops = { 116 .vop_default = &default_vnodeops, 117 .vop_access = ext2_access, 118 .vop_bmap = ext2_bmap, 119 .vop_cachedlookup = ext2_lookup, 120 .vop_close = ext2_close, 121 .vop_create = ext2_create, 122 .vop_fsync = ext2_fsync, 123 .vop_getattr = ext2_getattr, 124 .vop_inactive = ext2_inactive, 125 .vop_link = ext2_link, 126 .vop_lookup = vfs_cache_lookup, 127 .vop_mkdir = ext2_mkdir, 128 .vop_mknod = ext2_mknod, 129 .vop_open = ext2_open, 130 .vop_pathconf = ext2_pathconf, 131 .vop_poll = vop_stdpoll, 132 .vop_print = ext2_print, 133 .vop_read = ext2_read, 134 .vop_readdir = ext2_readdir, 135 .vop_readlink = ext2_readlink, 136 .vop_reallocblks = ext2_reallocblks, 137 .vop_reclaim = ext2_reclaim, 138 .vop_remove = ext2_remove, 139 .vop_rename = ext2_rename, 140 .vop_rmdir = ext2_rmdir, 141 .vop_setattr = ext2_setattr, 142 .vop_strategy = ext2_strategy, 143 .vop_symlink = ext2_symlink, 144 .vop_write = ext2_write, 145 .vop_vptofh = ext2_vptofh, 146 }; 147 148 struct vop_vector ext2_fifoops = { 149 .vop_default = &fifo_specops, 150 .vop_access = ext2_access, 151 .vop_close = ext2fifo_close, 152 .vop_fsync = ext2_fsync, 153 .vop_getattr = ext2_getattr, 154 .vop_inactive = ext2_inactive, 155 .vop_kqfilter = ext2fifo_kqfilter, 156 .vop_print = ext2_print, 157 .vop_read = VOP_PANIC, 158 .vop_reclaim = ext2_reclaim, 159 .vop_setattr = ext2_setattr, 160 .vop_write = VOP_PANIC, 161 .vop_vptofh = ext2_vptofh, 162 }; 163 164 /* 165 * A virgin directory (no blushing please). 166 * Note that the type and namlen fields are reversed relative to ext2. 167 * Also, we don't use `struct odirtemplate', since it would just cause 168 * endianness problems. 169 */ 170 static struct dirtemplate mastertemplate = { 171 0, 12, 1, EXT2_FT_DIR, ".", 172 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." 173 }; 174 static struct dirtemplate omastertemplate = { 175 0, 12, 1, EXT2_FT_UNKNOWN, ".", 176 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." 177 }; 178 179 static void 180 ext2_itimes_locked(struct vnode *vp) 181 { 182 struct inode *ip; 183 struct timespec ts; 184 185 ASSERT_VI_LOCKED(vp, __func__); 186 187 ip = VTOI(vp); 188 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 189 return; 190 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 191 ip->i_flag |= IN_LAZYMOD; 192 else 193 ip->i_flag |= IN_MODIFIED; 194 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 195 vfs_timestamp(&ts); 196 if (ip->i_flag & IN_ACCESS) { 197 ip->i_atime = ts.tv_sec; 198 ip->i_atimensec = ts.tv_nsec; 199 } 200 if (ip->i_flag & IN_UPDATE) { 201 ip->i_mtime = ts.tv_sec; 202 ip->i_mtimensec = ts.tv_nsec; 203 ip->i_modrev++; 204 } 205 if (ip->i_flag & IN_CHANGE) { 206 ip->i_ctime = ts.tv_sec; 207 ip->i_ctimensec = ts.tv_nsec; 208 } 209 } 210 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 211 } 212 213 void 214 ext2_itimes(struct vnode *vp) 215 { 216 217 VI_LOCK(vp); 218 ext2_itimes_locked(vp); 219 VI_UNLOCK(vp); 220 } 221 222 /* 223 * Create a regular file 224 */ 225 static int 226 ext2_create(struct vop_create_args *ap) 227 { 228 int error; 229 230 error = 231 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 232 ap->a_dvp, ap->a_vpp, ap->a_cnp); 233 if (error) 234 return (error); 235 return (0); 236 } 237 238 static int 239 ext2_open(struct vop_open_args *ap) 240 { 241 242 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 243 return (EOPNOTSUPP); 244 245 /* 246 * Files marked append-only must be opened for appending. 247 */ 248 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 249 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 250 return (EPERM); 251 252 vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); 253 254 return (0); 255 } 256 257 /* 258 * Close called. 259 * 260 * Update the times on the inode. 261 */ 262 static int 263 ext2_close(struct vop_close_args *ap) 264 { 265 struct vnode *vp = ap->a_vp; 266 267 VI_LOCK(vp); 268 if (vp->v_usecount > 1) 269 ext2_itimes_locked(vp); 270 VI_UNLOCK(vp); 271 return (0); 272 } 273 274 static int 275 ext2_access(struct vop_access_args *ap) 276 { 277 struct vnode *vp = ap->a_vp; 278 struct inode *ip = VTOI(vp); 279 accmode_t accmode = ap->a_accmode; 280 int error; 281 282 if (vp->v_type == VBLK || vp->v_type == VCHR) 283 return (EOPNOTSUPP); 284 285 /* 286 * Disallow write attempts on read-only file systems; 287 * unless the file is a socket, fifo, or a block or 288 * character device resident on the file system. 289 */ 290 if (accmode & VWRITE) { 291 switch (vp->v_type) { 292 case VDIR: 293 case VLNK: 294 case VREG: 295 if (vp->v_mount->mnt_flag & MNT_RDONLY) 296 return (EROFS); 297 break; 298 default: 299 break; 300 } 301 } 302 303 /* If immutable bit set, nobody gets to write it. */ 304 if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) 305 return (EPERM); 306 307 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 308 ap->a_accmode, ap->a_cred, NULL); 309 return (error); 310 } 311 312 static int 313 ext2_getattr(struct vop_getattr_args *ap) 314 { 315 struct vnode *vp = ap->a_vp; 316 struct inode *ip = VTOI(vp); 317 struct vattr *vap = ap->a_vap; 318 319 ext2_itimes(vp); 320 /* 321 * Copy from inode table 322 */ 323 vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); 324 vap->va_fileid = ip->i_number; 325 vap->va_mode = ip->i_mode & ~IFMT; 326 vap->va_nlink = ip->i_nlink; 327 vap->va_uid = ip->i_uid; 328 vap->va_gid = ip->i_gid; 329 vap->va_rdev = ip->i_rdev; 330 vap->va_size = ip->i_size; 331 vap->va_atime.tv_sec = ip->i_atime; 332 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; 333 vap->va_mtime.tv_sec = ip->i_mtime; 334 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; 335 vap->va_ctime.tv_sec = ip->i_ctime; 336 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; 337 if E2DI_HAS_XTIME(ip) { 338 vap->va_birthtime.tv_sec = ip->i_birthtime; 339 vap->va_birthtime.tv_nsec = ip->i_birthnsec; 340 } 341 vap->va_flags = ip->i_flags; 342 vap->va_gen = ip->i_gen; 343 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 344 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 345 vap->va_type = IFTOVT(ip->i_mode); 346 vap->va_filerev = ip->i_modrev; 347 return (0); 348 } 349 350 /* 351 * Set attribute vnode op. called from several syscalls 352 */ 353 static int 354 ext2_setattr(struct vop_setattr_args *ap) 355 { 356 struct vattr *vap = ap->a_vap; 357 struct vnode *vp = ap->a_vp; 358 struct inode *ip = VTOI(vp); 359 struct ucred *cred = ap->a_cred; 360 struct thread *td = curthread; 361 int error; 362 363 /* 364 * Check for unsettable attributes. 365 */ 366 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 367 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 368 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 369 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 370 return (EINVAL); 371 } 372 if (vap->va_flags != VNOVAL) { 373 /* Disallow flags not supported by ext2fs. */ 374 if(vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 375 return (EOPNOTSUPP); 376 377 if (vp->v_mount->mnt_flag & MNT_RDONLY) 378 return (EROFS); 379 /* 380 * Callers may only modify the file flags on objects they 381 * have VADMIN rights for. 382 */ 383 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 384 return (error); 385 /* 386 * Unprivileged processes and privileged processes in 387 * jail() are not permitted to unset system flags, or 388 * modify flags if any system flags are set. 389 * Privileged non-jail processes may not modify system flags 390 * if securelevel > 0 and any existing system flags are set. 391 */ 392 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 393 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) { 394 error = securelevel_gt(cred, 0); 395 if (error) 396 return (error); 397 } 398 } else { 399 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || 400 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 401 return (EPERM); 402 } 403 ip->i_flags = vap->va_flags; 404 ip->i_flag |= IN_CHANGE; 405 if (ip->i_flags & (IMMUTABLE | APPEND)) 406 return (0); 407 } 408 if (ip->i_flags & (IMMUTABLE | APPEND)) 409 return (EPERM); 410 /* 411 * Go through the fields and update iff not VNOVAL. 412 */ 413 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 414 if (vp->v_mount->mnt_flag & MNT_RDONLY) 415 return (EROFS); 416 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 417 td)) != 0) 418 return (error); 419 } 420 if (vap->va_size != VNOVAL) { 421 /* 422 * Disallow write attempts on read-only file systems; 423 * unless the file is a socket, fifo, or a block or 424 * character device resident on the file system. 425 */ 426 switch (vp->v_type) { 427 case VDIR: 428 return (EISDIR); 429 case VLNK: 430 case VREG: 431 if (vp->v_mount->mnt_flag & MNT_RDONLY) 432 return (EROFS); 433 break; 434 default: 435 break; 436 } 437 if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) 438 return (error); 439 } 440 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 441 if (vp->v_mount->mnt_flag & MNT_RDONLY) 442 return (EROFS); 443 /* 444 * From utimes(2): 445 * If times is NULL, ... The caller must be the owner of 446 * the file, have permission to write the file, or be the 447 * super-user. 448 * If times is non-NULL, ... The caller must be the owner of 449 * the file or be the super-user. 450 */ 451 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 452 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 453 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 454 return (error); 455 if (vap->va_atime.tv_sec != VNOVAL) 456 ip->i_flag |= IN_ACCESS; 457 if (vap->va_mtime.tv_sec != VNOVAL) 458 ip->i_flag |= IN_CHANGE | IN_UPDATE; 459 ext2_itimes(vp); 460 if (vap->va_atime.tv_sec != VNOVAL) { 461 ip->i_atime = vap->va_atime.tv_sec; 462 ip->i_atimensec = vap->va_atime.tv_nsec; 463 } 464 if (vap->va_mtime.tv_sec != VNOVAL) { 465 ip->i_mtime = vap->va_mtime.tv_sec; 466 ip->i_mtimensec = vap->va_mtime.tv_nsec; 467 } 468 ip->i_birthtime = vap->va_birthtime.tv_sec; 469 ip->i_birthnsec = vap->va_birthtime.tv_nsec; 470 error = ext2_update(vp, 0); 471 if (error) 472 return (error); 473 } 474 error = 0; 475 if (vap->va_mode != (mode_t)VNOVAL) { 476 if (vp->v_mount->mnt_flag & MNT_RDONLY) 477 return (EROFS); 478 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 479 } 480 return (error); 481 } 482 483 /* 484 * Change the mode on a file. 485 * Inode must be locked before calling. 486 */ 487 static int 488 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) 489 { 490 struct inode *ip = VTOI(vp); 491 int error; 492 493 /* 494 * To modify the permissions on a file, must possess VADMIN 495 * for that file. 496 */ 497 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 498 return (error); 499 /* 500 * Privileged processes may set the sticky bit on non-directories, 501 * as well as set the setgid bit on a file with a group that the 502 * process is not a member of. 503 */ 504 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 505 error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); 506 if (error) 507 return (EFTYPE); 508 } 509 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 510 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 511 if (error) 512 return (error); 513 } 514 ip->i_mode &= ~ALLPERMS; 515 ip->i_mode |= (mode & ALLPERMS); 516 ip->i_flag |= IN_CHANGE; 517 return (0); 518 } 519 520 /* 521 * Perform chown operation on inode ip; 522 * inode must be locked prior to call. 523 */ 524 static int 525 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 526 struct thread *td) 527 { 528 struct inode *ip = VTOI(vp); 529 uid_t ouid; 530 gid_t ogid; 531 int error = 0; 532 533 if (uid == (uid_t)VNOVAL) 534 uid = ip->i_uid; 535 if (gid == (gid_t)VNOVAL) 536 gid = ip->i_gid; 537 /* 538 * To modify the ownership of a file, must possess VADMIN 539 * for that file. 540 */ 541 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 542 return (error); 543 /* 544 * To change the owner of a file, or change the group of a file 545 * to a group of which we are not a member, the caller must 546 * have privilege. 547 */ 548 if (uid != ip->i_uid || (gid != ip->i_gid && 549 !groupmember(gid, cred))) { 550 error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); 551 if (error) 552 return (error); 553 } 554 ogid = ip->i_gid; 555 ouid = ip->i_uid; 556 ip->i_gid = gid; 557 ip->i_uid = uid; 558 ip->i_flag |= IN_CHANGE; 559 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 560 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) 561 ip->i_mode &= ~(ISUID | ISGID); 562 } 563 return (0); 564 } 565 566 /* 567 * Synch an open file. 568 */ 569 /* ARGSUSED */ 570 static int 571 ext2_fsync(struct vop_fsync_args *ap) 572 { 573 /* 574 * Flush all dirty buffers associated with a vnode. 575 */ 576 577 vop_stdfsync(ap); 578 579 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 580 } 581 582 /* 583 * Mknod vnode call 584 */ 585 /* ARGSUSED */ 586 static int 587 ext2_mknod(struct vop_mknod_args *ap) 588 { 589 struct vattr *vap = ap->a_vap; 590 struct vnode **vpp = ap->a_vpp; 591 struct inode *ip; 592 ino_t ino; 593 int error; 594 595 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 596 ap->a_dvp, vpp, ap->a_cnp); 597 if (error) 598 return (error); 599 ip = VTOI(*vpp); 600 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 601 if (vap->va_rdev != VNOVAL) { 602 /* 603 * Want to be able to use this to make badblock 604 * inodes, so don't truncate the dev number. 605 */ 606 ip->i_rdev = vap->va_rdev; 607 } 608 /* 609 * Remove inode, then reload it through VFS_VGET so it is 610 * checked to see if it is an alias of an existing entry in 611 * the inode cache. XXX I don't believe this is necessary now. 612 */ 613 (*vpp)->v_type = VNON; 614 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 615 vgone(*vpp); 616 vput(*vpp); 617 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 618 if (error) { 619 *vpp = NULL; 620 return (error); 621 } 622 return (0); 623 } 624 625 static int 626 ext2_remove(struct vop_remove_args *ap) 627 { 628 struct inode *ip; 629 struct vnode *vp = ap->a_vp; 630 struct vnode *dvp = ap->a_dvp; 631 int error; 632 633 ip = VTOI(vp); 634 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 635 (VTOI(dvp)->i_flags & APPEND)) { 636 error = EPERM; 637 goto out; 638 } 639 error = ext2_dirremove(dvp, ap->a_cnp); 640 if (error == 0) { 641 ip->i_nlink--; 642 ip->i_flag |= IN_CHANGE; 643 } 644 out: 645 return (error); 646 } 647 648 /* 649 * link vnode call 650 */ 651 static int 652 ext2_link(struct vop_link_args *ap) 653 { 654 struct vnode *vp = ap->a_vp; 655 struct vnode *tdvp = ap->a_tdvp; 656 struct componentname *cnp = ap->a_cnp; 657 struct inode *ip; 658 int error; 659 660 #ifdef DIAGNOSTIC 661 if ((cnp->cn_flags & HASBUF) == 0) 662 panic("ext2_link: no name"); 663 #endif 664 if (tdvp->v_mount != vp->v_mount) { 665 error = EXDEV; 666 goto out; 667 } 668 ip = VTOI(vp); 669 if ((nlink_t)ip->i_nlink >= EXT2_LINK_MAX) { 670 error = EMLINK; 671 goto out; 672 } 673 if (ip->i_flags & (IMMUTABLE | APPEND)) { 674 error = EPERM; 675 goto out; 676 } 677 ip->i_nlink++; 678 ip->i_flag |= IN_CHANGE; 679 error = ext2_update(vp, !DOINGASYNC(vp)); 680 if (!error) 681 error = ext2_direnter(ip, tdvp, cnp); 682 if (error) { 683 ip->i_nlink--; 684 ip->i_flag |= IN_CHANGE; 685 } 686 out: 687 return (error); 688 } 689 690 /* 691 * Rename system call. 692 * rename("foo", "bar"); 693 * is essentially 694 * unlink("bar"); 695 * link("foo", "bar"); 696 * unlink("foo"); 697 * but ``atomically''. Can't do full commit without saving state in the 698 * inode on disk which isn't feasible at this time. Best we can do is 699 * always guarantee the target exists. 700 * 701 * Basic algorithm is: 702 * 703 * 1) Bump link count on source while we're linking it to the 704 * target. This also ensure the inode won't be deleted out 705 * from underneath us while we work (it may be truncated by 706 * a concurrent `trunc' or `open' for creation). 707 * 2) Link source to destination. If destination already exists, 708 * delete it first. 709 * 3) Unlink source reference to inode if still around. If a 710 * directory was moved and the parent of the destination 711 * is different from the source, patch the ".." entry in the 712 * directory. 713 */ 714 static int 715 ext2_rename(struct vop_rename_args *ap) 716 { 717 struct vnode *tvp = ap->a_tvp; 718 struct vnode *tdvp = ap->a_tdvp; 719 struct vnode *fvp = ap->a_fvp; 720 struct vnode *fdvp = ap->a_fdvp; 721 struct componentname *tcnp = ap->a_tcnp; 722 struct componentname *fcnp = ap->a_fcnp; 723 struct inode *ip, *xp, *dp; 724 struct dirtemplate dirbuf; 725 int doingdirectory = 0, oldparent = 0, newparent = 0; 726 int error = 0; 727 u_char namlen; 728 729 #ifdef DIAGNOSTIC 730 if ((tcnp->cn_flags & HASBUF) == 0 || 731 (fcnp->cn_flags & HASBUF) == 0) 732 panic("ext2_rename: no name"); 733 #endif 734 /* 735 * Check for cross-device rename. 736 */ 737 if ((fvp->v_mount != tdvp->v_mount) || 738 (tvp && (fvp->v_mount != tvp->v_mount))) { 739 error = EXDEV; 740 abortit: 741 if (tdvp == tvp) 742 vrele(tdvp); 743 else 744 vput(tdvp); 745 if (tvp) 746 vput(tvp); 747 vrele(fdvp); 748 vrele(fvp); 749 return (error); 750 } 751 752 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 753 (VTOI(tdvp)->i_flags & APPEND))) { 754 error = EPERM; 755 goto abortit; 756 } 757 758 /* 759 * Renaming a file to itself has no effect. The upper layers should 760 * not call us in that case. Temporarily just warn if they do. 761 */ 762 if (fvp == tvp) { 763 printf("ext2_rename: fvp == tvp (can't happen)\n"); 764 error = 0; 765 goto abortit; 766 } 767 768 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 769 goto abortit; 770 dp = VTOI(fdvp); 771 ip = VTOI(fvp); 772 if (ip->i_nlink >= EXT2_LINK_MAX) { 773 VOP_UNLOCK(fvp, 0); 774 error = EMLINK; 775 goto abortit; 776 } 777 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 778 || (dp->i_flags & APPEND)) { 779 VOP_UNLOCK(fvp, 0); 780 error = EPERM; 781 goto abortit; 782 } 783 if ((ip->i_mode & IFMT) == IFDIR) { 784 /* 785 * Avoid ".", "..", and aliases of "." for obvious reasons. 786 */ 787 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 788 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 789 (ip->i_flag & IN_RENAME)) { 790 VOP_UNLOCK(fvp, 0); 791 error = EINVAL; 792 goto abortit; 793 } 794 ip->i_flag |= IN_RENAME; 795 oldparent = dp->i_number; 796 doingdirectory++; 797 } 798 vrele(fdvp); 799 800 /* 801 * When the target exists, both the directory 802 * and target vnodes are returned locked. 803 */ 804 dp = VTOI(tdvp); 805 xp = NULL; 806 if (tvp) 807 xp = VTOI(tvp); 808 809 /* 810 * 1) Bump link count while we're moving stuff 811 * around. If we crash somewhere before 812 * completing our work, the link count 813 * may be wrong, but correctable. 814 */ 815 ip->i_nlink++; 816 ip->i_flag |= IN_CHANGE; 817 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 818 VOP_UNLOCK(fvp, 0); 819 goto bad; 820 } 821 822 /* 823 * If ".." must be changed (ie the directory gets a new 824 * parent) then the source directory must not be in the 825 * directory hierarchy above the target, as this would 826 * orphan everything below the source directory. Also 827 * the user must have write permission in the source so 828 * as to be able to change "..". We must repeat the call 829 * to namei, as the parent directory is unlocked by the 830 * call to checkpath(). 831 */ 832 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 833 VOP_UNLOCK(fvp, 0); 834 if (oldparent != dp->i_number) 835 newparent = dp->i_number; 836 if (doingdirectory && newparent) { 837 if (error) /* write access check above */ 838 goto bad; 839 if (xp != NULL) 840 vput(tvp); 841 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 842 if (error) 843 goto out; 844 VREF(tdvp); 845 error = relookup(tdvp, &tvp, tcnp); 846 if (error) 847 goto out; 848 vrele(tdvp); 849 dp = VTOI(tdvp); 850 xp = NULL; 851 if (tvp) 852 xp = VTOI(tvp); 853 } 854 /* 855 * 2) If target doesn't exist, link the target 856 * to the source and unlink the source. 857 * Otherwise, rewrite the target directory 858 * entry to reference the source inode and 859 * expunge the original entry's existence. 860 */ 861 if (xp == NULL) { 862 if (dp->i_devvp != ip->i_devvp) 863 panic("ext2_rename: EXDEV"); 864 /* 865 * Account for ".." in new directory. 866 * When source and destination have the same 867 * parent we don't fool with the link count. 868 */ 869 if (doingdirectory && newparent) { 870 if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) { 871 error = EMLINK; 872 goto bad; 873 } 874 dp->i_nlink++; 875 dp->i_flag |= IN_CHANGE; 876 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 877 if (error) 878 goto bad; 879 } 880 error = ext2_direnter(ip, tdvp, tcnp); 881 if (error) { 882 if (doingdirectory && newparent) { 883 dp->i_nlink--; 884 dp->i_flag |= IN_CHANGE; 885 (void)ext2_update(tdvp, 1); 886 } 887 goto bad; 888 } 889 vput(tdvp); 890 } else { 891 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 892 panic("ext2_rename: EXDEV"); 893 /* 894 * Short circuit rename(foo, foo). 895 */ 896 if (xp->i_number == ip->i_number) 897 panic("ext2_rename: same file"); 898 /* 899 * If the parent directory is "sticky", then the user must 900 * own the parent directory, or the destination of the rename, 901 * otherwise the destination may not be changed (except by 902 * root). This implements append-only directories. 903 */ 904 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 905 tcnp->cn_cred->cr_uid != dp->i_uid && 906 xp->i_uid != tcnp->cn_cred->cr_uid) { 907 error = EPERM; 908 goto bad; 909 } 910 /* 911 * Target must be empty if a directory and have no links 912 * to it. Also, ensure source and target are compatible 913 * (both directories, or both not directories). 914 */ 915 if ((xp->i_mode&IFMT) == IFDIR) { 916 if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || 917 xp->i_nlink > 2) { 918 error = ENOTEMPTY; 919 goto bad; 920 } 921 if (!doingdirectory) { 922 error = ENOTDIR; 923 goto bad; 924 } 925 cache_purge(tdvp); 926 } else if (doingdirectory) { 927 error = EISDIR; 928 goto bad; 929 } 930 error = ext2_dirrewrite(dp, ip, tcnp); 931 if (error) 932 goto bad; 933 /* 934 * If the target directory is in the same 935 * directory as the source directory, 936 * decrement the link count on the parent 937 * of the target directory. 938 */ 939 if (doingdirectory && !newparent) { 940 dp->i_nlink--; 941 dp->i_flag |= IN_CHANGE; 942 } 943 vput(tdvp); 944 /* 945 * Adjust the link count of the target to 946 * reflect the dirrewrite above. If this is 947 * a directory it is empty and there are 948 * no links to it, so we can squash the inode and 949 * any space associated with it. We disallowed 950 * renaming over top of a directory with links to 951 * it above, as the remaining link would point to 952 * a directory without "." or ".." entries. 953 */ 954 xp->i_nlink--; 955 if (doingdirectory) { 956 if (--xp->i_nlink != 0) 957 panic("ext2_rename: linked directory"); 958 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 959 tcnp->cn_cred, tcnp->cn_thread); 960 } 961 xp->i_flag |= IN_CHANGE; 962 vput(tvp); 963 xp = NULL; 964 } 965 966 /* 967 * 3) Unlink the source. 968 */ 969 fcnp->cn_flags &= ~MODMASK; 970 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 971 VREF(fdvp); 972 error = relookup(fdvp, &fvp, fcnp); 973 if (error == 0) 974 vrele(fdvp); 975 if (fvp != NULL) { 976 xp = VTOI(fvp); 977 dp = VTOI(fdvp); 978 } else { 979 /* 980 * From name has disappeared. 981 */ 982 if (doingdirectory) 983 panic("ext2_rename: lost dir entry"); 984 vrele(ap->a_fvp); 985 return (0); 986 } 987 /* 988 * Ensure that the directory entry still exists and has not 989 * changed while the new name has been entered. If the source is 990 * a file then the entry may have been unlinked or renamed. In 991 * either case there is no further work to be done. If the source 992 * is a directory then it cannot have been rmdir'ed; its link 993 * count of three would cause a rmdir to fail with ENOTEMPTY. 994 * The IN_RENAME flag ensures that it cannot be moved by another 995 * rename. 996 */ 997 if (xp != ip) { 998 if (doingdirectory) 999 panic("ext2_rename: lost dir entry"); 1000 } else { 1001 /* 1002 * If the source is a directory with a 1003 * new parent, the link count of the old 1004 * parent directory must be decremented 1005 * and ".." set to point to the new parent. 1006 */ 1007 if (doingdirectory && newparent) { 1008 dp->i_nlink--; 1009 dp->i_flag |= IN_CHANGE; 1010 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, 1011 sizeof(struct dirtemplate), (off_t)0, 1012 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1013 tcnp->cn_cred, NOCRED, NULL, NULL); 1014 if (error == 0) { 1015 /* Like ufs little-endian: */ 1016 namlen = dirbuf.dotdot_type; 1017 if (namlen != 2 || 1018 dirbuf.dotdot_name[0] != '.' || 1019 dirbuf.dotdot_name[1] != '.') { 1020 ext2_dirbad(xp, (doff_t)12, 1021 "rename: mangled dir"); 1022 } else { 1023 dirbuf.dotdot_ino = newparent; 1024 (void) vn_rdwr(UIO_WRITE, fvp, 1025 (caddr_t)&dirbuf, 1026 sizeof(struct dirtemplate), 1027 (off_t)0, UIO_SYSSPACE, 1028 IO_NODELOCKED | IO_SYNC | 1029 IO_NOMACCHECK, tcnp->cn_cred, 1030 NOCRED, NULL, NULL); 1031 cache_purge(fdvp); 1032 } 1033 } 1034 } 1035 error = ext2_dirremove(fdvp, fcnp); 1036 if (!error) { 1037 xp->i_nlink--; 1038 xp->i_flag |= IN_CHANGE; 1039 } 1040 xp->i_flag &= ~IN_RENAME; 1041 } 1042 if (dp) 1043 vput(fdvp); 1044 if (xp) 1045 vput(fvp); 1046 vrele(ap->a_fvp); 1047 return (error); 1048 1049 bad: 1050 if (xp) 1051 vput(ITOV(xp)); 1052 vput(ITOV(dp)); 1053 out: 1054 if (doingdirectory) 1055 ip->i_flag &= ~IN_RENAME; 1056 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1057 ip->i_nlink--; 1058 ip->i_flag |= IN_CHANGE; 1059 ip->i_flag &= ~IN_RENAME; 1060 vput(fvp); 1061 } else 1062 vrele(fvp); 1063 return (error); 1064 } 1065 1066 /* 1067 * Mkdir system call 1068 */ 1069 static int 1070 ext2_mkdir(struct vop_mkdir_args *ap) 1071 { 1072 struct vnode *dvp = ap->a_dvp; 1073 struct vattr *vap = ap->a_vap; 1074 struct componentname *cnp = ap->a_cnp; 1075 struct inode *ip, *dp; 1076 struct vnode *tvp; 1077 struct dirtemplate dirtemplate, *dtp; 1078 int error, dmode; 1079 1080 #ifdef DIAGNOSTIC 1081 if ((cnp->cn_flags & HASBUF) == 0) 1082 panic("ext2_mkdir: no name"); 1083 #endif 1084 dp = VTOI(dvp); 1085 if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) { 1086 error = EMLINK; 1087 goto out; 1088 } 1089 dmode = vap->va_mode & 0777; 1090 dmode |= IFDIR; 1091 /* 1092 * Must simulate part of ext2_makeinode here to acquire the inode, 1093 * but not have it entered in the parent directory. The entry is 1094 * made later after writing "." and ".." entries. 1095 */ 1096 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1097 if (error) 1098 goto out; 1099 ip = VTOI(tvp); 1100 ip->i_gid = dp->i_gid; 1101 #ifdef SUIDDIR 1102 { 1103 /* 1104 * if we are hacking owners here, (only do this where told to) 1105 * and we are not giving it TOO root, (would subvert quotas) 1106 * then go ahead and give it to the other user. 1107 * The new directory also inherits the SUID bit. 1108 * If user's UID and dir UID are the same, 1109 * 'give it away' so that the SUID is still forced on. 1110 */ 1111 if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1112 (dp->i_mode & ISUID) && dp->i_uid) { 1113 dmode |= ISUID; 1114 ip->i_uid = dp->i_uid; 1115 } else { 1116 ip->i_uid = cnp->cn_cred->cr_uid; 1117 } 1118 } 1119 #else 1120 ip->i_uid = cnp->cn_cred->cr_uid; 1121 #endif 1122 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1123 ip->i_mode = dmode; 1124 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1125 ip->i_nlink = 2; 1126 if (cnp->cn_flags & ISWHITEOUT) 1127 ip->i_flags |= UF_OPAQUE; 1128 error = ext2_update(tvp, 1); 1129 1130 /* 1131 * Bump link count in parent directory 1132 * to reflect work done below. Should 1133 * be done before reference is created 1134 * so reparation is possible if we crash. 1135 */ 1136 dp->i_nlink++; 1137 dp->i_flag |= IN_CHANGE; 1138 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1139 if (error) 1140 goto bad; 1141 1142 /* Initialize directory with "." and ".." from static template. */ 1143 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1144 EXT2F_INCOMPAT_FTYPE)) 1145 dtp = &mastertemplate; 1146 else 1147 dtp = &omastertemplate; 1148 dirtemplate = *dtp; 1149 dirtemplate.dot_ino = ip->i_number; 1150 dirtemplate.dotdot_ino = dp->i_number; 1151 /* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE 1152 * so let's just redefine it - for this function only 1153 */ 1154 #undef DIRBLKSIZ 1155 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1156 dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; 1157 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, 1158 sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE, 1159 IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, 1160 NULL, NULL); 1161 if (error) { 1162 dp->i_nlink--; 1163 dp->i_flag |= IN_CHANGE; 1164 goto bad; 1165 } 1166 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1167 /* XXX should grow with balloc() */ 1168 panic("ext2_mkdir: blksize"); 1169 else { 1170 ip->i_size = DIRBLKSIZ; 1171 ip->i_flag |= IN_CHANGE; 1172 } 1173 1174 /* Directory set up, now install its entry in the parent directory. */ 1175 error = ext2_direnter(ip, dvp, cnp); 1176 if (error) { 1177 dp->i_nlink--; 1178 dp->i_flag |= IN_CHANGE; 1179 } 1180 bad: 1181 /* 1182 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1183 * for us because we set the link count to 0. 1184 */ 1185 if (error) { 1186 ip->i_nlink = 0; 1187 ip->i_flag |= IN_CHANGE; 1188 vput(tvp); 1189 } else 1190 *ap->a_vpp = tvp; 1191 out: 1192 return (error); 1193 #undef DIRBLKSIZ 1194 #define DIRBLKSIZ DEV_BSIZE 1195 } 1196 1197 /* 1198 * Rmdir system call. 1199 */ 1200 static int 1201 ext2_rmdir(struct vop_rmdir_args *ap) 1202 { 1203 struct vnode *vp = ap->a_vp; 1204 struct vnode *dvp = ap->a_dvp; 1205 struct componentname *cnp = ap->a_cnp; 1206 struct inode *ip, *dp; 1207 int error; 1208 1209 ip = VTOI(vp); 1210 dp = VTOI(dvp); 1211 1212 /* 1213 * Verify the directory is empty (and valid). 1214 * (Rmdir ".." won't be valid since 1215 * ".." will contain a reference to 1216 * the current directory and thus be 1217 * non-empty.) 1218 */ 1219 error = 0; 1220 if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1221 error = ENOTEMPTY; 1222 goto out; 1223 } 1224 if ((dp->i_flags & APPEND) 1225 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1226 error = EPERM; 1227 goto out; 1228 } 1229 /* 1230 * Delete reference to directory before purging 1231 * inode. If we crash in between, the directory 1232 * will be reattached to lost+found, 1233 */ 1234 error = ext2_dirremove(dvp, cnp); 1235 if (error) 1236 goto out; 1237 dp->i_nlink--; 1238 dp->i_flag |= IN_CHANGE; 1239 cache_purge(dvp); 1240 VOP_UNLOCK(dvp, 0); 1241 /* 1242 * Truncate inode. The only stuff left 1243 * in the directory is "." and "..". The 1244 * "." reference is inconsequential since 1245 * we're quashing it. The ".." reference 1246 * has already been adjusted above. We've 1247 * removed the "." reference and the reference 1248 * in the parent directory, but there may be 1249 * other hard links so decrement by 2 and 1250 * worry about them later. 1251 */ 1252 ip->i_nlink -= 2; 1253 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, 1254 cnp->cn_thread); 1255 cache_purge(ITOV(ip)); 1256 if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1257 VOP_UNLOCK(vp, 0); 1258 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1259 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1260 } 1261 out: 1262 return (error); 1263 } 1264 1265 /* 1266 * symlink -- make a symbolic link 1267 */ 1268 static int 1269 ext2_symlink(struct vop_symlink_args *ap) 1270 { 1271 struct vnode *vp, **vpp = ap->a_vpp; 1272 struct inode *ip; 1273 int len, error; 1274 1275 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1276 vpp, ap->a_cnp); 1277 if (error) 1278 return (error); 1279 vp = *vpp; 1280 len = strlen(ap->a_target); 1281 if (len < vp->v_mount->mnt_maxsymlinklen) { 1282 ip = VTOI(vp); 1283 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1284 ip->i_size = len; 1285 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1286 } else 1287 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1288 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1289 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 1290 if (error) 1291 vput(vp); 1292 return (error); 1293 } 1294 1295 /* 1296 * Return target name of a symbolic link 1297 */ 1298 static int 1299 ext2_readlink(struct vop_readlink_args *ap) 1300 { 1301 struct vnode *vp = ap->a_vp; 1302 struct inode *ip = VTOI(vp); 1303 int isize; 1304 1305 isize = ip->i_size; 1306 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1307 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1308 return (0); 1309 } 1310 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1311 } 1312 1313 /* 1314 * Calculate the logical to physical mapping if not done already, 1315 * then call the device strategy routine. 1316 * 1317 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1318 * deadlock on memory. See ext2_bmap() for details. 1319 */ 1320 static int 1321 ext2_strategy(struct vop_strategy_args *ap) 1322 { 1323 struct buf *bp = ap->a_bp; 1324 struct vnode *vp = ap->a_vp; 1325 struct inode *ip; 1326 struct bufobj *bo; 1327 int32_t blkno; 1328 int error; 1329 1330 ip = VTOI(vp); 1331 if (vp->v_type == VBLK || vp->v_type == VCHR) 1332 panic("ext2_strategy: spec"); 1333 if (bp->b_blkno == bp->b_lblkno) { 1334 error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); 1335 bp->b_blkno = blkno; 1336 if (error) { 1337 bp->b_error = error; 1338 bp->b_ioflags |= BIO_ERROR; 1339 bufdone(bp); 1340 return (0); 1341 } 1342 if ((long)bp->b_blkno == -1) 1343 vfs_bio_clrbuf(bp); 1344 } 1345 if ((long)bp->b_blkno == -1) { 1346 bufdone(bp); 1347 return (0); 1348 } 1349 bp->b_iooffset = dbtob(bp->b_blkno); 1350 bo = VFSTOEXT2(vp->v_mount)->um_bo; 1351 BO_STRATEGY(bo, bp); 1352 return (0); 1353 } 1354 1355 /* 1356 * Print out the contents of an inode. 1357 */ 1358 static int 1359 ext2_print(struct vop_print_args *ap) 1360 { 1361 struct vnode *vp = ap->a_vp; 1362 struct inode *ip = VTOI(vp); 1363 1364 vn_printf(ip->i_devvp, "\tino %lu", (u_long)ip->i_number); 1365 if (vp->v_type == VFIFO) 1366 fifo_printinfo(vp); 1367 printf("\n"); 1368 return (0); 1369 } 1370 1371 /* 1372 * Close wrapper for fifos. 1373 * 1374 * Update the times on the inode then do device close. 1375 */ 1376 static int 1377 ext2fifo_close(struct vop_close_args *ap) 1378 { 1379 struct vnode *vp = ap->a_vp; 1380 1381 VI_LOCK(vp); 1382 if (vp->v_usecount > 1) 1383 ext2_itimes_locked(vp); 1384 VI_UNLOCK(vp); 1385 return (fifo_specops.vop_close(ap)); 1386 } 1387 1388 /* 1389 * Kqfilter wrapper for fifos. 1390 * 1391 * Fall through to ext2 kqfilter routines if needed 1392 */ 1393 static int 1394 ext2fifo_kqfilter(struct vop_kqfilter_args *ap) 1395 { 1396 int error; 1397 1398 error = fifo_specops.vop_kqfilter(ap); 1399 if (error) 1400 error = vfs_kqfilter(ap); 1401 return (error); 1402 } 1403 1404 /* 1405 * Return POSIX pathconf information applicable to ext2 filesystems. 1406 */ 1407 static int 1408 ext2_pathconf(struct vop_pathconf_args *ap) 1409 { 1410 1411 switch (ap->a_name) { 1412 case _PC_LINK_MAX: 1413 *ap->a_retval = EXT2_LINK_MAX; 1414 return (0); 1415 case _PC_NAME_MAX: 1416 *ap->a_retval = NAME_MAX; 1417 return (0); 1418 case _PC_PATH_MAX: 1419 *ap->a_retval = PATH_MAX; 1420 return (0); 1421 case _PC_PIPE_BUF: 1422 *ap->a_retval = PIPE_BUF; 1423 return (0); 1424 case _PC_CHOWN_RESTRICTED: 1425 *ap->a_retval = 1; 1426 return (0); 1427 case _PC_NO_TRUNC: 1428 *ap->a_retval = 1; 1429 return (0); 1430 default: 1431 return (EINVAL); 1432 } 1433 /* NOTREACHED */ 1434 } 1435 1436 /* 1437 * Vnode pointer to File handle 1438 */ 1439 /* ARGSUSED */ 1440 static int 1441 ext2_vptofh(struct vop_vptofh_args *ap) 1442 { 1443 struct inode *ip; 1444 struct ufid *ufhp; 1445 1446 ip = VTOI(ap->a_vp); 1447 ufhp = (struct ufid *)ap->a_fhp; 1448 ufhp->ufid_len = sizeof(struct ufid); 1449 ufhp->ufid_ino = ip->i_number; 1450 ufhp->ufid_gen = ip->i_gen; 1451 return (0); 1452 } 1453 1454 /* 1455 * Initialize the vnode associated with a new inode, handle aliased 1456 * vnodes. 1457 */ 1458 int 1459 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) 1460 { 1461 struct inode *ip; 1462 struct vnode *vp; 1463 1464 vp = *vpp; 1465 ip = VTOI(vp); 1466 vp->v_type = IFTOVT(ip->i_mode); 1467 if (vp->v_type == VFIFO) 1468 vp->v_op = fifoops; 1469 1470 if (ip->i_number == EXT2_ROOTINO) 1471 vp->v_vflag |= VV_ROOT; 1472 ip->i_modrev = init_va_filerev(); 1473 *vpp = vp; 1474 return (0); 1475 } 1476 1477 /* 1478 * Allocate a new inode. 1479 */ 1480 static int 1481 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1482 struct componentname *cnp) 1483 { 1484 struct inode *ip, *pdir; 1485 struct vnode *tvp; 1486 int error; 1487 1488 pdir = VTOI(dvp); 1489 #ifdef DIAGNOSTIC 1490 if ((cnp->cn_flags & HASBUF) == 0) 1491 panic("ext2_makeinode: no name"); 1492 #endif 1493 *vpp = NULL; 1494 if ((mode & IFMT) == 0) 1495 mode |= IFREG; 1496 1497 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1498 if (error) { 1499 return (error); 1500 } 1501 ip = VTOI(tvp); 1502 ip->i_gid = pdir->i_gid; 1503 #ifdef SUIDDIR 1504 { 1505 /* 1506 * if we are 1507 * not the owner of the directory, 1508 * and we are hacking owners here, (only do this where told to) 1509 * and we are not giving it TOO root, (would subvert quotas) 1510 * then go ahead and give it to the other user. 1511 * Note that this drops off the execute bits for security. 1512 */ 1513 if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1514 (pdir->i_mode & ISUID) && 1515 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1516 ip->i_uid = pdir->i_uid; 1517 mode &= ~07111; 1518 } else { 1519 ip->i_uid = cnp->cn_cred->cr_uid; 1520 } 1521 } 1522 #else 1523 ip->i_uid = cnp->cn_cred->cr_uid; 1524 #endif 1525 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1526 ip->i_mode = mode; 1527 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1528 ip->i_nlink = 1; 1529 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1530 if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) 1531 ip->i_mode &= ~ISGID; 1532 } 1533 1534 if (cnp->cn_flags & ISWHITEOUT) 1535 ip->i_flags |= UF_OPAQUE; 1536 1537 /* 1538 * Make sure inode goes to disk before directory entry. 1539 */ 1540 error = ext2_update(tvp, !DOINGASYNC(tvp)); 1541 if (error) 1542 goto bad; 1543 error = ext2_direnter(ip, dvp, cnp); 1544 if (error) 1545 goto bad; 1546 1547 *vpp = tvp; 1548 return (0); 1549 1550 bad: 1551 /* 1552 * Write error occurred trying to update the inode 1553 * or the directory so must deallocate the inode. 1554 */ 1555 ip->i_nlink = 0; 1556 ip->i_flag |= IN_CHANGE; 1557 vput(tvp); 1558 return (error); 1559 } 1560 1561 /* 1562 * Vnode op for reading. 1563 */ 1564 static int 1565 ext2_read(struct vop_read_args *ap) 1566 { 1567 struct vnode *vp; 1568 struct inode *ip; 1569 struct uio *uio; 1570 struct m_ext2fs *fs; 1571 struct buf *bp; 1572 daddr_t lbn, nextlbn; 1573 off_t bytesinfile; 1574 long size, xfersize, blkoffset; 1575 int error, orig_resid, seqcount; 1576 int ioflag; 1577 1578 vp = ap->a_vp; 1579 uio = ap->a_uio; 1580 ioflag = ap->a_ioflag; 1581 1582 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 1583 ip = VTOI(vp); 1584 1585 #ifdef INVARIANTS 1586 if (uio->uio_rw != UIO_READ) 1587 panic("%s: mode", "ext2_read"); 1588 1589 if (vp->v_type == VLNK) { 1590 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 1591 panic("%s: short symlink", "ext2_read"); 1592 } else if (vp->v_type != VREG && vp->v_type != VDIR) 1593 panic("%s: type %d", "ext2_read", vp->v_type); 1594 #endif 1595 orig_resid = uio->uio_resid; 1596 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 1597 if (orig_resid == 0) 1598 return (0); 1599 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 1600 fs = ip->i_e2fs; 1601 if (uio->uio_offset < ip->i_size && 1602 uio->uio_offset >= fs->e2fs_maxfilesize) 1603 return (EOVERFLOW); 1604 1605 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 1606 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 1607 break; 1608 lbn = lblkno(fs, uio->uio_offset); 1609 nextlbn = lbn + 1; 1610 size = blksize(fs, ip, lbn); 1611 blkoffset = blkoff(fs, uio->uio_offset); 1612 1613 xfersize = fs->e2fs_fsize - blkoffset; 1614 if (uio->uio_resid < xfersize) 1615 xfersize = uio->uio_resid; 1616 if (bytesinfile < xfersize) 1617 xfersize = bytesinfile; 1618 1619 if (lblktosize(fs, nextlbn) >= ip->i_size) 1620 error = bread(vp, lbn, size, NOCRED, &bp); 1621 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 1622 error = cluster_read(vp, ip->i_size, lbn, size, 1623 NOCRED, blkoffset + uio->uio_resid, seqcount, 1624 0, &bp); 1625 } else if (seqcount > 1) { 1626 int nextsize = blksize(fs, ip, nextlbn); 1627 error = breadn(vp, lbn, 1628 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 1629 } else 1630 error = bread(vp, lbn, size, NOCRED, &bp); 1631 if (error) { 1632 brelse(bp); 1633 bp = NULL; 1634 break; 1635 } 1636 1637 /* 1638 * If IO_DIRECT then set B_DIRECT for the buffer. This 1639 * will cause us to attempt to release the buffer later on 1640 * and will cause the buffer cache to attempt to free the 1641 * underlying pages. 1642 */ 1643 if (ioflag & IO_DIRECT) 1644 bp->b_flags |= B_DIRECT; 1645 1646 /* 1647 * We should only get non-zero b_resid when an I/O error 1648 * has occurred, which should cause us to break above. 1649 * However, if the short read did not cause an error, 1650 * then we want to ensure that we do not uiomove bad 1651 * or uninitialized data. 1652 */ 1653 size -= bp->b_resid; 1654 if (size < xfersize) { 1655 if (size == 0) 1656 break; 1657 xfersize = size; 1658 } 1659 error = uiomove((char *)bp->b_data + blkoffset, 1660 (int)xfersize, uio); 1661 if (error) 1662 break; 1663 1664 if (ioflag & (IO_VMIO|IO_DIRECT)) { 1665 /* 1666 * If it's VMIO or direct I/O, then we don't 1667 * need the buf, mark it available for 1668 * freeing. If it's non-direct VMIO, the VM has 1669 * the data. 1670 */ 1671 bp->b_flags |= B_RELBUF; 1672 brelse(bp); 1673 } else { 1674 /* 1675 * Otherwise let whoever 1676 * made the request take care of 1677 * freeing it. We just queue 1678 * it onto another list. 1679 */ 1680 bqrelse(bp); 1681 } 1682 } 1683 1684 /* 1685 * This can only happen in the case of an error 1686 * because the loop above resets bp to NULL on each iteration 1687 * and on normal completion has not set a new value into it. 1688 * so it must have come from a 'break' statement 1689 */ 1690 if (bp != NULL) { 1691 if (ioflag & (IO_VMIO|IO_DIRECT)) { 1692 bp->b_flags |= B_RELBUF; 1693 brelse(bp); 1694 } else { 1695 bqrelse(bp); 1696 } 1697 } 1698 1699 if ((error == 0 || uio->uio_resid != orig_resid) && 1700 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1701 ip->i_flag |= IN_ACCESS; 1702 return (error); 1703 } 1704 1705 /* 1706 * Vnode op for writing. 1707 */ 1708 static int 1709 ext2_write(struct vop_write_args *ap) 1710 { 1711 struct vnode *vp; 1712 struct uio *uio; 1713 struct inode *ip; 1714 struct m_ext2fs *fs; 1715 struct buf *bp; 1716 daddr_t lbn; 1717 off_t osize; 1718 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 1719 1720 ioflag = ap->a_ioflag; 1721 uio = ap->a_uio; 1722 vp = ap->a_vp; 1723 1724 seqcount = ioflag >> IO_SEQSHIFT; 1725 ip = VTOI(vp); 1726 1727 #ifdef INVARIANTS 1728 if (uio->uio_rw != UIO_WRITE) 1729 panic("%s: mode", "ext2_write"); 1730 #endif 1731 1732 switch (vp->v_type) { 1733 case VREG: 1734 if (ioflag & IO_APPEND) 1735 uio->uio_offset = ip->i_size; 1736 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 1737 return (EPERM); 1738 /* FALLTHROUGH */ 1739 case VLNK: 1740 break; 1741 case VDIR: 1742 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 1743 if ((ioflag & IO_SYNC) == 0) 1744 panic("ext2_write: nonsync dir write"); 1745 break; 1746 default: 1747 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 1748 vp->v_type, (intmax_t)uio->uio_offset, 1749 (intmax_t)uio->uio_resid); 1750 } 1751 1752 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 1753 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 1754 fs = ip->i_e2fs; 1755 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 1756 return (EFBIG); 1757 /* 1758 * Maybe this should be above the vnode op call, but so long as 1759 * file servers have no limits, I don't think it matters. 1760 */ 1761 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 1762 return (EFBIG); 1763 1764 resid = uio->uio_resid; 1765 osize = ip->i_size; 1766 if (seqcount > BA_SEQMAX) 1767 flags = BA_SEQMAX << BA_SEQSHIFT; 1768 else 1769 flags = seqcount << BA_SEQSHIFT; 1770 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1771 flags |= IO_SYNC; 1772 1773 for (error = 0; uio->uio_resid > 0;) { 1774 lbn = lblkno(fs, uio->uio_offset); 1775 blkoffset = blkoff(fs, uio->uio_offset); 1776 xfersize = fs->e2fs_fsize - blkoffset; 1777 if (uio->uio_resid < xfersize) 1778 xfersize = uio->uio_resid; 1779 if (uio->uio_offset + xfersize > ip->i_size) 1780 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 1781 1782 /* 1783 * We must perform a read-before-write if the transfer size 1784 * does not cover the entire buffer. 1785 */ 1786 if (fs->e2fs_bsize > xfersize) 1787 flags |= BA_CLRBUF; 1788 else 1789 flags &= ~BA_CLRBUF; 1790 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 1791 ap->a_cred, &bp, flags); 1792 if (error != 0) 1793 break; 1794 1795 /* 1796 * If the buffer is not valid and we did not clear garbage 1797 * out above, we have to do so here even though the write 1798 * covers the entire buffer in order to avoid a mmap()/write 1799 * race where another process may see the garbage prior to 1800 * the uiomove() for a write replacing it. 1801 */ 1802 if ((bp->b_flags & B_CACHE) == 0 && fs->e2fs_bsize <= xfersize) 1803 vfs_bio_clrbuf(bp); 1804 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 1805 bp->b_flags |= B_NOCACHE; 1806 if (uio->uio_offset + xfersize > ip->i_size) 1807 ip->i_size = uio->uio_offset + xfersize; 1808 size = blksize(fs, ip, lbn) - bp->b_resid; 1809 if (size < xfersize) 1810 xfersize = size; 1811 1812 error = 1813 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1814 if (ioflag & (IO_VMIO|IO_DIRECT)) { 1815 bp->b_flags |= B_RELBUF; 1816 } 1817 1818 /* 1819 * If IO_SYNC each buffer is written synchronously. Otherwise 1820 * if we have a severe page deficiency write the buffer 1821 * asynchronously. Otherwise try to cluster, and if that 1822 * doesn't do it then either do an async write (if O_DIRECT), 1823 * or a delayed write (if not). 1824 */ 1825 if (ioflag & IO_SYNC) { 1826 (void)bwrite(bp); 1827 } else if (vm_page_count_severe() || 1828 buf_dirty_count_severe() || 1829 (ioflag & IO_ASYNC)) { 1830 bp->b_flags |= B_CLUSTEROK; 1831 bawrite(bp); 1832 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 1833 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 1834 bp->b_flags |= B_CLUSTEROK; 1835 cluster_write(vp, bp, ip->i_size, seqcount, 0); 1836 } else { 1837 bawrite(bp); 1838 } 1839 } else if (ioflag & IO_DIRECT) { 1840 bp->b_flags |= B_CLUSTEROK; 1841 bawrite(bp); 1842 } else { 1843 bp->b_flags |= B_CLUSTEROK; 1844 bdwrite(bp); 1845 } 1846 if (error || xfersize == 0) 1847 break; 1848 } 1849 /* 1850 * If we successfully wrote any data, and we are not the superuser 1851 * we clear the setuid and setgid bits as a precaution against 1852 * tampering. 1853 */ 1854 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 1855 ap->a_cred) { 1856 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 1857 ip->i_mode &= ~(ISUID | ISGID); 1858 } 1859 if (error) { 1860 if (ioflag & IO_UNIT) { 1861 (void)ext2_truncate(vp, osize, 1862 ioflag & IO_SYNC, ap->a_cred, uio->uio_td); 1863 uio->uio_offset -= resid - uio->uio_resid; 1864 uio->uio_resid = resid; 1865 } 1866 } 1867 if (uio->uio_resid != resid) { 1868 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1869 if (ioflag & IO_SYNC) 1870 error = ext2_update(vp, 1); 1871 } 1872 return (error); 1873 } 1874