1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * (c) UNIX System Laboratories, Inc. 11 * All or some portions of this file are derived from material licensed 12 * to the University of California by American Telephone and Telegraph 13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 14 * the permission of UNIX System Laboratories, Inc. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 41 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 42 * $FreeBSD$ 43 */ 44 45 #include "opt_suiddir.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/fcntl.h> 51 #include <sys/stat.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/endian.h> 55 #include <sys/priv.h> 56 #include <sys/mount.h> 57 #include <sys/unistd.h> 58 #include <sys/time.h> 59 #include <sys/vnode.h> 60 #include <sys/namei.h> 61 #include <sys/lockf.h> 62 #include <sys/event.h> 63 #include <sys/conf.h> 64 #include <sys/file.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vnode_pager.h> 71 72 #include "opt_directio.h" 73 74 #include <fs/fifofs/fifo.h> 75 76 #include <ufs/ufs/dir.h> 77 78 #include <fs/ext2fs/fs.h> 79 #include <fs/ext2fs/inode.h> 80 #include <fs/ext2fs/ext2_extern.h> 81 #include <fs/ext2fs/ext2fs.h> 82 #include <fs/ext2fs/ext2_dinode.h> 83 #include <fs/ext2fs/ext2_dir.h> 84 #include <fs/ext2fs/ext2_mount.h> 85 86 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 87 static void ext2_itimes_locked(struct vnode *); 88 89 static vop_access_t ext2_access; 90 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 91 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 92 struct thread *); 93 static vop_close_t ext2_close; 94 static vop_create_t ext2_create; 95 static vop_fsync_t ext2_fsync; 96 static vop_getattr_t ext2_getattr; 97 static vop_link_t ext2_link; 98 static vop_mkdir_t ext2_mkdir; 99 static vop_mknod_t ext2_mknod; 100 static vop_open_t ext2_open; 101 static vop_pathconf_t ext2_pathconf; 102 static vop_print_t ext2_print; 103 static vop_read_t ext2_read; 104 static vop_readlink_t ext2_readlink; 105 static vop_remove_t ext2_remove; 106 static vop_rename_t ext2_rename; 107 static vop_rmdir_t ext2_rmdir; 108 static vop_setattr_t ext2_setattr; 109 static vop_strategy_t ext2_strategy; 110 static vop_symlink_t ext2_symlink; 111 static vop_write_t ext2_write; 112 static vop_vptofh_t ext2_vptofh; 113 static vop_close_t ext2fifo_close; 114 static vop_kqfilter_t ext2fifo_kqfilter; 115 116 /* Global vfs data structures for ext2. */ 117 struct vop_vector ext2_vnodeops = { 118 .vop_default = &default_vnodeops, 119 .vop_access = ext2_access, 120 .vop_bmap = ext2_bmap, 121 .vop_cachedlookup = ext2_lookup, 122 .vop_close = ext2_close, 123 .vop_create = ext2_create, 124 .vop_fsync = ext2_fsync, 125 .vop_getattr = ext2_getattr, 126 .vop_inactive = ext2_inactive, 127 .vop_link = ext2_link, 128 .vop_lookup = vfs_cache_lookup, 129 .vop_mkdir = ext2_mkdir, 130 .vop_mknod = ext2_mknod, 131 .vop_open = ext2_open, 132 .vop_pathconf = ext2_pathconf, 133 .vop_poll = vop_stdpoll, 134 .vop_print = ext2_print, 135 .vop_read = ext2_read, 136 .vop_readdir = ext2_readdir, 137 .vop_readlink = ext2_readlink, 138 .vop_reallocblks = ext2_reallocblks, 139 .vop_reclaim = ext2_reclaim, 140 .vop_remove = ext2_remove, 141 .vop_rename = ext2_rename, 142 .vop_rmdir = ext2_rmdir, 143 .vop_setattr = ext2_setattr, 144 .vop_strategy = ext2_strategy, 145 .vop_symlink = ext2_symlink, 146 .vop_write = ext2_write, 147 .vop_vptofh = ext2_vptofh, 148 }; 149 150 struct vop_vector ext2_fifoops = { 151 .vop_default = &fifo_specops, 152 .vop_access = ext2_access, 153 .vop_close = ext2fifo_close, 154 .vop_fsync = ext2_fsync, 155 .vop_getattr = ext2_getattr, 156 .vop_inactive = ext2_inactive, 157 .vop_kqfilter = ext2fifo_kqfilter, 158 .vop_print = ext2_print, 159 .vop_read = VOP_PANIC, 160 .vop_reclaim = ext2_reclaim, 161 .vop_setattr = ext2_setattr, 162 .vop_write = VOP_PANIC, 163 .vop_vptofh = ext2_vptofh, 164 }; 165 166 /* 167 * A virgin directory (no blushing please). 168 * Note that the type and namlen fields are reversed relative to ext2. 169 * Also, we don't use `struct odirtemplate', since it would just cause 170 * endianness problems. 171 */ 172 static struct dirtemplate mastertemplate = { 173 0, 12, 1, EXT2_FT_DIR, ".", 174 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." 175 }; 176 static struct dirtemplate omastertemplate = { 177 0, 12, 1, EXT2_FT_UNKNOWN, ".", 178 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." 179 }; 180 181 static void 182 ext2_itimes_locked(struct vnode *vp) 183 { 184 struct inode *ip; 185 struct timespec ts; 186 187 ASSERT_VI_LOCKED(vp, __func__); 188 189 ip = VTOI(vp); 190 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 191 return; 192 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 193 ip->i_flag |= IN_LAZYMOD; 194 else 195 ip->i_flag |= IN_MODIFIED; 196 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 197 vfs_timestamp(&ts); 198 if (ip->i_flag & IN_ACCESS) { 199 ip->i_atime = ts.tv_sec; 200 ip->i_atimensec = ts.tv_nsec; 201 } 202 if (ip->i_flag & IN_UPDATE) { 203 ip->i_mtime = ts.tv_sec; 204 ip->i_mtimensec = ts.tv_nsec; 205 ip->i_modrev++; 206 } 207 if (ip->i_flag & IN_CHANGE) { 208 ip->i_ctime = ts.tv_sec; 209 ip->i_ctimensec = ts.tv_nsec; 210 } 211 } 212 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 213 } 214 215 void 216 ext2_itimes(struct vnode *vp) 217 { 218 219 VI_LOCK(vp); 220 ext2_itimes_locked(vp); 221 VI_UNLOCK(vp); 222 } 223 224 /* 225 * Create a regular file 226 */ 227 static int 228 ext2_create(ap) 229 struct vop_create_args /* { 230 struct vnode *a_dvp; 231 struct vnode **a_vpp; 232 struct componentname *a_cnp; 233 struct vattr *a_vap; 234 } */ *ap; 235 { 236 int error; 237 238 error = 239 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 240 ap->a_dvp, ap->a_vpp, ap->a_cnp); 241 if (error) 242 return (error); 243 return (0); 244 } 245 246 static int 247 ext2_open(ap) 248 struct vop_open_args /* { 249 struct vnode *a_vp; 250 int a_mode; 251 struct ucred *a_cred; 252 struct thread *a_td; 253 } */ *ap; 254 { 255 256 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 257 return (EOPNOTSUPP); 258 259 /* 260 * Files marked append-only must be opened for appending. 261 */ 262 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 263 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 264 return (EPERM); 265 266 vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); 267 268 return (0); 269 } 270 271 /* 272 * Close called. 273 * 274 * Update the times on the inode. 275 */ 276 static int 277 ext2_close(ap) 278 struct vop_close_args /* { 279 struct vnode *a_vp; 280 int a_fflag; 281 struct ucred *a_cred; 282 struct thread *a_td; 283 } */ *ap; 284 { 285 struct vnode *vp = ap->a_vp; 286 287 VI_LOCK(vp); 288 if (vp->v_usecount > 1) 289 ext2_itimes_locked(vp); 290 VI_UNLOCK(vp); 291 return (0); 292 } 293 294 static int 295 ext2_access(ap) 296 struct vop_access_args /* { 297 struct vnode *a_vp; 298 accmode_t a_accmode; 299 struct ucred *a_cred; 300 struct thread *a_td; 301 } */ *ap; 302 { 303 struct vnode *vp = ap->a_vp; 304 struct inode *ip = VTOI(vp); 305 accmode_t accmode = ap->a_accmode; 306 int error; 307 308 if (vp->v_type == VBLK || vp->v_type == VCHR) 309 return (EOPNOTSUPP); 310 311 /* 312 * Disallow write attempts on read-only file systems; 313 * unless the file is a socket, fifo, or a block or 314 * character device resident on the file system. 315 */ 316 if (accmode & VWRITE) { 317 switch (vp->v_type) { 318 case VDIR: 319 case VLNK: 320 case VREG: 321 if (vp->v_mount->mnt_flag & MNT_RDONLY) 322 return (EROFS); 323 break; 324 default: 325 break; 326 } 327 } 328 329 /* If immutable bit set, nobody gets to write it. */ 330 if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) 331 return (EPERM); 332 333 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 334 ap->a_accmode, ap->a_cred, NULL); 335 return (error); 336 } 337 338 static int 339 ext2_getattr(ap) 340 struct vop_getattr_args /* { 341 struct vnode *a_vp; 342 struct vattr *a_vap; 343 struct ucred *a_cred; 344 } */ *ap; 345 { 346 struct vnode *vp = ap->a_vp; 347 struct inode *ip = VTOI(vp); 348 struct vattr *vap = ap->a_vap; 349 350 ext2_itimes(vp); 351 /* 352 * Copy from inode table 353 */ 354 vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); 355 vap->va_fileid = ip->i_number; 356 vap->va_mode = ip->i_mode & ~IFMT; 357 vap->va_nlink = ip->i_nlink; 358 vap->va_uid = ip->i_uid; 359 vap->va_gid = ip->i_gid; 360 vap->va_rdev = ip->i_rdev; 361 vap->va_size = ip->i_size; 362 vap->va_atime.tv_sec = ip->i_atime; 363 vap->va_atime.tv_nsec = ip->i_atimensec; 364 vap->va_mtime.tv_sec = ip->i_mtime; 365 vap->va_mtime.tv_nsec = ip->i_mtimensec; 366 vap->va_ctime.tv_sec = ip->i_ctime; 367 vap->va_ctime.tv_nsec = ip->i_ctimensec; 368 vap->va_flags = ip->i_flags; 369 vap->va_gen = ip->i_gen; 370 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 371 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 372 vap->va_type = IFTOVT(ip->i_mode); 373 vap->va_filerev = ip->i_modrev; 374 return (0); 375 } 376 377 /* 378 * Set attribute vnode op. called from several syscalls 379 */ 380 static int 381 ext2_setattr(ap) 382 struct vop_setattr_args /* { 383 struct vnode *a_vp; 384 struct vattr *a_vap; 385 struct ucred *a_cred; 386 } */ *ap; 387 { 388 struct vattr *vap = ap->a_vap; 389 struct vnode *vp = ap->a_vp; 390 struct inode *ip = VTOI(vp); 391 struct ucred *cred = ap->a_cred; 392 struct thread *td = curthread; 393 int error; 394 395 /* 396 * Check for unsettable attributes. 397 */ 398 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 399 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 400 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 401 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 402 return (EINVAL); 403 } 404 if (vap->va_flags != VNOVAL) { 405 /* Disallow flags not supported by ext2fs. */ 406 if(vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 407 return (EOPNOTSUPP); 408 409 if (vp->v_mount->mnt_flag & MNT_RDONLY) 410 return (EROFS); 411 /* 412 * Callers may only modify the file flags on objects they 413 * have VADMIN rights for. 414 */ 415 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 416 return (error); 417 /* 418 * Unprivileged processes and privileged processes in 419 * jail() are not permitted to unset system flags, or 420 * modify flags if any system flags are set. 421 * Privileged non-jail processes may not modify system flags 422 * if securelevel > 0 and any existing system flags are set. 423 */ 424 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 425 if (ip->i_flags 426 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 427 error = securelevel_gt(cred, 0); 428 if (error) 429 return (error); 430 } 431 ip->i_flags = vap->va_flags; 432 } else { 433 if (ip->i_flags 434 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 435 (vap->va_flags & UF_SETTABLE) != vap->va_flags) 436 return (EPERM); 437 ip->i_flags &= SF_SETTABLE; 438 ip->i_flags |= (vap->va_flags & UF_SETTABLE); 439 } 440 ip->i_flag |= IN_CHANGE; 441 if (vap->va_flags & (IMMUTABLE | APPEND)) 442 return (0); 443 } 444 if (ip->i_flags & (IMMUTABLE | APPEND)) 445 return (EPERM); 446 /* 447 * Go through the fields and update iff not VNOVAL. 448 */ 449 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 450 if (vp->v_mount->mnt_flag & MNT_RDONLY) 451 return (EROFS); 452 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 453 td)) != 0) 454 return (error); 455 } 456 if (vap->va_size != VNOVAL) { 457 /* 458 * Disallow write attempts on read-only file systems; 459 * unless the file is a socket, fifo, or a block or 460 * character device resident on the file system. 461 */ 462 switch (vp->v_type) { 463 case VDIR: 464 return (EISDIR); 465 case VLNK: 466 case VREG: 467 if (vp->v_mount->mnt_flag & MNT_RDONLY) 468 return (EROFS); 469 break; 470 default: 471 break; 472 } 473 if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) 474 return (error); 475 } 476 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 477 if (vp->v_mount->mnt_flag & MNT_RDONLY) 478 return (EROFS); 479 /* 480 * From utimes(2): 481 * If times is NULL, ... The caller must be the owner of 482 * the file, have permission to write the file, or be the 483 * super-user. 484 * If times is non-NULL, ... The caller must be the owner of 485 * the file or be the super-user. 486 */ 487 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 488 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 489 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 490 return (error); 491 if (vap->va_atime.tv_sec != VNOVAL) 492 ip->i_flag |= IN_ACCESS; 493 if (vap->va_mtime.tv_sec != VNOVAL) 494 ip->i_flag |= IN_CHANGE | IN_UPDATE; 495 ext2_itimes(vp); 496 if (vap->va_atime.tv_sec != VNOVAL) { 497 ip->i_atime = vap->va_atime.tv_sec; 498 ip->i_atimensec = vap->va_atime.tv_nsec; 499 } 500 if (vap->va_mtime.tv_sec != VNOVAL) { 501 ip->i_mtime = vap->va_mtime.tv_sec; 502 ip->i_mtimensec = vap->va_mtime.tv_nsec; 503 } 504 error = ext2_update(vp, 0); 505 if (error) 506 return (error); 507 } 508 error = 0; 509 if (vap->va_mode != (mode_t)VNOVAL) { 510 if (vp->v_mount->mnt_flag & MNT_RDONLY) 511 return (EROFS); 512 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 513 } 514 return (error); 515 } 516 517 /* 518 * Change the mode on a file. 519 * Inode must be locked before calling. 520 */ 521 static int 522 ext2_chmod(vp, mode, cred, td) 523 struct vnode *vp; 524 int mode; 525 struct ucred *cred; 526 struct thread *td; 527 { 528 struct inode *ip = VTOI(vp); 529 int error; 530 531 /* 532 * To modify the permissions on a file, must possess VADMIN 533 * for that file. 534 */ 535 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 536 return (error); 537 /* 538 * Privileged processes may set the sticky bit on non-directories, 539 * as well as set the setgid bit on a file with a group that the 540 * process is not a member of. 541 */ 542 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 543 error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); 544 if (error) 545 return (EFTYPE); 546 } 547 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 548 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 549 if (error) 550 return (error); 551 } 552 ip->i_mode &= ~ALLPERMS; 553 ip->i_mode |= (mode & ALLPERMS); 554 ip->i_flag |= IN_CHANGE; 555 return (0); 556 } 557 558 /* 559 * Perform chown operation on inode ip; 560 * inode must be locked prior to call. 561 */ 562 static int 563 ext2_chown(vp, uid, gid, cred, td) 564 struct vnode *vp; 565 uid_t uid; 566 gid_t gid; 567 struct ucred *cred; 568 struct thread *td; 569 { 570 struct inode *ip = VTOI(vp); 571 uid_t ouid; 572 gid_t ogid; 573 int error = 0; 574 575 if (uid == (uid_t)VNOVAL) 576 uid = ip->i_uid; 577 if (gid == (gid_t)VNOVAL) 578 gid = ip->i_gid; 579 /* 580 * To modify the ownership of a file, must possess VADMIN 581 * for that file. 582 */ 583 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 584 return (error); 585 /* 586 * To change the owner of a file, or change the group of a file 587 * to a group of which we are not a member, the caller must 588 * have privilege. 589 */ 590 if (uid != ip->i_uid || (gid != ip->i_gid && 591 !groupmember(gid, cred))) { 592 error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); 593 if (error) 594 return (error); 595 } 596 ogid = ip->i_gid; 597 ouid = ip->i_uid; 598 ip->i_gid = gid; 599 ip->i_uid = uid; 600 ip->i_flag |= IN_CHANGE; 601 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 602 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) 603 ip->i_mode &= ~(ISUID | ISGID); 604 } 605 return (0); 606 } 607 608 /* 609 * Synch an open file. 610 */ 611 /* ARGSUSED */ 612 static int 613 ext2_fsync(ap) 614 struct vop_fsync_args /* { 615 struct vnode *a_vp; 616 struct ucred *a_cred; 617 int a_waitfor; 618 struct thread *a_td; 619 } */ *ap; 620 { 621 /* 622 * Flush all dirty buffers associated with a vnode. 623 */ 624 625 vop_stdfsync(ap); 626 627 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 628 } 629 630 /* 631 * Mknod vnode call 632 */ 633 /* ARGSUSED */ 634 static int 635 ext2_mknod(ap) 636 struct vop_mknod_args /* { 637 struct vnode *a_dvp; 638 struct vnode **a_vpp; 639 struct componentname *a_cnp; 640 struct vattr *a_vap; 641 } */ *ap; 642 { 643 struct vattr *vap = ap->a_vap; 644 struct vnode **vpp = ap->a_vpp; 645 struct inode *ip; 646 ino_t ino; 647 int error; 648 649 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 650 ap->a_dvp, vpp, ap->a_cnp); 651 if (error) 652 return (error); 653 ip = VTOI(*vpp); 654 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 655 if (vap->va_rdev != VNOVAL) { 656 /* 657 * Want to be able to use this to make badblock 658 * inodes, so don't truncate the dev number. 659 */ 660 ip->i_rdev = vap->va_rdev; 661 } 662 /* 663 * Remove inode, then reload it through VFS_VGET so it is 664 * checked to see if it is an alias of an existing entry in 665 * the inode cache. XXX I don't believe this is necessary now. 666 */ 667 (*vpp)->v_type = VNON; 668 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 669 vgone(*vpp); 670 vput(*vpp); 671 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 672 if (error) { 673 *vpp = NULL; 674 return (error); 675 } 676 return (0); 677 } 678 679 static int 680 ext2_remove(ap) 681 struct vop_remove_args /* { 682 struct vnode *a_dvp; 683 struct vnode *a_vp; 684 struct componentname *a_cnp; 685 } */ *ap; 686 { 687 struct inode *ip; 688 struct vnode *vp = ap->a_vp; 689 struct vnode *dvp = ap->a_dvp; 690 int error; 691 692 ip = VTOI(vp); 693 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 694 (VTOI(dvp)->i_flags & APPEND)) { 695 error = EPERM; 696 goto out; 697 } 698 error = ext2_dirremove(dvp, ap->a_cnp); 699 if (error == 0) { 700 ip->i_nlink--; 701 ip->i_flag |= IN_CHANGE; 702 } 703 out: 704 return (error); 705 } 706 707 /* 708 * link vnode call 709 */ 710 static int 711 ext2_link(ap) 712 struct vop_link_args /* { 713 struct vnode *a_tdvp; 714 struct vnode *a_vp; 715 struct componentname *a_cnp; 716 } */ *ap; 717 { 718 struct vnode *vp = ap->a_vp; 719 struct vnode *tdvp = ap->a_tdvp; 720 struct componentname *cnp = ap->a_cnp; 721 struct inode *ip; 722 int error; 723 724 #ifdef DIAGNOSTIC 725 if ((cnp->cn_flags & HASBUF) == 0) 726 panic("ext2_link: no name"); 727 #endif 728 if (tdvp->v_mount != vp->v_mount) { 729 error = EXDEV; 730 goto out; 731 } 732 ip = VTOI(vp); 733 if ((nlink_t)ip->i_nlink >= LINK_MAX) { 734 error = EMLINK; 735 goto out; 736 } 737 if (ip->i_flags & (IMMUTABLE | APPEND)) { 738 error = EPERM; 739 goto out; 740 } 741 ip->i_nlink++; 742 ip->i_flag |= IN_CHANGE; 743 error = ext2_update(vp, !DOINGASYNC(vp)); 744 if (!error) 745 error = ext2_direnter(ip, tdvp, cnp); 746 if (error) { 747 ip->i_nlink--; 748 ip->i_flag |= IN_CHANGE; 749 } 750 out: 751 return (error); 752 } 753 754 /* 755 * Rename system call. 756 * rename("foo", "bar"); 757 * is essentially 758 * unlink("bar"); 759 * link("foo", "bar"); 760 * unlink("foo"); 761 * but ``atomically''. Can't do full commit without saving state in the 762 * inode on disk which isn't feasible at this time. Best we can do is 763 * always guarantee the target exists. 764 * 765 * Basic algorithm is: 766 * 767 * 1) Bump link count on source while we're linking it to the 768 * target. This also ensure the inode won't be deleted out 769 * from underneath us while we work (it may be truncated by 770 * a concurrent `trunc' or `open' for creation). 771 * 2) Link source to destination. If destination already exists, 772 * delete it first. 773 * 3) Unlink source reference to inode if still around. If a 774 * directory was moved and the parent of the destination 775 * is different from the source, patch the ".." entry in the 776 * directory. 777 */ 778 static int 779 ext2_rename(ap) 780 struct vop_rename_args /* { 781 struct vnode *a_fdvp; 782 struct vnode *a_fvp; 783 struct componentname *a_fcnp; 784 struct vnode *a_tdvp; 785 struct vnode *a_tvp; 786 struct componentname *a_tcnp; 787 } */ *ap; 788 { 789 struct vnode *tvp = ap->a_tvp; 790 struct vnode *tdvp = ap->a_tdvp; 791 struct vnode *fvp = ap->a_fvp; 792 struct vnode *fdvp = ap->a_fdvp; 793 struct componentname *tcnp = ap->a_tcnp; 794 struct componentname *fcnp = ap->a_fcnp; 795 struct inode *ip, *xp, *dp; 796 struct dirtemplate dirbuf; 797 int doingdirectory = 0, oldparent = 0, newparent = 0; 798 int error = 0; 799 u_char namlen; 800 801 #ifdef DIAGNOSTIC 802 if ((tcnp->cn_flags & HASBUF) == 0 || 803 (fcnp->cn_flags & HASBUF) == 0) 804 panic("ext2_rename: no name"); 805 #endif 806 /* 807 * Check for cross-device rename. 808 */ 809 if ((fvp->v_mount != tdvp->v_mount) || 810 (tvp && (fvp->v_mount != tvp->v_mount))) { 811 error = EXDEV; 812 abortit: 813 if (tdvp == tvp) 814 vrele(tdvp); 815 else 816 vput(tdvp); 817 if (tvp) 818 vput(tvp); 819 vrele(fdvp); 820 vrele(fvp); 821 return (error); 822 } 823 824 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 825 (VTOI(tdvp)->i_flags & APPEND))) { 826 error = EPERM; 827 goto abortit; 828 } 829 830 /* 831 * Renaming a file to itself has no effect. The upper layers should 832 * not call us in that case. Temporarily just warn if they do. 833 */ 834 if (fvp == tvp) { 835 printf("ext2_rename: fvp == tvp (can't happen)\n"); 836 error = 0; 837 goto abortit; 838 } 839 840 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 841 goto abortit; 842 dp = VTOI(fdvp); 843 ip = VTOI(fvp); 844 if (ip->i_nlink >= LINK_MAX) { 845 VOP_UNLOCK(fvp, 0); 846 error = EMLINK; 847 goto abortit; 848 } 849 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 850 || (dp->i_flags & APPEND)) { 851 VOP_UNLOCK(fvp, 0); 852 error = EPERM; 853 goto abortit; 854 } 855 if ((ip->i_mode & IFMT) == IFDIR) { 856 /* 857 * Avoid ".", "..", and aliases of "." for obvious reasons. 858 */ 859 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 860 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 861 (ip->i_flag & IN_RENAME)) { 862 VOP_UNLOCK(fvp, 0); 863 error = EINVAL; 864 goto abortit; 865 } 866 ip->i_flag |= IN_RENAME; 867 oldparent = dp->i_number; 868 doingdirectory++; 869 } 870 vrele(fdvp); 871 872 /* 873 * When the target exists, both the directory 874 * and target vnodes are returned locked. 875 */ 876 dp = VTOI(tdvp); 877 xp = NULL; 878 if (tvp) 879 xp = VTOI(tvp); 880 881 /* 882 * 1) Bump link count while we're moving stuff 883 * around. If we crash somewhere before 884 * completing our work, the link count 885 * may be wrong, but correctable. 886 */ 887 ip->i_nlink++; 888 ip->i_flag |= IN_CHANGE; 889 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 890 VOP_UNLOCK(fvp, 0); 891 goto bad; 892 } 893 894 /* 895 * If ".." must be changed (ie the directory gets a new 896 * parent) then the source directory must not be in the 897 * directory hierarchy above the target, as this would 898 * orphan everything below the source directory. Also 899 * the user must have write permission in the source so 900 * as to be able to change "..". We must repeat the call 901 * to namei, as the parent directory is unlocked by the 902 * call to checkpath(). 903 */ 904 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 905 VOP_UNLOCK(fvp, 0); 906 if (oldparent != dp->i_number) 907 newparent = dp->i_number; 908 if (doingdirectory && newparent) { 909 if (error) /* write access check above */ 910 goto bad; 911 if (xp != NULL) 912 vput(tvp); 913 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 914 if (error) 915 goto out; 916 VREF(tdvp); 917 error = relookup(tdvp, &tvp, tcnp); 918 if (error) 919 goto out; 920 vrele(tdvp); 921 dp = VTOI(tdvp); 922 xp = NULL; 923 if (tvp) 924 xp = VTOI(tvp); 925 } 926 /* 927 * 2) If target doesn't exist, link the target 928 * to the source and unlink the source. 929 * Otherwise, rewrite the target directory 930 * entry to reference the source inode and 931 * expunge the original entry's existence. 932 */ 933 if (xp == NULL) { 934 if (dp->i_devvp != ip->i_devvp) 935 panic("ext2_rename: EXDEV"); 936 /* 937 * Account for ".." in new directory. 938 * When source and destination have the same 939 * parent we don't fool with the link count. 940 */ 941 if (doingdirectory && newparent) { 942 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 943 error = EMLINK; 944 goto bad; 945 } 946 dp->i_nlink++; 947 dp->i_flag |= IN_CHANGE; 948 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 949 if (error) 950 goto bad; 951 } 952 error = ext2_direnter(ip, tdvp, tcnp); 953 if (error) { 954 if (doingdirectory && newparent) { 955 dp->i_nlink--; 956 dp->i_flag |= IN_CHANGE; 957 (void)ext2_update(tdvp, 1); 958 } 959 goto bad; 960 } 961 vput(tdvp); 962 } else { 963 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 964 panic("ext2_rename: EXDEV"); 965 /* 966 * Short circuit rename(foo, foo). 967 */ 968 if (xp->i_number == ip->i_number) 969 panic("ext2_rename: same file"); 970 /* 971 * If the parent directory is "sticky", then the user must 972 * own the parent directory, or the destination of the rename, 973 * otherwise the destination may not be changed (except by 974 * root). This implements append-only directories. 975 */ 976 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 977 tcnp->cn_cred->cr_uid != dp->i_uid && 978 xp->i_uid != tcnp->cn_cred->cr_uid) { 979 error = EPERM; 980 goto bad; 981 } 982 /* 983 * Target must be empty if a directory and have no links 984 * to it. Also, ensure source and target are compatible 985 * (both directories, or both not directories). 986 */ 987 if ((xp->i_mode&IFMT) == IFDIR) { 988 if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || 989 xp->i_nlink > 2) { 990 error = ENOTEMPTY; 991 goto bad; 992 } 993 if (!doingdirectory) { 994 error = ENOTDIR; 995 goto bad; 996 } 997 cache_purge(tdvp); 998 } else if (doingdirectory) { 999 error = EISDIR; 1000 goto bad; 1001 } 1002 error = ext2_dirrewrite(dp, ip, tcnp); 1003 if (error) 1004 goto bad; 1005 /* 1006 * If the target directory is in the same 1007 * directory as the source directory, 1008 * decrement the link count on the parent 1009 * of the target directory. 1010 */ 1011 if (doingdirectory && !newparent) { 1012 dp->i_nlink--; 1013 dp->i_flag |= IN_CHANGE; 1014 } 1015 vput(tdvp); 1016 /* 1017 * Adjust the link count of the target to 1018 * reflect the dirrewrite above. If this is 1019 * a directory it is empty and there are 1020 * no links to it, so we can squash the inode and 1021 * any space associated with it. We disallowed 1022 * renaming over top of a directory with links to 1023 * it above, as the remaining link would point to 1024 * a directory without "." or ".." entries. 1025 */ 1026 xp->i_nlink--; 1027 if (doingdirectory) { 1028 if (--xp->i_nlink != 0) 1029 panic("ext2_rename: linked directory"); 1030 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 1031 tcnp->cn_cred, tcnp->cn_thread); 1032 } 1033 xp->i_flag |= IN_CHANGE; 1034 vput(tvp); 1035 xp = NULL; 1036 } 1037 1038 /* 1039 * 3) Unlink the source. 1040 */ 1041 fcnp->cn_flags &= ~MODMASK; 1042 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1043 VREF(fdvp); 1044 error = relookup(fdvp, &fvp, fcnp); 1045 if (error == 0) 1046 vrele(fdvp); 1047 if (fvp != NULL) { 1048 xp = VTOI(fvp); 1049 dp = VTOI(fdvp); 1050 } else { 1051 /* 1052 * From name has disappeared. 1053 */ 1054 if (doingdirectory) 1055 panic("ext2_rename: lost dir entry"); 1056 vrele(ap->a_fvp); 1057 return (0); 1058 } 1059 /* 1060 * Ensure that the directory entry still exists and has not 1061 * changed while the new name has been entered. If the source is 1062 * a file then the entry may have been unlinked or renamed. In 1063 * either case there is no further work to be done. If the source 1064 * is a directory then it cannot have been rmdir'ed; its link 1065 * count of three would cause a rmdir to fail with ENOTEMPTY. 1066 * The IN_RENAME flag ensures that it cannot be moved by another 1067 * rename. 1068 */ 1069 if (xp != ip) { 1070 if (doingdirectory) 1071 panic("ext2_rename: lost dir entry"); 1072 } else { 1073 /* 1074 * If the source is a directory with a 1075 * new parent, the link count of the old 1076 * parent directory must be decremented 1077 * and ".." set to point to the new parent. 1078 */ 1079 if (doingdirectory && newparent) { 1080 dp->i_nlink--; 1081 dp->i_flag |= IN_CHANGE; 1082 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, 1083 sizeof (struct dirtemplate), (off_t)0, 1084 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1085 tcnp->cn_cred, NOCRED, NULL, NULL); 1086 if (error == 0) { 1087 /* Like ufs little-endian: */ 1088 namlen = dirbuf.dotdot_type; 1089 if (namlen != 2 || 1090 dirbuf.dotdot_name[0] != '.' || 1091 dirbuf.dotdot_name[1] != '.') { 1092 ext2_dirbad(xp, (doff_t)12, 1093 "rename: mangled dir"); 1094 } else { 1095 dirbuf.dotdot_ino = newparent; 1096 (void) vn_rdwr(UIO_WRITE, fvp, 1097 (caddr_t)&dirbuf, 1098 sizeof (struct dirtemplate), 1099 (off_t)0, UIO_SYSSPACE, 1100 IO_NODELOCKED | IO_SYNC | 1101 IO_NOMACCHECK, tcnp->cn_cred, 1102 NOCRED, NULL, NULL); 1103 cache_purge(fdvp); 1104 } 1105 } 1106 } 1107 error = ext2_dirremove(fdvp, fcnp); 1108 if (!error) { 1109 xp->i_nlink--; 1110 xp->i_flag |= IN_CHANGE; 1111 } 1112 xp->i_flag &= ~IN_RENAME; 1113 } 1114 if (dp) 1115 vput(fdvp); 1116 if (xp) 1117 vput(fvp); 1118 vrele(ap->a_fvp); 1119 return (error); 1120 1121 bad: 1122 if (xp) 1123 vput(ITOV(xp)); 1124 vput(ITOV(dp)); 1125 out: 1126 if (doingdirectory) 1127 ip->i_flag &= ~IN_RENAME; 1128 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1129 ip->i_nlink--; 1130 ip->i_flag |= IN_CHANGE; 1131 ip->i_flag &= ~IN_RENAME; 1132 vput(fvp); 1133 } else 1134 vrele(fvp); 1135 return (error); 1136 } 1137 1138 /* 1139 * Mkdir system call 1140 */ 1141 static int 1142 ext2_mkdir(ap) 1143 struct vop_mkdir_args /* { 1144 struct vnode *a_dvp; 1145 struct vnode **a_vpp; 1146 struct componentname *a_cnp; 1147 struct vattr *a_vap; 1148 } */ *ap; 1149 { 1150 struct vnode *dvp = ap->a_dvp; 1151 struct vattr *vap = ap->a_vap; 1152 struct componentname *cnp = ap->a_cnp; 1153 struct inode *ip, *dp; 1154 struct vnode *tvp; 1155 struct dirtemplate dirtemplate, *dtp; 1156 int error, dmode; 1157 1158 #ifdef DIAGNOSTIC 1159 if ((cnp->cn_flags & HASBUF) == 0) 1160 panic("ext2_mkdir: no name"); 1161 #endif 1162 dp = VTOI(dvp); 1163 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 1164 error = EMLINK; 1165 goto out; 1166 } 1167 dmode = vap->va_mode & 0777; 1168 dmode |= IFDIR; 1169 /* 1170 * Must simulate part of ext2_makeinode here to acquire the inode, 1171 * but not have it entered in the parent directory. The entry is 1172 * made later after writing "." and ".." entries. 1173 */ 1174 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1175 if (error) 1176 goto out; 1177 ip = VTOI(tvp); 1178 ip->i_gid = dp->i_gid; 1179 #ifdef SUIDDIR 1180 { 1181 /* 1182 * if we are hacking owners here, (only do this where told to) 1183 * and we are not giving it TOO root, (would subvert quotas) 1184 * then go ahead and give it to the other user. 1185 * The new directory also inherits the SUID bit. 1186 * If user's UID and dir UID are the same, 1187 * 'give it away' so that the SUID is still forced on. 1188 */ 1189 if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1190 (dp->i_mode & ISUID) && dp->i_uid) { 1191 dmode |= ISUID; 1192 ip->i_uid = dp->i_uid; 1193 } else { 1194 ip->i_uid = cnp->cn_cred->cr_uid; 1195 } 1196 } 1197 #else 1198 ip->i_uid = cnp->cn_cred->cr_uid; 1199 #endif 1200 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1201 ip->i_mode = dmode; 1202 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1203 ip->i_nlink = 2; 1204 if (cnp->cn_flags & ISWHITEOUT) 1205 ip->i_flags |= UF_OPAQUE; 1206 error = ext2_update(tvp, 1); 1207 1208 /* 1209 * Bump link count in parent directory 1210 * to reflect work done below. Should 1211 * be done before reference is created 1212 * so reparation is possible if we crash. 1213 */ 1214 dp->i_nlink++; 1215 dp->i_flag |= IN_CHANGE; 1216 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1217 if (error) 1218 goto bad; 1219 1220 /* Initialize directory with "." and ".." from static template. */ 1221 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1222 EXT2F_INCOMPAT_FTYPE)) 1223 dtp = &mastertemplate; 1224 else 1225 dtp = &omastertemplate; 1226 dirtemplate = *dtp; 1227 dirtemplate.dot_ino = ip->i_number; 1228 dirtemplate.dotdot_ino = dp->i_number; 1229 /* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE 1230 * so let's just redefine it - for this function only 1231 */ 1232 #undef DIRBLKSIZ 1233 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1234 dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; 1235 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, 1236 sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, 1237 IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, 1238 NULL, NULL); 1239 if (error) { 1240 dp->i_nlink--; 1241 dp->i_flag |= IN_CHANGE; 1242 goto bad; 1243 } 1244 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1245 /* XXX should grow with balloc() */ 1246 panic("ext2_mkdir: blksize"); 1247 else { 1248 ip->i_size = DIRBLKSIZ; 1249 ip->i_flag |= IN_CHANGE; 1250 } 1251 1252 /* Directory set up, now install its entry in the parent directory. */ 1253 error = ext2_direnter(ip, dvp, cnp); 1254 if (error) { 1255 dp->i_nlink--; 1256 dp->i_flag |= IN_CHANGE; 1257 } 1258 bad: 1259 /* 1260 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1261 * for us because we set the link count to 0. 1262 */ 1263 if (error) { 1264 ip->i_nlink = 0; 1265 ip->i_flag |= IN_CHANGE; 1266 vput(tvp); 1267 } else 1268 *ap->a_vpp = tvp; 1269 out: 1270 return (error); 1271 #undef DIRBLKSIZ 1272 #define DIRBLKSIZ DEV_BSIZE 1273 } 1274 1275 /* 1276 * Rmdir system call. 1277 */ 1278 static int 1279 ext2_rmdir(ap) 1280 struct vop_rmdir_args /* { 1281 struct vnode *a_dvp; 1282 struct vnode *a_vp; 1283 struct componentname *a_cnp; 1284 } */ *ap; 1285 { 1286 struct vnode *vp = ap->a_vp; 1287 struct vnode *dvp = ap->a_dvp; 1288 struct componentname *cnp = ap->a_cnp; 1289 struct inode *ip, *dp; 1290 int error; 1291 1292 ip = VTOI(vp); 1293 dp = VTOI(dvp); 1294 1295 /* 1296 * Verify the directory is empty (and valid). 1297 * (Rmdir ".." won't be valid since 1298 * ".." will contain a reference to 1299 * the current directory and thus be 1300 * non-empty.) 1301 */ 1302 error = 0; 1303 if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1304 error = ENOTEMPTY; 1305 goto out; 1306 } 1307 if ((dp->i_flags & APPEND) 1308 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1309 error = EPERM; 1310 goto out; 1311 } 1312 /* 1313 * Delete reference to directory before purging 1314 * inode. If we crash in between, the directory 1315 * will be reattached to lost+found, 1316 */ 1317 error = ext2_dirremove(dvp, cnp); 1318 if (error) 1319 goto out; 1320 dp->i_nlink--; 1321 dp->i_flag |= IN_CHANGE; 1322 cache_purge(dvp); 1323 VOP_UNLOCK(dvp, 0); 1324 /* 1325 * Truncate inode. The only stuff left 1326 * in the directory is "." and "..". The 1327 * "." reference is inconsequential since 1328 * we're quashing it. The ".." reference 1329 * has already been adjusted above. We've 1330 * removed the "." reference and the reference 1331 * in the parent directory, but there may be 1332 * other hard links so decrement by 2 and 1333 * worry about them later. 1334 */ 1335 ip->i_nlink -= 2; 1336 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, 1337 cnp->cn_thread); 1338 cache_purge(ITOV(ip)); 1339 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1340 out: 1341 return (error); 1342 } 1343 1344 /* 1345 * symlink -- make a symbolic link 1346 */ 1347 static int 1348 ext2_symlink(ap) 1349 struct vop_symlink_args /* { 1350 struct vnode *a_dvp; 1351 struct vnode **a_vpp; 1352 struct componentname *a_cnp; 1353 struct vattr *a_vap; 1354 char *a_target; 1355 } */ *ap; 1356 { 1357 struct vnode *vp, **vpp = ap->a_vpp; 1358 struct inode *ip; 1359 int len, error; 1360 1361 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1362 vpp, ap->a_cnp); 1363 if (error) 1364 return (error); 1365 vp = *vpp; 1366 len = strlen(ap->a_target); 1367 if (len < vp->v_mount->mnt_maxsymlinklen) { 1368 ip = VTOI(vp); 1369 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1370 ip->i_size = len; 1371 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1372 } else 1373 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1374 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1375 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 1376 if (error) 1377 vput(vp); 1378 return (error); 1379 } 1380 1381 /* 1382 * Return target name of a symbolic link 1383 */ 1384 static int 1385 ext2_readlink(ap) 1386 struct vop_readlink_args /* { 1387 struct vnode *a_vp; 1388 struct uio *a_uio; 1389 struct ucred *a_cred; 1390 } */ *ap; 1391 { 1392 struct vnode *vp = ap->a_vp; 1393 struct inode *ip = VTOI(vp); 1394 int isize; 1395 1396 isize = ip->i_size; 1397 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1398 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1399 return (0); 1400 } 1401 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1402 } 1403 1404 /* 1405 * Calculate the logical to physical mapping if not done already, 1406 * then call the device strategy routine. 1407 * 1408 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1409 * deadlock on memory. See ext2_bmap() for details. 1410 */ 1411 static int 1412 ext2_strategy(ap) 1413 struct vop_strategy_args /* { 1414 struct vnode *a_vp; 1415 struct buf *a_bp; 1416 } */ *ap; 1417 { 1418 struct buf *bp = ap->a_bp; 1419 struct vnode *vp = ap->a_vp; 1420 struct inode *ip; 1421 struct bufobj *bo; 1422 int32_t blkno; 1423 int error; 1424 1425 ip = VTOI(vp); 1426 if (vp->v_type == VBLK || vp->v_type == VCHR) 1427 panic("ext2_strategy: spec"); 1428 if (bp->b_blkno == bp->b_lblkno) { 1429 error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); 1430 bp->b_blkno = blkno; 1431 if (error) { 1432 bp->b_error = error; 1433 bp->b_ioflags |= BIO_ERROR; 1434 bufdone(bp); 1435 return (0); 1436 } 1437 if ((long)bp->b_blkno == -1) 1438 vfs_bio_clrbuf(bp); 1439 } 1440 if ((long)bp->b_blkno == -1) { 1441 bufdone(bp); 1442 return (0); 1443 } 1444 bp->b_iooffset = dbtob(bp->b_blkno); 1445 bo = VFSTOEXT2(vp->v_mount)->um_bo; 1446 BO_STRATEGY(bo, bp); 1447 return (0); 1448 } 1449 1450 /* 1451 * Print out the contents of an inode. 1452 */ 1453 static int 1454 ext2_print(ap) 1455 struct vop_print_args /* { 1456 struct vnode *a_vp; 1457 } */ *ap; 1458 { 1459 struct vnode *vp = ap->a_vp; 1460 struct inode *ip = VTOI(vp); 1461 1462 vn_printf(ip->i_devvp, "\tino %lu", (u_long)ip->i_number); 1463 if (vp->v_type == VFIFO) 1464 fifo_printinfo(vp); 1465 printf("\n"); 1466 return (0); 1467 } 1468 1469 /* 1470 * Close wrapper for fifos. 1471 * 1472 * Update the times on the inode then do device close. 1473 */ 1474 static int 1475 ext2fifo_close(ap) 1476 struct vop_close_args /* { 1477 struct vnode *a_vp; 1478 int a_fflag; 1479 struct ucred *a_cred; 1480 struct thread *a_td; 1481 } */ *ap; 1482 { 1483 struct vnode *vp = ap->a_vp; 1484 1485 VI_LOCK(vp); 1486 if (vp->v_usecount > 1) 1487 ext2_itimes_locked(vp); 1488 VI_UNLOCK(vp); 1489 return (fifo_specops.vop_close(ap)); 1490 } 1491 1492 /* 1493 * Kqfilter wrapper for fifos. 1494 * 1495 * Fall through to ext2 kqfilter routines if needed 1496 */ 1497 static int 1498 ext2fifo_kqfilter(ap) 1499 struct vop_kqfilter_args *ap; 1500 { 1501 int error; 1502 1503 error = fifo_specops.vop_kqfilter(ap); 1504 if (error) 1505 error = vfs_kqfilter(ap); 1506 return (error); 1507 } 1508 1509 /* 1510 * Return POSIX pathconf information applicable to ext2 filesystems. 1511 */ 1512 static int 1513 ext2_pathconf(ap) 1514 struct vop_pathconf_args /* { 1515 struct vnode *a_vp; 1516 int a_name; 1517 int *a_retval; 1518 } */ *ap; 1519 { 1520 1521 switch (ap->a_name) { 1522 case _PC_LINK_MAX: 1523 *ap->a_retval = LINK_MAX; 1524 return (0); 1525 case _PC_NAME_MAX: 1526 *ap->a_retval = NAME_MAX; 1527 return (0); 1528 case _PC_PATH_MAX: 1529 *ap->a_retval = PATH_MAX; 1530 return (0); 1531 case _PC_PIPE_BUF: 1532 *ap->a_retval = PIPE_BUF; 1533 return (0); 1534 case _PC_CHOWN_RESTRICTED: 1535 *ap->a_retval = 1; 1536 return (0); 1537 case _PC_NO_TRUNC: 1538 *ap->a_retval = 1; 1539 return (0); 1540 default: 1541 return (EINVAL); 1542 } 1543 /* NOTREACHED */ 1544 } 1545 1546 /* 1547 * Vnode pointer to File handle 1548 */ 1549 /* ARGSUSED */ 1550 static int 1551 ext2_vptofh(ap) 1552 struct vop_vptofh_args /* { 1553 struct vnode *a_vp; 1554 struct fid *a_fhp; 1555 } */ *ap; 1556 { 1557 struct inode *ip; 1558 struct ufid *ufhp; 1559 1560 ip = VTOI(ap->a_vp); 1561 ufhp = (struct ufid *)ap->a_fhp; 1562 ufhp->ufid_len = sizeof(struct ufid); 1563 ufhp->ufid_ino = ip->i_number; 1564 ufhp->ufid_gen = ip->i_gen; 1565 return (0); 1566 } 1567 1568 /* 1569 * Initialize the vnode associated with a new inode, handle aliased 1570 * vnodes. 1571 */ 1572 int 1573 ext2_vinit(mntp, fifoops, vpp) 1574 struct mount *mntp; 1575 struct vop_vector *fifoops; 1576 struct vnode **vpp; 1577 { 1578 struct inode *ip; 1579 struct vnode *vp; 1580 1581 vp = *vpp; 1582 ip = VTOI(vp); 1583 vp->v_type = IFTOVT(ip->i_mode); 1584 if (vp->v_type == VFIFO) 1585 vp->v_op = fifoops; 1586 1587 if (ip->i_number == EXT2_ROOTINO) 1588 vp->v_vflag |= VV_ROOT; 1589 ip->i_modrev = init_va_filerev(); 1590 *vpp = vp; 1591 return (0); 1592 } 1593 1594 /* 1595 * Allocate a new inode. 1596 */ 1597 static int 1598 ext2_makeinode(mode, dvp, vpp, cnp) 1599 int mode; 1600 struct vnode *dvp; 1601 struct vnode **vpp; 1602 struct componentname *cnp; 1603 { 1604 struct inode *ip, *pdir; 1605 struct vnode *tvp; 1606 int error; 1607 1608 pdir = VTOI(dvp); 1609 #ifdef DIAGNOSTIC 1610 if ((cnp->cn_flags & HASBUF) == 0) 1611 panic("ext2_makeinode: no name"); 1612 #endif 1613 *vpp = NULL; 1614 if ((mode & IFMT) == 0) 1615 mode |= IFREG; 1616 1617 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1618 if (error) { 1619 return (error); 1620 } 1621 ip = VTOI(tvp); 1622 ip->i_gid = pdir->i_gid; 1623 #ifdef SUIDDIR 1624 { 1625 /* 1626 * if we are 1627 * not the owner of the directory, 1628 * and we are hacking owners here, (only do this where told to) 1629 * and we are not giving it TOO root, (would subvert quotas) 1630 * then go ahead and give it to the other user. 1631 * Note that this drops off the execute bits for security. 1632 */ 1633 if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1634 (pdir->i_mode & ISUID) && 1635 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1636 ip->i_uid = pdir->i_uid; 1637 mode &= ~07111; 1638 } else { 1639 ip->i_uid = cnp->cn_cred->cr_uid; 1640 } 1641 } 1642 #else 1643 ip->i_uid = cnp->cn_cred->cr_uid; 1644 #endif 1645 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1646 ip->i_mode = mode; 1647 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1648 ip->i_nlink = 1; 1649 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1650 if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) 1651 ip->i_mode &= ~ISGID; 1652 } 1653 1654 if (cnp->cn_flags & ISWHITEOUT) 1655 ip->i_flags |= UF_OPAQUE; 1656 1657 /* 1658 * Make sure inode goes to disk before directory entry. 1659 */ 1660 error = ext2_update(tvp, !DOINGASYNC(tvp)); 1661 if (error) 1662 goto bad; 1663 error = ext2_direnter(ip, dvp, cnp); 1664 if (error) 1665 goto bad; 1666 1667 *vpp = tvp; 1668 return (0); 1669 1670 bad: 1671 /* 1672 * Write error occurred trying to update the inode 1673 * or the directory so must deallocate the inode. 1674 */ 1675 ip->i_nlink = 0; 1676 ip->i_flag |= IN_CHANGE; 1677 vput(tvp); 1678 return (error); 1679 } 1680 1681 /* 1682 * Vnode op for reading. 1683 */ 1684 static int 1685 ext2_read(ap) 1686 struct vop_read_args /* { 1687 struct vnode *a_vp; 1688 struct uio *a_uio; 1689 int a_ioflag; 1690 struct ucred *a_cred; 1691 } */ *ap; 1692 { 1693 struct vnode *vp; 1694 struct inode *ip; 1695 struct uio *uio; 1696 struct m_ext2fs *fs; 1697 struct buf *bp; 1698 daddr_t lbn, nextlbn; 1699 off_t bytesinfile; 1700 long size, xfersize, blkoffset; 1701 int error, orig_resid, seqcount; 1702 int ioflag; 1703 1704 vp = ap->a_vp; 1705 uio = ap->a_uio; 1706 ioflag = ap->a_ioflag; 1707 1708 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 1709 ip = VTOI(vp); 1710 1711 #ifdef INVARIANTS 1712 if (uio->uio_rw != UIO_READ) 1713 panic("%s: mode", "ext2_read"); 1714 1715 if (vp->v_type == VLNK) { 1716 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 1717 panic("%s: short symlink", "ext2_read"); 1718 } else if (vp->v_type != VREG && vp->v_type != VDIR) 1719 panic("%s: type %d", "ext2_read", vp->v_type); 1720 #endif 1721 orig_resid = uio->uio_resid; 1722 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 1723 if (orig_resid == 0) 1724 return (0); 1725 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 1726 fs = ip->i_e2fs; 1727 if (uio->uio_offset < ip->i_size && 1728 uio->uio_offset >= fs->e2fs_maxfilesize) 1729 return (EOVERFLOW); 1730 1731 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 1732 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 1733 break; 1734 lbn = lblkno(fs, uio->uio_offset); 1735 nextlbn = lbn + 1; 1736 size = blksize(fs, ip, lbn); 1737 blkoffset = blkoff(fs, uio->uio_offset); 1738 1739 xfersize = fs->e2fs_fsize - blkoffset; 1740 if (uio->uio_resid < xfersize) 1741 xfersize = uio->uio_resid; 1742 if (bytesinfile < xfersize) 1743 xfersize = bytesinfile; 1744 1745 if (lblktosize(fs, nextlbn) >= ip->i_size) 1746 error = bread(vp, lbn, size, NOCRED, &bp); 1747 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) 1748 error = cluster_read(vp, ip->i_size, lbn, size, 1749 NOCRED, blkoffset + uio->uio_resid, seqcount, &bp); 1750 else if (seqcount > 1) { 1751 int nextsize = blksize(fs, ip, nextlbn); 1752 error = breadn(vp, lbn, 1753 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 1754 } else 1755 error = bread(vp, lbn, size, NOCRED, &bp); 1756 if (error) { 1757 brelse(bp); 1758 bp = NULL; 1759 break; 1760 } 1761 1762 /* 1763 * If IO_DIRECT then set B_DIRECT for the buffer. This 1764 * will cause us to attempt to release the buffer later on 1765 * and will cause the buffer cache to attempt to free the 1766 * underlying pages. 1767 */ 1768 if (ioflag & IO_DIRECT) 1769 bp->b_flags |= B_DIRECT; 1770 1771 /* 1772 * We should only get non-zero b_resid when an I/O error 1773 * has occurred, which should cause us to break above. 1774 * However, if the short read did not cause an error, 1775 * then we want to ensure that we do not uiomove bad 1776 * or uninitialized data. 1777 */ 1778 size -= bp->b_resid; 1779 if (size < xfersize) { 1780 if (size == 0) 1781 break; 1782 xfersize = size; 1783 } 1784 error = uiomove((char *)bp->b_data + blkoffset, 1785 (int)xfersize, uio); 1786 if (error) 1787 break; 1788 1789 if (ioflag & (IO_VMIO|IO_DIRECT)) { 1790 /* 1791 * If it's VMIO or direct I/O, then we don't 1792 * need the buf, mark it available for 1793 * freeing. If it's non-direct VMIO, the VM has 1794 * the data. 1795 */ 1796 bp->b_flags |= B_RELBUF; 1797 brelse(bp); 1798 } else { 1799 /* 1800 * Otherwise let whoever 1801 * made the request take care of 1802 * freeing it. We just queue 1803 * it onto another list. 1804 */ 1805 bqrelse(bp); 1806 } 1807 } 1808 1809 /* 1810 * This can only happen in the case of an error 1811 * because the loop above resets bp to NULL on each iteration 1812 * and on normal completion has not set a new value into it. 1813 * so it must have come from a 'break' statement 1814 */ 1815 if (bp != NULL) { 1816 if (ioflag & (IO_VMIO|IO_DIRECT)) { 1817 bp->b_flags |= B_RELBUF; 1818 brelse(bp); 1819 } else { 1820 bqrelse(bp); 1821 } 1822 } 1823 1824 if ((error == 0 || uio->uio_resid != orig_resid) && 1825 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1826 ip->i_flag |= IN_ACCESS; 1827 return (error); 1828 } 1829 1830 /* 1831 * Vnode op for writing. 1832 */ 1833 static int 1834 ext2_write(ap) 1835 struct vop_write_args /* { 1836 struct vnode *a_vp; 1837 struct uio *a_uio; 1838 int a_ioflag; 1839 struct ucred *a_cred; 1840 } */ *ap; 1841 { 1842 struct vnode *vp; 1843 struct uio *uio; 1844 struct inode *ip; 1845 struct m_ext2fs *fs; 1846 struct buf *bp; 1847 daddr_t lbn; 1848 off_t osize; 1849 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 1850 1851 ioflag = ap->a_ioflag; 1852 uio = ap->a_uio; 1853 vp = ap->a_vp; 1854 1855 seqcount = ioflag >> IO_SEQSHIFT; 1856 ip = VTOI(vp); 1857 1858 #ifdef INVARIANTS 1859 if (uio->uio_rw != UIO_WRITE) 1860 panic("%s: mode", "ext2_write"); 1861 #endif 1862 1863 switch (vp->v_type) { 1864 case VREG: 1865 if (ioflag & IO_APPEND) 1866 uio->uio_offset = ip->i_size; 1867 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 1868 return (EPERM); 1869 /* FALLTHROUGH */ 1870 case VLNK: 1871 break; 1872 case VDIR: 1873 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 1874 if ((ioflag & IO_SYNC) == 0) 1875 panic("ext2_write: nonsync dir write"); 1876 break; 1877 default: 1878 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 1879 vp->v_type, (intmax_t)uio->uio_offset, 1880 (intmax_t)uio->uio_resid); 1881 } 1882 1883 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 1884 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 1885 fs = ip->i_e2fs; 1886 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 1887 return (EFBIG); 1888 /* 1889 * Maybe this should be above the vnode op call, but so long as 1890 * file servers have no limits, I don't think it matters. 1891 */ 1892 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 1893 return (EFBIG); 1894 1895 resid = uio->uio_resid; 1896 osize = ip->i_size; 1897 if (seqcount > BA_SEQMAX) 1898 flags = BA_SEQMAX << BA_SEQSHIFT; 1899 else 1900 flags = seqcount << BA_SEQSHIFT; 1901 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1902 flags |= IO_SYNC; 1903 1904 for (error = 0; uio->uio_resid > 0;) { 1905 lbn = lblkno(fs, uio->uio_offset); 1906 blkoffset = blkoff(fs, uio->uio_offset); 1907 xfersize = fs->e2fs_fsize - blkoffset; 1908 if (uio->uio_resid < xfersize) 1909 xfersize = uio->uio_resid; 1910 if (uio->uio_offset + xfersize > ip->i_size) 1911 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 1912 1913 /* 1914 * We must perform a read-before-write if the transfer size 1915 * does not cover the entire buffer. 1916 */ 1917 if (fs->e2fs_bsize > xfersize) 1918 flags |= BA_CLRBUF; 1919 else 1920 flags &= ~BA_CLRBUF; 1921 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 1922 ap->a_cred, &bp, flags); 1923 if (error != 0) 1924 break; 1925 1926 /* 1927 * If the buffer is not valid and we did not clear garbage 1928 * out above, we have to do so here even though the write 1929 * covers the entire buffer in order to avoid a mmap()/write 1930 * race where another process may see the garbage prior to 1931 * the uiomove() for a write replacing it. 1932 */ 1933 if ((bp->b_flags & B_CACHE) == 0 && fs->e2fs_bsize <= xfersize) 1934 vfs_bio_clrbuf(bp); 1935 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 1936 bp->b_flags |= B_NOCACHE; 1937 if (uio->uio_offset + xfersize > ip->i_size) 1938 ip->i_size = uio->uio_offset + xfersize; 1939 size = blksize(fs, ip, lbn) - bp->b_resid; 1940 if (size < xfersize) 1941 xfersize = size; 1942 1943 error = 1944 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1945 if (ioflag & (IO_VMIO|IO_DIRECT)) { 1946 bp->b_flags |= B_RELBUF; 1947 } 1948 1949 /* 1950 * If IO_SYNC each buffer is written synchronously. Otherwise 1951 * if we have a severe page deficiency write the buffer 1952 * asynchronously. Otherwise try to cluster, and if that 1953 * doesn't do it then either do an async write (if O_DIRECT), 1954 * or a delayed write (if not). 1955 */ 1956 if (ioflag & IO_SYNC) { 1957 (void)bwrite(bp); 1958 } else if (vm_page_count_severe() || 1959 buf_dirty_count_severe() || 1960 (ioflag & IO_ASYNC)) { 1961 bp->b_flags |= B_CLUSTEROK; 1962 bawrite(bp); 1963 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 1964 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 1965 bp->b_flags |= B_CLUSTEROK; 1966 cluster_write(vp, bp, ip->i_size, seqcount); 1967 } else { 1968 bawrite(bp); 1969 } 1970 } else if (ioflag & IO_DIRECT) { 1971 bp->b_flags |= B_CLUSTEROK; 1972 bawrite(bp); 1973 } else { 1974 bp->b_flags |= B_CLUSTEROK; 1975 bdwrite(bp); 1976 } 1977 if (error || xfersize == 0) 1978 break; 1979 } 1980 /* 1981 * If we successfully wrote any data, and we are not the superuser 1982 * we clear the setuid and setgid bits as a precaution against 1983 * tampering. 1984 */ 1985 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 1986 ap->a_cred) { 1987 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 1988 ip->i_mode &= ~(ISUID | ISGID); 1989 } 1990 if (error) { 1991 if (ioflag & IO_UNIT) { 1992 (void)ext2_truncate(vp, osize, 1993 ioflag & IO_SYNC, ap->a_cred, uio->uio_td); 1994 uio->uio_offset -= resid - uio->uio_resid; 1995 uio->uio_resid = resid; 1996 } 1997 } 1998 if (uio->uio_resid != resid) { 1999 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2000 if (ioflag & IO_SYNC) 2001 error = ext2_update(vp, 1); 2002 } 2003 return (error); 2004 } 2005