1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * (c) UNIX System Laboratories, Inc. 13 * All or some portions of this file are derived from material licensed 14 * to the University of California by American Telephone and Telegraph 15 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 16 * the permission of UNIX System Laboratories, Inc. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 43 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 44 * $FreeBSD$ 45 */ 46 47 #include "opt_suiddir.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/kernel.h> 52 #include <sys/fcntl.h> 53 #include <sys/filio.h> 54 #include <sys/stat.h> 55 #include <sys/bio.h> 56 #include <sys/buf.h> 57 #include <sys/endian.h> 58 #include <sys/priv.h> 59 #include <sys/rwlock.h> 60 #include <sys/mount.h> 61 #include <sys/unistd.h> 62 #include <sys/time.h> 63 #include <sys/vnode.h> 64 #include <sys/namei.h> 65 #include <sys/lockf.h> 66 #include <sys/event.h> 67 #include <sys/conf.h> 68 #include <sys/file.h> 69 #include <sys/extattr.h> 70 #include <sys/vmmeter.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_param.h> 74 #include <vm/vm_extern.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 #include <vm/vm_pager.h> 78 #include <vm/vnode_pager.h> 79 80 #include "opt_directio.h" 81 82 #include <ufs/ufs/dir.h> 83 84 #include <fs/ext2fs/fs.h> 85 #include <fs/ext2fs/inode.h> 86 #include <fs/ext2fs/ext2_acl.h> 87 #include <fs/ext2fs/ext2_extern.h> 88 #include <fs/ext2fs/ext2fs.h> 89 #include <fs/ext2fs/ext2_dinode.h> 90 #include <fs/ext2fs/ext2_dir.h> 91 #include <fs/ext2fs/ext2_mount.h> 92 #include <fs/ext2fs/ext2_extattr.h> 93 94 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 95 static void ext2_itimes_locked(struct vnode *); 96 97 static vop_access_t ext2_access; 98 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 99 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 100 struct thread *); 101 static vop_close_t ext2_close; 102 static vop_create_t ext2_create; 103 static vop_fsync_t ext2_fsync; 104 static vop_getattr_t ext2_getattr; 105 static vop_ioctl_t ext2_ioctl; 106 static vop_link_t ext2_link; 107 static vop_mkdir_t ext2_mkdir; 108 static vop_mknod_t ext2_mknod; 109 static vop_open_t ext2_open; 110 static vop_pathconf_t ext2_pathconf; 111 static vop_print_t ext2_print; 112 static vop_read_t ext2_read; 113 static vop_readlink_t ext2_readlink; 114 static vop_remove_t ext2_remove; 115 static vop_rename_t ext2_rename; 116 static vop_rmdir_t ext2_rmdir; 117 static vop_setattr_t ext2_setattr; 118 static vop_strategy_t ext2_strategy; 119 static vop_symlink_t ext2_symlink; 120 static vop_write_t ext2_write; 121 static vop_deleteextattr_t ext2_deleteextattr; 122 static vop_getextattr_t ext2_getextattr; 123 static vop_listextattr_t ext2_listextattr; 124 static vop_setextattr_t ext2_setextattr; 125 static vop_vptofh_t ext2_vptofh; 126 static vop_close_t ext2fifo_close; 127 static vop_kqfilter_t ext2fifo_kqfilter; 128 129 /* Global vfs data structures for ext2. */ 130 struct vop_vector ext2_vnodeops = { 131 .vop_default = &default_vnodeops, 132 .vop_access = ext2_access, 133 .vop_bmap = ext2_bmap, 134 .vop_cachedlookup = ext2_lookup, 135 .vop_close = ext2_close, 136 .vop_create = ext2_create, 137 .vop_fsync = ext2_fsync, 138 .vop_getpages = vnode_pager_local_getpages, 139 .vop_getpages_async = vnode_pager_local_getpages_async, 140 .vop_getattr = ext2_getattr, 141 .vop_inactive = ext2_inactive, 142 .vop_ioctl = ext2_ioctl, 143 .vop_link = ext2_link, 144 .vop_lookup = vfs_cache_lookup, 145 .vop_mkdir = ext2_mkdir, 146 .vop_mknod = ext2_mknod, 147 .vop_open = ext2_open, 148 .vop_pathconf = ext2_pathconf, 149 .vop_poll = vop_stdpoll, 150 .vop_print = ext2_print, 151 .vop_read = ext2_read, 152 .vop_readdir = ext2_readdir, 153 .vop_readlink = ext2_readlink, 154 .vop_reallocblks = ext2_reallocblks, 155 .vop_reclaim = ext2_reclaim, 156 .vop_remove = ext2_remove, 157 .vop_rename = ext2_rename, 158 .vop_rmdir = ext2_rmdir, 159 .vop_setattr = ext2_setattr, 160 .vop_strategy = ext2_strategy, 161 .vop_symlink = ext2_symlink, 162 .vop_write = ext2_write, 163 .vop_deleteextattr = ext2_deleteextattr, 164 .vop_getextattr = ext2_getextattr, 165 .vop_listextattr = ext2_listextattr, 166 .vop_setextattr = ext2_setextattr, 167 #ifdef UFS_ACL 168 .vop_getacl = ext2_getacl, 169 .vop_setacl = ext2_setacl, 170 .vop_aclcheck = ext2_aclcheck, 171 #endif /* UFS_ACL */ 172 .vop_vptofh = ext2_vptofh, 173 }; 174 175 struct vop_vector ext2_fifoops = { 176 .vop_default = &fifo_specops, 177 .vop_access = ext2_access, 178 .vop_close = ext2fifo_close, 179 .vop_fsync = ext2_fsync, 180 .vop_getattr = ext2_getattr, 181 .vop_inactive = ext2_inactive, 182 .vop_kqfilter = ext2fifo_kqfilter, 183 .vop_pathconf = ext2_pathconf, 184 .vop_print = ext2_print, 185 .vop_read = VOP_PANIC, 186 .vop_reclaim = ext2_reclaim, 187 .vop_setattr = ext2_setattr, 188 .vop_write = VOP_PANIC, 189 .vop_vptofh = ext2_vptofh, 190 }; 191 192 /* 193 * A virgin directory (no blushing please). 194 * Note that the type and namlen fields are reversed relative to ext2. 195 * Also, we don't use `struct odirtemplate', since it would just cause 196 * endianness problems. 197 */ 198 static struct dirtemplate mastertemplate = { 199 0, 12, 1, EXT2_FT_DIR, ".", 200 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." 201 }; 202 static struct dirtemplate omastertemplate = { 203 0, 12, 1, EXT2_FT_UNKNOWN, ".", 204 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." 205 }; 206 207 static void 208 ext2_itimes_locked(struct vnode *vp) 209 { 210 struct inode *ip; 211 struct timespec ts; 212 213 ASSERT_VI_LOCKED(vp, __func__); 214 215 ip = VTOI(vp); 216 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 217 return; 218 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 219 ip->i_flag |= IN_LAZYMOD; 220 else 221 ip->i_flag |= IN_MODIFIED; 222 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 223 vfs_timestamp(&ts); 224 if (ip->i_flag & IN_ACCESS) { 225 ip->i_atime = ts.tv_sec; 226 ip->i_atimensec = ts.tv_nsec; 227 } 228 if (ip->i_flag & IN_UPDATE) { 229 ip->i_mtime = ts.tv_sec; 230 ip->i_mtimensec = ts.tv_nsec; 231 ip->i_modrev++; 232 } 233 if (ip->i_flag & IN_CHANGE) { 234 ip->i_ctime = ts.tv_sec; 235 ip->i_ctimensec = ts.tv_nsec; 236 } 237 } 238 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 239 } 240 241 void 242 ext2_itimes(struct vnode *vp) 243 { 244 245 VI_LOCK(vp); 246 ext2_itimes_locked(vp); 247 VI_UNLOCK(vp); 248 } 249 250 /* 251 * Create a regular file 252 */ 253 static int 254 ext2_create(struct vop_create_args *ap) 255 { 256 int error; 257 258 error = 259 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 260 ap->a_dvp, ap->a_vpp, ap->a_cnp); 261 if (error != 0) 262 return (error); 263 if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) 264 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); 265 return (0); 266 } 267 268 static int 269 ext2_open(struct vop_open_args *ap) 270 { 271 272 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 273 return (EOPNOTSUPP); 274 275 /* 276 * Files marked append-only must be opened for appending. 277 */ 278 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 279 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 280 return (EPERM); 281 282 vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); 283 284 return (0); 285 } 286 287 /* 288 * Close called. 289 * 290 * Update the times on the inode. 291 */ 292 static int 293 ext2_close(struct vop_close_args *ap) 294 { 295 struct vnode *vp = ap->a_vp; 296 297 VI_LOCK(vp); 298 if (vp->v_usecount > 1) 299 ext2_itimes_locked(vp); 300 VI_UNLOCK(vp); 301 return (0); 302 } 303 304 static int 305 ext2_access(struct vop_access_args *ap) 306 { 307 struct vnode *vp = ap->a_vp; 308 struct inode *ip = VTOI(vp); 309 accmode_t accmode = ap->a_accmode; 310 int error; 311 312 if (vp->v_type == VBLK || vp->v_type == VCHR) 313 return (EOPNOTSUPP); 314 315 /* 316 * Disallow write attempts on read-only file systems; 317 * unless the file is a socket, fifo, or a block or 318 * character device resident on the file system. 319 */ 320 if (accmode & VWRITE) { 321 switch (vp->v_type) { 322 case VDIR: 323 case VLNK: 324 case VREG: 325 if (vp->v_mount->mnt_flag & MNT_RDONLY) 326 return (EROFS); 327 break; 328 default: 329 break; 330 } 331 } 332 333 /* If immutable bit set, nobody gets to write it. */ 334 if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) 335 return (EPERM); 336 337 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 338 ap->a_accmode, ap->a_cred, NULL); 339 return (error); 340 } 341 342 static int 343 ext2_getattr(struct vop_getattr_args *ap) 344 { 345 struct vnode *vp = ap->a_vp; 346 struct inode *ip = VTOI(vp); 347 struct vattr *vap = ap->a_vap; 348 349 ext2_itimes(vp); 350 /* 351 * Copy from inode table 352 */ 353 vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); 354 vap->va_fileid = ip->i_number; 355 vap->va_mode = ip->i_mode & ~IFMT; 356 vap->va_nlink = ip->i_nlink; 357 vap->va_uid = ip->i_uid; 358 vap->va_gid = ip->i_gid; 359 vap->va_rdev = ip->i_rdev; 360 vap->va_size = ip->i_size; 361 vap->va_atime.tv_sec = ip->i_atime; 362 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; 363 vap->va_mtime.tv_sec = ip->i_mtime; 364 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; 365 vap->va_ctime.tv_sec = ip->i_ctime; 366 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; 367 if E2DI_HAS_XTIME(ip) { 368 vap->va_birthtime.tv_sec = ip->i_birthtime; 369 vap->va_birthtime.tv_nsec = ip->i_birthnsec; 370 } 371 vap->va_flags = ip->i_flags; 372 vap->va_gen = ip->i_gen; 373 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 374 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 375 vap->va_type = IFTOVT(ip->i_mode); 376 vap->va_filerev = ip->i_modrev; 377 return (0); 378 } 379 380 /* 381 * Set attribute vnode op. called from several syscalls 382 */ 383 static int 384 ext2_setattr(struct vop_setattr_args *ap) 385 { 386 struct vattr *vap = ap->a_vap; 387 struct vnode *vp = ap->a_vp; 388 struct inode *ip = VTOI(vp); 389 struct ucred *cred = ap->a_cred; 390 struct thread *td = curthread; 391 int error; 392 393 /* 394 * Check for unsettable attributes. 395 */ 396 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 397 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 398 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 399 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 400 return (EINVAL); 401 } 402 if (vap->va_flags != VNOVAL) { 403 /* Disallow flags not supported by ext2fs. */ 404 if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 405 return (EOPNOTSUPP); 406 407 if (vp->v_mount->mnt_flag & MNT_RDONLY) 408 return (EROFS); 409 /* 410 * Callers may only modify the file flags on objects they 411 * have VADMIN rights for. 412 */ 413 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 414 return (error); 415 /* 416 * Unprivileged processes and privileged processes in 417 * jail() are not permitted to unset system flags, or 418 * modify flags if any system flags are set. 419 * Privileged non-jail processes may not modify system flags 420 * if securelevel > 0 and any existing system flags are set. 421 */ 422 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 423 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) { 424 error = securelevel_gt(cred, 0); 425 if (error) 426 return (error); 427 } 428 } else { 429 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || 430 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 431 return (EPERM); 432 } 433 ip->i_flags = vap->va_flags; 434 ip->i_flag |= IN_CHANGE; 435 if (ip->i_flags & (IMMUTABLE | APPEND)) 436 return (0); 437 } 438 if (ip->i_flags & (IMMUTABLE | APPEND)) 439 return (EPERM); 440 /* 441 * Go through the fields and update iff not VNOVAL. 442 */ 443 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 444 if (vp->v_mount->mnt_flag & MNT_RDONLY) 445 return (EROFS); 446 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 447 td)) != 0) 448 return (error); 449 } 450 if (vap->va_size != VNOVAL) { 451 /* 452 * Disallow write attempts on read-only file systems; 453 * unless the file is a socket, fifo, or a block or 454 * character device resident on the file system. 455 */ 456 switch (vp->v_type) { 457 case VDIR: 458 return (EISDIR); 459 case VLNK: 460 case VREG: 461 if (vp->v_mount->mnt_flag & MNT_RDONLY) 462 return (EROFS); 463 break; 464 default: 465 break; 466 } 467 if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) 468 return (error); 469 } 470 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 471 if (vp->v_mount->mnt_flag & MNT_RDONLY) 472 return (EROFS); 473 /* 474 * From utimes(2): 475 * If times is NULL, ... The caller must be the owner of 476 * the file, have permission to write the file, or be the 477 * super-user. 478 * If times is non-NULL, ... The caller must be the owner of 479 * the file or be the super-user. 480 */ 481 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 482 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 483 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 484 return (error); 485 ip->i_flag |= IN_CHANGE | IN_MODIFIED; 486 if (vap->va_atime.tv_sec != VNOVAL) { 487 ip->i_flag &= ~IN_ACCESS; 488 ip->i_atime = vap->va_atime.tv_sec; 489 ip->i_atimensec = vap->va_atime.tv_nsec; 490 } 491 if (vap->va_mtime.tv_sec != VNOVAL) { 492 ip->i_flag &= ~IN_UPDATE; 493 ip->i_mtime = vap->va_mtime.tv_sec; 494 ip->i_mtimensec = vap->va_mtime.tv_nsec; 495 } 496 ip->i_birthtime = vap->va_birthtime.tv_sec; 497 ip->i_birthnsec = vap->va_birthtime.tv_nsec; 498 error = ext2_update(vp, 0); 499 if (error) 500 return (error); 501 } 502 error = 0; 503 if (vap->va_mode != (mode_t)VNOVAL) { 504 if (vp->v_mount->mnt_flag & MNT_RDONLY) 505 return (EROFS); 506 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 507 } 508 return (error); 509 } 510 511 /* 512 * Change the mode on a file. 513 * Inode must be locked before calling. 514 */ 515 static int 516 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) 517 { 518 struct inode *ip = VTOI(vp); 519 int error; 520 521 /* 522 * To modify the permissions on a file, must possess VADMIN 523 * for that file. 524 */ 525 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 526 return (error); 527 /* 528 * Privileged processes may set the sticky bit on non-directories, 529 * as well as set the setgid bit on a file with a group that the 530 * process is not a member of. 531 */ 532 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 533 error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); 534 if (error) 535 return (EFTYPE); 536 } 537 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 538 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 539 if (error) 540 return (error); 541 } 542 ip->i_mode &= ~ALLPERMS; 543 ip->i_mode |= (mode & ALLPERMS); 544 ip->i_flag |= IN_CHANGE; 545 return (0); 546 } 547 548 /* 549 * Perform chown operation on inode ip; 550 * inode must be locked prior to call. 551 */ 552 static int 553 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 554 struct thread *td) 555 { 556 struct inode *ip = VTOI(vp); 557 uid_t ouid; 558 gid_t ogid; 559 int error = 0; 560 561 if (uid == (uid_t)VNOVAL) 562 uid = ip->i_uid; 563 if (gid == (gid_t)VNOVAL) 564 gid = ip->i_gid; 565 /* 566 * To modify the ownership of a file, must possess VADMIN 567 * for that file. 568 */ 569 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 570 return (error); 571 /* 572 * To change the owner of a file, or change the group of a file 573 * to a group of which we are not a member, the caller must 574 * have privilege. 575 */ 576 if (uid != ip->i_uid || (gid != ip->i_gid && 577 !groupmember(gid, cred))) { 578 error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); 579 if (error) 580 return (error); 581 } 582 ogid = ip->i_gid; 583 ouid = ip->i_uid; 584 ip->i_gid = gid; 585 ip->i_uid = uid; 586 ip->i_flag |= IN_CHANGE; 587 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 588 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) 589 ip->i_mode &= ~(ISUID | ISGID); 590 } 591 return (0); 592 } 593 594 /* 595 * Synch an open file. 596 */ 597 /* ARGSUSED */ 598 static int 599 ext2_fsync(struct vop_fsync_args *ap) 600 { 601 /* 602 * Flush all dirty buffers associated with a vnode. 603 */ 604 605 vop_stdfsync(ap); 606 607 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 608 } 609 610 /* 611 * Mknod vnode call 612 */ 613 /* ARGSUSED */ 614 static int 615 ext2_mknod(struct vop_mknod_args *ap) 616 { 617 struct vattr *vap = ap->a_vap; 618 struct vnode **vpp = ap->a_vpp; 619 struct inode *ip; 620 ino_t ino; 621 int error; 622 623 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 624 ap->a_dvp, vpp, ap->a_cnp); 625 if (error) 626 return (error); 627 ip = VTOI(*vpp); 628 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 629 if (vap->va_rdev != VNOVAL) { 630 /* 631 * Want to be able to use this to make badblock 632 * inodes, so don't truncate the dev number. 633 */ 634 if (!(ip->i_flag & IN_E4EXTENTS)) 635 ip->i_rdev = vap->va_rdev; 636 } 637 /* 638 * Remove inode, then reload it through VFS_VGET so it is 639 * checked to see if it is an alias of an existing entry in 640 * the inode cache. XXX I don't believe this is necessary now. 641 */ 642 (*vpp)->v_type = VNON; 643 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 644 vgone(*vpp); 645 vput(*vpp); 646 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 647 if (error) { 648 *vpp = NULL; 649 return (error); 650 } 651 return (0); 652 } 653 654 static int 655 ext2_remove(struct vop_remove_args *ap) 656 { 657 struct inode *ip; 658 struct vnode *vp = ap->a_vp; 659 struct vnode *dvp = ap->a_dvp; 660 int error; 661 662 ip = VTOI(vp); 663 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 664 (VTOI(dvp)->i_flags & APPEND)) { 665 error = EPERM; 666 goto out; 667 } 668 error = ext2_dirremove(dvp, ap->a_cnp); 669 if (error == 0) { 670 ip->i_nlink--; 671 ip->i_flag |= IN_CHANGE; 672 } 673 out: 674 return (error); 675 } 676 677 static unsigned short 678 ext2_max_nlink(struct inode *ip) 679 { 680 struct m_ext2fs *fs; 681 682 fs = ip->i_e2fs; 683 684 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_DIR_NLINK)) 685 return (EXT4_LINK_MAX); 686 else 687 return (EXT2_LINK_MAX); 688 } 689 690 /* 691 * link vnode call 692 */ 693 static int 694 ext2_link(struct vop_link_args *ap) 695 { 696 struct vnode *vp = ap->a_vp; 697 struct vnode *tdvp = ap->a_tdvp; 698 struct componentname *cnp = ap->a_cnp; 699 struct inode *ip; 700 int error; 701 702 #ifdef INVARIANTS 703 if ((cnp->cn_flags & HASBUF) == 0) 704 panic("ext2_link: no name"); 705 #endif 706 ip = VTOI(vp); 707 if ((nlink_t)ip->i_nlink >= ext2_max_nlink(ip)) { 708 error = EMLINK; 709 goto out; 710 } 711 if (ip->i_flags & (IMMUTABLE | APPEND)) { 712 error = EPERM; 713 goto out; 714 } 715 ip->i_nlink++; 716 ip->i_flag |= IN_CHANGE; 717 error = ext2_update(vp, !DOINGASYNC(vp)); 718 if (!error) 719 error = ext2_direnter(ip, tdvp, cnp); 720 if (error) { 721 ip->i_nlink--; 722 ip->i_flag |= IN_CHANGE; 723 } 724 out: 725 return (error); 726 } 727 728 static int 729 ext2_inc_nlink(struct inode *ip) 730 { 731 732 ip->i_nlink++; 733 734 if (ext2_htree_has_idx(ip) && ip->i_nlink > 1) { 735 if (ip->i_nlink >= ext2_max_nlink(ip) || ip->i_nlink == 2) 736 ip->i_nlink = 1; 737 } else if (ip->i_nlink > ext2_max_nlink(ip)) { 738 ip->i_nlink--; 739 return (EMLINK); 740 } 741 742 return (0); 743 } 744 745 static void 746 ext2_dec_nlink(struct inode *ip) 747 { 748 749 if (!S_ISDIR(ip->i_mode) || ip->i_nlink > 2) 750 ip->i_nlink--; 751 } 752 753 /* 754 * Rename system call. 755 * rename("foo", "bar"); 756 * is essentially 757 * unlink("bar"); 758 * link("foo", "bar"); 759 * unlink("foo"); 760 * but ``atomically''. Can't do full commit without saving state in the 761 * inode on disk which isn't feasible at this time. Best we can do is 762 * always guarantee the target exists. 763 * 764 * Basic algorithm is: 765 * 766 * 1) Bump link count on source while we're linking it to the 767 * target. This also ensure the inode won't be deleted out 768 * from underneath us while we work (it may be truncated by 769 * a concurrent `trunc' or `open' for creation). 770 * 2) Link source to destination. If destination already exists, 771 * delete it first. 772 * 3) Unlink source reference to inode if still around. If a 773 * directory was moved and the parent of the destination 774 * is different from the source, patch the ".." entry in the 775 * directory. 776 */ 777 static int 778 ext2_rename(struct vop_rename_args *ap) 779 { 780 struct vnode *tvp = ap->a_tvp; 781 struct vnode *tdvp = ap->a_tdvp; 782 struct vnode *fvp = ap->a_fvp; 783 struct vnode *fdvp = ap->a_fdvp; 784 struct componentname *tcnp = ap->a_tcnp; 785 struct componentname *fcnp = ap->a_fcnp; 786 struct inode *ip, *xp, *dp; 787 struct dirtemplate dirbuf; 788 int doingdirectory = 0, oldparent = 0, newparent = 0; 789 int error = 0; 790 u_char namlen; 791 792 #ifdef INVARIANTS 793 if ((tcnp->cn_flags & HASBUF) == 0 || 794 (fcnp->cn_flags & HASBUF) == 0) 795 panic("ext2_rename: no name"); 796 #endif 797 /* 798 * Check for cross-device rename. 799 */ 800 if ((fvp->v_mount != tdvp->v_mount) || 801 (tvp && (fvp->v_mount != tvp->v_mount))) { 802 error = EXDEV; 803 abortit: 804 if (tdvp == tvp) 805 vrele(tdvp); 806 else 807 vput(tdvp); 808 if (tvp) 809 vput(tvp); 810 vrele(fdvp); 811 vrele(fvp); 812 return (error); 813 } 814 815 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 816 (VTOI(tdvp)->i_flags & APPEND))) { 817 error = EPERM; 818 goto abortit; 819 } 820 821 /* 822 * Renaming a file to itself has no effect. The upper layers should 823 * not call us in that case. Temporarily just warn if they do. 824 */ 825 if (fvp == tvp) { 826 printf("ext2_rename: fvp == tvp (can't happen)\n"); 827 error = 0; 828 goto abortit; 829 } 830 831 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 832 goto abortit; 833 dp = VTOI(fdvp); 834 ip = VTOI(fvp); 835 if (ip->i_nlink >= ext2_max_nlink(ip) && !ext2_htree_has_idx(ip)) { 836 VOP_UNLOCK(fvp, 0); 837 error = EMLINK; 838 goto abortit; 839 } 840 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 841 || (dp->i_flags & APPEND)) { 842 VOP_UNLOCK(fvp, 0); 843 error = EPERM; 844 goto abortit; 845 } 846 if ((ip->i_mode & IFMT) == IFDIR) { 847 /* 848 * Avoid ".", "..", and aliases of "." for obvious reasons. 849 */ 850 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 851 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 852 (ip->i_flag & IN_RENAME)) { 853 VOP_UNLOCK(fvp, 0); 854 error = EINVAL; 855 goto abortit; 856 } 857 ip->i_flag |= IN_RENAME; 858 oldparent = dp->i_number; 859 doingdirectory++; 860 } 861 vrele(fdvp); 862 863 /* 864 * When the target exists, both the directory 865 * and target vnodes are returned locked. 866 */ 867 dp = VTOI(tdvp); 868 xp = NULL; 869 if (tvp) 870 xp = VTOI(tvp); 871 872 /* 873 * 1) Bump link count while we're moving stuff 874 * around. If we crash somewhere before 875 * completing our work, the link count 876 * may be wrong, but correctable. 877 */ 878 ext2_inc_nlink(ip); 879 ip->i_flag |= IN_CHANGE; 880 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 881 VOP_UNLOCK(fvp, 0); 882 goto bad; 883 } 884 885 /* 886 * If ".." must be changed (ie the directory gets a new 887 * parent) then the source directory must not be in the 888 * directory hierarchy above the target, as this would 889 * orphan everything below the source directory. Also 890 * the user must have write permission in the source so 891 * as to be able to change "..". We must repeat the call 892 * to namei, as the parent directory is unlocked by the 893 * call to checkpath(). 894 */ 895 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 896 VOP_UNLOCK(fvp, 0); 897 if (oldparent != dp->i_number) 898 newparent = dp->i_number; 899 if (doingdirectory && newparent) { 900 if (error) /* write access check above */ 901 goto bad; 902 if (xp != NULL) 903 vput(tvp); 904 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 905 if (error) 906 goto out; 907 VREF(tdvp); 908 error = relookup(tdvp, &tvp, tcnp); 909 if (error) 910 goto out; 911 vrele(tdvp); 912 dp = VTOI(tdvp); 913 xp = NULL; 914 if (tvp) 915 xp = VTOI(tvp); 916 } 917 /* 918 * 2) If target doesn't exist, link the target 919 * to the source and unlink the source. 920 * Otherwise, rewrite the target directory 921 * entry to reference the source inode and 922 * expunge the original entry's existence. 923 */ 924 if (xp == NULL) { 925 if (dp->i_devvp != ip->i_devvp) 926 panic("ext2_rename: EXDEV"); 927 /* 928 * Account for ".." in new directory. 929 * When source and destination have the same 930 * parent we don't fool with the link count. 931 */ 932 if (doingdirectory && newparent) { 933 error = ext2_inc_nlink(dp); 934 if (error) 935 goto bad; 936 937 dp->i_flag |= IN_CHANGE; 938 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 939 if (error) 940 goto bad; 941 } 942 error = ext2_direnter(ip, tdvp, tcnp); 943 if (error) { 944 if (doingdirectory && newparent) { 945 ext2_dec_nlink(dp); 946 dp->i_flag |= IN_CHANGE; 947 (void)ext2_update(tdvp, 1); 948 } 949 goto bad; 950 } 951 vput(tdvp); 952 } else { 953 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 954 panic("ext2_rename: EXDEV"); 955 /* 956 * Short circuit rename(foo, foo). 957 */ 958 if (xp->i_number == ip->i_number) 959 panic("ext2_rename: same file"); 960 /* 961 * If the parent directory is "sticky", then the user must 962 * own the parent directory, or the destination of the rename, 963 * otherwise the destination may not be changed (except by 964 * root). This implements append-only directories. 965 */ 966 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 967 tcnp->cn_cred->cr_uid != dp->i_uid && 968 xp->i_uid != tcnp->cn_cred->cr_uid) { 969 error = EPERM; 970 goto bad; 971 } 972 /* 973 * Target must be empty if a directory and have no links 974 * to it. Also, ensure source and target are compatible 975 * (both directories, or both not directories). 976 */ 977 if ((xp->i_mode & IFMT) == IFDIR) { 978 if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred)) { 979 error = ENOTEMPTY; 980 goto bad; 981 } 982 if (!doingdirectory) { 983 error = ENOTDIR; 984 goto bad; 985 } 986 cache_purge(tdvp); 987 } else if (doingdirectory) { 988 error = EISDIR; 989 goto bad; 990 } 991 error = ext2_dirrewrite(dp, ip, tcnp); 992 if (error) 993 goto bad; 994 /* 995 * If the target directory is in the same 996 * directory as the source directory, 997 * decrement the link count on the parent 998 * of the target directory. 999 */ 1000 if (doingdirectory && !newparent) { 1001 ext2_dec_nlink(dp); 1002 dp->i_flag |= IN_CHANGE; 1003 } 1004 vput(tdvp); 1005 /* 1006 * Adjust the link count of the target to 1007 * reflect the dirrewrite above. If this is 1008 * a directory it is empty and there are 1009 * no links to it, so we can squash the inode and 1010 * any space associated with it. We disallowed 1011 * renaming over top of a directory with links to 1012 * it above, as the remaining link would point to 1013 * a directory without "." or ".." entries. 1014 */ 1015 ext2_dec_nlink(xp); 1016 if (doingdirectory) { 1017 if (--xp->i_nlink != 0) 1018 panic("ext2_rename: linked directory"); 1019 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 1020 tcnp->cn_cred, tcnp->cn_thread); 1021 } 1022 xp->i_flag |= IN_CHANGE; 1023 vput(tvp); 1024 xp = NULL; 1025 } 1026 1027 /* 1028 * 3) Unlink the source. 1029 */ 1030 fcnp->cn_flags &= ~MODMASK; 1031 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1032 VREF(fdvp); 1033 error = relookup(fdvp, &fvp, fcnp); 1034 if (error == 0) 1035 vrele(fdvp); 1036 if (fvp != NULL) { 1037 xp = VTOI(fvp); 1038 dp = VTOI(fdvp); 1039 } else { 1040 /* 1041 * From name has disappeared. IN_RENAME is not sufficient 1042 * to protect against directory races due to timing windows, 1043 * so we can't panic here. 1044 */ 1045 vrele(ap->a_fvp); 1046 return (0); 1047 } 1048 /* 1049 * Ensure that the directory entry still exists and has not 1050 * changed while the new name has been entered. If the source is 1051 * a file then the entry may have been unlinked or renamed. In 1052 * either case there is no further work to be done. If the source 1053 * is a directory then it cannot have been rmdir'ed; its link 1054 * count of three would cause a rmdir to fail with ENOTEMPTY. 1055 * The IN_RENAME flag ensures that it cannot be moved by another 1056 * rename. 1057 */ 1058 if (xp != ip) { 1059 /* 1060 * From name resolves to a different inode. IN_RENAME is 1061 * not sufficient protection against timing window races 1062 * so we can't panic here. 1063 */ 1064 } else { 1065 /* 1066 * If the source is a directory with a 1067 * new parent, the link count of the old 1068 * parent directory must be decremented 1069 * and ".." set to point to the new parent. 1070 */ 1071 if (doingdirectory && newparent) { 1072 ext2_dec_nlink(dp); 1073 dp->i_flag |= IN_CHANGE; 1074 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, 1075 sizeof(struct dirtemplate), (off_t)0, 1076 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1077 tcnp->cn_cred, NOCRED, NULL, NULL); 1078 if (error == 0) { 1079 /* Like ufs little-endian: */ 1080 namlen = dirbuf.dotdot_type; 1081 if (namlen != 2 || 1082 dirbuf.dotdot_name[0] != '.' || 1083 dirbuf.dotdot_name[1] != '.') { 1084 ext2_dirbad(xp, (doff_t)12, 1085 "rename: mangled dir"); 1086 } else { 1087 dirbuf.dotdot_ino = newparent; 1088 (void)vn_rdwr(UIO_WRITE, fvp, 1089 (caddr_t)&dirbuf, 1090 sizeof(struct dirtemplate), 1091 (off_t)0, UIO_SYSSPACE, 1092 IO_NODELOCKED | IO_SYNC | 1093 IO_NOMACCHECK, tcnp->cn_cred, 1094 NOCRED, NULL, NULL); 1095 cache_purge(fdvp); 1096 } 1097 } 1098 } 1099 error = ext2_dirremove(fdvp, fcnp); 1100 if (!error) { 1101 ext2_dec_nlink(xp); 1102 xp->i_flag |= IN_CHANGE; 1103 } 1104 xp->i_flag &= ~IN_RENAME; 1105 } 1106 if (dp) 1107 vput(fdvp); 1108 if (xp) 1109 vput(fvp); 1110 vrele(ap->a_fvp); 1111 return (error); 1112 1113 bad: 1114 if (xp) 1115 vput(ITOV(xp)); 1116 vput(ITOV(dp)); 1117 out: 1118 if (doingdirectory) 1119 ip->i_flag &= ~IN_RENAME; 1120 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1121 ext2_dec_nlink(ip); 1122 ip->i_flag |= IN_CHANGE; 1123 ip->i_flag &= ~IN_RENAME; 1124 vput(fvp); 1125 } else 1126 vrele(fvp); 1127 return (error); 1128 } 1129 1130 #ifdef UFS_ACL 1131 static int 1132 ext2_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1133 mode_t dmode, struct ucred *cred, struct thread *td) 1134 { 1135 int error; 1136 struct inode *ip = VTOI(tvp); 1137 struct acl *dacl, *acl; 1138 1139 acl = acl_alloc(M_WAITOK); 1140 dacl = acl_alloc(M_WAITOK); 1141 1142 /* 1143 * Retrieve default ACL from parent, if any. 1144 */ 1145 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1146 switch (error) { 1147 case 0: 1148 /* 1149 * Retrieved a default ACL, so merge mode and ACL if 1150 * necessary. If the ACL is empty, fall through to 1151 * the "not defined or available" case. 1152 */ 1153 if (acl->acl_cnt != 0) { 1154 dmode = acl_posix1e_newfilemode(dmode, acl); 1155 ip->i_mode = dmode; 1156 *dacl = *acl; 1157 ext2_sync_acl_from_inode(ip, acl); 1158 break; 1159 } 1160 /* FALLTHROUGH */ 1161 1162 case EOPNOTSUPP: 1163 /* 1164 * Just use the mode as-is. 1165 */ 1166 ip->i_mode = dmode; 1167 error = 0; 1168 goto out; 1169 1170 default: 1171 goto out; 1172 } 1173 1174 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1175 if (error == 0) 1176 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1177 switch (error) { 1178 case 0: 1179 break; 1180 1181 case EOPNOTSUPP: 1182 /* 1183 * XXX: This should not happen, as EOPNOTSUPP above 1184 * was supposed to free acl. 1185 */ 1186 #ifdef DEBUG 1187 printf("ext2_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1188 #endif /* DEBUG */ 1189 break; 1190 1191 default: 1192 goto out; 1193 } 1194 1195 out: 1196 acl_free(acl); 1197 acl_free(dacl); 1198 1199 return (error); 1200 } 1201 1202 static int 1203 ext2_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1204 mode_t mode, struct ucred *cred, struct thread *td) 1205 { 1206 int error; 1207 struct inode *ip = VTOI(tvp); 1208 struct acl *acl; 1209 1210 acl = acl_alloc(M_WAITOK); 1211 1212 /* 1213 * Retrieve default ACL for parent, if any. 1214 */ 1215 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1216 switch (error) { 1217 case 0: 1218 /* 1219 * Retrieved a default ACL, so merge mode and ACL if 1220 * necessary. 1221 */ 1222 if (acl->acl_cnt != 0) { 1223 /* 1224 * Two possible ways for default ACL to not 1225 * be present. First, the EA can be 1226 * undefined, or second, the default ACL can 1227 * be blank. If it's blank, fall through to 1228 * the it's not defined case. 1229 */ 1230 mode = acl_posix1e_newfilemode(mode, acl); 1231 ip->i_mode = mode; 1232 ext2_sync_acl_from_inode(ip, acl); 1233 break; 1234 } 1235 /* FALLTHROUGH */ 1236 1237 case EOPNOTSUPP: 1238 /* 1239 * Just use the mode as-is. 1240 */ 1241 ip->i_mode = mode; 1242 error = 0; 1243 goto out; 1244 1245 default: 1246 goto out; 1247 } 1248 1249 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1250 switch (error) { 1251 case 0: 1252 break; 1253 1254 case EOPNOTSUPP: 1255 /* 1256 * XXX: This should not happen, as EOPNOTSUPP above was 1257 * supposed to free acl. 1258 */ 1259 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1260 "but no VOP_SETACL()\n"); 1261 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1262 "but no VOP_SETACL()"); */ 1263 break; 1264 1265 default: 1266 goto out; 1267 } 1268 1269 out: 1270 acl_free(acl); 1271 1272 return (error); 1273 } 1274 1275 #endif /* UFS_ACL */ 1276 1277 /* 1278 * Mkdir system call 1279 */ 1280 static int 1281 ext2_mkdir(struct vop_mkdir_args *ap) 1282 { 1283 struct vnode *dvp = ap->a_dvp; 1284 struct vattr *vap = ap->a_vap; 1285 struct componentname *cnp = ap->a_cnp; 1286 struct inode *ip, *dp; 1287 struct vnode *tvp; 1288 struct dirtemplate dirtemplate, *dtp; 1289 int error, dmode; 1290 1291 #ifdef INVARIANTS 1292 if ((cnp->cn_flags & HASBUF) == 0) 1293 panic("ext2_mkdir: no name"); 1294 #endif 1295 dp = VTOI(dvp); 1296 if ((nlink_t)dp->i_nlink >= ext2_max_nlink(dp) && 1297 !ext2_htree_has_idx(dp)) { 1298 error = EMLINK; 1299 goto out; 1300 } 1301 dmode = vap->va_mode & 0777; 1302 dmode |= IFDIR; 1303 /* 1304 * Must simulate part of ext2_makeinode here to acquire the inode, 1305 * but not have it entered in the parent directory. The entry is 1306 * made later after writing "." and ".." entries. 1307 */ 1308 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1309 if (error) 1310 goto out; 1311 ip = VTOI(tvp); 1312 ip->i_gid = dp->i_gid; 1313 #ifdef SUIDDIR 1314 { 1315 /* 1316 * if we are hacking owners here, (only do this where told to) 1317 * and we are not giving it TOO root, (would subvert quotas) 1318 * then go ahead and give it to the other user. 1319 * The new directory also inherits the SUID bit. 1320 * If user's UID and dir UID are the same, 1321 * 'give it away' so that the SUID is still forced on. 1322 */ 1323 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1324 (dp->i_mode & ISUID) && dp->i_uid) { 1325 dmode |= ISUID; 1326 ip->i_uid = dp->i_uid; 1327 } else { 1328 ip->i_uid = cnp->cn_cred->cr_uid; 1329 } 1330 } 1331 #else 1332 ip->i_uid = cnp->cn_cred->cr_uid; 1333 #endif 1334 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1335 ip->i_mode = dmode; 1336 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1337 ip->i_nlink = 2; 1338 if (cnp->cn_flags & ISWHITEOUT) 1339 ip->i_flags |= UF_OPAQUE; 1340 error = ext2_update(tvp, 1); 1341 1342 /* 1343 * Bump link count in parent directory 1344 * to reflect work done below. Should 1345 * be done before reference is created 1346 * so reparation is possible if we crash. 1347 */ 1348 ext2_inc_nlink(dp); 1349 dp->i_flag |= IN_CHANGE; 1350 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1351 if (error) 1352 goto bad; 1353 1354 /* Initialize directory with "." and ".." from static template. */ 1355 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1356 EXT2F_INCOMPAT_FTYPE)) 1357 dtp = &mastertemplate; 1358 else 1359 dtp = &omastertemplate; 1360 dirtemplate = *dtp; 1361 dirtemplate.dot_ino = ip->i_number; 1362 dirtemplate.dotdot_ino = dp->i_number; 1363 /* 1364 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's 1365 * just redefine it - for this function only 1366 */ 1367 #undef DIRBLKSIZ 1368 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1369 dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; 1370 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, 1371 sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE, 1372 IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, 1373 NULL, NULL); 1374 if (error) { 1375 ext2_dec_nlink(dp); 1376 dp->i_flag |= IN_CHANGE; 1377 goto bad; 1378 } 1379 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1380 /* XXX should grow with balloc() */ 1381 panic("ext2_mkdir: blksize"); 1382 else { 1383 ip->i_size = DIRBLKSIZ; 1384 ip->i_flag |= IN_CHANGE; 1385 } 1386 1387 #ifdef UFS_ACL 1388 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1389 error = ext2_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 1390 cnp->cn_cred, cnp->cn_thread); 1391 if (error) 1392 goto bad; 1393 } 1394 1395 #endif /* UFS_ACL */ 1396 1397 /* Directory set up, now install its entry in the parent directory. */ 1398 error = ext2_direnter(ip, dvp, cnp); 1399 if (error) { 1400 ext2_dec_nlink(dp); 1401 dp->i_flag |= IN_CHANGE; 1402 } 1403 bad: 1404 /* 1405 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1406 * for us because we set the link count to 0. 1407 */ 1408 if (error) { 1409 ip->i_nlink = 0; 1410 ip->i_flag |= IN_CHANGE; 1411 vput(tvp); 1412 } else 1413 *ap->a_vpp = tvp; 1414 out: 1415 return (error); 1416 #undef DIRBLKSIZ 1417 #define DIRBLKSIZ DEV_BSIZE 1418 } 1419 1420 /* 1421 * Rmdir system call. 1422 */ 1423 static int 1424 ext2_rmdir(struct vop_rmdir_args *ap) 1425 { 1426 struct vnode *vp = ap->a_vp; 1427 struct vnode *dvp = ap->a_dvp; 1428 struct componentname *cnp = ap->a_cnp; 1429 struct inode *ip, *dp; 1430 int error; 1431 1432 ip = VTOI(vp); 1433 dp = VTOI(dvp); 1434 1435 /* 1436 * Verify the directory is empty (and valid). 1437 * (Rmdir ".." won't be valid since 1438 * ".." will contain a reference to 1439 * the current directory and thus be 1440 * non-empty.) 1441 */ 1442 if (!ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1443 error = ENOTEMPTY; 1444 goto out; 1445 } 1446 if ((dp->i_flags & APPEND) 1447 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1448 error = EPERM; 1449 goto out; 1450 } 1451 /* 1452 * Delete reference to directory before purging 1453 * inode. If we crash in between, the directory 1454 * will be reattached to lost+found, 1455 */ 1456 error = ext2_dirremove(dvp, cnp); 1457 if (error) 1458 goto out; 1459 ext2_dec_nlink(dp); 1460 dp->i_flag |= IN_CHANGE; 1461 cache_purge(dvp); 1462 VOP_UNLOCK(dvp, 0); 1463 /* 1464 * Truncate inode. The only stuff left 1465 * in the directory is "." and "..". 1466 */ 1467 ip->i_nlink = 0; 1468 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, 1469 cnp->cn_thread); 1470 cache_purge(ITOV(ip)); 1471 if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1472 VOP_UNLOCK(vp, 0); 1473 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1474 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1475 } 1476 out: 1477 return (error); 1478 } 1479 1480 /* 1481 * symlink -- make a symbolic link 1482 */ 1483 static int 1484 ext2_symlink(struct vop_symlink_args *ap) 1485 { 1486 struct vnode *vp, **vpp = ap->a_vpp; 1487 struct inode *ip; 1488 int len, error; 1489 1490 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1491 vpp, ap->a_cnp); 1492 if (error) 1493 return (error); 1494 vp = *vpp; 1495 len = strlen(ap->a_target); 1496 if (len < vp->v_mount->mnt_maxsymlinklen) { 1497 ip = VTOI(vp); 1498 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1499 ip->i_size = len; 1500 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1501 } else 1502 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1503 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1504 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 1505 if (error) 1506 vput(vp); 1507 return (error); 1508 } 1509 1510 /* 1511 * Return target name of a symbolic link 1512 */ 1513 static int 1514 ext2_readlink(struct vop_readlink_args *ap) 1515 { 1516 struct vnode *vp = ap->a_vp; 1517 struct inode *ip = VTOI(vp); 1518 int isize; 1519 1520 isize = ip->i_size; 1521 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1522 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1523 return (0); 1524 } 1525 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1526 } 1527 1528 /* 1529 * Calculate the logical to physical mapping if not done already, 1530 * then call the device strategy routine. 1531 * 1532 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1533 * deadlock on memory. See ext2_bmap() for details. 1534 */ 1535 static int 1536 ext2_strategy(struct vop_strategy_args *ap) 1537 { 1538 struct buf *bp = ap->a_bp; 1539 struct vnode *vp = ap->a_vp; 1540 struct bufobj *bo; 1541 daddr_t blkno; 1542 int error; 1543 1544 if (vp->v_type == VBLK || vp->v_type == VCHR) 1545 panic("ext2_strategy: spec"); 1546 if (bp->b_blkno == bp->b_lblkno) { 1547 1548 if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS) 1549 error = ext4_bmapext(vp, bp->b_lblkno, &blkno, NULL, NULL); 1550 else 1551 error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); 1552 1553 bp->b_blkno = blkno; 1554 if (error) { 1555 bp->b_error = error; 1556 bp->b_ioflags |= BIO_ERROR; 1557 bufdone(bp); 1558 return (0); 1559 } 1560 if ((long)bp->b_blkno == -1) 1561 vfs_bio_clrbuf(bp); 1562 } 1563 if ((long)bp->b_blkno == -1) { 1564 bufdone(bp); 1565 return (0); 1566 } 1567 bp->b_iooffset = dbtob(bp->b_blkno); 1568 bo = VFSTOEXT2(vp->v_mount)->um_bo; 1569 BO_STRATEGY(bo, bp); 1570 return (0); 1571 } 1572 1573 /* 1574 * Print out the contents of an inode. 1575 */ 1576 static int 1577 ext2_print(struct vop_print_args *ap) 1578 { 1579 struct vnode *vp = ap->a_vp; 1580 struct inode *ip = VTOI(vp); 1581 1582 vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number); 1583 if (vp->v_type == VFIFO) 1584 fifo_printinfo(vp); 1585 printf("\n"); 1586 return (0); 1587 } 1588 1589 /* 1590 * Close wrapper for fifos. 1591 * 1592 * Update the times on the inode then do device close. 1593 */ 1594 static int 1595 ext2fifo_close(struct vop_close_args *ap) 1596 { 1597 struct vnode *vp = ap->a_vp; 1598 1599 VI_LOCK(vp); 1600 if (vp->v_usecount > 1) 1601 ext2_itimes_locked(vp); 1602 VI_UNLOCK(vp); 1603 return (fifo_specops.vop_close(ap)); 1604 } 1605 1606 /* 1607 * Kqfilter wrapper for fifos. 1608 * 1609 * Fall through to ext2 kqfilter routines if needed 1610 */ 1611 static int 1612 ext2fifo_kqfilter(struct vop_kqfilter_args *ap) 1613 { 1614 int error; 1615 1616 error = fifo_specops.vop_kqfilter(ap); 1617 if (error) 1618 error = vfs_kqfilter(ap); 1619 return (error); 1620 } 1621 1622 /* 1623 * Return POSIX pathconf information applicable to ext2 filesystems. 1624 */ 1625 static int 1626 ext2_pathconf(struct vop_pathconf_args *ap) 1627 { 1628 int error = 0; 1629 1630 switch (ap->a_name) { 1631 case _PC_LINK_MAX: 1632 if (ext2_htree_has_idx(VTOI(ap->a_vp))) 1633 *ap->a_retval = INT_MAX; 1634 else 1635 *ap->a_retval = ext2_max_nlink(VTOI(ap->a_vp)); 1636 break; 1637 case _PC_NAME_MAX: 1638 *ap->a_retval = NAME_MAX; 1639 break; 1640 case _PC_PIPE_BUF: 1641 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 1642 *ap->a_retval = PIPE_BUF; 1643 else 1644 error = EINVAL; 1645 break; 1646 case _PC_CHOWN_RESTRICTED: 1647 *ap->a_retval = 1; 1648 break; 1649 case _PC_NO_TRUNC: 1650 *ap->a_retval = 1; 1651 break; 1652 1653 #ifdef UFS_ACL 1654 case _PC_ACL_EXTENDED: 1655 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1656 *ap->a_retval = 1; 1657 else 1658 *ap->a_retval = 0; 1659 break; 1660 case _PC_ACL_PATH_MAX: 1661 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1662 *ap->a_retval = ACL_MAX_ENTRIES; 1663 else 1664 *ap->a_retval = 3; 1665 break; 1666 #endif /* UFS_ACL */ 1667 1668 case _PC_MIN_HOLE_SIZE: 1669 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1670 break; 1671 case _PC_PRIO_IO: 1672 *ap->a_retval = 0; 1673 break; 1674 case _PC_SYNC_IO: 1675 *ap->a_retval = 0; 1676 break; 1677 case _PC_ALLOC_SIZE_MIN: 1678 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 1679 break; 1680 case _PC_FILESIZEBITS: 1681 *ap->a_retval = 64; 1682 break; 1683 case _PC_REC_INCR_XFER_SIZE: 1684 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1685 break; 1686 case _PC_REC_MAX_XFER_SIZE: 1687 *ap->a_retval = -1; /* means ``unlimited'' */ 1688 break; 1689 case _PC_REC_MIN_XFER_SIZE: 1690 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1691 break; 1692 case _PC_REC_XFER_ALIGN: 1693 *ap->a_retval = PAGE_SIZE; 1694 break; 1695 case _PC_SYMLINK_MAX: 1696 *ap->a_retval = MAXPATHLEN; 1697 break; 1698 1699 default: 1700 error = vop_stdpathconf(ap); 1701 break; 1702 } 1703 return (error); 1704 } 1705 1706 /* 1707 * Vnode operation to remove a named attribute. 1708 */ 1709 static int 1710 ext2_deleteextattr(struct vop_deleteextattr_args *ap) 1711 { 1712 struct inode *ip; 1713 struct m_ext2fs *fs; 1714 int error; 1715 1716 ip = VTOI(ap->a_vp); 1717 fs = ip->i_e2fs; 1718 1719 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1720 return (EOPNOTSUPP); 1721 1722 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1723 return (EOPNOTSUPP); 1724 1725 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1726 ap->a_cred, ap->a_td, VWRITE); 1727 if (error) 1728 return (error); 1729 1730 error = ENOATTR; 1731 1732 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1733 error = ext2_extattr_inode_delete(ip, ap->a_attrnamespace, ap->a_name); 1734 if (error != ENOATTR) 1735 return (error); 1736 } 1737 1738 if (ip->i_facl) 1739 error = ext2_extattr_block_delete(ip, ap->a_attrnamespace, ap->a_name); 1740 1741 return (error); 1742 } 1743 1744 /* 1745 * Vnode operation to retrieve a named extended attribute. 1746 */ 1747 static int 1748 ext2_getextattr(struct vop_getextattr_args *ap) 1749 { 1750 struct inode *ip; 1751 struct m_ext2fs *fs; 1752 int error; 1753 1754 ip = VTOI(ap->a_vp); 1755 fs = ip->i_e2fs; 1756 1757 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1758 return (EOPNOTSUPP); 1759 1760 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1761 return (EOPNOTSUPP); 1762 1763 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1764 ap->a_cred, ap->a_td, VREAD); 1765 if (error) 1766 return (error); 1767 1768 if (ap->a_size != NULL) 1769 *ap->a_size = 0; 1770 1771 error = ENOATTR; 1772 1773 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1774 error = ext2_extattr_inode_get(ip, ap->a_attrnamespace, 1775 ap->a_name, ap->a_uio, ap->a_size); 1776 if (error != ENOATTR) 1777 return (error); 1778 } 1779 1780 if (ip->i_facl) 1781 error = ext2_extattr_block_get(ip, ap->a_attrnamespace, 1782 ap->a_name, ap->a_uio, ap->a_size); 1783 1784 return (error); 1785 } 1786 1787 /* 1788 * Vnode operation to retrieve extended attributes on a vnode. 1789 */ 1790 static int 1791 ext2_listextattr(struct vop_listextattr_args *ap) 1792 { 1793 struct inode *ip; 1794 struct m_ext2fs *fs; 1795 int error; 1796 1797 ip = VTOI(ap->a_vp); 1798 fs = ip->i_e2fs; 1799 1800 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1801 return (EOPNOTSUPP); 1802 1803 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1804 return (EOPNOTSUPP); 1805 1806 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1807 ap->a_cred, ap->a_td, VREAD); 1808 if (error) 1809 return (error); 1810 1811 if (ap->a_size != NULL) 1812 *ap->a_size = 0; 1813 1814 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1815 error = ext2_extattr_inode_list(ip, ap->a_attrnamespace, 1816 ap->a_uio, ap->a_size); 1817 if (error) 1818 return (error); 1819 } 1820 1821 if (ip->i_facl) 1822 error = ext2_extattr_block_list(ip, ap->a_attrnamespace, 1823 ap->a_uio, ap->a_size); 1824 1825 return (error); 1826 } 1827 1828 /* 1829 * Vnode operation to set a named attribute. 1830 */ 1831 static int 1832 ext2_setextattr(struct vop_setextattr_args *ap) 1833 { 1834 struct inode *ip; 1835 struct m_ext2fs *fs; 1836 int error; 1837 1838 ip = VTOI(ap->a_vp); 1839 fs = ip->i_e2fs; 1840 1841 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1842 return (EOPNOTSUPP); 1843 1844 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1845 return (EOPNOTSUPP); 1846 1847 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1848 ap->a_cred, ap->a_td, VWRITE); 1849 if (error) 1850 return (error); 1851 1852 error = ext2_extattr_valid_attrname(ap->a_attrnamespace, ap->a_name); 1853 if (error) 1854 return (error); 1855 1856 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1857 error = ext2_extattr_inode_set(ip, ap->a_attrnamespace, 1858 ap->a_name, ap->a_uio); 1859 if (error != ENOSPC) 1860 return (error); 1861 } 1862 1863 error = ext2_extattr_block_set(ip, ap->a_attrnamespace, 1864 ap->a_name, ap->a_uio); 1865 1866 return (error); 1867 } 1868 1869 /* 1870 * Vnode pointer to File handle 1871 */ 1872 /* ARGSUSED */ 1873 static int 1874 ext2_vptofh(struct vop_vptofh_args *ap) 1875 { 1876 struct inode *ip; 1877 struct ufid *ufhp; 1878 1879 ip = VTOI(ap->a_vp); 1880 ufhp = (struct ufid *)ap->a_fhp; 1881 ufhp->ufid_len = sizeof(struct ufid); 1882 ufhp->ufid_ino = ip->i_number; 1883 ufhp->ufid_gen = ip->i_gen; 1884 return (0); 1885 } 1886 1887 /* 1888 * Initialize the vnode associated with a new inode, handle aliased 1889 * vnodes. 1890 */ 1891 int 1892 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) 1893 { 1894 struct inode *ip; 1895 struct vnode *vp; 1896 1897 vp = *vpp; 1898 ip = VTOI(vp); 1899 vp->v_type = IFTOVT(ip->i_mode); 1900 if (vp->v_type == VFIFO) 1901 vp->v_op = fifoops; 1902 1903 if (ip->i_number == EXT2_ROOTINO) 1904 vp->v_vflag |= VV_ROOT; 1905 ip->i_modrev = init_va_filerev(); 1906 *vpp = vp; 1907 return (0); 1908 } 1909 1910 /* 1911 * Allocate a new inode. 1912 */ 1913 static int 1914 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1915 struct componentname *cnp) 1916 { 1917 struct inode *ip, *pdir; 1918 struct vnode *tvp; 1919 int error; 1920 1921 pdir = VTOI(dvp); 1922 #ifdef INVARIANTS 1923 if ((cnp->cn_flags & HASBUF) == 0) 1924 panic("ext2_makeinode: no name"); 1925 #endif 1926 *vpp = NULL; 1927 if ((mode & IFMT) == 0) 1928 mode |= IFREG; 1929 1930 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1931 if (error) { 1932 return (error); 1933 } 1934 ip = VTOI(tvp); 1935 ip->i_gid = pdir->i_gid; 1936 #ifdef SUIDDIR 1937 { 1938 /* 1939 * if we are 1940 * not the owner of the directory, 1941 * and we are hacking owners here, (only do this where told to) 1942 * and we are not giving it TOO root, (would subvert quotas) 1943 * then go ahead and give it to the other user. 1944 * Note that this drops off the execute bits for security. 1945 */ 1946 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1947 (pdir->i_mode & ISUID) && 1948 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1949 ip->i_uid = pdir->i_uid; 1950 mode &= ~07111; 1951 } else { 1952 ip->i_uid = cnp->cn_cred->cr_uid; 1953 } 1954 } 1955 #else 1956 ip->i_uid = cnp->cn_cred->cr_uid; 1957 #endif 1958 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1959 ip->i_mode = mode; 1960 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1961 ip->i_nlink = 1; 1962 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1963 if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) 1964 ip->i_mode &= ~ISGID; 1965 } 1966 1967 if (cnp->cn_flags & ISWHITEOUT) 1968 ip->i_flags |= UF_OPAQUE; 1969 1970 /* 1971 * Make sure inode goes to disk before directory entry. 1972 */ 1973 error = ext2_update(tvp, !DOINGASYNC(tvp)); 1974 if (error) 1975 goto bad; 1976 1977 #ifdef UFS_ACL 1978 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1979 error = ext2_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 1980 cnp->cn_cred, cnp->cn_thread); 1981 if (error) 1982 goto bad; 1983 } 1984 #endif /* UFS_ACL */ 1985 1986 error = ext2_direnter(ip, dvp, cnp); 1987 if (error) 1988 goto bad; 1989 1990 *vpp = tvp; 1991 return (0); 1992 1993 bad: 1994 /* 1995 * Write error occurred trying to update the inode 1996 * or the directory so must deallocate the inode. 1997 */ 1998 ip->i_nlink = 0; 1999 ip->i_flag |= IN_CHANGE; 2000 vput(tvp); 2001 return (error); 2002 } 2003 2004 /* 2005 * Vnode op for reading. 2006 */ 2007 static int 2008 ext2_read(struct vop_read_args *ap) 2009 { 2010 struct vnode *vp; 2011 struct inode *ip; 2012 struct uio *uio; 2013 struct m_ext2fs *fs; 2014 struct buf *bp; 2015 daddr_t lbn, nextlbn; 2016 off_t bytesinfile; 2017 long size, xfersize, blkoffset; 2018 int error, orig_resid, seqcount; 2019 int ioflag; 2020 2021 vp = ap->a_vp; 2022 uio = ap->a_uio; 2023 ioflag = ap->a_ioflag; 2024 2025 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 2026 ip = VTOI(vp); 2027 2028 #ifdef INVARIANTS 2029 if (uio->uio_rw != UIO_READ) 2030 panic("%s: mode", "ext2_read"); 2031 2032 if (vp->v_type == VLNK) { 2033 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 2034 panic("%s: short symlink", "ext2_read"); 2035 } else if (vp->v_type != VREG && vp->v_type != VDIR) 2036 panic("%s: type %d", "ext2_read", vp->v_type); 2037 #endif 2038 orig_resid = uio->uio_resid; 2039 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 2040 if (orig_resid == 0) 2041 return (0); 2042 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 2043 fs = ip->i_e2fs; 2044 if (uio->uio_offset < ip->i_size && 2045 uio->uio_offset >= fs->e2fs_maxfilesize) 2046 return (EOVERFLOW); 2047 2048 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 2049 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2050 break; 2051 lbn = lblkno(fs, uio->uio_offset); 2052 nextlbn = lbn + 1; 2053 size = blksize(fs, ip, lbn); 2054 blkoffset = blkoff(fs, uio->uio_offset); 2055 2056 xfersize = fs->e2fs_fsize - blkoffset; 2057 if (uio->uio_resid < xfersize) 2058 xfersize = uio->uio_resid; 2059 if (bytesinfile < xfersize) 2060 xfersize = bytesinfile; 2061 2062 if (lblktosize(fs, nextlbn) >= ip->i_size) 2063 error = bread(vp, lbn, size, NOCRED, &bp); 2064 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 2065 error = cluster_read(vp, ip->i_size, lbn, size, 2066 NOCRED, blkoffset + uio->uio_resid, seqcount, 2067 0, &bp); 2068 } else if (seqcount > 1) { 2069 u_int nextsize = blksize(fs, ip, nextlbn); 2070 2071 error = breadn(vp, lbn, 2072 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 2073 } else 2074 error = bread(vp, lbn, size, NOCRED, &bp); 2075 if (error) { 2076 brelse(bp); 2077 bp = NULL; 2078 break; 2079 } 2080 2081 /* 2082 * We should only get non-zero b_resid when an I/O error 2083 * has occurred, which should cause us to break above. 2084 * However, if the short read did not cause an error, 2085 * then we want to ensure that we do not uiomove bad 2086 * or uninitialized data. 2087 */ 2088 size -= bp->b_resid; 2089 if (size < xfersize) { 2090 if (size == 0) 2091 break; 2092 xfersize = size; 2093 } 2094 error = uiomove((char *)bp->b_data + blkoffset, 2095 (int)xfersize, uio); 2096 if (error) 2097 break; 2098 vfs_bio_brelse(bp, ioflag); 2099 } 2100 2101 /* 2102 * This can only happen in the case of an error because the loop 2103 * above resets bp to NULL on each iteration and on normal 2104 * completion has not set a new value into it. so it must have come 2105 * from a 'break' statement 2106 */ 2107 if (bp != NULL) 2108 vfs_bio_brelse(bp, ioflag); 2109 2110 if ((error == 0 || uio->uio_resid != orig_resid) && 2111 (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 2112 ip->i_flag |= IN_ACCESS; 2113 return (error); 2114 } 2115 2116 static int 2117 ext2_ioctl(struct vop_ioctl_args *ap) 2118 { 2119 2120 switch (ap->a_command) { 2121 case FIOSEEKDATA: 2122 case FIOSEEKHOLE: 2123 return (vn_bmap_seekhole(ap->a_vp, ap->a_command, 2124 (off_t *)ap->a_data, ap->a_cred)); 2125 default: 2126 return (ENOTTY); 2127 } 2128 } 2129 2130 /* 2131 * Vnode op for writing. 2132 */ 2133 static int 2134 ext2_write(struct vop_write_args *ap) 2135 { 2136 struct vnode *vp; 2137 struct uio *uio; 2138 struct inode *ip; 2139 struct m_ext2fs *fs; 2140 struct buf *bp; 2141 daddr_t lbn; 2142 off_t osize; 2143 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 2144 2145 ioflag = ap->a_ioflag; 2146 uio = ap->a_uio; 2147 vp = ap->a_vp; 2148 2149 seqcount = ioflag >> IO_SEQSHIFT; 2150 ip = VTOI(vp); 2151 2152 #ifdef INVARIANTS 2153 if (uio->uio_rw != UIO_WRITE) 2154 panic("%s: mode", "ext2_write"); 2155 #endif 2156 2157 switch (vp->v_type) { 2158 case VREG: 2159 if (ioflag & IO_APPEND) 2160 uio->uio_offset = ip->i_size; 2161 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 2162 return (EPERM); 2163 /* FALLTHROUGH */ 2164 case VLNK: 2165 break; 2166 case VDIR: 2167 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 2168 if ((ioflag & IO_SYNC) == 0) 2169 panic("ext2_write: nonsync dir write"); 2170 break; 2171 default: 2172 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 2173 vp->v_type, (intmax_t)uio->uio_offset, 2174 (intmax_t)uio->uio_resid); 2175 } 2176 2177 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 2178 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 2179 fs = ip->i_e2fs; 2180 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 2181 return (EFBIG); 2182 /* 2183 * Maybe this should be above the vnode op call, but so long as 2184 * file servers have no limits, I don't think it matters. 2185 */ 2186 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 2187 return (EFBIG); 2188 2189 resid = uio->uio_resid; 2190 osize = ip->i_size; 2191 if (seqcount > BA_SEQMAX) 2192 flags = BA_SEQMAX << BA_SEQSHIFT; 2193 else 2194 flags = seqcount << BA_SEQSHIFT; 2195 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 2196 flags |= IO_SYNC; 2197 2198 for (error = 0; uio->uio_resid > 0;) { 2199 lbn = lblkno(fs, uio->uio_offset); 2200 blkoffset = blkoff(fs, uio->uio_offset); 2201 xfersize = fs->e2fs_fsize - blkoffset; 2202 if (uio->uio_resid < xfersize) 2203 xfersize = uio->uio_resid; 2204 if (uio->uio_offset + xfersize > ip->i_size) 2205 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 2206 2207 /* 2208 * We must perform a read-before-write if the transfer size 2209 * does not cover the entire buffer. 2210 */ 2211 if (fs->e2fs_bsize > xfersize) 2212 flags |= BA_CLRBUF; 2213 else 2214 flags &= ~BA_CLRBUF; 2215 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 2216 ap->a_cred, &bp, flags); 2217 if (error != 0) 2218 break; 2219 2220 if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL)) 2221 bp->b_flags |= B_NOCACHE; 2222 if (uio->uio_offset + xfersize > ip->i_size) 2223 ip->i_size = uio->uio_offset + xfersize; 2224 size = blksize(fs, ip, lbn) - bp->b_resid; 2225 if (size < xfersize) 2226 xfersize = size; 2227 2228 error = 2229 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 2230 /* 2231 * If the buffer is not already filled and we encounter an 2232 * error while trying to fill it, we have to clear out any 2233 * garbage data from the pages instantiated for the buffer. 2234 * If we do not, a failed uiomove() during a write can leave 2235 * the prior contents of the pages exposed to a userland mmap. 2236 * 2237 * Note that we need only clear buffers with a transfer size 2238 * equal to the block size because buffers with a shorter 2239 * transfer size were cleared above by the call to ext2_balloc() 2240 * with the BA_CLRBUF flag set. 2241 * 2242 * If the source region for uiomove identically mmaps the 2243 * buffer, uiomove() performed the NOP copy, and the buffer 2244 * content remains valid because the page fault handler 2245 * validated the pages. 2246 */ 2247 if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 2248 fs->e2fs_bsize == xfersize) 2249 vfs_bio_clrbuf(bp); 2250 2251 vfs_bio_set_flags(bp, ioflag); 2252 2253 /* 2254 * If IO_SYNC each buffer is written synchronously. Otherwise 2255 * if we have a severe page deficiency write the buffer 2256 * asynchronously. Otherwise try to cluster, and if that 2257 * doesn't do it then either do an async write (if O_DIRECT), 2258 * or a delayed write (if not). 2259 */ 2260 if (ioflag & IO_SYNC) { 2261 (void)bwrite(bp); 2262 } else if (vm_page_count_severe() || 2263 buf_dirty_count_severe() || 2264 (ioflag & IO_ASYNC)) { 2265 bp->b_flags |= B_CLUSTEROK; 2266 bawrite(bp); 2267 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 2268 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 2269 bp->b_flags |= B_CLUSTEROK; 2270 cluster_write(vp, bp, ip->i_size, seqcount, 0); 2271 } else { 2272 bawrite(bp); 2273 } 2274 } else if (ioflag & IO_DIRECT) { 2275 bp->b_flags |= B_CLUSTEROK; 2276 bawrite(bp); 2277 } else { 2278 bp->b_flags |= B_CLUSTEROK; 2279 bdwrite(bp); 2280 } 2281 if (error || xfersize == 0) 2282 break; 2283 } 2284 /* 2285 * If we successfully wrote any data, and we are not the superuser 2286 * we clear the setuid and setgid bits as a precaution against 2287 * tampering. 2288 */ 2289 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 2290 ap->a_cred) { 2291 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 2292 ip->i_mode &= ~(ISUID | ISGID); 2293 } 2294 if (error) { 2295 if (ioflag & IO_UNIT) { 2296 (void)ext2_truncate(vp, osize, 2297 ioflag & IO_SYNC, ap->a_cred, uio->uio_td); 2298 uio->uio_offset -= resid - uio->uio_resid; 2299 uio->uio_resid = resid; 2300 } 2301 } 2302 if (uio->uio_resid != resid) { 2303 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2304 if (ioflag & IO_SYNC) 2305 error = ext2_update(vp, 1); 2306 } 2307 return (error); 2308 } 2309