1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * (c) UNIX System Laboratories, Inc. 11 * All or some portions of this file are derived from material licensed 12 * to the University of California by American Telephone and Telegraph 13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 14 * the permission of UNIX System Laboratories, Inc. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 41 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 42 * $FreeBSD$ 43 */ 44 45 #include "opt_suiddir.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/fcntl.h> 51 #include <sys/filio.h> 52 #include <sys/stat.h> 53 #include <sys/bio.h> 54 #include <sys/buf.h> 55 #include <sys/endian.h> 56 #include <sys/priv.h> 57 #include <sys/rwlock.h> 58 #include <sys/mount.h> 59 #include <sys/unistd.h> 60 #include <sys/time.h> 61 #include <sys/vnode.h> 62 #include <sys/namei.h> 63 #include <sys/lockf.h> 64 #include <sys/event.h> 65 #include <sys/conf.h> 66 #include <sys/file.h> 67 #include <sys/extattr.h> 68 #include <sys/vmmeter.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/vm_extern.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_pager.h> 76 #include <vm/vnode_pager.h> 77 78 #include "opt_directio.h" 79 80 #include <ufs/ufs/dir.h> 81 82 #include <fs/ext2fs/fs.h> 83 #include <fs/ext2fs/inode.h> 84 #include <fs/ext2fs/ext2_acl.h> 85 #include <fs/ext2fs/ext2_extern.h> 86 #include <fs/ext2fs/ext2fs.h> 87 #include <fs/ext2fs/ext2_dinode.h> 88 #include <fs/ext2fs/ext2_dir.h> 89 #include <fs/ext2fs/ext2_mount.h> 90 #include <fs/ext2fs/ext2_extattr.h> 91 92 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 93 static void ext2_itimes_locked(struct vnode *); 94 static int ext4_ext_read(struct vop_read_args *); 95 static int ext2_ind_read(struct vop_read_args *); 96 97 static vop_access_t ext2_access; 98 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 99 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 100 struct thread *); 101 static vop_close_t ext2_close; 102 static vop_create_t ext2_create; 103 static vop_fsync_t ext2_fsync; 104 static vop_getattr_t ext2_getattr; 105 static vop_ioctl_t ext2_ioctl; 106 static vop_link_t ext2_link; 107 static vop_mkdir_t ext2_mkdir; 108 static vop_mknod_t ext2_mknod; 109 static vop_open_t ext2_open; 110 static vop_pathconf_t ext2_pathconf; 111 static vop_print_t ext2_print; 112 static vop_read_t ext2_read; 113 static vop_readlink_t ext2_readlink; 114 static vop_remove_t ext2_remove; 115 static vop_rename_t ext2_rename; 116 static vop_rmdir_t ext2_rmdir; 117 static vop_setattr_t ext2_setattr; 118 static vop_strategy_t ext2_strategy; 119 static vop_symlink_t ext2_symlink; 120 static vop_write_t ext2_write; 121 static vop_deleteextattr_t ext2_deleteextattr; 122 static vop_getextattr_t ext2_getextattr; 123 static vop_listextattr_t ext2_listextattr; 124 static vop_setextattr_t ext2_setextattr; 125 static vop_vptofh_t ext2_vptofh; 126 static vop_close_t ext2fifo_close; 127 static vop_kqfilter_t ext2fifo_kqfilter; 128 129 /* Global vfs data structures for ext2. */ 130 struct vop_vector ext2_vnodeops = { 131 .vop_default = &default_vnodeops, 132 .vop_access = ext2_access, 133 .vop_bmap = ext2_bmap, 134 .vop_cachedlookup = ext2_lookup, 135 .vop_close = ext2_close, 136 .vop_create = ext2_create, 137 .vop_fsync = ext2_fsync, 138 .vop_getpages = vnode_pager_local_getpages, 139 .vop_getpages_async = vnode_pager_local_getpages_async, 140 .vop_getattr = ext2_getattr, 141 .vop_inactive = ext2_inactive, 142 .vop_ioctl = ext2_ioctl, 143 .vop_link = ext2_link, 144 .vop_lookup = vfs_cache_lookup, 145 .vop_mkdir = ext2_mkdir, 146 .vop_mknod = ext2_mknod, 147 .vop_open = ext2_open, 148 .vop_pathconf = ext2_pathconf, 149 .vop_poll = vop_stdpoll, 150 .vop_print = ext2_print, 151 .vop_read = ext2_read, 152 .vop_readdir = ext2_readdir, 153 .vop_readlink = ext2_readlink, 154 .vop_reallocblks = ext2_reallocblks, 155 .vop_reclaim = ext2_reclaim, 156 .vop_remove = ext2_remove, 157 .vop_rename = ext2_rename, 158 .vop_rmdir = ext2_rmdir, 159 .vop_setattr = ext2_setattr, 160 .vop_strategy = ext2_strategy, 161 .vop_symlink = ext2_symlink, 162 .vop_write = ext2_write, 163 .vop_deleteextattr = ext2_deleteextattr, 164 .vop_getextattr = ext2_getextattr, 165 .vop_listextattr = ext2_listextattr, 166 .vop_setextattr = ext2_setextattr, 167 #ifdef UFS_ACL 168 .vop_getacl = ext2_getacl, 169 .vop_setacl = ext2_setacl, 170 .vop_aclcheck = ext2_aclcheck, 171 #endif /* UFS_ACL */ 172 .vop_vptofh = ext2_vptofh, 173 }; 174 175 struct vop_vector ext2_fifoops = { 176 .vop_default = &fifo_specops, 177 .vop_access = ext2_access, 178 .vop_close = ext2fifo_close, 179 .vop_fsync = ext2_fsync, 180 .vop_getattr = ext2_getattr, 181 .vop_inactive = ext2_inactive, 182 .vop_kqfilter = ext2fifo_kqfilter, 183 .vop_print = ext2_print, 184 .vop_read = VOP_PANIC, 185 .vop_reclaim = ext2_reclaim, 186 .vop_setattr = ext2_setattr, 187 .vop_write = VOP_PANIC, 188 .vop_vptofh = ext2_vptofh, 189 }; 190 191 /* 192 * A virgin directory (no blushing please). 193 * Note that the type and namlen fields are reversed relative to ext2. 194 * Also, we don't use `struct odirtemplate', since it would just cause 195 * endianness problems. 196 */ 197 static struct dirtemplate mastertemplate = { 198 0, 12, 1, EXT2_FT_DIR, ".", 199 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." 200 }; 201 static struct dirtemplate omastertemplate = { 202 0, 12, 1, EXT2_FT_UNKNOWN, ".", 203 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." 204 }; 205 206 static void 207 ext2_itimes_locked(struct vnode *vp) 208 { 209 struct inode *ip; 210 struct timespec ts; 211 212 ASSERT_VI_LOCKED(vp, __func__); 213 214 ip = VTOI(vp); 215 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 216 return; 217 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 218 ip->i_flag |= IN_LAZYMOD; 219 else 220 ip->i_flag |= IN_MODIFIED; 221 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 222 vfs_timestamp(&ts); 223 if (ip->i_flag & IN_ACCESS) { 224 ip->i_atime = ts.tv_sec; 225 ip->i_atimensec = ts.tv_nsec; 226 } 227 if (ip->i_flag & IN_UPDATE) { 228 ip->i_mtime = ts.tv_sec; 229 ip->i_mtimensec = ts.tv_nsec; 230 ip->i_modrev++; 231 } 232 if (ip->i_flag & IN_CHANGE) { 233 ip->i_ctime = ts.tv_sec; 234 ip->i_ctimensec = ts.tv_nsec; 235 } 236 } 237 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 238 } 239 240 void 241 ext2_itimes(struct vnode *vp) 242 { 243 244 VI_LOCK(vp); 245 ext2_itimes_locked(vp); 246 VI_UNLOCK(vp); 247 } 248 249 /* 250 * Create a regular file 251 */ 252 static int 253 ext2_create(struct vop_create_args *ap) 254 { 255 int error; 256 257 error = 258 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 259 ap->a_dvp, ap->a_vpp, ap->a_cnp); 260 if (error != 0) 261 return (error); 262 if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) 263 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); 264 return (0); 265 } 266 267 static int 268 ext2_open(struct vop_open_args *ap) 269 { 270 271 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 272 return (EOPNOTSUPP); 273 274 /* 275 * Files marked append-only must be opened for appending. 276 */ 277 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 278 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 279 return (EPERM); 280 281 vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); 282 283 return (0); 284 } 285 286 /* 287 * Close called. 288 * 289 * Update the times on the inode. 290 */ 291 static int 292 ext2_close(struct vop_close_args *ap) 293 { 294 struct vnode *vp = ap->a_vp; 295 296 VI_LOCK(vp); 297 if (vp->v_usecount > 1) 298 ext2_itimes_locked(vp); 299 VI_UNLOCK(vp); 300 return (0); 301 } 302 303 static int 304 ext2_access(struct vop_access_args *ap) 305 { 306 struct vnode *vp = ap->a_vp; 307 struct inode *ip = VTOI(vp); 308 accmode_t accmode = ap->a_accmode; 309 int error; 310 311 if (vp->v_type == VBLK || vp->v_type == VCHR) 312 return (EOPNOTSUPP); 313 314 /* 315 * Disallow write attempts on read-only file systems; 316 * unless the file is a socket, fifo, or a block or 317 * character device resident on the file system. 318 */ 319 if (accmode & VWRITE) { 320 switch (vp->v_type) { 321 case VDIR: 322 case VLNK: 323 case VREG: 324 if (vp->v_mount->mnt_flag & MNT_RDONLY) 325 return (EROFS); 326 break; 327 default: 328 break; 329 } 330 } 331 332 /* If immutable bit set, nobody gets to write it. */ 333 if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) 334 return (EPERM); 335 336 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 337 ap->a_accmode, ap->a_cred, NULL); 338 return (error); 339 } 340 341 static int 342 ext2_getattr(struct vop_getattr_args *ap) 343 { 344 struct vnode *vp = ap->a_vp; 345 struct inode *ip = VTOI(vp); 346 struct vattr *vap = ap->a_vap; 347 348 ext2_itimes(vp); 349 /* 350 * Copy from inode table 351 */ 352 vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); 353 vap->va_fileid = ip->i_number; 354 vap->va_mode = ip->i_mode & ~IFMT; 355 vap->va_nlink = ip->i_nlink; 356 vap->va_uid = ip->i_uid; 357 vap->va_gid = ip->i_gid; 358 vap->va_rdev = ip->i_rdev; 359 vap->va_size = ip->i_size; 360 vap->va_atime.tv_sec = ip->i_atime; 361 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; 362 vap->va_mtime.tv_sec = ip->i_mtime; 363 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; 364 vap->va_ctime.tv_sec = ip->i_ctime; 365 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; 366 if E2DI_HAS_XTIME(ip) { 367 vap->va_birthtime.tv_sec = ip->i_birthtime; 368 vap->va_birthtime.tv_nsec = ip->i_birthnsec; 369 } 370 vap->va_flags = ip->i_flags; 371 vap->va_gen = ip->i_gen; 372 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 373 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 374 vap->va_type = IFTOVT(ip->i_mode); 375 vap->va_filerev = ip->i_modrev; 376 return (0); 377 } 378 379 /* 380 * Set attribute vnode op. called from several syscalls 381 */ 382 static int 383 ext2_setattr(struct vop_setattr_args *ap) 384 { 385 struct vattr *vap = ap->a_vap; 386 struct vnode *vp = ap->a_vp; 387 struct inode *ip = VTOI(vp); 388 struct ucred *cred = ap->a_cred; 389 struct thread *td = curthread; 390 int error; 391 392 /* 393 * Check for unsettable attributes. 394 */ 395 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 396 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 397 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 398 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 399 return (EINVAL); 400 } 401 if (vap->va_flags != VNOVAL) { 402 /* Disallow flags not supported by ext2fs. */ 403 if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 404 return (EOPNOTSUPP); 405 406 if (vp->v_mount->mnt_flag & MNT_RDONLY) 407 return (EROFS); 408 /* 409 * Callers may only modify the file flags on objects they 410 * have VADMIN rights for. 411 */ 412 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 413 return (error); 414 /* 415 * Unprivileged processes and privileged processes in 416 * jail() are not permitted to unset system flags, or 417 * modify flags if any system flags are set. 418 * Privileged non-jail processes may not modify system flags 419 * if securelevel > 0 and any existing system flags are set. 420 */ 421 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 422 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) { 423 error = securelevel_gt(cred, 0); 424 if (error) 425 return (error); 426 } 427 } else { 428 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || 429 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 430 return (EPERM); 431 } 432 ip->i_flags = vap->va_flags; 433 ip->i_flag |= IN_CHANGE; 434 if (ip->i_flags & (IMMUTABLE | APPEND)) 435 return (0); 436 } 437 if (ip->i_flags & (IMMUTABLE | APPEND)) 438 return (EPERM); 439 /* 440 * Go through the fields and update iff not VNOVAL. 441 */ 442 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 443 if (vp->v_mount->mnt_flag & MNT_RDONLY) 444 return (EROFS); 445 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 446 td)) != 0) 447 return (error); 448 } 449 if (vap->va_size != VNOVAL) { 450 /* 451 * Disallow write attempts on read-only file systems; 452 * unless the file is a socket, fifo, or a block or 453 * character device resident on the file system. 454 */ 455 switch (vp->v_type) { 456 case VDIR: 457 return (EISDIR); 458 case VLNK: 459 case VREG: 460 if (vp->v_mount->mnt_flag & MNT_RDONLY) 461 return (EROFS); 462 break; 463 default: 464 break; 465 } 466 if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) 467 return (error); 468 } 469 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 470 if (vp->v_mount->mnt_flag & MNT_RDONLY) 471 return (EROFS); 472 /* 473 * From utimes(2): 474 * If times is NULL, ... The caller must be the owner of 475 * the file, have permission to write the file, or be the 476 * super-user. 477 * If times is non-NULL, ... The caller must be the owner of 478 * the file or be the super-user. 479 */ 480 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 481 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 482 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 483 return (error); 484 ip->i_flag |= IN_CHANGE | IN_MODIFIED; 485 if (vap->va_atime.tv_sec != VNOVAL) { 486 ip->i_flag &= ~IN_ACCESS; 487 ip->i_atime = vap->va_atime.tv_sec; 488 ip->i_atimensec = vap->va_atime.tv_nsec; 489 } 490 if (vap->va_mtime.tv_sec != VNOVAL) { 491 ip->i_flag &= ~IN_UPDATE; 492 ip->i_mtime = vap->va_mtime.tv_sec; 493 ip->i_mtimensec = vap->va_mtime.tv_nsec; 494 } 495 ip->i_birthtime = vap->va_birthtime.tv_sec; 496 ip->i_birthnsec = vap->va_birthtime.tv_nsec; 497 error = ext2_update(vp, 0); 498 if (error) 499 return (error); 500 } 501 error = 0; 502 if (vap->va_mode != (mode_t)VNOVAL) { 503 if (vp->v_mount->mnt_flag & MNT_RDONLY) 504 return (EROFS); 505 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 506 } 507 return (error); 508 } 509 510 /* 511 * Change the mode on a file. 512 * Inode must be locked before calling. 513 */ 514 static int 515 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) 516 { 517 struct inode *ip = VTOI(vp); 518 int error; 519 520 /* 521 * To modify the permissions on a file, must possess VADMIN 522 * for that file. 523 */ 524 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 525 return (error); 526 /* 527 * Privileged processes may set the sticky bit on non-directories, 528 * as well as set the setgid bit on a file with a group that the 529 * process is not a member of. 530 */ 531 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 532 error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); 533 if (error) 534 return (EFTYPE); 535 } 536 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 537 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 538 if (error) 539 return (error); 540 } 541 ip->i_mode &= ~ALLPERMS; 542 ip->i_mode |= (mode & ALLPERMS); 543 ip->i_flag |= IN_CHANGE; 544 return (0); 545 } 546 547 /* 548 * Perform chown operation on inode ip; 549 * inode must be locked prior to call. 550 */ 551 static int 552 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 553 struct thread *td) 554 { 555 struct inode *ip = VTOI(vp); 556 uid_t ouid; 557 gid_t ogid; 558 int error = 0; 559 560 if (uid == (uid_t)VNOVAL) 561 uid = ip->i_uid; 562 if (gid == (gid_t)VNOVAL) 563 gid = ip->i_gid; 564 /* 565 * To modify the ownership of a file, must possess VADMIN 566 * for that file. 567 */ 568 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 569 return (error); 570 /* 571 * To change the owner of a file, or change the group of a file 572 * to a group of which we are not a member, the caller must 573 * have privilege. 574 */ 575 if (uid != ip->i_uid || (gid != ip->i_gid && 576 !groupmember(gid, cred))) { 577 error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); 578 if (error) 579 return (error); 580 } 581 ogid = ip->i_gid; 582 ouid = ip->i_uid; 583 ip->i_gid = gid; 584 ip->i_uid = uid; 585 ip->i_flag |= IN_CHANGE; 586 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 587 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) 588 ip->i_mode &= ~(ISUID | ISGID); 589 } 590 return (0); 591 } 592 593 /* 594 * Synch an open file. 595 */ 596 /* ARGSUSED */ 597 static int 598 ext2_fsync(struct vop_fsync_args *ap) 599 { 600 /* 601 * Flush all dirty buffers associated with a vnode. 602 */ 603 604 vop_stdfsync(ap); 605 606 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 607 } 608 609 /* 610 * Mknod vnode call 611 */ 612 /* ARGSUSED */ 613 static int 614 ext2_mknod(struct vop_mknod_args *ap) 615 { 616 struct vattr *vap = ap->a_vap; 617 struct vnode **vpp = ap->a_vpp; 618 struct inode *ip; 619 ino_t ino; 620 int error; 621 622 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 623 ap->a_dvp, vpp, ap->a_cnp); 624 if (error) 625 return (error); 626 ip = VTOI(*vpp); 627 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 628 if (vap->va_rdev != VNOVAL) { 629 /* 630 * Want to be able to use this to make badblock 631 * inodes, so don't truncate the dev number. 632 */ 633 ip->i_rdev = vap->va_rdev; 634 } 635 /* 636 * Remove inode, then reload it through VFS_VGET so it is 637 * checked to see if it is an alias of an existing entry in 638 * the inode cache. XXX I don't believe this is necessary now. 639 */ 640 (*vpp)->v_type = VNON; 641 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 642 vgone(*vpp); 643 vput(*vpp); 644 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 645 if (error) { 646 *vpp = NULL; 647 return (error); 648 } 649 return (0); 650 } 651 652 static int 653 ext2_remove(struct vop_remove_args *ap) 654 { 655 struct inode *ip; 656 struct vnode *vp = ap->a_vp; 657 struct vnode *dvp = ap->a_dvp; 658 int error; 659 660 ip = VTOI(vp); 661 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 662 (VTOI(dvp)->i_flags & APPEND)) { 663 error = EPERM; 664 goto out; 665 } 666 error = ext2_dirremove(dvp, ap->a_cnp); 667 if (error == 0) { 668 ip->i_nlink--; 669 ip->i_flag |= IN_CHANGE; 670 } 671 out: 672 return (error); 673 } 674 675 /* 676 * link vnode call 677 */ 678 static int 679 ext2_link(struct vop_link_args *ap) 680 { 681 struct vnode *vp = ap->a_vp; 682 struct vnode *tdvp = ap->a_tdvp; 683 struct componentname *cnp = ap->a_cnp; 684 struct inode *ip; 685 int error; 686 687 #ifdef INVARIANTS 688 if ((cnp->cn_flags & HASBUF) == 0) 689 panic("ext2_link: no name"); 690 #endif 691 ip = VTOI(vp); 692 if ((nlink_t)ip->i_nlink >= EXT2_LINK_MAX) { 693 error = EMLINK; 694 goto out; 695 } 696 if (ip->i_flags & (IMMUTABLE | APPEND)) { 697 error = EPERM; 698 goto out; 699 } 700 ip->i_nlink++; 701 ip->i_flag |= IN_CHANGE; 702 error = ext2_update(vp, !DOINGASYNC(vp)); 703 if (!error) 704 error = ext2_direnter(ip, tdvp, cnp); 705 if (error) { 706 ip->i_nlink--; 707 ip->i_flag |= IN_CHANGE; 708 } 709 out: 710 return (error); 711 } 712 713 /* 714 * Rename system call. 715 * rename("foo", "bar"); 716 * is essentially 717 * unlink("bar"); 718 * link("foo", "bar"); 719 * unlink("foo"); 720 * but ``atomically''. Can't do full commit without saving state in the 721 * inode on disk which isn't feasible at this time. Best we can do is 722 * always guarantee the target exists. 723 * 724 * Basic algorithm is: 725 * 726 * 1) Bump link count on source while we're linking it to the 727 * target. This also ensure the inode won't be deleted out 728 * from underneath us while we work (it may be truncated by 729 * a concurrent `trunc' or `open' for creation). 730 * 2) Link source to destination. If destination already exists, 731 * delete it first. 732 * 3) Unlink source reference to inode if still around. If a 733 * directory was moved and the parent of the destination 734 * is different from the source, patch the ".." entry in the 735 * directory. 736 */ 737 static int 738 ext2_rename(struct vop_rename_args *ap) 739 { 740 struct vnode *tvp = ap->a_tvp; 741 struct vnode *tdvp = ap->a_tdvp; 742 struct vnode *fvp = ap->a_fvp; 743 struct vnode *fdvp = ap->a_fdvp; 744 struct componentname *tcnp = ap->a_tcnp; 745 struct componentname *fcnp = ap->a_fcnp; 746 struct inode *ip, *xp, *dp; 747 struct dirtemplate dirbuf; 748 int doingdirectory = 0, oldparent = 0, newparent = 0; 749 int error = 0; 750 u_char namlen; 751 752 #ifdef INVARIANTS 753 if ((tcnp->cn_flags & HASBUF) == 0 || 754 (fcnp->cn_flags & HASBUF) == 0) 755 panic("ext2_rename: no name"); 756 #endif 757 /* 758 * Check for cross-device rename. 759 */ 760 if ((fvp->v_mount != tdvp->v_mount) || 761 (tvp && (fvp->v_mount != tvp->v_mount))) { 762 error = EXDEV; 763 abortit: 764 if (tdvp == tvp) 765 vrele(tdvp); 766 else 767 vput(tdvp); 768 if (tvp) 769 vput(tvp); 770 vrele(fdvp); 771 vrele(fvp); 772 return (error); 773 } 774 775 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 776 (VTOI(tdvp)->i_flags & APPEND))) { 777 error = EPERM; 778 goto abortit; 779 } 780 781 /* 782 * Renaming a file to itself has no effect. The upper layers should 783 * not call us in that case. Temporarily just warn if they do. 784 */ 785 if (fvp == tvp) { 786 printf("ext2_rename: fvp == tvp (can't happen)\n"); 787 error = 0; 788 goto abortit; 789 } 790 791 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 792 goto abortit; 793 dp = VTOI(fdvp); 794 ip = VTOI(fvp); 795 if (ip->i_nlink >= EXT2_LINK_MAX) { 796 VOP_UNLOCK(fvp, 0); 797 error = EMLINK; 798 goto abortit; 799 } 800 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 801 || (dp->i_flags & APPEND)) { 802 VOP_UNLOCK(fvp, 0); 803 error = EPERM; 804 goto abortit; 805 } 806 if ((ip->i_mode & IFMT) == IFDIR) { 807 /* 808 * Avoid ".", "..", and aliases of "." for obvious reasons. 809 */ 810 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 811 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 812 (ip->i_flag & IN_RENAME)) { 813 VOP_UNLOCK(fvp, 0); 814 error = EINVAL; 815 goto abortit; 816 } 817 ip->i_flag |= IN_RENAME; 818 oldparent = dp->i_number; 819 doingdirectory++; 820 } 821 vrele(fdvp); 822 823 /* 824 * When the target exists, both the directory 825 * and target vnodes are returned locked. 826 */ 827 dp = VTOI(tdvp); 828 xp = NULL; 829 if (tvp) 830 xp = VTOI(tvp); 831 832 /* 833 * 1) Bump link count while we're moving stuff 834 * around. If we crash somewhere before 835 * completing our work, the link count 836 * may be wrong, but correctable. 837 */ 838 ip->i_nlink++; 839 ip->i_flag |= IN_CHANGE; 840 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 841 VOP_UNLOCK(fvp, 0); 842 goto bad; 843 } 844 845 /* 846 * If ".." must be changed (ie the directory gets a new 847 * parent) then the source directory must not be in the 848 * directory hierarchy above the target, as this would 849 * orphan everything below the source directory. Also 850 * the user must have write permission in the source so 851 * as to be able to change "..". We must repeat the call 852 * to namei, as the parent directory is unlocked by the 853 * call to checkpath(). 854 */ 855 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 856 VOP_UNLOCK(fvp, 0); 857 if (oldparent != dp->i_number) 858 newparent = dp->i_number; 859 if (doingdirectory && newparent) { 860 if (error) /* write access check above */ 861 goto bad; 862 if (xp != NULL) 863 vput(tvp); 864 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 865 if (error) 866 goto out; 867 VREF(tdvp); 868 error = relookup(tdvp, &tvp, tcnp); 869 if (error) 870 goto out; 871 vrele(tdvp); 872 dp = VTOI(tdvp); 873 xp = NULL; 874 if (tvp) 875 xp = VTOI(tvp); 876 } 877 /* 878 * 2) If target doesn't exist, link the target 879 * to the source and unlink the source. 880 * Otherwise, rewrite the target directory 881 * entry to reference the source inode and 882 * expunge the original entry's existence. 883 */ 884 if (xp == NULL) { 885 if (dp->i_devvp != ip->i_devvp) 886 panic("ext2_rename: EXDEV"); 887 /* 888 * Account for ".." in new directory. 889 * When source and destination have the same 890 * parent we don't fool with the link count. 891 */ 892 if (doingdirectory && newparent) { 893 if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) { 894 error = EMLINK; 895 goto bad; 896 } 897 dp->i_nlink++; 898 dp->i_flag |= IN_CHANGE; 899 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 900 if (error) 901 goto bad; 902 } 903 error = ext2_direnter(ip, tdvp, tcnp); 904 if (error) { 905 if (doingdirectory && newparent) { 906 dp->i_nlink--; 907 dp->i_flag |= IN_CHANGE; 908 (void)ext2_update(tdvp, 1); 909 } 910 goto bad; 911 } 912 vput(tdvp); 913 } else { 914 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 915 panic("ext2_rename: EXDEV"); 916 /* 917 * Short circuit rename(foo, foo). 918 */ 919 if (xp->i_number == ip->i_number) 920 panic("ext2_rename: same file"); 921 /* 922 * If the parent directory is "sticky", then the user must 923 * own the parent directory, or the destination of the rename, 924 * otherwise the destination may not be changed (except by 925 * root). This implements append-only directories. 926 */ 927 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 928 tcnp->cn_cred->cr_uid != dp->i_uid && 929 xp->i_uid != tcnp->cn_cred->cr_uid) { 930 error = EPERM; 931 goto bad; 932 } 933 /* 934 * Target must be empty if a directory and have no links 935 * to it. Also, ensure source and target are compatible 936 * (both directories, or both not directories). 937 */ 938 if ((xp->i_mode & IFMT) == IFDIR) { 939 if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || 940 xp->i_nlink > 2) { 941 error = ENOTEMPTY; 942 goto bad; 943 } 944 if (!doingdirectory) { 945 error = ENOTDIR; 946 goto bad; 947 } 948 cache_purge(tdvp); 949 } else if (doingdirectory) { 950 error = EISDIR; 951 goto bad; 952 } 953 error = ext2_dirrewrite(dp, ip, tcnp); 954 if (error) 955 goto bad; 956 /* 957 * If the target directory is in the same 958 * directory as the source directory, 959 * decrement the link count on the parent 960 * of the target directory. 961 */ 962 if (doingdirectory && !newparent) { 963 dp->i_nlink--; 964 dp->i_flag |= IN_CHANGE; 965 } 966 vput(tdvp); 967 /* 968 * Adjust the link count of the target to 969 * reflect the dirrewrite above. If this is 970 * a directory it is empty and there are 971 * no links to it, so we can squash the inode and 972 * any space associated with it. We disallowed 973 * renaming over top of a directory with links to 974 * it above, as the remaining link would point to 975 * a directory without "." or ".." entries. 976 */ 977 xp->i_nlink--; 978 if (doingdirectory) { 979 if (--xp->i_nlink != 0) 980 panic("ext2_rename: linked directory"); 981 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 982 tcnp->cn_cred, tcnp->cn_thread); 983 } 984 xp->i_flag |= IN_CHANGE; 985 vput(tvp); 986 xp = NULL; 987 } 988 989 /* 990 * 3) Unlink the source. 991 */ 992 fcnp->cn_flags &= ~MODMASK; 993 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 994 VREF(fdvp); 995 error = relookup(fdvp, &fvp, fcnp); 996 if (error == 0) 997 vrele(fdvp); 998 if (fvp != NULL) { 999 xp = VTOI(fvp); 1000 dp = VTOI(fdvp); 1001 } else { 1002 /* 1003 * From name has disappeared. IN_RENAME is not sufficient 1004 * to protect against directory races due to timing windows, 1005 * so we can't panic here. 1006 */ 1007 vrele(ap->a_fvp); 1008 return (0); 1009 } 1010 /* 1011 * Ensure that the directory entry still exists and has not 1012 * changed while the new name has been entered. If the source is 1013 * a file then the entry may have been unlinked or renamed. In 1014 * either case there is no further work to be done. If the source 1015 * is a directory then it cannot have been rmdir'ed; its link 1016 * count of three would cause a rmdir to fail with ENOTEMPTY. 1017 * The IN_RENAME flag ensures that it cannot be moved by another 1018 * rename. 1019 */ 1020 if (xp != ip) { 1021 /* 1022 * From name resolves to a different inode. IN_RENAME is 1023 * not sufficient protection against timing window races 1024 * so we can't panic here. 1025 */ 1026 } else { 1027 /* 1028 * If the source is a directory with a 1029 * new parent, the link count of the old 1030 * parent directory must be decremented 1031 * and ".." set to point to the new parent. 1032 */ 1033 if (doingdirectory && newparent) { 1034 dp->i_nlink--; 1035 dp->i_flag |= IN_CHANGE; 1036 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, 1037 sizeof(struct dirtemplate), (off_t)0, 1038 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1039 tcnp->cn_cred, NOCRED, NULL, NULL); 1040 if (error == 0) { 1041 /* Like ufs little-endian: */ 1042 namlen = dirbuf.dotdot_type; 1043 if (namlen != 2 || 1044 dirbuf.dotdot_name[0] != '.' || 1045 dirbuf.dotdot_name[1] != '.') { 1046 ext2_dirbad(xp, (doff_t)12, 1047 "rename: mangled dir"); 1048 } else { 1049 dirbuf.dotdot_ino = newparent; 1050 (void)vn_rdwr(UIO_WRITE, fvp, 1051 (caddr_t)&dirbuf, 1052 sizeof(struct dirtemplate), 1053 (off_t)0, UIO_SYSSPACE, 1054 IO_NODELOCKED | IO_SYNC | 1055 IO_NOMACCHECK, tcnp->cn_cred, 1056 NOCRED, NULL, NULL); 1057 cache_purge(fdvp); 1058 } 1059 } 1060 } 1061 error = ext2_dirremove(fdvp, fcnp); 1062 if (!error) { 1063 xp->i_nlink--; 1064 xp->i_flag |= IN_CHANGE; 1065 } 1066 xp->i_flag &= ~IN_RENAME; 1067 } 1068 if (dp) 1069 vput(fdvp); 1070 if (xp) 1071 vput(fvp); 1072 vrele(ap->a_fvp); 1073 return (error); 1074 1075 bad: 1076 if (xp) 1077 vput(ITOV(xp)); 1078 vput(ITOV(dp)); 1079 out: 1080 if (doingdirectory) 1081 ip->i_flag &= ~IN_RENAME; 1082 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1083 ip->i_nlink--; 1084 ip->i_flag |= IN_CHANGE; 1085 ip->i_flag &= ~IN_RENAME; 1086 vput(fvp); 1087 } else 1088 vrele(fvp); 1089 return (error); 1090 } 1091 1092 #ifdef UFS_ACL 1093 static int 1094 ext2_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1095 mode_t dmode, struct ucred *cred, struct thread *td) 1096 { 1097 int error; 1098 struct inode *ip = VTOI(tvp); 1099 struct acl *dacl, *acl; 1100 1101 acl = acl_alloc(M_WAITOK); 1102 dacl = acl_alloc(M_WAITOK); 1103 1104 /* 1105 * Retrieve default ACL from parent, if any. 1106 */ 1107 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1108 switch (error) { 1109 case 0: 1110 /* 1111 * Retrieved a default ACL, so merge mode and ACL if 1112 * necessary. If the ACL is empty, fall through to 1113 * the "not defined or available" case. 1114 */ 1115 if (acl->acl_cnt != 0) { 1116 dmode = acl_posix1e_newfilemode(dmode, acl); 1117 ip->i_mode = dmode; 1118 *dacl = *acl; 1119 ext2_sync_acl_from_inode(ip, acl); 1120 break; 1121 } 1122 /* FALLTHROUGH */ 1123 1124 case EOPNOTSUPP: 1125 /* 1126 * Just use the mode as-is. 1127 */ 1128 ip->i_mode = dmode; 1129 error = 0; 1130 goto out; 1131 1132 default: 1133 goto out; 1134 } 1135 1136 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1137 if (error == 0) 1138 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1139 switch (error) { 1140 case 0: 1141 break; 1142 1143 case EOPNOTSUPP: 1144 /* 1145 * XXX: This should not happen, as EOPNOTSUPP above 1146 * was supposed to free acl. 1147 */ 1148 #ifdef DEBUG 1149 printf("ext2_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1150 #endif /* DEBUG */ 1151 break; 1152 1153 default: 1154 goto out; 1155 } 1156 1157 out: 1158 acl_free(acl); 1159 acl_free(dacl); 1160 1161 return (error); 1162 } 1163 1164 static int 1165 ext2_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1166 mode_t mode, struct ucred *cred, struct thread *td) 1167 { 1168 int error; 1169 struct inode *ip = VTOI(tvp); 1170 struct acl *acl; 1171 1172 acl = acl_alloc(M_WAITOK); 1173 1174 /* 1175 * Retrieve default ACL for parent, if any. 1176 */ 1177 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1178 switch (error) { 1179 case 0: 1180 /* 1181 * Retrieved a default ACL, so merge mode and ACL if 1182 * necessary. 1183 */ 1184 if (acl->acl_cnt != 0) { 1185 /* 1186 * Two possible ways for default ACL to not 1187 * be present. First, the EA can be 1188 * undefined, or second, the default ACL can 1189 * be blank. If it's blank, fall through to 1190 * the it's not defined case. 1191 */ 1192 mode = acl_posix1e_newfilemode(mode, acl); 1193 ip->i_mode = mode; 1194 ext2_sync_acl_from_inode(ip, acl); 1195 break; 1196 } 1197 /* FALLTHROUGH */ 1198 1199 case EOPNOTSUPP: 1200 /* 1201 * Just use the mode as-is. 1202 */ 1203 ip->i_mode = mode; 1204 error = 0; 1205 goto out; 1206 1207 default: 1208 goto out; 1209 } 1210 1211 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1212 switch (error) { 1213 case 0: 1214 break; 1215 1216 case EOPNOTSUPP: 1217 /* 1218 * XXX: This should not happen, as EOPNOTSUPP above was 1219 * supposed to free acl. 1220 */ 1221 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1222 "but no VOP_SETACL()\n"); 1223 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1224 "but no VOP_SETACL()"); */ 1225 break; 1226 1227 default: 1228 goto out; 1229 } 1230 1231 out: 1232 acl_free(acl); 1233 1234 return (error); 1235 } 1236 1237 #endif /* UFS_ACL */ 1238 1239 /* 1240 * Mkdir system call 1241 */ 1242 static int 1243 ext2_mkdir(struct vop_mkdir_args *ap) 1244 { 1245 struct vnode *dvp = ap->a_dvp; 1246 struct vattr *vap = ap->a_vap; 1247 struct componentname *cnp = ap->a_cnp; 1248 struct inode *ip, *dp; 1249 struct vnode *tvp; 1250 struct dirtemplate dirtemplate, *dtp; 1251 int error, dmode; 1252 1253 #ifdef INVARIANTS 1254 if ((cnp->cn_flags & HASBUF) == 0) 1255 panic("ext2_mkdir: no name"); 1256 #endif 1257 dp = VTOI(dvp); 1258 if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) { 1259 error = EMLINK; 1260 goto out; 1261 } 1262 dmode = vap->va_mode & 0777; 1263 dmode |= IFDIR; 1264 /* 1265 * Must simulate part of ext2_makeinode here to acquire the inode, 1266 * but not have it entered in the parent directory. The entry is 1267 * made later after writing "." and ".." entries. 1268 */ 1269 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1270 if (error) 1271 goto out; 1272 ip = VTOI(tvp); 1273 ip->i_gid = dp->i_gid; 1274 #ifdef SUIDDIR 1275 { 1276 /* 1277 * if we are hacking owners here, (only do this where told to) 1278 * and we are not giving it TOO root, (would subvert quotas) 1279 * then go ahead and give it to the other user. 1280 * The new directory also inherits the SUID bit. 1281 * If user's UID and dir UID are the same, 1282 * 'give it away' so that the SUID is still forced on. 1283 */ 1284 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1285 (dp->i_mode & ISUID) && dp->i_uid) { 1286 dmode |= ISUID; 1287 ip->i_uid = dp->i_uid; 1288 } else { 1289 ip->i_uid = cnp->cn_cred->cr_uid; 1290 } 1291 } 1292 #else 1293 ip->i_uid = cnp->cn_cred->cr_uid; 1294 #endif 1295 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1296 ip->i_mode = dmode; 1297 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1298 ip->i_nlink = 2; 1299 if (cnp->cn_flags & ISWHITEOUT) 1300 ip->i_flags |= UF_OPAQUE; 1301 error = ext2_update(tvp, 1); 1302 1303 /* 1304 * Bump link count in parent directory 1305 * to reflect work done below. Should 1306 * be done before reference is created 1307 * so reparation is possible if we crash. 1308 */ 1309 dp->i_nlink++; 1310 dp->i_flag |= IN_CHANGE; 1311 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1312 if (error) 1313 goto bad; 1314 1315 /* Initialize directory with "." and ".." from static template. */ 1316 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1317 EXT2F_INCOMPAT_FTYPE)) 1318 dtp = &mastertemplate; 1319 else 1320 dtp = &omastertemplate; 1321 dirtemplate = *dtp; 1322 dirtemplate.dot_ino = ip->i_number; 1323 dirtemplate.dotdot_ino = dp->i_number; 1324 /* 1325 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's 1326 * just redefine it - for this function only 1327 */ 1328 #undef DIRBLKSIZ 1329 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1330 dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; 1331 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, 1332 sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE, 1333 IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, 1334 NULL, NULL); 1335 if (error) { 1336 dp->i_nlink--; 1337 dp->i_flag |= IN_CHANGE; 1338 goto bad; 1339 } 1340 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1341 /* XXX should grow with balloc() */ 1342 panic("ext2_mkdir: blksize"); 1343 else { 1344 ip->i_size = DIRBLKSIZ; 1345 ip->i_flag |= IN_CHANGE; 1346 } 1347 1348 #ifdef UFS_ACL 1349 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1350 error = ext2_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 1351 cnp->cn_cred, cnp->cn_thread); 1352 if (error) 1353 goto bad; 1354 } 1355 1356 #endif /* UFS_ACL */ 1357 1358 /* Directory set up, now install its entry in the parent directory. */ 1359 error = ext2_direnter(ip, dvp, cnp); 1360 if (error) { 1361 dp->i_nlink--; 1362 dp->i_flag |= IN_CHANGE; 1363 } 1364 bad: 1365 /* 1366 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1367 * for us because we set the link count to 0. 1368 */ 1369 if (error) { 1370 ip->i_nlink = 0; 1371 ip->i_flag |= IN_CHANGE; 1372 vput(tvp); 1373 } else 1374 *ap->a_vpp = tvp; 1375 out: 1376 return (error); 1377 #undef DIRBLKSIZ 1378 #define DIRBLKSIZ DEV_BSIZE 1379 } 1380 1381 /* 1382 * Rmdir system call. 1383 */ 1384 static int 1385 ext2_rmdir(struct vop_rmdir_args *ap) 1386 { 1387 struct vnode *vp = ap->a_vp; 1388 struct vnode *dvp = ap->a_dvp; 1389 struct componentname *cnp = ap->a_cnp; 1390 struct inode *ip, *dp; 1391 int error; 1392 1393 ip = VTOI(vp); 1394 dp = VTOI(dvp); 1395 1396 /* 1397 * Verify the directory is empty (and valid). 1398 * (Rmdir ".." won't be valid since 1399 * ".." will contain a reference to 1400 * the current directory and thus be 1401 * non-empty.) 1402 */ 1403 if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1404 error = ENOTEMPTY; 1405 goto out; 1406 } 1407 if ((dp->i_flags & APPEND) 1408 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1409 error = EPERM; 1410 goto out; 1411 } 1412 /* 1413 * Delete reference to directory before purging 1414 * inode. If we crash in between, the directory 1415 * will be reattached to lost+found, 1416 */ 1417 error = ext2_dirremove(dvp, cnp); 1418 if (error) 1419 goto out; 1420 dp->i_nlink--; 1421 dp->i_flag |= IN_CHANGE; 1422 cache_purge(dvp); 1423 VOP_UNLOCK(dvp, 0); 1424 /* 1425 * Truncate inode. The only stuff left 1426 * in the directory is "." and "..". The 1427 * "." reference is inconsequential since 1428 * we're quashing it. The ".." reference 1429 * has already been adjusted above. We've 1430 * removed the "." reference and the reference 1431 * in the parent directory, but there may be 1432 * other hard links so decrement by 2 and 1433 * worry about them later. 1434 */ 1435 ip->i_nlink -= 2; 1436 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, 1437 cnp->cn_thread); 1438 cache_purge(ITOV(ip)); 1439 if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1440 VOP_UNLOCK(vp, 0); 1441 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1442 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1443 } 1444 out: 1445 return (error); 1446 } 1447 1448 /* 1449 * symlink -- make a symbolic link 1450 */ 1451 static int 1452 ext2_symlink(struct vop_symlink_args *ap) 1453 { 1454 struct vnode *vp, **vpp = ap->a_vpp; 1455 struct inode *ip; 1456 int len, error; 1457 1458 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1459 vpp, ap->a_cnp); 1460 if (error) 1461 return (error); 1462 vp = *vpp; 1463 len = strlen(ap->a_target); 1464 if (len < vp->v_mount->mnt_maxsymlinklen) { 1465 ip = VTOI(vp); 1466 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1467 ip->i_size = len; 1468 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1469 } else 1470 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1471 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1472 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 1473 if (error) 1474 vput(vp); 1475 return (error); 1476 } 1477 1478 /* 1479 * Return target name of a symbolic link 1480 */ 1481 static int 1482 ext2_readlink(struct vop_readlink_args *ap) 1483 { 1484 struct vnode *vp = ap->a_vp; 1485 struct inode *ip = VTOI(vp); 1486 int isize; 1487 1488 isize = ip->i_size; 1489 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1490 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1491 return (0); 1492 } 1493 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1494 } 1495 1496 /* 1497 * Calculate the logical to physical mapping if not done already, 1498 * then call the device strategy routine. 1499 * 1500 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1501 * deadlock on memory. See ext2_bmap() for details. 1502 */ 1503 static int 1504 ext2_strategy(struct vop_strategy_args *ap) 1505 { 1506 struct buf *bp = ap->a_bp; 1507 struct vnode *vp = ap->a_vp; 1508 struct bufobj *bo; 1509 daddr_t blkno; 1510 int error; 1511 1512 if (vp->v_type == VBLK || vp->v_type == VCHR) 1513 panic("ext2_strategy: spec"); 1514 if (bp->b_blkno == bp->b_lblkno) { 1515 error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); 1516 bp->b_blkno = blkno; 1517 if (error) { 1518 bp->b_error = error; 1519 bp->b_ioflags |= BIO_ERROR; 1520 bufdone(bp); 1521 return (0); 1522 } 1523 if ((long)bp->b_blkno == -1) 1524 vfs_bio_clrbuf(bp); 1525 } 1526 if ((long)bp->b_blkno == -1) { 1527 bufdone(bp); 1528 return (0); 1529 } 1530 bp->b_iooffset = dbtob(bp->b_blkno); 1531 bo = VFSTOEXT2(vp->v_mount)->um_bo; 1532 BO_STRATEGY(bo, bp); 1533 return (0); 1534 } 1535 1536 /* 1537 * Print out the contents of an inode. 1538 */ 1539 static int 1540 ext2_print(struct vop_print_args *ap) 1541 { 1542 struct vnode *vp = ap->a_vp; 1543 struct inode *ip = VTOI(vp); 1544 1545 vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number); 1546 if (vp->v_type == VFIFO) 1547 fifo_printinfo(vp); 1548 printf("\n"); 1549 return (0); 1550 } 1551 1552 /* 1553 * Close wrapper for fifos. 1554 * 1555 * Update the times on the inode then do device close. 1556 */ 1557 static int 1558 ext2fifo_close(struct vop_close_args *ap) 1559 { 1560 struct vnode *vp = ap->a_vp; 1561 1562 VI_LOCK(vp); 1563 if (vp->v_usecount > 1) 1564 ext2_itimes_locked(vp); 1565 VI_UNLOCK(vp); 1566 return (fifo_specops.vop_close(ap)); 1567 } 1568 1569 /* 1570 * Kqfilter wrapper for fifos. 1571 * 1572 * Fall through to ext2 kqfilter routines if needed 1573 */ 1574 static int 1575 ext2fifo_kqfilter(struct vop_kqfilter_args *ap) 1576 { 1577 int error; 1578 1579 error = fifo_specops.vop_kqfilter(ap); 1580 if (error) 1581 error = vfs_kqfilter(ap); 1582 return (error); 1583 } 1584 1585 /* 1586 * Return POSIX pathconf information applicable to ext2 filesystems. 1587 */ 1588 static int 1589 ext2_pathconf(struct vop_pathconf_args *ap) 1590 { 1591 int error = 0; 1592 1593 switch (ap->a_name) { 1594 case _PC_LINK_MAX: 1595 *ap->a_retval = EXT2_LINK_MAX; 1596 break; 1597 case _PC_NAME_MAX: 1598 *ap->a_retval = NAME_MAX; 1599 break; 1600 case _PC_PATH_MAX: 1601 *ap->a_retval = PATH_MAX; 1602 break; 1603 case _PC_PIPE_BUF: 1604 *ap->a_retval = PIPE_BUF; 1605 break; 1606 case _PC_CHOWN_RESTRICTED: 1607 *ap->a_retval = 1; 1608 break; 1609 case _PC_NO_TRUNC: 1610 *ap->a_retval = 1; 1611 break; 1612 1613 #ifdef UFS_ACL 1614 case _PC_ACL_EXTENDED: 1615 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1616 *ap->a_retval = 1; 1617 else 1618 *ap->a_retval = 0; 1619 break; 1620 case _PC_ACL_PATH_MAX: 1621 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1622 *ap->a_retval = ACL_MAX_ENTRIES; 1623 else 1624 *ap->a_retval = 3; 1625 break; 1626 #endif /* UFS_ACL */ 1627 1628 case _PC_MIN_HOLE_SIZE: 1629 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1630 break; 1631 case _PC_ASYNC_IO: 1632 /* _PC_ASYNC_IO should have been handled by upper layers. */ 1633 KASSERT(0, ("_PC_ASYNC_IO should not get here")); 1634 error = EINVAL; 1635 break; 1636 case _PC_PRIO_IO: 1637 *ap->a_retval = 0; 1638 break; 1639 case _PC_SYNC_IO: 1640 *ap->a_retval = 0; 1641 break; 1642 case _PC_ALLOC_SIZE_MIN: 1643 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 1644 break; 1645 case _PC_FILESIZEBITS: 1646 *ap->a_retval = 64; 1647 break; 1648 case _PC_REC_INCR_XFER_SIZE: 1649 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1650 break; 1651 case _PC_REC_MAX_XFER_SIZE: 1652 *ap->a_retval = -1; /* means ``unlimited'' */ 1653 break; 1654 case _PC_REC_MIN_XFER_SIZE: 1655 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1656 break; 1657 case _PC_REC_XFER_ALIGN: 1658 *ap->a_retval = PAGE_SIZE; 1659 break; 1660 case _PC_SYMLINK_MAX: 1661 *ap->a_retval = MAXPATHLEN; 1662 break; 1663 1664 default: 1665 error = EINVAL; 1666 break; 1667 } 1668 return (error); 1669 } 1670 1671 /* 1672 * Vnode operation to remove a named attribute. 1673 */ 1674 static int 1675 ext2_deleteextattr(struct vop_deleteextattr_args *ap) 1676 { 1677 struct inode *ip; 1678 struct m_ext2fs *fs; 1679 int error; 1680 1681 ip = VTOI(ap->a_vp); 1682 fs = ip->i_e2fs; 1683 1684 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1685 return (EOPNOTSUPP); 1686 1687 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1688 return (EOPNOTSUPP); 1689 1690 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1691 ap->a_cred, ap->a_td, VWRITE); 1692 if (error) 1693 return (error); 1694 1695 error = ENOATTR; 1696 1697 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1698 error = ext2_extattr_inode_delete(ip, ap->a_attrnamespace, ap->a_name); 1699 if (error != ENOATTR) 1700 return (error); 1701 } 1702 1703 if (ip->i_facl) 1704 error = ext2_extattr_block_delete(ip, ap->a_attrnamespace, ap->a_name); 1705 1706 return (error); 1707 } 1708 1709 /* 1710 * Vnode operation to retrieve a named extended attribute. 1711 */ 1712 static int 1713 ext2_getextattr(struct vop_getextattr_args *ap) 1714 { 1715 struct inode *ip; 1716 struct m_ext2fs *fs; 1717 int error; 1718 1719 ip = VTOI(ap->a_vp); 1720 fs = ip->i_e2fs; 1721 1722 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1723 return (EOPNOTSUPP); 1724 1725 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1726 return (EOPNOTSUPP); 1727 1728 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1729 ap->a_cred, ap->a_td, VREAD); 1730 if (error) 1731 return (error); 1732 1733 if (ap->a_size != NULL) 1734 *ap->a_size = 0; 1735 1736 error = ENOATTR; 1737 1738 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1739 error = ext2_extattr_inode_get(ip, ap->a_attrnamespace, 1740 ap->a_name, ap->a_uio, ap->a_size); 1741 if (error != ENOATTR) 1742 return (error); 1743 } 1744 1745 if (ip->i_facl) 1746 error = ext2_extattr_block_get(ip, ap->a_attrnamespace, 1747 ap->a_name, ap->a_uio, ap->a_size); 1748 1749 return (error); 1750 } 1751 1752 /* 1753 * Vnode operation to retrieve extended attributes on a vnode. 1754 */ 1755 static int 1756 ext2_listextattr(struct vop_listextattr_args *ap) 1757 { 1758 struct inode *ip; 1759 struct m_ext2fs *fs; 1760 int error; 1761 1762 ip = VTOI(ap->a_vp); 1763 fs = ip->i_e2fs; 1764 1765 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1766 return (EOPNOTSUPP); 1767 1768 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1769 return (EOPNOTSUPP); 1770 1771 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1772 ap->a_cred, ap->a_td, VREAD); 1773 if (error) 1774 return (error); 1775 1776 if (ap->a_size != NULL) 1777 *ap->a_size = 0; 1778 1779 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1780 error = ext2_extattr_inode_list(ip, ap->a_attrnamespace, 1781 ap->a_uio, ap->a_size); 1782 if (error) 1783 return (error); 1784 } 1785 1786 if (ip->i_facl) 1787 error = ext2_extattr_block_list(ip, ap->a_attrnamespace, 1788 ap->a_uio, ap->a_size); 1789 1790 return (error); 1791 } 1792 1793 /* 1794 * Vnode operation to set a named attribute. 1795 */ 1796 static int 1797 ext2_setextattr(struct vop_setextattr_args *ap) 1798 { 1799 struct inode *ip; 1800 struct m_ext2fs *fs; 1801 int error; 1802 1803 ip = VTOI(ap->a_vp); 1804 fs = ip->i_e2fs; 1805 1806 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1807 return (EOPNOTSUPP); 1808 1809 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1810 return (EOPNOTSUPP); 1811 1812 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1813 ap->a_cred, ap->a_td, VWRITE); 1814 if (error) 1815 return (error); 1816 1817 error = ext2_extattr_valid_attrname(ap->a_attrnamespace, ap->a_name); 1818 if (error) 1819 return (error); 1820 1821 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1822 error = ext2_extattr_inode_set(ip, ap->a_attrnamespace, 1823 ap->a_name, ap->a_uio); 1824 if (error != ENOSPC) 1825 return (error); 1826 } 1827 1828 error = ext2_extattr_block_set(ip, ap->a_attrnamespace, 1829 ap->a_name, ap->a_uio); 1830 1831 return (error); 1832 } 1833 1834 /* 1835 * Vnode pointer to File handle 1836 */ 1837 /* ARGSUSED */ 1838 static int 1839 ext2_vptofh(struct vop_vptofh_args *ap) 1840 { 1841 struct inode *ip; 1842 struct ufid *ufhp; 1843 1844 ip = VTOI(ap->a_vp); 1845 ufhp = (struct ufid *)ap->a_fhp; 1846 ufhp->ufid_len = sizeof(struct ufid); 1847 ufhp->ufid_ino = ip->i_number; 1848 ufhp->ufid_gen = ip->i_gen; 1849 return (0); 1850 } 1851 1852 /* 1853 * Initialize the vnode associated with a new inode, handle aliased 1854 * vnodes. 1855 */ 1856 int 1857 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) 1858 { 1859 struct inode *ip; 1860 struct vnode *vp; 1861 1862 vp = *vpp; 1863 ip = VTOI(vp); 1864 vp->v_type = IFTOVT(ip->i_mode); 1865 if (vp->v_type == VFIFO) 1866 vp->v_op = fifoops; 1867 1868 if (ip->i_number == EXT2_ROOTINO) 1869 vp->v_vflag |= VV_ROOT; 1870 ip->i_modrev = init_va_filerev(); 1871 *vpp = vp; 1872 return (0); 1873 } 1874 1875 /* 1876 * Allocate a new inode. 1877 */ 1878 static int 1879 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1880 struct componentname *cnp) 1881 { 1882 struct inode *ip, *pdir; 1883 struct vnode *tvp; 1884 int error; 1885 1886 pdir = VTOI(dvp); 1887 #ifdef INVARIANTS 1888 if ((cnp->cn_flags & HASBUF) == 0) 1889 panic("ext2_makeinode: no name"); 1890 #endif 1891 *vpp = NULL; 1892 if ((mode & IFMT) == 0) 1893 mode |= IFREG; 1894 1895 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1896 if (error) { 1897 return (error); 1898 } 1899 ip = VTOI(tvp); 1900 ip->i_gid = pdir->i_gid; 1901 #ifdef SUIDDIR 1902 { 1903 /* 1904 * if we are 1905 * not the owner of the directory, 1906 * and we are hacking owners here, (only do this where told to) 1907 * and we are not giving it TOO root, (would subvert quotas) 1908 * then go ahead and give it to the other user. 1909 * Note that this drops off the execute bits for security. 1910 */ 1911 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1912 (pdir->i_mode & ISUID) && 1913 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1914 ip->i_uid = pdir->i_uid; 1915 mode &= ~07111; 1916 } else { 1917 ip->i_uid = cnp->cn_cred->cr_uid; 1918 } 1919 } 1920 #else 1921 ip->i_uid = cnp->cn_cred->cr_uid; 1922 #endif 1923 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1924 ip->i_mode = mode; 1925 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1926 ip->i_nlink = 1; 1927 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1928 if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) 1929 ip->i_mode &= ~ISGID; 1930 } 1931 1932 if (cnp->cn_flags & ISWHITEOUT) 1933 ip->i_flags |= UF_OPAQUE; 1934 1935 /* 1936 * Make sure inode goes to disk before directory entry. 1937 */ 1938 error = ext2_update(tvp, !DOINGASYNC(tvp)); 1939 if (error) 1940 goto bad; 1941 1942 #ifdef UFS_ACL 1943 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1944 error = ext2_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 1945 cnp->cn_cred, cnp->cn_thread); 1946 if (error) 1947 goto bad; 1948 } 1949 #endif /* UFS_ACL */ 1950 1951 error = ext2_direnter(ip, dvp, cnp); 1952 if (error) 1953 goto bad; 1954 1955 *vpp = tvp; 1956 return (0); 1957 1958 bad: 1959 /* 1960 * Write error occurred trying to update the inode 1961 * or the directory so must deallocate the inode. 1962 */ 1963 ip->i_nlink = 0; 1964 ip->i_flag |= IN_CHANGE; 1965 vput(tvp); 1966 return (error); 1967 } 1968 1969 /* 1970 * Vnode op for reading. 1971 */ 1972 static int 1973 ext2_read(struct vop_read_args *ap) 1974 { 1975 struct vnode *vp; 1976 struct inode *ip; 1977 int error; 1978 1979 vp = ap->a_vp; 1980 ip = VTOI(vp); 1981 1982 /* EXT4_EXT_LOCK(ip); */ 1983 if (ip->i_flag & IN_E4EXTENTS) 1984 error = ext4_ext_read(ap); 1985 else 1986 error = ext2_ind_read(ap); 1987 /* EXT4_EXT_UNLOCK(ip); */ 1988 return (error); 1989 } 1990 1991 /* 1992 * Vnode op for reading. 1993 */ 1994 static int 1995 ext2_ind_read(struct vop_read_args *ap) 1996 { 1997 struct vnode *vp; 1998 struct inode *ip; 1999 struct uio *uio; 2000 struct m_ext2fs *fs; 2001 struct buf *bp; 2002 daddr_t lbn, nextlbn; 2003 off_t bytesinfile; 2004 long size, xfersize, blkoffset; 2005 int error, orig_resid, seqcount; 2006 int ioflag; 2007 2008 vp = ap->a_vp; 2009 uio = ap->a_uio; 2010 ioflag = ap->a_ioflag; 2011 2012 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 2013 ip = VTOI(vp); 2014 2015 #ifdef INVARIANTS 2016 if (uio->uio_rw != UIO_READ) 2017 panic("%s: mode", "ext2_read"); 2018 2019 if (vp->v_type == VLNK) { 2020 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 2021 panic("%s: short symlink", "ext2_read"); 2022 } else if (vp->v_type != VREG && vp->v_type != VDIR) 2023 panic("%s: type %d", "ext2_read", vp->v_type); 2024 #endif 2025 orig_resid = uio->uio_resid; 2026 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 2027 if (orig_resid == 0) 2028 return (0); 2029 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 2030 fs = ip->i_e2fs; 2031 if (uio->uio_offset < ip->i_size && 2032 uio->uio_offset >= fs->e2fs_maxfilesize) 2033 return (EOVERFLOW); 2034 2035 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 2036 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2037 break; 2038 lbn = lblkno(fs, uio->uio_offset); 2039 nextlbn = lbn + 1; 2040 size = blksize(fs, ip, lbn); 2041 blkoffset = blkoff(fs, uio->uio_offset); 2042 2043 xfersize = fs->e2fs_fsize - blkoffset; 2044 if (uio->uio_resid < xfersize) 2045 xfersize = uio->uio_resid; 2046 if (bytesinfile < xfersize) 2047 xfersize = bytesinfile; 2048 2049 if (lblktosize(fs, nextlbn) >= ip->i_size) 2050 error = bread(vp, lbn, size, NOCRED, &bp); 2051 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 2052 error = cluster_read(vp, ip->i_size, lbn, size, 2053 NOCRED, blkoffset + uio->uio_resid, seqcount, 2054 0, &bp); 2055 } else if (seqcount > 1) { 2056 u_int nextsize = blksize(fs, ip, nextlbn); 2057 2058 error = breadn(vp, lbn, 2059 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 2060 } else 2061 error = bread(vp, lbn, size, NOCRED, &bp); 2062 if (error) { 2063 brelse(bp); 2064 bp = NULL; 2065 break; 2066 } 2067 2068 /* 2069 * We should only get non-zero b_resid when an I/O error 2070 * has occurred, which should cause us to break above. 2071 * However, if the short read did not cause an error, 2072 * then we want to ensure that we do not uiomove bad 2073 * or uninitialized data. 2074 */ 2075 size -= bp->b_resid; 2076 if (size < xfersize) { 2077 if (size == 0) 2078 break; 2079 xfersize = size; 2080 } 2081 error = uiomove((char *)bp->b_data + blkoffset, 2082 (int)xfersize, uio); 2083 if (error) 2084 break; 2085 vfs_bio_brelse(bp, ioflag); 2086 } 2087 2088 /* 2089 * This can only happen in the case of an error because the loop 2090 * above resets bp to NULL on each iteration and on normal 2091 * completion has not set a new value into it. so it must have come 2092 * from a 'break' statement 2093 */ 2094 if (bp != NULL) 2095 vfs_bio_brelse(bp, ioflag); 2096 2097 if ((error == 0 || uio->uio_resid != orig_resid) && 2098 (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 2099 ip->i_flag |= IN_ACCESS; 2100 return (error); 2101 } 2102 2103 static int 2104 ext2_ioctl(struct vop_ioctl_args *ap) 2105 { 2106 2107 switch (ap->a_command) { 2108 case FIOSEEKDATA: 2109 case FIOSEEKHOLE: 2110 return (vn_bmap_seekhole(ap->a_vp, ap->a_command, 2111 (off_t *)ap->a_data, ap->a_cred)); 2112 default: 2113 return (ENOTTY); 2114 } 2115 } 2116 2117 /* 2118 * this function handles ext4 extents block mapping 2119 */ 2120 static int 2121 ext4_ext_read(struct vop_read_args *ap) 2122 { 2123 static unsigned char zeroes[EXT2_MAX_BLOCK_SIZE]; 2124 struct vnode *vp; 2125 struct inode *ip; 2126 struct uio *uio; 2127 struct m_ext2fs *fs; 2128 struct buf *bp; 2129 struct ext4_extent nex, *ep; 2130 struct ext4_extent_path path; 2131 daddr_t lbn, newblk; 2132 off_t bytesinfile; 2133 int cache_type; 2134 ssize_t orig_resid; 2135 int error; 2136 long size, xfersize, blkoffset; 2137 2138 vp = ap->a_vp; 2139 ip = VTOI(vp); 2140 uio = ap->a_uio; 2141 memset(&path, 0, sizeof(path)); 2142 2143 orig_resid = uio->uio_resid; 2144 KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__)); 2145 if (orig_resid == 0) 2146 return (0); 2147 KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__)); 2148 fs = ip->i_e2fs; 2149 if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) 2150 return (EOVERFLOW); 2151 2152 while (uio->uio_resid > 0) { 2153 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2154 break; 2155 lbn = lblkno(fs, uio->uio_offset); 2156 size = blksize(fs, ip, lbn); 2157 blkoffset = blkoff(fs, uio->uio_offset); 2158 2159 xfersize = fs->e2fs_fsize - blkoffset; 2160 xfersize = MIN(xfersize, uio->uio_resid); 2161 xfersize = MIN(xfersize, bytesinfile); 2162 2163 /* get block from ext4 extent cache */ 2164 cache_type = ext4_ext_in_cache(ip, lbn, &nex); 2165 switch (cache_type) { 2166 case EXT4_EXT_CACHE_NO: 2167 ext4_ext_find_extent(fs, ip, lbn, &path); 2168 if (path.ep_is_sparse) 2169 ep = &path.ep_sparse_ext; 2170 else 2171 ep = path.ep_ext; 2172 if (ep == NULL) 2173 return (EIO); 2174 2175 ext4_ext_put_cache(ip, ep, 2176 path.ep_is_sparse ? EXT4_EXT_CACHE_GAP : EXT4_EXT_CACHE_IN); 2177 2178 newblk = lbn - ep->e_blk + (ep->e_start_lo | 2179 (daddr_t)ep->e_start_hi << 32); 2180 2181 if (path.ep_bp != NULL) { 2182 brelse(path.ep_bp); 2183 path.ep_bp = NULL; 2184 } 2185 break; 2186 2187 case EXT4_EXT_CACHE_GAP: 2188 /* block has not been allocated yet */ 2189 break; 2190 2191 case EXT4_EXT_CACHE_IN: 2192 newblk = lbn - nex.e_blk + (nex.e_start_lo | 2193 (daddr_t)nex.e_start_hi << 32); 2194 break; 2195 2196 default: 2197 panic("%s: invalid cache type", __func__); 2198 } 2199 2200 if (cache_type == EXT4_EXT_CACHE_GAP || 2201 (cache_type == EXT4_EXT_CACHE_NO && path.ep_is_sparse)) { 2202 if (xfersize > sizeof(zeroes)) 2203 xfersize = sizeof(zeroes); 2204 error = uiomove(zeroes, xfersize, uio); 2205 if (error) 2206 return (error); 2207 } else { 2208 error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, 2209 NOCRED, &bp); 2210 if (error) { 2211 brelse(bp); 2212 return (error); 2213 } 2214 2215 size -= bp->b_resid; 2216 if (size < xfersize) { 2217 if (size == 0) { 2218 bqrelse(bp); 2219 break; 2220 } 2221 xfersize = size; 2222 } 2223 error = uiomove(bp->b_data + blkoffset, xfersize, uio); 2224 bqrelse(bp); 2225 if (error) 2226 return (error); 2227 } 2228 } 2229 2230 return (0); 2231 } 2232 2233 /* 2234 * Vnode op for writing. 2235 */ 2236 static int 2237 ext2_write(struct vop_write_args *ap) 2238 { 2239 struct vnode *vp; 2240 struct uio *uio; 2241 struct inode *ip; 2242 struct m_ext2fs *fs; 2243 struct buf *bp; 2244 daddr_t lbn; 2245 off_t osize; 2246 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 2247 2248 ioflag = ap->a_ioflag; 2249 uio = ap->a_uio; 2250 vp = ap->a_vp; 2251 2252 seqcount = ioflag >> IO_SEQSHIFT; 2253 ip = VTOI(vp); 2254 2255 #ifdef INVARIANTS 2256 if (uio->uio_rw != UIO_WRITE) 2257 panic("%s: mode", "ext2_write"); 2258 #endif 2259 2260 switch (vp->v_type) { 2261 case VREG: 2262 if (ioflag & IO_APPEND) 2263 uio->uio_offset = ip->i_size; 2264 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 2265 return (EPERM); 2266 /* FALLTHROUGH */ 2267 case VLNK: 2268 break; 2269 case VDIR: 2270 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 2271 if ((ioflag & IO_SYNC) == 0) 2272 panic("ext2_write: nonsync dir write"); 2273 break; 2274 default: 2275 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 2276 vp->v_type, (intmax_t)uio->uio_offset, 2277 (intmax_t)uio->uio_resid); 2278 } 2279 2280 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 2281 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 2282 fs = ip->i_e2fs; 2283 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 2284 return (EFBIG); 2285 /* 2286 * Maybe this should be above the vnode op call, but so long as 2287 * file servers have no limits, I don't think it matters. 2288 */ 2289 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 2290 return (EFBIG); 2291 2292 resid = uio->uio_resid; 2293 osize = ip->i_size; 2294 if (seqcount > BA_SEQMAX) 2295 flags = BA_SEQMAX << BA_SEQSHIFT; 2296 else 2297 flags = seqcount << BA_SEQSHIFT; 2298 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 2299 flags |= IO_SYNC; 2300 2301 for (error = 0; uio->uio_resid > 0;) { 2302 lbn = lblkno(fs, uio->uio_offset); 2303 blkoffset = blkoff(fs, uio->uio_offset); 2304 xfersize = fs->e2fs_fsize - blkoffset; 2305 if (uio->uio_resid < xfersize) 2306 xfersize = uio->uio_resid; 2307 if (uio->uio_offset + xfersize > ip->i_size) 2308 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 2309 2310 /* 2311 * We must perform a read-before-write if the transfer size 2312 * does not cover the entire buffer. 2313 */ 2314 if (fs->e2fs_bsize > xfersize) 2315 flags |= BA_CLRBUF; 2316 else 2317 flags &= ~BA_CLRBUF; 2318 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 2319 ap->a_cred, &bp, flags); 2320 if (error != 0) 2321 break; 2322 2323 if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL)) 2324 bp->b_flags |= B_NOCACHE; 2325 if (uio->uio_offset + xfersize > ip->i_size) 2326 ip->i_size = uio->uio_offset + xfersize; 2327 size = blksize(fs, ip, lbn) - bp->b_resid; 2328 if (size < xfersize) 2329 xfersize = size; 2330 2331 error = 2332 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 2333 /* 2334 * If the buffer is not already filled and we encounter an 2335 * error while trying to fill it, we have to clear out any 2336 * garbage data from the pages instantiated for the buffer. 2337 * If we do not, a failed uiomove() during a write can leave 2338 * the prior contents of the pages exposed to a userland mmap. 2339 * 2340 * Note that we need only clear buffers with a transfer size 2341 * equal to the block size because buffers with a shorter 2342 * transfer size were cleared above by the call to ext2_balloc() 2343 * with the BA_CLRBUF flag set. 2344 * 2345 * If the source region for uiomove identically mmaps the 2346 * buffer, uiomove() performed the NOP copy, and the buffer 2347 * content remains valid because the page fault handler 2348 * validated the pages. 2349 */ 2350 if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 2351 fs->e2fs_bsize == xfersize) 2352 vfs_bio_clrbuf(bp); 2353 2354 vfs_bio_set_flags(bp, ioflag); 2355 2356 /* 2357 * If IO_SYNC each buffer is written synchronously. Otherwise 2358 * if we have a severe page deficiency write the buffer 2359 * asynchronously. Otherwise try to cluster, and if that 2360 * doesn't do it then either do an async write (if O_DIRECT), 2361 * or a delayed write (if not). 2362 */ 2363 if (ioflag & IO_SYNC) { 2364 (void)bwrite(bp); 2365 } else if (vm_page_count_severe() || 2366 buf_dirty_count_severe() || 2367 (ioflag & IO_ASYNC)) { 2368 bp->b_flags |= B_CLUSTEROK; 2369 bawrite(bp); 2370 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 2371 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 2372 bp->b_flags |= B_CLUSTEROK; 2373 cluster_write(vp, bp, ip->i_size, seqcount, 0); 2374 } else { 2375 bawrite(bp); 2376 } 2377 } else if (ioflag & IO_DIRECT) { 2378 bp->b_flags |= B_CLUSTEROK; 2379 bawrite(bp); 2380 } else { 2381 bp->b_flags |= B_CLUSTEROK; 2382 bdwrite(bp); 2383 } 2384 if (error || xfersize == 0) 2385 break; 2386 } 2387 /* 2388 * If we successfully wrote any data, and we are not the superuser 2389 * we clear the setuid and setgid bits as a precaution against 2390 * tampering. 2391 */ 2392 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 2393 ap->a_cred) { 2394 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 2395 ip->i_mode &= ~(ISUID | ISGID); 2396 } 2397 if (error) { 2398 if (ioflag & IO_UNIT) { 2399 (void)ext2_truncate(vp, osize, 2400 ioflag & IO_SYNC, ap->a_cred, uio->uio_td); 2401 uio->uio_offset -= resid - uio->uio_resid; 2402 uio->uio_resid = resid; 2403 } 2404 } 2405 if (uio->uio_resid != resid) { 2406 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2407 if (ioflag & IO_SYNC) 2408 error = ext2_update(vp, 1); 2409 } 2410 return (error); 2411 } 2412