1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * (c) UNIX System Laboratories, Inc. 11 * All or some portions of this file are derived from material licensed 12 * to the University of California by American Telephone and Telegraph 13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 14 * the permission of UNIX System Laboratories, Inc. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 41 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 42 * $FreeBSD$ 43 */ 44 45 #include "opt_suiddir.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/fcntl.h> 51 #include <sys/filio.h> 52 #include <sys/stat.h> 53 #include <sys/bio.h> 54 #include <sys/buf.h> 55 #include <sys/endian.h> 56 #include <sys/priv.h> 57 #include <sys/rwlock.h> 58 #include <sys/mount.h> 59 #include <sys/unistd.h> 60 #include <sys/time.h> 61 #include <sys/vnode.h> 62 #include <sys/namei.h> 63 #include <sys/lockf.h> 64 #include <sys/event.h> 65 #include <sys/conf.h> 66 #include <sys/file.h> 67 #include <sys/extattr.h> 68 #include <sys/vmmeter.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/vm_extern.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_pager.h> 76 #include <vm/vnode_pager.h> 77 78 #include "opt_directio.h" 79 80 #include <ufs/ufs/dir.h> 81 82 #include <fs/ext2fs/fs.h> 83 #include <fs/ext2fs/inode.h> 84 #include <fs/ext2fs/ext2_acl.h> 85 #include <fs/ext2fs/ext2_extern.h> 86 #include <fs/ext2fs/ext2fs.h> 87 #include <fs/ext2fs/ext2_dinode.h> 88 #include <fs/ext2fs/ext2_dir.h> 89 #include <fs/ext2fs/ext2_mount.h> 90 #include <fs/ext2fs/ext2_extattr.h> 91 92 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 93 static void ext2_itimes_locked(struct vnode *); 94 static int ext4_ext_read(struct vop_read_args *); 95 static int ext2_ind_read(struct vop_read_args *); 96 97 static vop_access_t ext2_access; 98 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 99 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 100 struct thread *); 101 static vop_close_t ext2_close; 102 static vop_create_t ext2_create; 103 static vop_fsync_t ext2_fsync; 104 static vop_getattr_t ext2_getattr; 105 static vop_ioctl_t ext2_ioctl; 106 static vop_link_t ext2_link; 107 static vop_mkdir_t ext2_mkdir; 108 static vop_mknod_t ext2_mknod; 109 static vop_open_t ext2_open; 110 static vop_pathconf_t ext2_pathconf; 111 static vop_print_t ext2_print; 112 static vop_read_t ext2_read; 113 static vop_readlink_t ext2_readlink; 114 static vop_remove_t ext2_remove; 115 static vop_rename_t ext2_rename; 116 static vop_rmdir_t ext2_rmdir; 117 static vop_setattr_t ext2_setattr; 118 static vop_strategy_t ext2_strategy; 119 static vop_symlink_t ext2_symlink; 120 static vop_write_t ext2_write; 121 static vop_deleteextattr_t ext2_deleteextattr; 122 static vop_getextattr_t ext2_getextattr; 123 static vop_listextattr_t ext2_listextattr; 124 static vop_setextattr_t ext2_setextattr; 125 static vop_vptofh_t ext2_vptofh; 126 static vop_close_t ext2fifo_close; 127 static vop_kqfilter_t ext2fifo_kqfilter; 128 129 /* Global vfs data structures for ext2. */ 130 struct vop_vector ext2_vnodeops = { 131 .vop_default = &default_vnodeops, 132 .vop_access = ext2_access, 133 .vop_bmap = ext2_bmap, 134 .vop_cachedlookup = ext2_lookup, 135 .vop_close = ext2_close, 136 .vop_create = ext2_create, 137 .vop_fsync = ext2_fsync, 138 .vop_getpages = vnode_pager_local_getpages, 139 .vop_getpages_async = vnode_pager_local_getpages_async, 140 .vop_getattr = ext2_getattr, 141 .vop_inactive = ext2_inactive, 142 .vop_ioctl = ext2_ioctl, 143 .vop_link = ext2_link, 144 .vop_lookup = vfs_cache_lookup, 145 .vop_mkdir = ext2_mkdir, 146 .vop_mknod = ext2_mknod, 147 .vop_open = ext2_open, 148 .vop_pathconf = ext2_pathconf, 149 .vop_poll = vop_stdpoll, 150 .vop_print = ext2_print, 151 .vop_read = ext2_read, 152 .vop_readdir = ext2_readdir, 153 .vop_readlink = ext2_readlink, 154 .vop_reallocblks = ext2_reallocblks, 155 .vop_reclaim = ext2_reclaim, 156 .vop_remove = ext2_remove, 157 .vop_rename = ext2_rename, 158 .vop_rmdir = ext2_rmdir, 159 .vop_setattr = ext2_setattr, 160 .vop_strategy = ext2_strategy, 161 .vop_symlink = ext2_symlink, 162 .vop_write = ext2_write, 163 .vop_deleteextattr = ext2_deleteextattr, 164 .vop_getextattr = ext2_getextattr, 165 .vop_listextattr = ext2_listextattr, 166 .vop_setextattr = ext2_setextattr, 167 #ifdef UFS_ACL 168 .vop_getacl = ext2_getacl, 169 .vop_setacl = ext2_setacl, 170 .vop_aclcheck = ext2_aclcheck, 171 #endif /* UFS_ACL */ 172 .vop_vptofh = ext2_vptofh, 173 }; 174 175 struct vop_vector ext2_fifoops = { 176 .vop_default = &fifo_specops, 177 .vop_access = ext2_access, 178 .vop_close = ext2fifo_close, 179 .vop_fsync = ext2_fsync, 180 .vop_getattr = ext2_getattr, 181 .vop_inactive = ext2_inactive, 182 .vop_kqfilter = ext2fifo_kqfilter, 183 .vop_print = ext2_print, 184 .vop_read = VOP_PANIC, 185 .vop_reclaim = ext2_reclaim, 186 .vop_setattr = ext2_setattr, 187 .vop_write = VOP_PANIC, 188 .vop_vptofh = ext2_vptofh, 189 }; 190 191 /* 192 * A virgin directory (no blushing please). 193 * Note that the type and namlen fields are reversed relative to ext2. 194 * Also, we don't use `struct odirtemplate', since it would just cause 195 * endianness problems. 196 */ 197 static struct dirtemplate mastertemplate = { 198 0, 12, 1, EXT2_FT_DIR, ".", 199 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." 200 }; 201 static struct dirtemplate omastertemplate = { 202 0, 12, 1, EXT2_FT_UNKNOWN, ".", 203 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." 204 }; 205 206 static void 207 ext2_itimes_locked(struct vnode *vp) 208 { 209 struct inode *ip; 210 struct timespec ts; 211 212 ASSERT_VI_LOCKED(vp, __func__); 213 214 ip = VTOI(vp); 215 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 216 return; 217 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 218 ip->i_flag |= IN_LAZYMOD; 219 else 220 ip->i_flag |= IN_MODIFIED; 221 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 222 vfs_timestamp(&ts); 223 if (ip->i_flag & IN_ACCESS) { 224 ip->i_atime = ts.tv_sec; 225 ip->i_atimensec = ts.tv_nsec; 226 } 227 if (ip->i_flag & IN_UPDATE) { 228 ip->i_mtime = ts.tv_sec; 229 ip->i_mtimensec = ts.tv_nsec; 230 ip->i_modrev++; 231 } 232 if (ip->i_flag & IN_CHANGE) { 233 ip->i_ctime = ts.tv_sec; 234 ip->i_ctimensec = ts.tv_nsec; 235 } 236 } 237 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 238 } 239 240 void 241 ext2_itimes(struct vnode *vp) 242 { 243 244 VI_LOCK(vp); 245 ext2_itimes_locked(vp); 246 VI_UNLOCK(vp); 247 } 248 249 /* 250 * Create a regular file 251 */ 252 static int 253 ext2_create(struct vop_create_args *ap) 254 { 255 int error; 256 257 error = 258 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 259 ap->a_dvp, ap->a_vpp, ap->a_cnp); 260 if (error != 0) 261 return (error); 262 if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) 263 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); 264 return (0); 265 } 266 267 static int 268 ext2_open(struct vop_open_args *ap) 269 { 270 271 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 272 return (EOPNOTSUPP); 273 274 /* 275 * Files marked append-only must be opened for appending. 276 */ 277 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 278 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 279 return (EPERM); 280 281 vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); 282 283 return (0); 284 } 285 286 /* 287 * Close called. 288 * 289 * Update the times on the inode. 290 */ 291 static int 292 ext2_close(struct vop_close_args *ap) 293 { 294 struct vnode *vp = ap->a_vp; 295 296 VI_LOCK(vp); 297 if (vp->v_usecount > 1) 298 ext2_itimes_locked(vp); 299 VI_UNLOCK(vp); 300 return (0); 301 } 302 303 static int 304 ext2_access(struct vop_access_args *ap) 305 { 306 struct vnode *vp = ap->a_vp; 307 struct inode *ip = VTOI(vp); 308 accmode_t accmode = ap->a_accmode; 309 int error; 310 311 if (vp->v_type == VBLK || vp->v_type == VCHR) 312 return (EOPNOTSUPP); 313 314 /* 315 * Disallow write attempts on read-only file systems; 316 * unless the file is a socket, fifo, or a block or 317 * character device resident on the file system. 318 */ 319 if (accmode & VWRITE) { 320 switch (vp->v_type) { 321 case VDIR: 322 case VLNK: 323 case VREG: 324 if (vp->v_mount->mnt_flag & MNT_RDONLY) 325 return (EROFS); 326 break; 327 default: 328 break; 329 } 330 } 331 332 /* If immutable bit set, nobody gets to write it. */ 333 if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) 334 return (EPERM); 335 336 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 337 ap->a_accmode, ap->a_cred, NULL); 338 return (error); 339 } 340 341 static int 342 ext2_getattr(struct vop_getattr_args *ap) 343 { 344 struct vnode *vp = ap->a_vp; 345 struct inode *ip = VTOI(vp); 346 struct vattr *vap = ap->a_vap; 347 348 ext2_itimes(vp); 349 /* 350 * Copy from inode table 351 */ 352 vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); 353 vap->va_fileid = ip->i_number; 354 vap->va_mode = ip->i_mode & ~IFMT; 355 vap->va_nlink = ip->i_nlink; 356 vap->va_uid = ip->i_uid; 357 vap->va_gid = ip->i_gid; 358 vap->va_rdev = ip->i_rdev; 359 vap->va_size = ip->i_size; 360 vap->va_atime.tv_sec = ip->i_atime; 361 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; 362 vap->va_mtime.tv_sec = ip->i_mtime; 363 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; 364 vap->va_ctime.tv_sec = ip->i_ctime; 365 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; 366 if E2DI_HAS_XTIME(ip) { 367 vap->va_birthtime.tv_sec = ip->i_birthtime; 368 vap->va_birthtime.tv_nsec = ip->i_birthnsec; 369 } 370 vap->va_flags = ip->i_flags; 371 vap->va_gen = ip->i_gen; 372 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 373 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 374 vap->va_type = IFTOVT(ip->i_mode); 375 vap->va_filerev = ip->i_modrev; 376 return (0); 377 } 378 379 /* 380 * Set attribute vnode op. called from several syscalls 381 */ 382 static int 383 ext2_setattr(struct vop_setattr_args *ap) 384 { 385 struct vattr *vap = ap->a_vap; 386 struct vnode *vp = ap->a_vp; 387 struct inode *ip = VTOI(vp); 388 struct ucred *cred = ap->a_cred; 389 struct thread *td = curthread; 390 int error; 391 392 /* 393 * Check for unsettable attributes. 394 */ 395 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 396 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 397 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 398 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 399 return (EINVAL); 400 } 401 if (vap->va_flags != VNOVAL) { 402 /* Disallow flags not supported by ext2fs. */ 403 if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 404 return (EOPNOTSUPP); 405 406 if (vp->v_mount->mnt_flag & MNT_RDONLY) 407 return (EROFS); 408 /* 409 * Callers may only modify the file flags on objects they 410 * have VADMIN rights for. 411 */ 412 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 413 return (error); 414 /* 415 * Unprivileged processes and privileged processes in 416 * jail() are not permitted to unset system flags, or 417 * modify flags if any system flags are set. 418 * Privileged non-jail processes may not modify system flags 419 * if securelevel > 0 and any existing system flags are set. 420 */ 421 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 422 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) { 423 error = securelevel_gt(cred, 0); 424 if (error) 425 return (error); 426 } 427 } else { 428 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || 429 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 430 return (EPERM); 431 } 432 ip->i_flags = vap->va_flags; 433 ip->i_flag |= IN_CHANGE; 434 if (ip->i_flags & (IMMUTABLE | APPEND)) 435 return (0); 436 } 437 if (ip->i_flags & (IMMUTABLE | APPEND)) 438 return (EPERM); 439 /* 440 * Go through the fields and update iff not VNOVAL. 441 */ 442 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 443 if (vp->v_mount->mnt_flag & MNT_RDONLY) 444 return (EROFS); 445 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 446 td)) != 0) 447 return (error); 448 } 449 if (vap->va_size != VNOVAL) { 450 /* 451 * Disallow write attempts on read-only file systems; 452 * unless the file is a socket, fifo, or a block or 453 * character device resident on the file system. 454 */ 455 switch (vp->v_type) { 456 case VDIR: 457 return (EISDIR); 458 case VLNK: 459 case VREG: 460 if (vp->v_mount->mnt_flag & MNT_RDONLY) 461 return (EROFS); 462 break; 463 default: 464 break; 465 } 466 if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) 467 return (error); 468 } 469 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 470 if (vp->v_mount->mnt_flag & MNT_RDONLY) 471 return (EROFS); 472 /* 473 * From utimes(2): 474 * If times is NULL, ... The caller must be the owner of 475 * the file, have permission to write the file, or be the 476 * super-user. 477 * If times is non-NULL, ... The caller must be the owner of 478 * the file or be the super-user. 479 */ 480 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 481 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 482 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 483 return (error); 484 ip->i_flag |= IN_CHANGE | IN_MODIFIED; 485 if (vap->va_atime.tv_sec != VNOVAL) { 486 ip->i_flag &= ~IN_ACCESS; 487 ip->i_atime = vap->va_atime.tv_sec; 488 ip->i_atimensec = vap->va_atime.tv_nsec; 489 } 490 if (vap->va_mtime.tv_sec != VNOVAL) { 491 ip->i_flag &= ~IN_UPDATE; 492 ip->i_mtime = vap->va_mtime.tv_sec; 493 ip->i_mtimensec = vap->va_mtime.tv_nsec; 494 } 495 ip->i_birthtime = vap->va_birthtime.tv_sec; 496 ip->i_birthnsec = vap->va_birthtime.tv_nsec; 497 error = ext2_update(vp, 0); 498 if (error) 499 return (error); 500 } 501 error = 0; 502 if (vap->va_mode != (mode_t)VNOVAL) { 503 if (vp->v_mount->mnt_flag & MNT_RDONLY) 504 return (EROFS); 505 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 506 } 507 return (error); 508 } 509 510 /* 511 * Change the mode on a file. 512 * Inode must be locked before calling. 513 */ 514 static int 515 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) 516 { 517 struct inode *ip = VTOI(vp); 518 int error; 519 520 /* 521 * To modify the permissions on a file, must possess VADMIN 522 * for that file. 523 */ 524 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 525 return (error); 526 /* 527 * Privileged processes may set the sticky bit on non-directories, 528 * as well as set the setgid bit on a file with a group that the 529 * process is not a member of. 530 */ 531 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 532 error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); 533 if (error) 534 return (EFTYPE); 535 } 536 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 537 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 538 if (error) 539 return (error); 540 } 541 ip->i_mode &= ~ALLPERMS; 542 ip->i_mode |= (mode & ALLPERMS); 543 ip->i_flag |= IN_CHANGE; 544 return (0); 545 } 546 547 /* 548 * Perform chown operation on inode ip; 549 * inode must be locked prior to call. 550 */ 551 static int 552 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 553 struct thread *td) 554 { 555 struct inode *ip = VTOI(vp); 556 uid_t ouid; 557 gid_t ogid; 558 int error = 0; 559 560 if (uid == (uid_t)VNOVAL) 561 uid = ip->i_uid; 562 if (gid == (gid_t)VNOVAL) 563 gid = ip->i_gid; 564 /* 565 * To modify the ownership of a file, must possess VADMIN 566 * for that file. 567 */ 568 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 569 return (error); 570 /* 571 * To change the owner of a file, or change the group of a file 572 * to a group of which we are not a member, the caller must 573 * have privilege. 574 */ 575 if (uid != ip->i_uid || (gid != ip->i_gid && 576 !groupmember(gid, cred))) { 577 error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); 578 if (error) 579 return (error); 580 } 581 ogid = ip->i_gid; 582 ouid = ip->i_uid; 583 ip->i_gid = gid; 584 ip->i_uid = uid; 585 ip->i_flag |= IN_CHANGE; 586 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 587 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) 588 ip->i_mode &= ~(ISUID | ISGID); 589 } 590 return (0); 591 } 592 593 /* 594 * Synch an open file. 595 */ 596 /* ARGSUSED */ 597 static int 598 ext2_fsync(struct vop_fsync_args *ap) 599 { 600 /* 601 * Flush all dirty buffers associated with a vnode. 602 */ 603 604 vop_stdfsync(ap); 605 606 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 607 } 608 609 /* 610 * Mknod vnode call 611 */ 612 /* ARGSUSED */ 613 static int 614 ext2_mknod(struct vop_mknod_args *ap) 615 { 616 struct vattr *vap = ap->a_vap; 617 struct vnode **vpp = ap->a_vpp; 618 struct inode *ip; 619 ino_t ino; 620 int error; 621 622 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 623 ap->a_dvp, vpp, ap->a_cnp); 624 if (error) 625 return (error); 626 ip = VTOI(*vpp); 627 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 628 if (vap->va_rdev != VNOVAL) { 629 /* 630 * Want to be able to use this to make badblock 631 * inodes, so don't truncate the dev number. 632 */ 633 ip->i_rdev = vap->va_rdev; 634 } 635 /* 636 * Remove inode, then reload it through VFS_VGET so it is 637 * checked to see if it is an alias of an existing entry in 638 * the inode cache. XXX I don't believe this is necessary now. 639 */ 640 (*vpp)->v_type = VNON; 641 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 642 vgone(*vpp); 643 vput(*vpp); 644 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 645 if (error) { 646 *vpp = NULL; 647 return (error); 648 } 649 return (0); 650 } 651 652 static int 653 ext2_remove(struct vop_remove_args *ap) 654 { 655 struct inode *ip; 656 struct vnode *vp = ap->a_vp; 657 struct vnode *dvp = ap->a_dvp; 658 int error; 659 660 ip = VTOI(vp); 661 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 662 (VTOI(dvp)->i_flags & APPEND)) { 663 error = EPERM; 664 goto out; 665 } 666 error = ext2_dirremove(dvp, ap->a_cnp); 667 if (error == 0) { 668 ip->i_nlink--; 669 ip->i_flag |= IN_CHANGE; 670 } 671 out: 672 return (error); 673 } 674 675 static unsigned short 676 ext2_max_nlink(struct inode *ip) 677 { 678 struct m_ext2fs *fs; 679 680 fs = ip->i_e2fs; 681 682 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_DIR_NLINK)) 683 return (EXT4_LINK_MAX); 684 else 685 return (EXT2_LINK_MAX); 686 } 687 688 /* 689 * link vnode call 690 */ 691 static int 692 ext2_link(struct vop_link_args *ap) 693 { 694 struct vnode *vp = ap->a_vp; 695 struct vnode *tdvp = ap->a_tdvp; 696 struct componentname *cnp = ap->a_cnp; 697 struct inode *ip; 698 int error; 699 700 #ifdef INVARIANTS 701 if ((cnp->cn_flags & HASBUF) == 0) 702 panic("ext2_link: no name"); 703 #endif 704 ip = VTOI(vp); 705 if ((nlink_t)ip->i_nlink >= ext2_max_nlink(ip)) { 706 error = EMLINK; 707 goto out; 708 } 709 if (ip->i_flags & (IMMUTABLE | APPEND)) { 710 error = EPERM; 711 goto out; 712 } 713 ip->i_nlink++; 714 ip->i_flag |= IN_CHANGE; 715 error = ext2_update(vp, !DOINGASYNC(vp)); 716 if (!error) 717 error = ext2_direnter(ip, tdvp, cnp); 718 if (error) { 719 ip->i_nlink--; 720 ip->i_flag |= IN_CHANGE; 721 } 722 out: 723 return (error); 724 } 725 726 static int 727 ext2_inc_nlink(struct inode *ip) 728 { 729 730 ip->i_nlink++; 731 732 if (ext2_htree_has_idx(ip) && ip->i_nlink > 1) { 733 if (ip->i_nlink >= ext2_max_nlink(ip) || ip->i_nlink == 2) 734 ip->i_nlink = 1; 735 } else if (ip->i_nlink > ext2_max_nlink(ip)) { 736 ip->i_nlink--; 737 return (EMLINK); 738 } 739 740 return (0); 741 } 742 743 static void 744 ext2_dec_nlink(struct inode *ip) 745 { 746 747 if (!S_ISDIR(ip->i_mode) || ip->i_nlink > 2) 748 ip->i_nlink--; 749 } 750 751 /* 752 * Rename system call. 753 * rename("foo", "bar"); 754 * is essentially 755 * unlink("bar"); 756 * link("foo", "bar"); 757 * unlink("foo"); 758 * but ``atomically''. Can't do full commit without saving state in the 759 * inode on disk which isn't feasible at this time. Best we can do is 760 * always guarantee the target exists. 761 * 762 * Basic algorithm is: 763 * 764 * 1) Bump link count on source while we're linking it to the 765 * target. This also ensure the inode won't be deleted out 766 * from underneath us while we work (it may be truncated by 767 * a concurrent `trunc' or `open' for creation). 768 * 2) Link source to destination. If destination already exists, 769 * delete it first. 770 * 3) Unlink source reference to inode if still around. If a 771 * directory was moved and the parent of the destination 772 * is different from the source, patch the ".." entry in the 773 * directory. 774 */ 775 static int 776 ext2_rename(struct vop_rename_args *ap) 777 { 778 struct vnode *tvp = ap->a_tvp; 779 struct vnode *tdvp = ap->a_tdvp; 780 struct vnode *fvp = ap->a_fvp; 781 struct vnode *fdvp = ap->a_fdvp; 782 struct componentname *tcnp = ap->a_tcnp; 783 struct componentname *fcnp = ap->a_fcnp; 784 struct inode *ip, *xp, *dp; 785 struct dirtemplate dirbuf; 786 int doingdirectory = 0, oldparent = 0, newparent = 0; 787 int error = 0; 788 u_char namlen; 789 790 #ifdef INVARIANTS 791 if ((tcnp->cn_flags & HASBUF) == 0 || 792 (fcnp->cn_flags & HASBUF) == 0) 793 panic("ext2_rename: no name"); 794 #endif 795 /* 796 * Check for cross-device rename. 797 */ 798 if ((fvp->v_mount != tdvp->v_mount) || 799 (tvp && (fvp->v_mount != tvp->v_mount))) { 800 error = EXDEV; 801 abortit: 802 if (tdvp == tvp) 803 vrele(tdvp); 804 else 805 vput(tdvp); 806 if (tvp) 807 vput(tvp); 808 vrele(fdvp); 809 vrele(fvp); 810 return (error); 811 } 812 813 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 814 (VTOI(tdvp)->i_flags & APPEND))) { 815 error = EPERM; 816 goto abortit; 817 } 818 819 /* 820 * Renaming a file to itself has no effect. The upper layers should 821 * not call us in that case. Temporarily just warn if they do. 822 */ 823 if (fvp == tvp) { 824 printf("ext2_rename: fvp == tvp (can't happen)\n"); 825 error = 0; 826 goto abortit; 827 } 828 829 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 830 goto abortit; 831 dp = VTOI(fdvp); 832 ip = VTOI(fvp); 833 if (ip->i_nlink >= ext2_max_nlink(ip) && !ext2_htree_has_idx(ip)) { 834 VOP_UNLOCK(fvp, 0); 835 error = EMLINK; 836 goto abortit; 837 } 838 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 839 || (dp->i_flags & APPEND)) { 840 VOP_UNLOCK(fvp, 0); 841 error = EPERM; 842 goto abortit; 843 } 844 if ((ip->i_mode & IFMT) == IFDIR) { 845 /* 846 * Avoid ".", "..", and aliases of "." for obvious reasons. 847 */ 848 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 849 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 850 (ip->i_flag & IN_RENAME)) { 851 VOP_UNLOCK(fvp, 0); 852 error = EINVAL; 853 goto abortit; 854 } 855 ip->i_flag |= IN_RENAME; 856 oldparent = dp->i_number; 857 doingdirectory++; 858 } 859 vrele(fdvp); 860 861 /* 862 * When the target exists, both the directory 863 * and target vnodes are returned locked. 864 */ 865 dp = VTOI(tdvp); 866 xp = NULL; 867 if (tvp) 868 xp = VTOI(tvp); 869 870 /* 871 * 1) Bump link count while we're moving stuff 872 * around. If we crash somewhere before 873 * completing our work, the link count 874 * may be wrong, but correctable. 875 */ 876 ext2_inc_nlink(ip); 877 ip->i_flag |= IN_CHANGE; 878 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 879 VOP_UNLOCK(fvp, 0); 880 goto bad; 881 } 882 883 /* 884 * If ".." must be changed (ie the directory gets a new 885 * parent) then the source directory must not be in the 886 * directory hierarchy above the target, as this would 887 * orphan everything below the source directory. Also 888 * the user must have write permission in the source so 889 * as to be able to change "..". We must repeat the call 890 * to namei, as the parent directory is unlocked by the 891 * call to checkpath(). 892 */ 893 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 894 VOP_UNLOCK(fvp, 0); 895 if (oldparent != dp->i_number) 896 newparent = dp->i_number; 897 if (doingdirectory && newparent) { 898 if (error) /* write access check above */ 899 goto bad; 900 if (xp != NULL) 901 vput(tvp); 902 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 903 if (error) 904 goto out; 905 VREF(tdvp); 906 error = relookup(tdvp, &tvp, tcnp); 907 if (error) 908 goto out; 909 vrele(tdvp); 910 dp = VTOI(tdvp); 911 xp = NULL; 912 if (tvp) 913 xp = VTOI(tvp); 914 } 915 /* 916 * 2) If target doesn't exist, link the target 917 * to the source and unlink the source. 918 * Otherwise, rewrite the target directory 919 * entry to reference the source inode and 920 * expunge the original entry's existence. 921 */ 922 if (xp == NULL) { 923 if (dp->i_devvp != ip->i_devvp) 924 panic("ext2_rename: EXDEV"); 925 /* 926 * Account for ".." in new directory. 927 * When source and destination have the same 928 * parent we don't fool with the link count. 929 */ 930 if (doingdirectory && newparent) { 931 error = ext2_inc_nlink(dp); 932 if (error) 933 goto bad; 934 935 dp->i_flag |= IN_CHANGE; 936 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 937 if (error) 938 goto bad; 939 } 940 error = ext2_direnter(ip, tdvp, tcnp); 941 if (error) { 942 if (doingdirectory && newparent) { 943 ext2_dec_nlink(dp); 944 dp->i_flag |= IN_CHANGE; 945 (void)ext2_update(tdvp, 1); 946 } 947 goto bad; 948 } 949 vput(tdvp); 950 } else { 951 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 952 panic("ext2_rename: EXDEV"); 953 /* 954 * Short circuit rename(foo, foo). 955 */ 956 if (xp->i_number == ip->i_number) 957 panic("ext2_rename: same file"); 958 /* 959 * If the parent directory is "sticky", then the user must 960 * own the parent directory, or the destination of the rename, 961 * otherwise the destination may not be changed (except by 962 * root). This implements append-only directories. 963 */ 964 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 965 tcnp->cn_cred->cr_uid != dp->i_uid && 966 xp->i_uid != tcnp->cn_cred->cr_uid) { 967 error = EPERM; 968 goto bad; 969 } 970 /* 971 * Target must be empty if a directory and have no links 972 * to it. Also, ensure source and target are compatible 973 * (both directories, or both not directories). 974 */ 975 if ((xp->i_mode & IFMT) == IFDIR) { 976 if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred)) { 977 error = ENOTEMPTY; 978 goto bad; 979 } 980 if (!doingdirectory) { 981 error = ENOTDIR; 982 goto bad; 983 } 984 cache_purge(tdvp); 985 } else if (doingdirectory) { 986 error = EISDIR; 987 goto bad; 988 } 989 error = ext2_dirrewrite(dp, ip, tcnp); 990 if (error) 991 goto bad; 992 /* 993 * If the target directory is in the same 994 * directory as the source directory, 995 * decrement the link count on the parent 996 * of the target directory. 997 */ 998 if (doingdirectory && !newparent) { 999 ext2_dec_nlink(dp); 1000 dp->i_flag |= IN_CHANGE; 1001 } 1002 vput(tdvp); 1003 /* 1004 * Adjust the link count of the target to 1005 * reflect the dirrewrite above. If this is 1006 * a directory it is empty and there are 1007 * no links to it, so we can squash the inode and 1008 * any space associated with it. We disallowed 1009 * renaming over top of a directory with links to 1010 * it above, as the remaining link would point to 1011 * a directory without "." or ".." entries. 1012 */ 1013 ext2_dec_nlink(xp); 1014 if (doingdirectory) { 1015 if (--xp->i_nlink != 0) 1016 panic("ext2_rename: linked directory"); 1017 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 1018 tcnp->cn_cred, tcnp->cn_thread); 1019 } 1020 xp->i_flag |= IN_CHANGE; 1021 vput(tvp); 1022 xp = NULL; 1023 } 1024 1025 /* 1026 * 3) Unlink the source. 1027 */ 1028 fcnp->cn_flags &= ~MODMASK; 1029 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1030 VREF(fdvp); 1031 error = relookup(fdvp, &fvp, fcnp); 1032 if (error == 0) 1033 vrele(fdvp); 1034 if (fvp != NULL) { 1035 xp = VTOI(fvp); 1036 dp = VTOI(fdvp); 1037 } else { 1038 /* 1039 * From name has disappeared. IN_RENAME is not sufficient 1040 * to protect against directory races due to timing windows, 1041 * so we can't panic here. 1042 */ 1043 vrele(ap->a_fvp); 1044 return (0); 1045 } 1046 /* 1047 * Ensure that the directory entry still exists and has not 1048 * changed while the new name has been entered. If the source is 1049 * a file then the entry may have been unlinked or renamed. In 1050 * either case there is no further work to be done. If the source 1051 * is a directory then it cannot have been rmdir'ed; its link 1052 * count of three would cause a rmdir to fail with ENOTEMPTY. 1053 * The IN_RENAME flag ensures that it cannot be moved by another 1054 * rename. 1055 */ 1056 if (xp != ip) { 1057 /* 1058 * From name resolves to a different inode. IN_RENAME is 1059 * not sufficient protection against timing window races 1060 * so we can't panic here. 1061 */ 1062 } else { 1063 /* 1064 * If the source is a directory with a 1065 * new parent, the link count of the old 1066 * parent directory must be decremented 1067 * and ".." set to point to the new parent. 1068 */ 1069 if (doingdirectory && newparent) { 1070 ext2_dec_nlink(dp); 1071 dp->i_flag |= IN_CHANGE; 1072 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, 1073 sizeof(struct dirtemplate), (off_t)0, 1074 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1075 tcnp->cn_cred, NOCRED, NULL, NULL); 1076 if (error == 0) { 1077 /* Like ufs little-endian: */ 1078 namlen = dirbuf.dotdot_type; 1079 if (namlen != 2 || 1080 dirbuf.dotdot_name[0] != '.' || 1081 dirbuf.dotdot_name[1] != '.') { 1082 ext2_dirbad(xp, (doff_t)12, 1083 "rename: mangled dir"); 1084 } else { 1085 dirbuf.dotdot_ino = newparent; 1086 (void)vn_rdwr(UIO_WRITE, fvp, 1087 (caddr_t)&dirbuf, 1088 sizeof(struct dirtemplate), 1089 (off_t)0, UIO_SYSSPACE, 1090 IO_NODELOCKED | IO_SYNC | 1091 IO_NOMACCHECK, tcnp->cn_cred, 1092 NOCRED, NULL, NULL); 1093 cache_purge(fdvp); 1094 } 1095 } 1096 } 1097 error = ext2_dirremove(fdvp, fcnp); 1098 if (!error) { 1099 ext2_dec_nlink(xp); 1100 xp->i_flag |= IN_CHANGE; 1101 } 1102 xp->i_flag &= ~IN_RENAME; 1103 } 1104 if (dp) 1105 vput(fdvp); 1106 if (xp) 1107 vput(fvp); 1108 vrele(ap->a_fvp); 1109 return (error); 1110 1111 bad: 1112 if (xp) 1113 vput(ITOV(xp)); 1114 vput(ITOV(dp)); 1115 out: 1116 if (doingdirectory) 1117 ip->i_flag &= ~IN_RENAME; 1118 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1119 ext2_dec_nlink(ip); 1120 ip->i_flag |= IN_CHANGE; 1121 ip->i_flag &= ~IN_RENAME; 1122 vput(fvp); 1123 } else 1124 vrele(fvp); 1125 return (error); 1126 } 1127 1128 #ifdef UFS_ACL 1129 static int 1130 ext2_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1131 mode_t dmode, struct ucred *cred, struct thread *td) 1132 { 1133 int error; 1134 struct inode *ip = VTOI(tvp); 1135 struct acl *dacl, *acl; 1136 1137 acl = acl_alloc(M_WAITOK); 1138 dacl = acl_alloc(M_WAITOK); 1139 1140 /* 1141 * Retrieve default ACL from parent, if any. 1142 */ 1143 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1144 switch (error) { 1145 case 0: 1146 /* 1147 * Retrieved a default ACL, so merge mode and ACL if 1148 * necessary. If the ACL is empty, fall through to 1149 * the "not defined or available" case. 1150 */ 1151 if (acl->acl_cnt != 0) { 1152 dmode = acl_posix1e_newfilemode(dmode, acl); 1153 ip->i_mode = dmode; 1154 *dacl = *acl; 1155 ext2_sync_acl_from_inode(ip, acl); 1156 break; 1157 } 1158 /* FALLTHROUGH */ 1159 1160 case EOPNOTSUPP: 1161 /* 1162 * Just use the mode as-is. 1163 */ 1164 ip->i_mode = dmode; 1165 error = 0; 1166 goto out; 1167 1168 default: 1169 goto out; 1170 } 1171 1172 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1173 if (error == 0) 1174 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1175 switch (error) { 1176 case 0: 1177 break; 1178 1179 case EOPNOTSUPP: 1180 /* 1181 * XXX: This should not happen, as EOPNOTSUPP above 1182 * was supposed to free acl. 1183 */ 1184 #ifdef DEBUG 1185 printf("ext2_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1186 #endif /* DEBUG */ 1187 break; 1188 1189 default: 1190 goto out; 1191 } 1192 1193 out: 1194 acl_free(acl); 1195 acl_free(dacl); 1196 1197 return (error); 1198 } 1199 1200 static int 1201 ext2_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1202 mode_t mode, struct ucred *cred, struct thread *td) 1203 { 1204 int error; 1205 struct inode *ip = VTOI(tvp); 1206 struct acl *acl; 1207 1208 acl = acl_alloc(M_WAITOK); 1209 1210 /* 1211 * Retrieve default ACL for parent, if any. 1212 */ 1213 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1214 switch (error) { 1215 case 0: 1216 /* 1217 * Retrieved a default ACL, so merge mode and ACL if 1218 * necessary. 1219 */ 1220 if (acl->acl_cnt != 0) { 1221 /* 1222 * Two possible ways for default ACL to not 1223 * be present. First, the EA can be 1224 * undefined, or second, the default ACL can 1225 * be blank. If it's blank, fall through to 1226 * the it's not defined case. 1227 */ 1228 mode = acl_posix1e_newfilemode(mode, acl); 1229 ip->i_mode = mode; 1230 ext2_sync_acl_from_inode(ip, acl); 1231 break; 1232 } 1233 /* FALLTHROUGH */ 1234 1235 case EOPNOTSUPP: 1236 /* 1237 * Just use the mode as-is. 1238 */ 1239 ip->i_mode = mode; 1240 error = 0; 1241 goto out; 1242 1243 default: 1244 goto out; 1245 } 1246 1247 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1248 switch (error) { 1249 case 0: 1250 break; 1251 1252 case EOPNOTSUPP: 1253 /* 1254 * XXX: This should not happen, as EOPNOTSUPP above was 1255 * supposed to free acl. 1256 */ 1257 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1258 "but no VOP_SETACL()\n"); 1259 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1260 "but no VOP_SETACL()"); */ 1261 break; 1262 1263 default: 1264 goto out; 1265 } 1266 1267 out: 1268 acl_free(acl); 1269 1270 return (error); 1271 } 1272 1273 #endif /* UFS_ACL */ 1274 1275 /* 1276 * Mkdir system call 1277 */ 1278 static int 1279 ext2_mkdir(struct vop_mkdir_args *ap) 1280 { 1281 struct vnode *dvp = ap->a_dvp; 1282 struct vattr *vap = ap->a_vap; 1283 struct componentname *cnp = ap->a_cnp; 1284 struct inode *ip, *dp; 1285 struct vnode *tvp; 1286 struct dirtemplate dirtemplate, *dtp; 1287 int error, dmode; 1288 1289 #ifdef INVARIANTS 1290 if ((cnp->cn_flags & HASBUF) == 0) 1291 panic("ext2_mkdir: no name"); 1292 #endif 1293 dp = VTOI(dvp); 1294 if ((nlink_t)dp->i_nlink >= ext2_max_nlink(dp) && 1295 !ext2_htree_has_idx(dp)) { 1296 error = EMLINK; 1297 goto out; 1298 } 1299 dmode = vap->va_mode & 0777; 1300 dmode |= IFDIR; 1301 /* 1302 * Must simulate part of ext2_makeinode here to acquire the inode, 1303 * but not have it entered in the parent directory. The entry is 1304 * made later after writing "." and ".." entries. 1305 */ 1306 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1307 if (error) 1308 goto out; 1309 ip = VTOI(tvp); 1310 ip->i_gid = dp->i_gid; 1311 #ifdef SUIDDIR 1312 { 1313 /* 1314 * if we are hacking owners here, (only do this where told to) 1315 * and we are not giving it TOO root, (would subvert quotas) 1316 * then go ahead and give it to the other user. 1317 * The new directory also inherits the SUID bit. 1318 * If user's UID and dir UID are the same, 1319 * 'give it away' so that the SUID is still forced on. 1320 */ 1321 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1322 (dp->i_mode & ISUID) && dp->i_uid) { 1323 dmode |= ISUID; 1324 ip->i_uid = dp->i_uid; 1325 } else { 1326 ip->i_uid = cnp->cn_cred->cr_uid; 1327 } 1328 } 1329 #else 1330 ip->i_uid = cnp->cn_cred->cr_uid; 1331 #endif 1332 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1333 ip->i_mode = dmode; 1334 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1335 ip->i_nlink = 2; 1336 if (cnp->cn_flags & ISWHITEOUT) 1337 ip->i_flags |= UF_OPAQUE; 1338 error = ext2_update(tvp, 1); 1339 1340 /* 1341 * Bump link count in parent directory 1342 * to reflect work done below. Should 1343 * be done before reference is created 1344 * so reparation is possible if we crash. 1345 */ 1346 ext2_inc_nlink(dp); 1347 dp->i_flag |= IN_CHANGE; 1348 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1349 if (error) 1350 goto bad; 1351 1352 /* Initialize directory with "." and ".." from static template. */ 1353 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1354 EXT2F_INCOMPAT_FTYPE)) 1355 dtp = &mastertemplate; 1356 else 1357 dtp = &omastertemplate; 1358 dirtemplate = *dtp; 1359 dirtemplate.dot_ino = ip->i_number; 1360 dirtemplate.dotdot_ino = dp->i_number; 1361 /* 1362 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's 1363 * just redefine it - for this function only 1364 */ 1365 #undef DIRBLKSIZ 1366 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1367 dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; 1368 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, 1369 sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE, 1370 IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, 1371 NULL, NULL); 1372 if (error) { 1373 ext2_dec_nlink(dp); 1374 dp->i_flag |= IN_CHANGE; 1375 goto bad; 1376 } 1377 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1378 /* XXX should grow with balloc() */ 1379 panic("ext2_mkdir: blksize"); 1380 else { 1381 ip->i_size = DIRBLKSIZ; 1382 ip->i_flag |= IN_CHANGE; 1383 } 1384 1385 #ifdef UFS_ACL 1386 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1387 error = ext2_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 1388 cnp->cn_cred, cnp->cn_thread); 1389 if (error) 1390 goto bad; 1391 } 1392 1393 #endif /* UFS_ACL */ 1394 1395 /* Directory set up, now install its entry in the parent directory. */ 1396 error = ext2_direnter(ip, dvp, cnp); 1397 if (error) { 1398 ext2_dec_nlink(dp); 1399 dp->i_flag |= IN_CHANGE; 1400 } 1401 bad: 1402 /* 1403 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1404 * for us because we set the link count to 0. 1405 */ 1406 if (error) { 1407 ip->i_nlink = 0; 1408 ip->i_flag |= IN_CHANGE; 1409 vput(tvp); 1410 } else 1411 *ap->a_vpp = tvp; 1412 out: 1413 return (error); 1414 #undef DIRBLKSIZ 1415 #define DIRBLKSIZ DEV_BSIZE 1416 } 1417 1418 /* 1419 * Rmdir system call. 1420 */ 1421 static int 1422 ext2_rmdir(struct vop_rmdir_args *ap) 1423 { 1424 struct vnode *vp = ap->a_vp; 1425 struct vnode *dvp = ap->a_dvp; 1426 struct componentname *cnp = ap->a_cnp; 1427 struct inode *ip, *dp; 1428 int error; 1429 1430 ip = VTOI(vp); 1431 dp = VTOI(dvp); 1432 1433 /* 1434 * Verify the directory is empty (and valid). 1435 * (Rmdir ".." won't be valid since 1436 * ".." will contain a reference to 1437 * the current directory and thus be 1438 * non-empty.) 1439 */ 1440 if (!ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1441 error = ENOTEMPTY; 1442 goto out; 1443 } 1444 if ((dp->i_flags & APPEND) 1445 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1446 error = EPERM; 1447 goto out; 1448 } 1449 /* 1450 * Delete reference to directory before purging 1451 * inode. If we crash in between, the directory 1452 * will be reattached to lost+found, 1453 */ 1454 error = ext2_dirremove(dvp, cnp); 1455 if (error) 1456 goto out; 1457 ext2_dec_nlink(dp); 1458 dp->i_flag |= IN_CHANGE; 1459 cache_purge(dvp); 1460 VOP_UNLOCK(dvp, 0); 1461 /* 1462 * Truncate inode. The only stuff left 1463 * in the directory is "." and "..". 1464 */ 1465 ip->i_nlink = 0; 1466 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, 1467 cnp->cn_thread); 1468 cache_purge(ITOV(ip)); 1469 if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1470 VOP_UNLOCK(vp, 0); 1471 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1472 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1473 } 1474 out: 1475 return (error); 1476 } 1477 1478 /* 1479 * symlink -- make a symbolic link 1480 */ 1481 static int 1482 ext2_symlink(struct vop_symlink_args *ap) 1483 { 1484 struct vnode *vp, **vpp = ap->a_vpp; 1485 struct inode *ip; 1486 int len, error; 1487 1488 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1489 vpp, ap->a_cnp); 1490 if (error) 1491 return (error); 1492 vp = *vpp; 1493 len = strlen(ap->a_target); 1494 if (len < vp->v_mount->mnt_maxsymlinklen) { 1495 ip = VTOI(vp); 1496 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1497 ip->i_size = len; 1498 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1499 } else 1500 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1501 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1502 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 1503 if (error) 1504 vput(vp); 1505 return (error); 1506 } 1507 1508 /* 1509 * Return target name of a symbolic link 1510 */ 1511 static int 1512 ext2_readlink(struct vop_readlink_args *ap) 1513 { 1514 struct vnode *vp = ap->a_vp; 1515 struct inode *ip = VTOI(vp); 1516 int isize; 1517 1518 isize = ip->i_size; 1519 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1520 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1521 return (0); 1522 } 1523 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1524 } 1525 1526 /* 1527 * Calculate the logical to physical mapping if not done already, 1528 * then call the device strategy routine. 1529 * 1530 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1531 * deadlock on memory. See ext2_bmap() for details. 1532 */ 1533 static int 1534 ext2_strategy(struct vop_strategy_args *ap) 1535 { 1536 struct buf *bp = ap->a_bp; 1537 struct vnode *vp = ap->a_vp; 1538 struct bufobj *bo; 1539 daddr_t blkno; 1540 int error; 1541 1542 if (vp->v_type == VBLK || vp->v_type == VCHR) 1543 panic("ext2_strategy: spec"); 1544 if (bp->b_blkno == bp->b_lblkno) { 1545 error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); 1546 bp->b_blkno = blkno; 1547 if (error) { 1548 bp->b_error = error; 1549 bp->b_ioflags |= BIO_ERROR; 1550 bufdone(bp); 1551 return (0); 1552 } 1553 if ((long)bp->b_blkno == -1) 1554 vfs_bio_clrbuf(bp); 1555 } 1556 if ((long)bp->b_blkno == -1) { 1557 bufdone(bp); 1558 return (0); 1559 } 1560 bp->b_iooffset = dbtob(bp->b_blkno); 1561 bo = VFSTOEXT2(vp->v_mount)->um_bo; 1562 BO_STRATEGY(bo, bp); 1563 return (0); 1564 } 1565 1566 /* 1567 * Print out the contents of an inode. 1568 */ 1569 static int 1570 ext2_print(struct vop_print_args *ap) 1571 { 1572 struct vnode *vp = ap->a_vp; 1573 struct inode *ip = VTOI(vp); 1574 1575 vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number); 1576 if (vp->v_type == VFIFO) 1577 fifo_printinfo(vp); 1578 printf("\n"); 1579 return (0); 1580 } 1581 1582 /* 1583 * Close wrapper for fifos. 1584 * 1585 * Update the times on the inode then do device close. 1586 */ 1587 static int 1588 ext2fifo_close(struct vop_close_args *ap) 1589 { 1590 struct vnode *vp = ap->a_vp; 1591 1592 VI_LOCK(vp); 1593 if (vp->v_usecount > 1) 1594 ext2_itimes_locked(vp); 1595 VI_UNLOCK(vp); 1596 return (fifo_specops.vop_close(ap)); 1597 } 1598 1599 /* 1600 * Kqfilter wrapper for fifos. 1601 * 1602 * Fall through to ext2 kqfilter routines if needed 1603 */ 1604 static int 1605 ext2fifo_kqfilter(struct vop_kqfilter_args *ap) 1606 { 1607 int error; 1608 1609 error = fifo_specops.vop_kqfilter(ap); 1610 if (error) 1611 error = vfs_kqfilter(ap); 1612 return (error); 1613 } 1614 1615 /* 1616 * Return POSIX pathconf information applicable to ext2 filesystems. 1617 */ 1618 static int 1619 ext2_pathconf(struct vop_pathconf_args *ap) 1620 { 1621 int error = 0; 1622 1623 switch (ap->a_name) { 1624 case _PC_LINK_MAX: 1625 if (ext2_htree_has_idx(VTOI(ap->a_vp))) 1626 *ap->a_retval = INT_MAX; 1627 else 1628 *ap->a_retval = ext2_max_nlink(VTOI(ap->a_vp)); 1629 break; 1630 case _PC_NO_TRUNC: 1631 *ap->a_retval = 1; 1632 break; 1633 1634 #ifdef UFS_ACL 1635 case _PC_ACL_EXTENDED: 1636 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1637 *ap->a_retval = 1; 1638 else 1639 *ap->a_retval = 0; 1640 break; 1641 case _PC_ACL_PATH_MAX: 1642 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1643 *ap->a_retval = ACL_MAX_ENTRIES; 1644 else 1645 *ap->a_retval = 3; 1646 break; 1647 #endif /* UFS_ACL */ 1648 1649 case _PC_MIN_HOLE_SIZE: 1650 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1651 break; 1652 case _PC_PRIO_IO: 1653 *ap->a_retval = 0; 1654 break; 1655 case _PC_SYNC_IO: 1656 *ap->a_retval = 0; 1657 break; 1658 case _PC_ALLOC_SIZE_MIN: 1659 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 1660 break; 1661 case _PC_FILESIZEBITS: 1662 *ap->a_retval = 64; 1663 break; 1664 case _PC_REC_INCR_XFER_SIZE: 1665 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1666 break; 1667 case _PC_REC_MAX_XFER_SIZE: 1668 *ap->a_retval = -1; /* means ``unlimited'' */ 1669 break; 1670 case _PC_REC_MIN_XFER_SIZE: 1671 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1672 break; 1673 case _PC_REC_XFER_ALIGN: 1674 *ap->a_retval = PAGE_SIZE; 1675 break; 1676 case _PC_SYMLINK_MAX: 1677 *ap->a_retval = MAXPATHLEN; 1678 break; 1679 1680 default: 1681 error = vop_stdpathconf(ap); 1682 break; 1683 } 1684 return (error); 1685 } 1686 1687 /* 1688 * Vnode operation to remove a named attribute. 1689 */ 1690 static int 1691 ext2_deleteextattr(struct vop_deleteextattr_args *ap) 1692 { 1693 struct inode *ip; 1694 struct m_ext2fs *fs; 1695 int error; 1696 1697 ip = VTOI(ap->a_vp); 1698 fs = ip->i_e2fs; 1699 1700 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1701 return (EOPNOTSUPP); 1702 1703 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1704 return (EOPNOTSUPP); 1705 1706 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1707 ap->a_cred, ap->a_td, VWRITE); 1708 if (error) 1709 return (error); 1710 1711 error = ENOATTR; 1712 1713 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1714 error = ext2_extattr_inode_delete(ip, ap->a_attrnamespace, ap->a_name); 1715 if (error != ENOATTR) 1716 return (error); 1717 } 1718 1719 if (ip->i_facl) 1720 error = ext2_extattr_block_delete(ip, ap->a_attrnamespace, ap->a_name); 1721 1722 return (error); 1723 } 1724 1725 /* 1726 * Vnode operation to retrieve a named extended attribute. 1727 */ 1728 static int 1729 ext2_getextattr(struct vop_getextattr_args *ap) 1730 { 1731 struct inode *ip; 1732 struct m_ext2fs *fs; 1733 int error; 1734 1735 ip = VTOI(ap->a_vp); 1736 fs = ip->i_e2fs; 1737 1738 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1739 return (EOPNOTSUPP); 1740 1741 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1742 return (EOPNOTSUPP); 1743 1744 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1745 ap->a_cred, ap->a_td, VREAD); 1746 if (error) 1747 return (error); 1748 1749 if (ap->a_size != NULL) 1750 *ap->a_size = 0; 1751 1752 error = ENOATTR; 1753 1754 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1755 error = ext2_extattr_inode_get(ip, ap->a_attrnamespace, 1756 ap->a_name, ap->a_uio, ap->a_size); 1757 if (error != ENOATTR) 1758 return (error); 1759 } 1760 1761 if (ip->i_facl) 1762 error = ext2_extattr_block_get(ip, ap->a_attrnamespace, 1763 ap->a_name, ap->a_uio, ap->a_size); 1764 1765 return (error); 1766 } 1767 1768 /* 1769 * Vnode operation to retrieve extended attributes on a vnode. 1770 */ 1771 static int 1772 ext2_listextattr(struct vop_listextattr_args *ap) 1773 { 1774 struct inode *ip; 1775 struct m_ext2fs *fs; 1776 int error; 1777 1778 ip = VTOI(ap->a_vp); 1779 fs = ip->i_e2fs; 1780 1781 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1782 return (EOPNOTSUPP); 1783 1784 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1785 return (EOPNOTSUPP); 1786 1787 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1788 ap->a_cred, ap->a_td, VREAD); 1789 if (error) 1790 return (error); 1791 1792 if (ap->a_size != NULL) 1793 *ap->a_size = 0; 1794 1795 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1796 error = ext2_extattr_inode_list(ip, ap->a_attrnamespace, 1797 ap->a_uio, ap->a_size); 1798 if (error) 1799 return (error); 1800 } 1801 1802 if (ip->i_facl) 1803 error = ext2_extattr_block_list(ip, ap->a_attrnamespace, 1804 ap->a_uio, ap->a_size); 1805 1806 return (error); 1807 } 1808 1809 /* 1810 * Vnode operation to set a named attribute. 1811 */ 1812 static int 1813 ext2_setextattr(struct vop_setextattr_args *ap) 1814 { 1815 struct inode *ip; 1816 struct m_ext2fs *fs; 1817 int error; 1818 1819 ip = VTOI(ap->a_vp); 1820 fs = ip->i_e2fs; 1821 1822 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1823 return (EOPNOTSUPP); 1824 1825 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1826 return (EOPNOTSUPP); 1827 1828 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1829 ap->a_cred, ap->a_td, VWRITE); 1830 if (error) 1831 return (error); 1832 1833 error = ext2_extattr_valid_attrname(ap->a_attrnamespace, ap->a_name); 1834 if (error) 1835 return (error); 1836 1837 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1838 error = ext2_extattr_inode_set(ip, ap->a_attrnamespace, 1839 ap->a_name, ap->a_uio); 1840 if (error != ENOSPC) 1841 return (error); 1842 } 1843 1844 error = ext2_extattr_block_set(ip, ap->a_attrnamespace, 1845 ap->a_name, ap->a_uio); 1846 1847 return (error); 1848 } 1849 1850 /* 1851 * Vnode pointer to File handle 1852 */ 1853 /* ARGSUSED */ 1854 static int 1855 ext2_vptofh(struct vop_vptofh_args *ap) 1856 { 1857 struct inode *ip; 1858 struct ufid *ufhp; 1859 1860 ip = VTOI(ap->a_vp); 1861 ufhp = (struct ufid *)ap->a_fhp; 1862 ufhp->ufid_len = sizeof(struct ufid); 1863 ufhp->ufid_ino = ip->i_number; 1864 ufhp->ufid_gen = ip->i_gen; 1865 return (0); 1866 } 1867 1868 /* 1869 * Initialize the vnode associated with a new inode, handle aliased 1870 * vnodes. 1871 */ 1872 int 1873 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) 1874 { 1875 struct inode *ip; 1876 struct vnode *vp; 1877 1878 vp = *vpp; 1879 ip = VTOI(vp); 1880 vp->v_type = IFTOVT(ip->i_mode); 1881 if (vp->v_type == VFIFO) 1882 vp->v_op = fifoops; 1883 1884 if (ip->i_number == EXT2_ROOTINO) 1885 vp->v_vflag |= VV_ROOT; 1886 ip->i_modrev = init_va_filerev(); 1887 *vpp = vp; 1888 return (0); 1889 } 1890 1891 /* 1892 * Allocate a new inode. 1893 */ 1894 static int 1895 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1896 struct componentname *cnp) 1897 { 1898 struct inode *ip, *pdir; 1899 struct vnode *tvp; 1900 int error; 1901 1902 pdir = VTOI(dvp); 1903 #ifdef INVARIANTS 1904 if ((cnp->cn_flags & HASBUF) == 0) 1905 panic("ext2_makeinode: no name"); 1906 #endif 1907 *vpp = NULL; 1908 if ((mode & IFMT) == 0) 1909 mode |= IFREG; 1910 1911 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1912 if (error) { 1913 return (error); 1914 } 1915 ip = VTOI(tvp); 1916 ip->i_gid = pdir->i_gid; 1917 #ifdef SUIDDIR 1918 { 1919 /* 1920 * if we are 1921 * not the owner of the directory, 1922 * and we are hacking owners here, (only do this where told to) 1923 * and we are not giving it TOO root, (would subvert quotas) 1924 * then go ahead and give it to the other user. 1925 * Note that this drops off the execute bits for security. 1926 */ 1927 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1928 (pdir->i_mode & ISUID) && 1929 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1930 ip->i_uid = pdir->i_uid; 1931 mode &= ~07111; 1932 } else { 1933 ip->i_uid = cnp->cn_cred->cr_uid; 1934 } 1935 } 1936 #else 1937 ip->i_uid = cnp->cn_cred->cr_uid; 1938 #endif 1939 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1940 ip->i_mode = mode; 1941 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1942 ip->i_nlink = 1; 1943 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1944 if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) 1945 ip->i_mode &= ~ISGID; 1946 } 1947 1948 if (cnp->cn_flags & ISWHITEOUT) 1949 ip->i_flags |= UF_OPAQUE; 1950 1951 /* 1952 * Make sure inode goes to disk before directory entry. 1953 */ 1954 error = ext2_update(tvp, !DOINGASYNC(tvp)); 1955 if (error) 1956 goto bad; 1957 1958 #ifdef UFS_ACL 1959 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1960 error = ext2_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 1961 cnp->cn_cred, cnp->cn_thread); 1962 if (error) 1963 goto bad; 1964 } 1965 #endif /* UFS_ACL */ 1966 1967 error = ext2_direnter(ip, dvp, cnp); 1968 if (error) 1969 goto bad; 1970 1971 *vpp = tvp; 1972 return (0); 1973 1974 bad: 1975 /* 1976 * Write error occurred trying to update the inode 1977 * or the directory so must deallocate the inode. 1978 */ 1979 ip->i_nlink = 0; 1980 ip->i_flag |= IN_CHANGE; 1981 vput(tvp); 1982 return (error); 1983 } 1984 1985 /* 1986 * Vnode op for reading. 1987 */ 1988 static int 1989 ext2_read(struct vop_read_args *ap) 1990 { 1991 struct vnode *vp; 1992 struct inode *ip; 1993 int error; 1994 1995 vp = ap->a_vp; 1996 ip = VTOI(vp); 1997 1998 /* EXT4_EXT_LOCK(ip); */ 1999 if (ip->i_flag & IN_E4EXTENTS) 2000 error = ext4_ext_read(ap); 2001 else 2002 error = ext2_ind_read(ap); 2003 /* EXT4_EXT_UNLOCK(ip); */ 2004 return (error); 2005 } 2006 2007 /* 2008 * Vnode op for reading. 2009 */ 2010 static int 2011 ext2_ind_read(struct vop_read_args *ap) 2012 { 2013 struct vnode *vp; 2014 struct inode *ip; 2015 struct uio *uio; 2016 struct m_ext2fs *fs; 2017 struct buf *bp; 2018 daddr_t lbn, nextlbn; 2019 off_t bytesinfile; 2020 long size, xfersize, blkoffset; 2021 int error, orig_resid, seqcount; 2022 int ioflag; 2023 2024 vp = ap->a_vp; 2025 uio = ap->a_uio; 2026 ioflag = ap->a_ioflag; 2027 2028 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 2029 ip = VTOI(vp); 2030 2031 #ifdef INVARIANTS 2032 if (uio->uio_rw != UIO_READ) 2033 panic("%s: mode", "ext2_read"); 2034 2035 if (vp->v_type == VLNK) { 2036 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 2037 panic("%s: short symlink", "ext2_read"); 2038 } else if (vp->v_type != VREG && vp->v_type != VDIR) 2039 panic("%s: type %d", "ext2_read", vp->v_type); 2040 #endif 2041 orig_resid = uio->uio_resid; 2042 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 2043 if (orig_resid == 0) 2044 return (0); 2045 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 2046 fs = ip->i_e2fs; 2047 if (uio->uio_offset < ip->i_size && 2048 uio->uio_offset >= fs->e2fs_maxfilesize) 2049 return (EOVERFLOW); 2050 2051 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 2052 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2053 break; 2054 lbn = lblkno(fs, uio->uio_offset); 2055 nextlbn = lbn + 1; 2056 size = blksize(fs, ip, lbn); 2057 blkoffset = blkoff(fs, uio->uio_offset); 2058 2059 xfersize = fs->e2fs_fsize - blkoffset; 2060 if (uio->uio_resid < xfersize) 2061 xfersize = uio->uio_resid; 2062 if (bytesinfile < xfersize) 2063 xfersize = bytesinfile; 2064 2065 if (lblktosize(fs, nextlbn) >= ip->i_size) 2066 error = bread(vp, lbn, size, NOCRED, &bp); 2067 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 2068 error = cluster_read(vp, ip->i_size, lbn, size, 2069 NOCRED, blkoffset + uio->uio_resid, seqcount, 2070 0, &bp); 2071 } else if (seqcount > 1) { 2072 u_int nextsize = blksize(fs, ip, nextlbn); 2073 2074 error = breadn(vp, lbn, 2075 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 2076 } else 2077 error = bread(vp, lbn, size, NOCRED, &bp); 2078 if (error) { 2079 brelse(bp); 2080 bp = NULL; 2081 break; 2082 } 2083 2084 /* 2085 * We should only get non-zero b_resid when an I/O error 2086 * has occurred, which should cause us to break above. 2087 * However, if the short read did not cause an error, 2088 * then we want to ensure that we do not uiomove bad 2089 * or uninitialized data. 2090 */ 2091 size -= bp->b_resid; 2092 if (size < xfersize) { 2093 if (size == 0) 2094 break; 2095 xfersize = size; 2096 } 2097 error = uiomove((char *)bp->b_data + blkoffset, 2098 (int)xfersize, uio); 2099 if (error) 2100 break; 2101 vfs_bio_brelse(bp, ioflag); 2102 } 2103 2104 /* 2105 * This can only happen in the case of an error because the loop 2106 * above resets bp to NULL on each iteration and on normal 2107 * completion has not set a new value into it. so it must have come 2108 * from a 'break' statement 2109 */ 2110 if (bp != NULL) 2111 vfs_bio_brelse(bp, ioflag); 2112 2113 if ((error == 0 || uio->uio_resid != orig_resid) && 2114 (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 2115 ip->i_flag |= IN_ACCESS; 2116 return (error); 2117 } 2118 2119 static int 2120 ext2_ioctl(struct vop_ioctl_args *ap) 2121 { 2122 2123 switch (ap->a_command) { 2124 case FIOSEEKDATA: 2125 case FIOSEEKHOLE: 2126 return (vn_bmap_seekhole(ap->a_vp, ap->a_command, 2127 (off_t *)ap->a_data, ap->a_cred)); 2128 default: 2129 return (ENOTTY); 2130 } 2131 } 2132 2133 /* 2134 * this function handles ext4 extents block mapping 2135 */ 2136 static int 2137 ext4_ext_read(struct vop_read_args *ap) 2138 { 2139 static unsigned char zeroes[EXT2_MAX_BLOCK_SIZE]; 2140 struct vnode *vp; 2141 struct inode *ip; 2142 struct uio *uio; 2143 struct m_ext2fs *fs; 2144 struct buf *bp; 2145 struct ext4_extent nex, *ep; 2146 struct ext4_extent_path path; 2147 daddr_t lbn, newblk; 2148 off_t bytesinfile; 2149 int cache_type; 2150 ssize_t orig_resid; 2151 int error; 2152 long size, xfersize, blkoffset; 2153 2154 vp = ap->a_vp; 2155 ip = VTOI(vp); 2156 uio = ap->a_uio; 2157 memset(&path, 0, sizeof(path)); 2158 2159 orig_resid = uio->uio_resid; 2160 KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__)); 2161 if (orig_resid == 0) 2162 return (0); 2163 KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__)); 2164 fs = ip->i_e2fs; 2165 if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) 2166 return (EOVERFLOW); 2167 2168 while (uio->uio_resid > 0) { 2169 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2170 break; 2171 lbn = lblkno(fs, uio->uio_offset); 2172 size = blksize(fs, ip, lbn); 2173 blkoffset = blkoff(fs, uio->uio_offset); 2174 2175 xfersize = fs->e2fs_fsize - blkoffset; 2176 xfersize = MIN(xfersize, uio->uio_resid); 2177 xfersize = MIN(xfersize, bytesinfile); 2178 2179 /* get block from ext4 extent cache */ 2180 cache_type = ext4_ext_in_cache(ip, lbn, &nex); 2181 switch (cache_type) { 2182 case EXT4_EXT_CACHE_NO: 2183 ext4_ext_find_extent(fs, ip, lbn, &path); 2184 if (path.ep_is_sparse) 2185 ep = &path.ep_sparse_ext; 2186 else 2187 ep = path.ep_ext; 2188 if (ep == NULL) 2189 return (EIO); 2190 2191 ext4_ext_put_cache(ip, ep, 2192 path.ep_is_sparse ? EXT4_EXT_CACHE_GAP : EXT4_EXT_CACHE_IN); 2193 2194 newblk = lbn - ep->e_blk + (ep->e_start_lo | 2195 (daddr_t)ep->e_start_hi << 32); 2196 2197 if (path.ep_bp != NULL) { 2198 brelse(path.ep_bp); 2199 path.ep_bp = NULL; 2200 } 2201 break; 2202 2203 case EXT4_EXT_CACHE_GAP: 2204 /* block has not been allocated yet */ 2205 break; 2206 2207 case EXT4_EXT_CACHE_IN: 2208 newblk = lbn - nex.e_blk + (nex.e_start_lo | 2209 (daddr_t)nex.e_start_hi << 32); 2210 break; 2211 2212 default: 2213 panic("%s: invalid cache type", __func__); 2214 } 2215 2216 if (cache_type == EXT4_EXT_CACHE_GAP || 2217 (cache_type == EXT4_EXT_CACHE_NO && path.ep_is_sparse)) { 2218 if (xfersize > sizeof(zeroes)) 2219 xfersize = sizeof(zeroes); 2220 error = uiomove(zeroes, xfersize, uio); 2221 if (error) 2222 return (error); 2223 } else { 2224 error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, 2225 NOCRED, &bp); 2226 if (error) { 2227 brelse(bp); 2228 return (error); 2229 } 2230 2231 size -= bp->b_resid; 2232 if (size < xfersize) { 2233 if (size == 0) { 2234 bqrelse(bp); 2235 break; 2236 } 2237 xfersize = size; 2238 } 2239 error = uiomove(bp->b_data + blkoffset, xfersize, uio); 2240 bqrelse(bp); 2241 if (error) 2242 return (error); 2243 } 2244 } 2245 2246 return (0); 2247 } 2248 2249 /* 2250 * Vnode op for writing. 2251 */ 2252 static int 2253 ext2_write(struct vop_write_args *ap) 2254 { 2255 struct vnode *vp; 2256 struct uio *uio; 2257 struct inode *ip; 2258 struct m_ext2fs *fs; 2259 struct buf *bp; 2260 daddr_t lbn; 2261 off_t osize; 2262 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 2263 2264 ioflag = ap->a_ioflag; 2265 uio = ap->a_uio; 2266 vp = ap->a_vp; 2267 2268 seqcount = ioflag >> IO_SEQSHIFT; 2269 ip = VTOI(vp); 2270 2271 #ifdef INVARIANTS 2272 if (uio->uio_rw != UIO_WRITE) 2273 panic("%s: mode", "ext2_write"); 2274 #endif 2275 2276 switch (vp->v_type) { 2277 case VREG: 2278 if (ioflag & IO_APPEND) 2279 uio->uio_offset = ip->i_size; 2280 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 2281 return (EPERM); 2282 /* FALLTHROUGH */ 2283 case VLNK: 2284 break; 2285 case VDIR: 2286 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 2287 if ((ioflag & IO_SYNC) == 0) 2288 panic("ext2_write: nonsync dir write"); 2289 break; 2290 default: 2291 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 2292 vp->v_type, (intmax_t)uio->uio_offset, 2293 (intmax_t)uio->uio_resid); 2294 } 2295 2296 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 2297 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 2298 fs = ip->i_e2fs; 2299 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 2300 return (EFBIG); 2301 /* 2302 * Maybe this should be above the vnode op call, but so long as 2303 * file servers have no limits, I don't think it matters. 2304 */ 2305 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 2306 return (EFBIG); 2307 2308 resid = uio->uio_resid; 2309 osize = ip->i_size; 2310 if (seqcount > BA_SEQMAX) 2311 flags = BA_SEQMAX << BA_SEQSHIFT; 2312 else 2313 flags = seqcount << BA_SEQSHIFT; 2314 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 2315 flags |= IO_SYNC; 2316 2317 for (error = 0; uio->uio_resid > 0;) { 2318 lbn = lblkno(fs, uio->uio_offset); 2319 blkoffset = blkoff(fs, uio->uio_offset); 2320 xfersize = fs->e2fs_fsize - blkoffset; 2321 if (uio->uio_resid < xfersize) 2322 xfersize = uio->uio_resid; 2323 if (uio->uio_offset + xfersize > ip->i_size) 2324 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 2325 2326 /* 2327 * We must perform a read-before-write if the transfer size 2328 * does not cover the entire buffer. 2329 */ 2330 if (fs->e2fs_bsize > xfersize) 2331 flags |= BA_CLRBUF; 2332 else 2333 flags &= ~BA_CLRBUF; 2334 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 2335 ap->a_cred, &bp, flags); 2336 if (error != 0) 2337 break; 2338 2339 if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL)) 2340 bp->b_flags |= B_NOCACHE; 2341 if (uio->uio_offset + xfersize > ip->i_size) 2342 ip->i_size = uio->uio_offset + xfersize; 2343 size = blksize(fs, ip, lbn) - bp->b_resid; 2344 if (size < xfersize) 2345 xfersize = size; 2346 2347 error = 2348 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 2349 /* 2350 * If the buffer is not already filled and we encounter an 2351 * error while trying to fill it, we have to clear out any 2352 * garbage data from the pages instantiated for the buffer. 2353 * If we do not, a failed uiomove() during a write can leave 2354 * the prior contents of the pages exposed to a userland mmap. 2355 * 2356 * Note that we need only clear buffers with a transfer size 2357 * equal to the block size because buffers with a shorter 2358 * transfer size were cleared above by the call to ext2_balloc() 2359 * with the BA_CLRBUF flag set. 2360 * 2361 * If the source region for uiomove identically mmaps the 2362 * buffer, uiomove() performed the NOP copy, and the buffer 2363 * content remains valid because the page fault handler 2364 * validated the pages. 2365 */ 2366 if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 2367 fs->e2fs_bsize == xfersize) 2368 vfs_bio_clrbuf(bp); 2369 2370 vfs_bio_set_flags(bp, ioflag); 2371 2372 /* 2373 * If IO_SYNC each buffer is written synchronously. Otherwise 2374 * if we have a severe page deficiency write the buffer 2375 * asynchronously. Otherwise try to cluster, and if that 2376 * doesn't do it then either do an async write (if O_DIRECT), 2377 * or a delayed write (if not). 2378 */ 2379 if (ioflag & IO_SYNC) { 2380 (void)bwrite(bp); 2381 } else if (vm_page_count_severe() || 2382 buf_dirty_count_severe() || 2383 (ioflag & IO_ASYNC)) { 2384 bp->b_flags |= B_CLUSTEROK; 2385 bawrite(bp); 2386 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 2387 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 2388 bp->b_flags |= B_CLUSTEROK; 2389 cluster_write(vp, bp, ip->i_size, seqcount, 0); 2390 } else { 2391 bawrite(bp); 2392 } 2393 } else if (ioflag & IO_DIRECT) { 2394 bp->b_flags |= B_CLUSTEROK; 2395 bawrite(bp); 2396 } else { 2397 bp->b_flags |= B_CLUSTEROK; 2398 bdwrite(bp); 2399 } 2400 if (error || xfersize == 0) 2401 break; 2402 } 2403 /* 2404 * If we successfully wrote any data, and we are not the superuser 2405 * we clear the setuid and setgid bits as a precaution against 2406 * tampering. 2407 */ 2408 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 2409 ap->a_cred) { 2410 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 2411 ip->i_mode &= ~(ISUID | ISGID); 2412 } 2413 if (error) { 2414 if (ioflag & IO_UNIT) { 2415 (void)ext2_truncate(vp, osize, 2416 ioflag & IO_SYNC, ap->a_cred, uio->uio_td); 2417 uio->uio_offset -= resid - uio->uio_resid; 2418 uio->uio_resid = resid; 2419 } 2420 } 2421 if (uio->uio_resid != resid) { 2422 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2423 if (ioflag & IO_SYNC) 2424 error = ext2_update(vp, 1); 2425 } 2426 return (error); 2427 } 2428