1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * (c) UNIX System Laboratories, Inc. 11 * All or some portions of this file are derived from material licensed 12 * to the University of California by American Telephone and Telegraph 13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 14 * the permission of UNIX System Laboratories, Inc. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 41 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 42 * $FreeBSD$ 43 */ 44 45 #include "opt_suiddir.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/fcntl.h> 51 #include <sys/filio.h> 52 #include <sys/stat.h> 53 #include <sys/bio.h> 54 #include <sys/buf.h> 55 #include <sys/endian.h> 56 #include <sys/priv.h> 57 #include <sys/rwlock.h> 58 #include <sys/mount.h> 59 #include <sys/unistd.h> 60 #include <sys/time.h> 61 #include <sys/vnode.h> 62 #include <sys/namei.h> 63 #include <sys/lockf.h> 64 #include <sys/event.h> 65 #include <sys/conf.h> 66 #include <sys/file.h> 67 #include <sys/extattr.h> 68 #include <sys/vmmeter.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/vm_extern.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_pager.h> 76 #include <vm/vnode_pager.h> 77 78 #include "opt_directio.h" 79 80 #include <ufs/ufs/dir.h> 81 82 #include <fs/ext2fs/fs.h> 83 #include <fs/ext2fs/inode.h> 84 #include <fs/ext2fs/ext2_acl.h> 85 #include <fs/ext2fs/ext2_extern.h> 86 #include <fs/ext2fs/ext2fs.h> 87 #include <fs/ext2fs/ext2_dinode.h> 88 #include <fs/ext2fs/ext2_dir.h> 89 #include <fs/ext2fs/ext2_mount.h> 90 #include <fs/ext2fs/ext2_extattr.h> 91 92 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 93 static void ext2_itimes_locked(struct vnode *); 94 static int ext4_ext_read(struct vop_read_args *); 95 static int ext2_ind_read(struct vop_read_args *); 96 97 static vop_access_t ext2_access; 98 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 99 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 100 struct thread *); 101 static vop_close_t ext2_close; 102 static vop_create_t ext2_create; 103 static vop_fsync_t ext2_fsync; 104 static vop_getattr_t ext2_getattr; 105 static vop_ioctl_t ext2_ioctl; 106 static vop_link_t ext2_link; 107 static vop_mkdir_t ext2_mkdir; 108 static vop_mknod_t ext2_mknod; 109 static vop_open_t ext2_open; 110 static vop_pathconf_t ext2_pathconf; 111 static vop_print_t ext2_print; 112 static vop_read_t ext2_read; 113 static vop_readlink_t ext2_readlink; 114 static vop_remove_t ext2_remove; 115 static vop_rename_t ext2_rename; 116 static vop_rmdir_t ext2_rmdir; 117 static vop_setattr_t ext2_setattr; 118 static vop_strategy_t ext2_strategy; 119 static vop_symlink_t ext2_symlink; 120 static vop_write_t ext2_write; 121 static vop_deleteextattr_t ext2_deleteextattr; 122 static vop_getextattr_t ext2_getextattr; 123 static vop_listextattr_t ext2_listextattr; 124 static vop_setextattr_t ext2_setextattr; 125 static vop_vptofh_t ext2_vptofh; 126 static vop_close_t ext2fifo_close; 127 static vop_kqfilter_t ext2fifo_kqfilter; 128 129 /* Global vfs data structures for ext2. */ 130 struct vop_vector ext2_vnodeops = { 131 .vop_default = &default_vnodeops, 132 .vop_access = ext2_access, 133 .vop_bmap = ext2_bmap, 134 .vop_cachedlookup = ext2_lookup, 135 .vop_close = ext2_close, 136 .vop_create = ext2_create, 137 .vop_fsync = ext2_fsync, 138 .vop_getpages = vnode_pager_local_getpages, 139 .vop_getpages_async = vnode_pager_local_getpages_async, 140 .vop_getattr = ext2_getattr, 141 .vop_inactive = ext2_inactive, 142 .vop_ioctl = ext2_ioctl, 143 .vop_link = ext2_link, 144 .vop_lookup = vfs_cache_lookup, 145 .vop_mkdir = ext2_mkdir, 146 .vop_mknod = ext2_mknod, 147 .vop_open = ext2_open, 148 .vop_pathconf = ext2_pathconf, 149 .vop_poll = vop_stdpoll, 150 .vop_print = ext2_print, 151 .vop_read = ext2_read, 152 .vop_readdir = ext2_readdir, 153 .vop_readlink = ext2_readlink, 154 .vop_reallocblks = ext2_reallocblks, 155 .vop_reclaim = ext2_reclaim, 156 .vop_remove = ext2_remove, 157 .vop_rename = ext2_rename, 158 .vop_rmdir = ext2_rmdir, 159 .vop_setattr = ext2_setattr, 160 .vop_strategy = ext2_strategy, 161 .vop_symlink = ext2_symlink, 162 .vop_write = ext2_write, 163 .vop_deleteextattr = ext2_deleteextattr, 164 .vop_getextattr = ext2_getextattr, 165 .vop_listextattr = ext2_listextattr, 166 .vop_setextattr = ext2_setextattr, 167 #ifdef UFS_ACL 168 .vop_getacl = ext2_getacl, 169 .vop_setacl = ext2_setacl, 170 .vop_aclcheck = ext2_aclcheck, 171 #endif /* UFS_ACL */ 172 .vop_vptofh = ext2_vptofh, 173 }; 174 175 struct vop_vector ext2_fifoops = { 176 .vop_default = &fifo_specops, 177 .vop_access = ext2_access, 178 .vop_close = ext2fifo_close, 179 .vop_fsync = ext2_fsync, 180 .vop_getattr = ext2_getattr, 181 .vop_inactive = ext2_inactive, 182 .vop_kqfilter = ext2fifo_kqfilter, 183 .vop_print = ext2_print, 184 .vop_read = VOP_PANIC, 185 .vop_reclaim = ext2_reclaim, 186 .vop_setattr = ext2_setattr, 187 .vop_write = VOP_PANIC, 188 .vop_vptofh = ext2_vptofh, 189 }; 190 191 /* 192 * A virgin directory (no blushing please). 193 * Note that the type and namlen fields are reversed relative to ext2. 194 * Also, we don't use `struct odirtemplate', since it would just cause 195 * endianness problems. 196 */ 197 static struct dirtemplate mastertemplate = { 198 0, 12, 1, EXT2_FT_DIR, ".", 199 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." 200 }; 201 static struct dirtemplate omastertemplate = { 202 0, 12, 1, EXT2_FT_UNKNOWN, ".", 203 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." 204 }; 205 206 static void 207 ext2_itimes_locked(struct vnode *vp) 208 { 209 struct inode *ip; 210 struct timespec ts; 211 212 ASSERT_VI_LOCKED(vp, __func__); 213 214 ip = VTOI(vp); 215 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 216 return; 217 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 218 ip->i_flag |= IN_LAZYMOD; 219 else 220 ip->i_flag |= IN_MODIFIED; 221 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 222 vfs_timestamp(&ts); 223 if (ip->i_flag & IN_ACCESS) { 224 ip->i_atime = ts.tv_sec; 225 ip->i_atimensec = ts.tv_nsec; 226 } 227 if (ip->i_flag & IN_UPDATE) { 228 ip->i_mtime = ts.tv_sec; 229 ip->i_mtimensec = ts.tv_nsec; 230 ip->i_modrev++; 231 } 232 if (ip->i_flag & IN_CHANGE) { 233 ip->i_ctime = ts.tv_sec; 234 ip->i_ctimensec = ts.tv_nsec; 235 } 236 } 237 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 238 } 239 240 void 241 ext2_itimes(struct vnode *vp) 242 { 243 244 VI_LOCK(vp); 245 ext2_itimes_locked(vp); 246 VI_UNLOCK(vp); 247 } 248 249 /* 250 * Create a regular file 251 */ 252 static int 253 ext2_create(struct vop_create_args *ap) 254 { 255 int error; 256 257 error = 258 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 259 ap->a_dvp, ap->a_vpp, ap->a_cnp); 260 if (error != 0) 261 return (error); 262 if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) 263 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); 264 return (0); 265 } 266 267 static int 268 ext2_open(struct vop_open_args *ap) 269 { 270 271 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 272 return (EOPNOTSUPP); 273 274 /* 275 * Files marked append-only must be opened for appending. 276 */ 277 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 278 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 279 return (EPERM); 280 281 vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); 282 283 return (0); 284 } 285 286 /* 287 * Close called. 288 * 289 * Update the times on the inode. 290 */ 291 static int 292 ext2_close(struct vop_close_args *ap) 293 { 294 struct vnode *vp = ap->a_vp; 295 296 VI_LOCK(vp); 297 if (vp->v_usecount > 1) 298 ext2_itimes_locked(vp); 299 VI_UNLOCK(vp); 300 return (0); 301 } 302 303 static int 304 ext2_access(struct vop_access_args *ap) 305 { 306 struct vnode *vp = ap->a_vp; 307 struct inode *ip = VTOI(vp); 308 accmode_t accmode = ap->a_accmode; 309 int error; 310 311 if (vp->v_type == VBLK || vp->v_type == VCHR) 312 return (EOPNOTSUPP); 313 314 /* 315 * Disallow write attempts on read-only file systems; 316 * unless the file is a socket, fifo, or a block or 317 * character device resident on the file system. 318 */ 319 if (accmode & VWRITE) { 320 switch (vp->v_type) { 321 case VDIR: 322 case VLNK: 323 case VREG: 324 if (vp->v_mount->mnt_flag & MNT_RDONLY) 325 return (EROFS); 326 break; 327 default: 328 break; 329 } 330 } 331 332 /* If immutable bit set, nobody gets to write it. */ 333 if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) 334 return (EPERM); 335 336 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 337 ap->a_accmode, ap->a_cred, NULL); 338 return (error); 339 } 340 341 static int 342 ext2_getattr(struct vop_getattr_args *ap) 343 { 344 struct vnode *vp = ap->a_vp; 345 struct inode *ip = VTOI(vp); 346 struct vattr *vap = ap->a_vap; 347 348 ext2_itimes(vp); 349 /* 350 * Copy from inode table 351 */ 352 vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); 353 vap->va_fileid = ip->i_number; 354 vap->va_mode = ip->i_mode & ~IFMT; 355 vap->va_nlink = ip->i_nlink; 356 vap->va_uid = ip->i_uid; 357 vap->va_gid = ip->i_gid; 358 vap->va_rdev = ip->i_rdev; 359 vap->va_size = ip->i_size; 360 vap->va_atime.tv_sec = ip->i_atime; 361 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; 362 vap->va_mtime.tv_sec = ip->i_mtime; 363 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; 364 vap->va_ctime.tv_sec = ip->i_ctime; 365 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; 366 if E2DI_HAS_XTIME(ip) { 367 vap->va_birthtime.tv_sec = ip->i_birthtime; 368 vap->va_birthtime.tv_nsec = ip->i_birthnsec; 369 } 370 vap->va_flags = ip->i_flags; 371 vap->va_gen = ip->i_gen; 372 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 373 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 374 vap->va_type = IFTOVT(ip->i_mode); 375 vap->va_filerev = ip->i_modrev; 376 return (0); 377 } 378 379 /* 380 * Set attribute vnode op. called from several syscalls 381 */ 382 static int 383 ext2_setattr(struct vop_setattr_args *ap) 384 { 385 struct vattr *vap = ap->a_vap; 386 struct vnode *vp = ap->a_vp; 387 struct inode *ip = VTOI(vp); 388 struct ucred *cred = ap->a_cred; 389 struct thread *td = curthread; 390 int error; 391 392 /* 393 * Check for unsettable attributes. 394 */ 395 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 396 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 397 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 398 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 399 return (EINVAL); 400 } 401 if (vap->va_flags != VNOVAL) { 402 /* Disallow flags not supported by ext2fs. */ 403 if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 404 return (EOPNOTSUPP); 405 406 if (vp->v_mount->mnt_flag & MNT_RDONLY) 407 return (EROFS); 408 /* 409 * Callers may only modify the file flags on objects they 410 * have VADMIN rights for. 411 */ 412 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 413 return (error); 414 /* 415 * Unprivileged processes and privileged processes in 416 * jail() are not permitted to unset system flags, or 417 * modify flags if any system flags are set. 418 * Privileged non-jail processes may not modify system flags 419 * if securelevel > 0 and any existing system flags are set. 420 */ 421 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 422 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) { 423 error = securelevel_gt(cred, 0); 424 if (error) 425 return (error); 426 } 427 } else { 428 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || 429 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 430 return (EPERM); 431 } 432 ip->i_flags = vap->va_flags; 433 ip->i_flag |= IN_CHANGE; 434 if (ip->i_flags & (IMMUTABLE | APPEND)) 435 return (0); 436 } 437 if (ip->i_flags & (IMMUTABLE | APPEND)) 438 return (EPERM); 439 /* 440 * Go through the fields and update iff not VNOVAL. 441 */ 442 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 443 if (vp->v_mount->mnt_flag & MNT_RDONLY) 444 return (EROFS); 445 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 446 td)) != 0) 447 return (error); 448 } 449 if (vap->va_size != VNOVAL) { 450 /* 451 * Disallow write attempts on read-only file systems; 452 * unless the file is a socket, fifo, or a block or 453 * character device resident on the file system. 454 */ 455 switch (vp->v_type) { 456 case VDIR: 457 return (EISDIR); 458 case VLNK: 459 case VREG: 460 if (vp->v_mount->mnt_flag & MNT_RDONLY) 461 return (EROFS); 462 break; 463 default: 464 break; 465 } 466 if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) 467 return (error); 468 } 469 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 470 if (vp->v_mount->mnt_flag & MNT_RDONLY) 471 return (EROFS); 472 /* 473 * From utimes(2): 474 * If times is NULL, ... The caller must be the owner of 475 * the file, have permission to write the file, or be the 476 * super-user. 477 * If times is non-NULL, ... The caller must be the owner of 478 * the file or be the super-user. 479 */ 480 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 481 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 482 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 483 return (error); 484 ip->i_flag |= IN_CHANGE | IN_MODIFIED; 485 if (vap->va_atime.tv_sec != VNOVAL) { 486 ip->i_flag &= ~IN_ACCESS; 487 ip->i_atime = vap->va_atime.tv_sec; 488 ip->i_atimensec = vap->va_atime.tv_nsec; 489 } 490 if (vap->va_mtime.tv_sec != VNOVAL) { 491 ip->i_flag &= ~IN_UPDATE; 492 ip->i_mtime = vap->va_mtime.tv_sec; 493 ip->i_mtimensec = vap->va_mtime.tv_nsec; 494 } 495 ip->i_birthtime = vap->va_birthtime.tv_sec; 496 ip->i_birthnsec = vap->va_birthtime.tv_nsec; 497 error = ext2_update(vp, 0); 498 if (error) 499 return (error); 500 } 501 error = 0; 502 if (vap->va_mode != (mode_t)VNOVAL) { 503 if (vp->v_mount->mnt_flag & MNT_RDONLY) 504 return (EROFS); 505 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 506 } 507 return (error); 508 } 509 510 /* 511 * Change the mode on a file. 512 * Inode must be locked before calling. 513 */ 514 static int 515 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) 516 { 517 struct inode *ip = VTOI(vp); 518 int error; 519 520 /* 521 * To modify the permissions on a file, must possess VADMIN 522 * for that file. 523 */ 524 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 525 return (error); 526 /* 527 * Privileged processes may set the sticky bit on non-directories, 528 * as well as set the setgid bit on a file with a group that the 529 * process is not a member of. 530 */ 531 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 532 error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); 533 if (error) 534 return (EFTYPE); 535 } 536 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 537 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 538 if (error) 539 return (error); 540 } 541 ip->i_mode &= ~ALLPERMS; 542 ip->i_mode |= (mode & ALLPERMS); 543 ip->i_flag |= IN_CHANGE; 544 return (0); 545 } 546 547 /* 548 * Perform chown operation on inode ip; 549 * inode must be locked prior to call. 550 */ 551 static int 552 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 553 struct thread *td) 554 { 555 struct inode *ip = VTOI(vp); 556 uid_t ouid; 557 gid_t ogid; 558 int error = 0; 559 560 if (uid == (uid_t)VNOVAL) 561 uid = ip->i_uid; 562 if (gid == (gid_t)VNOVAL) 563 gid = ip->i_gid; 564 /* 565 * To modify the ownership of a file, must possess VADMIN 566 * for that file. 567 */ 568 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 569 return (error); 570 /* 571 * To change the owner of a file, or change the group of a file 572 * to a group of which we are not a member, the caller must 573 * have privilege. 574 */ 575 if (uid != ip->i_uid || (gid != ip->i_gid && 576 !groupmember(gid, cred))) { 577 error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); 578 if (error) 579 return (error); 580 } 581 ogid = ip->i_gid; 582 ouid = ip->i_uid; 583 ip->i_gid = gid; 584 ip->i_uid = uid; 585 ip->i_flag |= IN_CHANGE; 586 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 587 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) 588 ip->i_mode &= ~(ISUID | ISGID); 589 } 590 return (0); 591 } 592 593 /* 594 * Synch an open file. 595 */ 596 /* ARGSUSED */ 597 static int 598 ext2_fsync(struct vop_fsync_args *ap) 599 { 600 /* 601 * Flush all dirty buffers associated with a vnode. 602 */ 603 604 vop_stdfsync(ap); 605 606 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 607 } 608 609 /* 610 * Mknod vnode call 611 */ 612 /* ARGSUSED */ 613 static int 614 ext2_mknod(struct vop_mknod_args *ap) 615 { 616 struct vattr *vap = ap->a_vap; 617 struct vnode **vpp = ap->a_vpp; 618 struct inode *ip; 619 ino_t ino; 620 int error; 621 622 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 623 ap->a_dvp, vpp, ap->a_cnp); 624 if (error) 625 return (error); 626 ip = VTOI(*vpp); 627 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 628 if (vap->va_rdev != VNOVAL) { 629 /* 630 * Want to be able to use this to make badblock 631 * inodes, so don't truncate the dev number. 632 */ 633 ip->i_rdev = vap->va_rdev; 634 } 635 /* 636 * Remove inode, then reload it through VFS_VGET so it is 637 * checked to see if it is an alias of an existing entry in 638 * the inode cache. XXX I don't believe this is necessary now. 639 */ 640 (*vpp)->v_type = VNON; 641 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 642 vgone(*vpp); 643 vput(*vpp); 644 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 645 if (error) { 646 *vpp = NULL; 647 return (error); 648 } 649 return (0); 650 } 651 652 static int 653 ext2_remove(struct vop_remove_args *ap) 654 { 655 struct inode *ip; 656 struct vnode *vp = ap->a_vp; 657 struct vnode *dvp = ap->a_dvp; 658 int error; 659 660 ip = VTOI(vp); 661 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 662 (VTOI(dvp)->i_flags & APPEND)) { 663 error = EPERM; 664 goto out; 665 } 666 error = ext2_dirremove(dvp, ap->a_cnp); 667 if (error == 0) { 668 ip->i_nlink--; 669 ip->i_flag |= IN_CHANGE; 670 } 671 out: 672 return (error); 673 } 674 675 static unsigned short 676 ext2_max_nlink(struct inode *ip) 677 { 678 struct m_ext2fs *fs; 679 680 fs = ip->i_e2fs; 681 682 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_DIR_NLINK)) 683 return (EXT4_LINK_MAX); 684 else 685 return (EXT2_LINK_MAX); 686 } 687 688 /* 689 * link vnode call 690 */ 691 static int 692 ext2_link(struct vop_link_args *ap) 693 { 694 struct vnode *vp = ap->a_vp; 695 struct vnode *tdvp = ap->a_tdvp; 696 struct componentname *cnp = ap->a_cnp; 697 struct inode *ip; 698 int error; 699 700 #ifdef INVARIANTS 701 if ((cnp->cn_flags & HASBUF) == 0) 702 panic("ext2_link: no name"); 703 #endif 704 ip = VTOI(vp); 705 if ((nlink_t)ip->i_nlink >= ext2_max_nlink(ip)) { 706 error = EMLINK; 707 goto out; 708 } 709 if (ip->i_flags & (IMMUTABLE | APPEND)) { 710 error = EPERM; 711 goto out; 712 } 713 ip->i_nlink++; 714 ip->i_flag |= IN_CHANGE; 715 error = ext2_update(vp, !DOINGASYNC(vp)); 716 if (!error) 717 error = ext2_direnter(ip, tdvp, cnp); 718 if (error) { 719 ip->i_nlink--; 720 ip->i_flag |= IN_CHANGE; 721 } 722 out: 723 return (error); 724 } 725 726 static int 727 ext2_inc_nlink(struct inode *ip) 728 { 729 730 ip->i_nlink++; 731 732 if (ext2_htree_has_idx(ip) && ip->i_nlink > 1) { 733 if (ip->i_nlink >= ext2_max_nlink(ip) || ip->i_nlink == 2) 734 ip->i_nlink = 1; 735 } else if (ip->i_nlink > ext2_max_nlink(ip)) { 736 ip->i_nlink--; 737 return (EMLINK); 738 } 739 740 return (0); 741 } 742 743 static void 744 ext2_dec_nlink(struct inode *ip) 745 { 746 747 if (!S_ISDIR(ip->i_mode) || ip->i_nlink > 2) 748 ip->i_nlink--; 749 } 750 751 /* 752 * Rename system call. 753 * rename("foo", "bar"); 754 * is essentially 755 * unlink("bar"); 756 * link("foo", "bar"); 757 * unlink("foo"); 758 * but ``atomically''. Can't do full commit without saving state in the 759 * inode on disk which isn't feasible at this time. Best we can do is 760 * always guarantee the target exists. 761 * 762 * Basic algorithm is: 763 * 764 * 1) Bump link count on source while we're linking it to the 765 * target. This also ensure the inode won't be deleted out 766 * from underneath us while we work (it may be truncated by 767 * a concurrent `trunc' or `open' for creation). 768 * 2) Link source to destination. If destination already exists, 769 * delete it first. 770 * 3) Unlink source reference to inode if still around. If a 771 * directory was moved and the parent of the destination 772 * is different from the source, patch the ".." entry in the 773 * directory. 774 */ 775 static int 776 ext2_rename(struct vop_rename_args *ap) 777 { 778 struct vnode *tvp = ap->a_tvp; 779 struct vnode *tdvp = ap->a_tdvp; 780 struct vnode *fvp = ap->a_fvp; 781 struct vnode *fdvp = ap->a_fdvp; 782 struct componentname *tcnp = ap->a_tcnp; 783 struct componentname *fcnp = ap->a_fcnp; 784 struct inode *ip, *xp, *dp; 785 struct dirtemplate dirbuf; 786 int doingdirectory = 0, oldparent = 0, newparent = 0; 787 int error = 0; 788 u_char namlen; 789 790 #ifdef INVARIANTS 791 if ((tcnp->cn_flags & HASBUF) == 0 || 792 (fcnp->cn_flags & HASBUF) == 0) 793 panic("ext2_rename: no name"); 794 #endif 795 /* 796 * Check for cross-device rename. 797 */ 798 if ((fvp->v_mount != tdvp->v_mount) || 799 (tvp && (fvp->v_mount != tvp->v_mount))) { 800 error = EXDEV; 801 abortit: 802 if (tdvp == tvp) 803 vrele(tdvp); 804 else 805 vput(tdvp); 806 if (tvp) 807 vput(tvp); 808 vrele(fdvp); 809 vrele(fvp); 810 return (error); 811 } 812 813 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 814 (VTOI(tdvp)->i_flags & APPEND))) { 815 error = EPERM; 816 goto abortit; 817 } 818 819 /* 820 * Renaming a file to itself has no effect. The upper layers should 821 * not call us in that case. Temporarily just warn if they do. 822 */ 823 if (fvp == tvp) { 824 printf("ext2_rename: fvp == tvp (can't happen)\n"); 825 error = 0; 826 goto abortit; 827 } 828 829 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 830 goto abortit; 831 dp = VTOI(fdvp); 832 ip = VTOI(fvp); 833 if (ip->i_nlink >= ext2_max_nlink(ip) && !ext2_htree_has_idx(ip)) { 834 VOP_UNLOCK(fvp, 0); 835 error = EMLINK; 836 goto abortit; 837 } 838 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 839 || (dp->i_flags & APPEND)) { 840 VOP_UNLOCK(fvp, 0); 841 error = EPERM; 842 goto abortit; 843 } 844 if ((ip->i_mode & IFMT) == IFDIR) { 845 /* 846 * Avoid ".", "..", and aliases of "." for obvious reasons. 847 */ 848 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 849 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 850 (ip->i_flag & IN_RENAME)) { 851 VOP_UNLOCK(fvp, 0); 852 error = EINVAL; 853 goto abortit; 854 } 855 ip->i_flag |= IN_RENAME; 856 oldparent = dp->i_number; 857 doingdirectory++; 858 } 859 vrele(fdvp); 860 861 /* 862 * When the target exists, both the directory 863 * and target vnodes are returned locked. 864 */ 865 dp = VTOI(tdvp); 866 xp = NULL; 867 if (tvp) 868 xp = VTOI(tvp); 869 870 /* 871 * 1) Bump link count while we're moving stuff 872 * around. If we crash somewhere before 873 * completing our work, the link count 874 * may be wrong, but correctable. 875 */ 876 ext2_inc_nlink(ip); 877 ip->i_flag |= IN_CHANGE; 878 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 879 VOP_UNLOCK(fvp, 0); 880 goto bad; 881 } 882 883 /* 884 * If ".." must be changed (ie the directory gets a new 885 * parent) then the source directory must not be in the 886 * directory hierarchy above the target, as this would 887 * orphan everything below the source directory. Also 888 * the user must have write permission in the source so 889 * as to be able to change "..". We must repeat the call 890 * to namei, as the parent directory is unlocked by the 891 * call to checkpath(). 892 */ 893 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 894 VOP_UNLOCK(fvp, 0); 895 if (oldparent != dp->i_number) 896 newparent = dp->i_number; 897 if (doingdirectory && newparent) { 898 if (error) /* write access check above */ 899 goto bad; 900 if (xp != NULL) 901 vput(tvp); 902 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 903 if (error) 904 goto out; 905 VREF(tdvp); 906 error = relookup(tdvp, &tvp, tcnp); 907 if (error) 908 goto out; 909 vrele(tdvp); 910 dp = VTOI(tdvp); 911 xp = NULL; 912 if (tvp) 913 xp = VTOI(tvp); 914 } 915 /* 916 * 2) If target doesn't exist, link the target 917 * to the source and unlink the source. 918 * Otherwise, rewrite the target directory 919 * entry to reference the source inode and 920 * expunge the original entry's existence. 921 */ 922 if (xp == NULL) { 923 if (dp->i_devvp != ip->i_devvp) 924 panic("ext2_rename: EXDEV"); 925 /* 926 * Account for ".." in new directory. 927 * When source and destination have the same 928 * parent we don't fool with the link count. 929 */ 930 if (doingdirectory && newparent) { 931 error = ext2_inc_nlink(dp); 932 if (error) 933 goto bad; 934 935 dp->i_flag |= IN_CHANGE; 936 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 937 if (error) 938 goto bad; 939 } 940 error = ext2_direnter(ip, tdvp, tcnp); 941 if (error) { 942 if (doingdirectory && newparent) { 943 ext2_dec_nlink(dp); 944 dp->i_flag |= IN_CHANGE; 945 (void)ext2_update(tdvp, 1); 946 } 947 goto bad; 948 } 949 vput(tdvp); 950 } else { 951 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 952 panic("ext2_rename: EXDEV"); 953 /* 954 * Short circuit rename(foo, foo). 955 */ 956 if (xp->i_number == ip->i_number) 957 panic("ext2_rename: same file"); 958 /* 959 * If the parent directory is "sticky", then the user must 960 * own the parent directory, or the destination of the rename, 961 * otherwise the destination may not be changed (except by 962 * root). This implements append-only directories. 963 */ 964 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 965 tcnp->cn_cred->cr_uid != dp->i_uid && 966 xp->i_uid != tcnp->cn_cred->cr_uid) { 967 error = EPERM; 968 goto bad; 969 } 970 /* 971 * Target must be empty if a directory and have no links 972 * to it. Also, ensure source and target are compatible 973 * (both directories, or both not directories). 974 */ 975 if ((xp->i_mode & IFMT) == IFDIR) { 976 if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred)) { 977 error = ENOTEMPTY; 978 goto bad; 979 } 980 if (!doingdirectory) { 981 error = ENOTDIR; 982 goto bad; 983 } 984 cache_purge(tdvp); 985 } else if (doingdirectory) { 986 error = EISDIR; 987 goto bad; 988 } 989 error = ext2_dirrewrite(dp, ip, tcnp); 990 if (error) 991 goto bad; 992 /* 993 * If the target directory is in the same 994 * directory as the source directory, 995 * decrement the link count on the parent 996 * of the target directory. 997 */ 998 if (doingdirectory && !newparent) { 999 ext2_dec_nlink(dp); 1000 dp->i_flag |= IN_CHANGE; 1001 } 1002 vput(tdvp); 1003 /* 1004 * Adjust the link count of the target to 1005 * reflect the dirrewrite above. If this is 1006 * a directory it is empty and there are 1007 * no links to it, so we can squash the inode and 1008 * any space associated with it. We disallowed 1009 * renaming over top of a directory with links to 1010 * it above, as the remaining link would point to 1011 * a directory without "." or ".." entries. 1012 */ 1013 ext2_dec_nlink(xp); 1014 if (doingdirectory) { 1015 if (--xp->i_nlink != 0) 1016 panic("ext2_rename: linked directory"); 1017 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 1018 tcnp->cn_cred, tcnp->cn_thread); 1019 } 1020 xp->i_flag |= IN_CHANGE; 1021 vput(tvp); 1022 xp = NULL; 1023 } 1024 1025 /* 1026 * 3) Unlink the source. 1027 */ 1028 fcnp->cn_flags &= ~MODMASK; 1029 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1030 VREF(fdvp); 1031 error = relookup(fdvp, &fvp, fcnp); 1032 if (error == 0) 1033 vrele(fdvp); 1034 if (fvp != NULL) { 1035 xp = VTOI(fvp); 1036 dp = VTOI(fdvp); 1037 } else { 1038 /* 1039 * From name has disappeared. IN_RENAME is not sufficient 1040 * to protect against directory races due to timing windows, 1041 * so we can't panic here. 1042 */ 1043 vrele(ap->a_fvp); 1044 return (0); 1045 } 1046 /* 1047 * Ensure that the directory entry still exists and has not 1048 * changed while the new name has been entered. If the source is 1049 * a file then the entry may have been unlinked or renamed. In 1050 * either case there is no further work to be done. If the source 1051 * is a directory then it cannot have been rmdir'ed; its link 1052 * count of three would cause a rmdir to fail with ENOTEMPTY. 1053 * The IN_RENAME flag ensures that it cannot be moved by another 1054 * rename. 1055 */ 1056 if (xp != ip) { 1057 /* 1058 * From name resolves to a different inode. IN_RENAME is 1059 * not sufficient protection against timing window races 1060 * so we can't panic here. 1061 */ 1062 } else { 1063 /* 1064 * If the source is a directory with a 1065 * new parent, the link count of the old 1066 * parent directory must be decremented 1067 * and ".." set to point to the new parent. 1068 */ 1069 if (doingdirectory && newparent) { 1070 ext2_dec_nlink(dp); 1071 dp->i_flag |= IN_CHANGE; 1072 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, 1073 sizeof(struct dirtemplate), (off_t)0, 1074 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1075 tcnp->cn_cred, NOCRED, NULL, NULL); 1076 if (error == 0) { 1077 /* Like ufs little-endian: */ 1078 namlen = dirbuf.dotdot_type; 1079 if (namlen != 2 || 1080 dirbuf.dotdot_name[0] != '.' || 1081 dirbuf.dotdot_name[1] != '.') { 1082 ext2_dirbad(xp, (doff_t)12, 1083 "rename: mangled dir"); 1084 } else { 1085 dirbuf.dotdot_ino = newparent; 1086 (void)vn_rdwr(UIO_WRITE, fvp, 1087 (caddr_t)&dirbuf, 1088 sizeof(struct dirtemplate), 1089 (off_t)0, UIO_SYSSPACE, 1090 IO_NODELOCKED | IO_SYNC | 1091 IO_NOMACCHECK, tcnp->cn_cred, 1092 NOCRED, NULL, NULL); 1093 cache_purge(fdvp); 1094 } 1095 } 1096 } 1097 error = ext2_dirremove(fdvp, fcnp); 1098 if (!error) { 1099 ext2_dec_nlink(xp); 1100 xp->i_flag |= IN_CHANGE; 1101 } 1102 xp->i_flag &= ~IN_RENAME; 1103 } 1104 if (dp) 1105 vput(fdvp); 1106 if (xp) 1107 vput(fvp); 1108 vrele(ap->a_fvp); 1109 return (error); 1110 1111 bad: 1112 if (xp) 1113 vput(ITOV(xp)); 1114 vput(ITOV(dp)); 1115 out: 1116 if (doingdirectory) 1117 ip->i_flag &= ~IN_RENAME; 1118 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1119 ext2_dec_nlink(ip); 1120 ip->i_flag |= IN_CHANGE; 1121 ip->i_flag &= ~IN_RENAME; 1122 vput(fvp); 1123 } else 1124 vrele(fvp); 1125 return (error); 1126 } 1127 1128 #ifdef UFS_ACL 1129 static int 1130 ext2_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1131 mode_t dmode, struct ucred *cred, struct thread *td) 1132 { 1133 int error; 1134 struct inode *ip = VTOI(tvp); 1135 struct acl *dacl, *acl; 1136 1137 acl = acl_alloc(M_WAITOK); 1138 dacl = acl_alloc(M_WAITOK); 1139 1140 /* 1141 * Retrieve default ACL from parent, if any. 1142 */ 1143 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1144 switch (error) { 1145 case 0: 1146 /* 1147 * Retrieved a default ACL, so merge mode and ACL if 1148 * necessary. If the ACL is empty, fall through to 1149 * the "not defined or available" case. 1150 */ 1151 if (acl->acl_cnt != 0) { 1152 dmode = acl_posix1e_newfilemode(dmode, acl); 1153 ip->i_mode = dmode; 1154 *dacl = *acl; 1155 ext2_sync_acl_from_inode(ip, acl); 1156 break; 1157 } 1158 /* FALLTHROUGH */ 1159 1160 case EOPNOTSUPP: 1161 /* 1162 * Just use the mode as-is. 1163 */ 1164 ip->i_mode = dmode; 1165 error = 0; 1166 goto out; 1167 1168 default: 1169 goto out; 1170 } 1171 1172 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1173 if (error == 0) 1174 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1175 switch (error) { 1176 case 0: 1177 break; 1178 1179 case EOPNOTSUPP: 1180 /* 1181 * XXX: This should not happen, as EOPNOTSUPP above 1182 * was supposed to free acl. 1183 */ 1184 #ifdef DEBUG 1185 printf("ext2_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1186 #endif /* DEBUG */ 1187 break; 1188 1189 default: 1190 goto out; 1191 } 1192 1193 out: 1194 acl_free(acl); 1195 acl_free(dacl); 1196 1197 return (error); 1198 } 1199 1200 static int 1201 ext2_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1202 mode_t mode, struct ucred *cred, struct thread *td) 1203 { 1204 int error; 1205 struct inode *ip = VTOI(tvp); 1206 struct acl *acl; 1207 1208 acl = acl_alloc(M_WAITOK); 1209 1210 /* 1211 * Retrieve default ACL for parent, if any. 1212 */ 1213 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1214 switch (error) { 1215 case 0: 1216 /* 1217 * Retrieved a default ACL, so merge mode and ACL if 1218 * necessary. 1219 */ 1220 if (acl->acl_cnt != 0) { 1221 /* 1222 * Two possible ways for default ACL to not 1223 * be present. First, the EA can be 1224 * undefined, or second, the default ACL can 1225 * be blank. If it's blank, fall through to 1226 * the it's not defined case. 1227 */ 1228 mode = acl_posix1e_newfilemode(mode, acl); 1229 ip->i_mode = mode; 1230 ext2_sync_acl_from_inode(ip, acl); 1231 break; 1232 } 1233 /* FALLTHROUGH */ 1234 1235 case EOPNOTSUPP: 1236 /* 1237 * Just use the mode as-is. 1238 */ 1239 ip->i_mode = mode; 1240 error = 0; 1241 goto out; 1242 1243 default: 1244 goto out; 1245 } 1246 1247 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1248 switch (error) { 1249 case 0: 1250 break; 1251 1252 case EOPNOTSUPP: 1253 /* 1254 * XXX: This should not happen, as EOPNOTSUPP above was 1255 * supposed to free acl. 1256 */ 1257 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1258 "but no VOP_SETACL()\n"); 1259 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1260 "but no VOP_SETACL()"); */ 1261 break; 1262 1263 default: 1264 goto out; 1265 } 1266 1267 out: 1268 acl_free(acl); 1269 1270 return (error); 1271 } 1272 1273 #endif /* UFS_ACL */ 1274 1275 /* 1276 * Mkdir system call 1277 */ 1278 static int 1279 ext2_mkdir(struct vop_mkdir_args *ap) 1280 { 1281 struct vnode *dvp = ap->a_dvp; 1282 struct vattr *vap = ap->a_vap; 1283 struct componentname *cnp = ap->a_cnp; 1284 struct inode *ip, *dp; 1285 struct vnode *tvp; 1286 struct dirtemplate dirtemplate, *dtp; 1287 int error, dmode; 1288 1289 #ifdef INVARIANTS 1290 if ((cnp->cn_flags & HASBUF) == 0) 1291 panic("ext2_mkdir: no name"); 1292 #endif 1293 dp = VTOI(dvp); 1294 if ((nlink_t)dp->i_nlink >= ext2_max_nlink(dp) && 1295 !ext2_htree_has_idx(dp)) { 1296 error = EMLINK; 1297 goto out; 1298 } 1299 dmode = vap->va_mode & 0777; 1300 dmode |= IFDIR; 1301 /* 1302 * Must simulate part of ext2_makeinode here to acquire the inode, 1303 * but not have it entered in the parent directory. The entry is 1304 * made later after writing "." and ".." entries. 1305 */ 1306 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1307 if (error) 1308 goto out; 1309 ip = VTOI(tvp); 1310 ip->i_gid = dp->i_gid; 1311 #ifdef SUIDDIR 1312 { 1313 /* 1314 * if we are hacking owners here, (only do this where told to) 1315 * and we are not giving it TOO root, (would subvert quotas) 1316 * then go ahead and give it to the other user. 1317 * The new directory also inherits the SUID bit. 1318 * If user's UID and dir UID are the same, 1319 * 'give it away' so that the SUID is still forced on. 1320 */ 1321 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1322 (dp->i_mode & ISUID) && dp->i_uid) { 1323 dmode |= ISUID; 1324 ip->i_uid = dp->i_uid; 1325 } else { 1326 ip->i_uid = cnp->cn_cred->cr_uid; 1327 } 1328 } 1329 #else 1330 ip->i_uid = cnp->cn_cred->cr_uid; 1331 #endif 1332 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1333 ip->i_mode = dmode; 1334 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1335 ip->i_nlink = 2; 1336 if (cnp->cn_flags & ISWHITEOUT) 1337 ip->i_flags |= UF_OPAQUE; 1338 error = ext2_update(tvp, 1); 1339 1340 /* 1341 * Bump link count in parent directory 1342 * to reflect work done below. Should 1343 * be done before reference is created 1344 * so reparation is possible if we crash. 1345 */ 1346 ext2_inc_nlink(dp); 1347 dp->i_flag |= IN_CHANGE; 1348 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1349 if (error) 1350 goto bad; 1351 1352 /* Initialize directory with "." and ".." from static template. */ 1353 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1354 EXT2F_INCOMPAT_FTYPE)) 1355 dtp = &mastertemplate; 1356 else 1357 dtp = &omastertemplate; 1358 dirtemplate = *dtp; 1359 dirtemplate.dot_ino = ip->i_number; 1360 dirtemplate.dotdot_ino = dp->i_number; 1361 /* 1362 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's 1363 * just redefine it - for this function only 1364 */ 1365 #undef DIRBLKSIZ 1366 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1367 dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; 1368 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, 1369 sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE, 1370 IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, 1371 NULL, NULL); 1372 if (error) { 1373 ext2_dec_nlink(dp); 1374 dp->i_flag |= IN_CHANGE; 1375 goto bad; 1376 } 1377 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1378 /* XXX should grow with balloc() */ 1379 panic("ext2_mkdir: blksize"); 1380 else { 1381 ip->i_size = DIRBLKSIZ; 1382 ip->i_flag |= IN_CHANGE; 1383 } 1384 1385 #ifdef UFS_ACL 1386 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1387 error = ext2_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 1388 cnp->cn_cred, cnp->cn_thread); 1389 if (error) 1390 goto bad; 1391 } 1392 1393 #endif /* UFS_ACL */ 1394 1395 /* Directory set up, now install its entry in the parent directory. */ 1396 error = ext2_direnter(ip, dvp, cnp); 1397 if (error) { 1398 ext2_dec_nlink(dp); 1399 dp->i_flag |= IN_CHANGE; 1400 } 1401 bad: 1402 /* 1403 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1404 * for us because we set the link count to 0. 1405 */ 1406 if (error) { 1407 ip->i_nlink = 0; 1408 ip->i_flag |= IN_CHANGE; 1409 vput(tvp); 1410 } else 1411 *ap->a_vpp = tvp; 1412 out: 1413 return (error); 1414 #undef DIRBLKSIZ 1415 #define DIRBLKSIZ DEV_BSIZE 1416 } 1417 1418 /* 1419 * Rmdir system call. 1420 */ 1421 static int 1422 ext2_rmdir(struct vop_rmdir_args *ap) 1423 { 1424 struct vnode *vp = ap->a_vp; 1425 struct vnode *dvp = ap->a_dvp; 1426 struct componentname *cnp = ap->a_cnp; 1427 struct inode *ip, *dp; 1428 int error; 1429 1430 ip = VTOI(vp); 1431 dp = VTOI(dvp); 1432 1433 /* 1434 * Verify the directory is empty (and valid). 1435 * (Rmdir ".." won't be valid since 1436 * ".." will contain a reference to 1437 * the current directory and thus be 1438 * non-empty.) 1439 */ 1440 if (!ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1441 error = ENOTEMPTY; 1442 goto out; 1443 } 1444 if ((dp->i_flags & APPEND) 1445 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1446 error = EPERM; 1447 goto out; 1448 } 1449 /* 1450 * Delete reference to directory before purging 1451 * inode. If we crash in between, the directory 1452 * will be reattached to lost+found, 1453 */ 1454 error = ext2_dirremove(dvp, cnp); 1455 if (error) 1456 goto out; 1457 ext2_dec_nlink(dp); 1458 dp->i_flag |= IN_CHANGE; 1459 cache_purge(dvp); 1460 VOP_UNLOCK(dvp, 0); 1461 /* 1462 * Truncate inode. The only stuff left 1463 * in the directory is "." and "..". 1464 */ 1465 ip->i_nlink = 0; 1466 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, 1467 cnp->cn_thread); 1468 cache_purge(ITOV(ip)); 1469 if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1470 VOP_UNLOCK(vp, 0); 1471 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1472 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1473 } 1474 out: 1475 return (error); 1476 } 1477 1478 /* 1479 * symlink -- make a symbolic link 1480 */ 1481 static int 1482 ext2_symlink(struct vop_symlink_args *ap) 1483 { 1484 struct vnode *vp, **vpp = ap->a_vpp; 1485 struct inode *ip; 1486 int len, error; 1487 1488 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1489 vpp, ap->a_cnp); 1490 if (error) 1491 return (error); 1492 vp = *vpp; 1493 len = strlen(ap->a_target); 1494 if (len < vp->v_mount->mnt_maxsymlinklen) { 1495 ip = VTOI(vp); 1496 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1497 ip->i_size = len; 1498 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1499 } else 1500 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1501 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1502 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 1503 if (error) 1504 vput(vp); 1505 return (error); 1506 } 1507 1508 /* 1509 * Return target name of a symbolic link 1510 */ 1511 static int 1512 ext2_readlink(struct vop_readlink_args *ap) 1513 { 1514 struct vnode *vp = ap->a_vp; 1515 struct inode *ip = VTOI(vp); 1516 int isize; 1517 1518 isize = ip->i_size; 1519 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1520 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1521 return (0); 1522 } 1523 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1524 } 1525 1526 /* 1527 * Calculate the logical to physical mapping if not done already, 1528 * then call the device strategy routine. 1529 * 1530 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1531 * deadlock on memory. See ext2_bmap() for details. 1532 */ 1533 static int 1534 ext2_strategy(struct vop_strategy_args *ap) 1535 { 1536 struct buf *bp = ap->a_bp; 1537 struct vnode *vp = ap->a_vp; 1538 struct bufobj *bo; 1539 daddr_t blkno; 1540 int error; 1541 1542 if (vp->v_type == VBLK || vp->v_type == VCHR) 1543 panic("ext2_strategy: spec"); 1544 if (bp->b_blkno == bp->b_lblkno) { 1545 error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); 1546 bp->b_blkno = blkno; 1547 if (error) { 1548 bp->b_error = error; 1549 bp->b_ioflags |= BIO_ERROR; 1550 bufdone(bp); 1551 return (0); 1552 } 1553 if ((long)bp->b_blkno == -1) 1554 vfs_bio_clrbuf(bp); 1555 } 1556 if ((long)bp->b_blkno == -1) { 1557 bufdone(bp); 1558 return (0); 1559 } 1560 bp->b_iooffset = dbtob(bp->b_blkno); 1561 bo = VFSTOEXT2(vp->v_mount)->um_bo; 1562 BO_STRATEGY(bo, bp); 1563 return (0); 1564 } 1565 1566 /* 1567 * Print out the contents of an inode. 1568 */ 1569 static int 1570 ext2_print(struct vop_print_args *ap) 1571 { 1572 struct vnode *vp = ap->a_vp; 1573 struct inode *ip = VTOI(vp); 1574 1575 vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number); 1576 if (vp->v_type == VFIFO) 1577 fifo_printinfo(vp); 1578 printf("\n"); 1579 return (0); 1580 } 1581 1582 /* 1583 * Close wrapper for fifos. 1584 * 1585 * Update the times on the inode then do device close. 1586 */ 1587 static int 1588 ext2fifo_close(struct vop_close_args *ap) 1589 { 1590 struct vnode *vp = ap->a_vp; 1591 1592 VI_LOCK(vp); 1593 if (vp->v_usecount > 1) 1594 ext2_itimes_locked(vp); 1595 VI_UNLOCK(vp); 1596 return (fifo_specops.vop_close(ap)); 1597 } 1598 1599 /* 1600 * Kqfilter wrapper for fifos. 1601 * 1602 * Fall through to ext2 kqfilter routines if needed 1603 */ 1604 static int 1605 ext2fifo_kqfilter(struct vop_kqfilter_args *ap) 1606 { 1607 int error; 1608 1609 error = fifo_specops.vop_kqfilter(ap); 1610 if (error) 1611 error = vfs_kqfilter(ap); 1612 return (error); 1613 } 1614 1615 /* 1616 * Return POSIX pathconf information applicable to ext2 filesystems. 1617 */ 1618 static int 1619 ext2_pathconf(struct vop_pathconf_args *ap) 1620 { 1621 int error = 0; 1622 1623 switch (ap->a_name) { 1624 case _PC_LINK_MAX: 1625 if (ext2_htree_has_idx(VTOI(ap->a_vp))) 1626 *ap->a_retval = INT_MAX; 1627 else 1628 *ap->a_retval = ext2_max_nlink(VTOI(ap->a_vp)); 1629 break; 1630 case _PC_NAME_MAX: 1631 *ap->a_retval = NAME_MAX; 1632 break; 1633 case _PC_PATH_MAX: 1634 *ap->a_retval = PATH_MAX; 1635 break; 1636 case _PC_PIPE_BUF: 1637 *ap->a_retval = PIPE_BUF; 1638 break; 1639 case _PC_CHOWN_RESTRICTED: 1640 *ap->a_retval = 1; 1641 break; 1642 case _PC_NO_TRUNC: 1643 *ap->a_retval = 1; 1644 break; 1645 1646 #ifdef UFS_ACL 1647 case _PC_ACL_EXTENDED: 1648 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1649 *ap->a_retval = 1; 1650 else 1651 *ap->a_retval = 0; 1652 break; 1653 case _PC_ACL_PATH_MAX: 1654 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1655 *ap->a_retval = ACL_MAX_ENTRIES; 1656 else 1657 *ap->a_retval = 3; 1658 break; 1659 #endif /* UFS_ACL */ 1660 1661 case _PC_MIN_HOLE_SIZE: 1662 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1663 break; 1664 case _PC_ASYNC_IO: 1665 /* _PC_ASYNC_IO should have been handled by upper layers. */ 1666 KASSERT(0, ("_PC_ASYNC_IO should not get here")); 1667 error = EINVAL; 1668 break; 1669 case _PC_PRIO_IO: 1670 *ap->a_retval = 0; 1671 break; 1672 case _PC_SYNC_IO: 1673 *ap->a_retval = 0; 1674 break; 1675 case _PC_ALLOC_SIZE_MIN: 1676 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 1677 break; 1678 case _PC_FILESIZEBITS: 1679 *ap->a_retval = 64; 1680 break; 1681 case _PC_REC_INCR_XFER_SIZE: 1682 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1683 break; 1684 case _PC_REC_MAX_XFER_SIZE: 1685 *ap->a_retval = -1; /* means ``unlimited'' */ 1686 break; 1687 case _PC_REC_MIN_XFER_SIZE: 1688 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1689 break; 1690 case _PC_REC_XFER_ALIGN: 1691 *ap->a_retval = PAGE_SIZE; 1692 break; 1693 case _PC_SYMLINK_MAX: 1694 *ap->a_retval = MAXPATHLEN; 1695 break; 1696 1697 default: 1698 error = EINVAL; 1699 break; 1700 } 1701 return (error); 1702 } 1703 1704 /* 1705 * Vnode operation to remove a named attribute. 1706 */ 1707 static int 1708 ext2_deleteextattr(struct vop_deleteextattr_args *ap) 1709 { 1710 struct inode *ip; 1711 struct m_ext2fs *fs; 1712 int error; 1713 1714 ip = VTOI(ap->a_vp); 1715 fs = ip->i_e2fs; 1716 1717 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1718 return (EOPNOTSUPP); 1719 1720 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1721 return (EOPNOTSUPP); 1722 1723 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1724 ap->a_cred, ap->a_td, VWRITE); 1725 if (error) 1726 return (error); 1727 1728 error = ENOATTR; 1729 1730 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1731 error = ext2_extattr_inode_delete(ip, ap->a_attrnamespace, ap->a_name); 1732 if (error != ENOATTR) 1733 return (error); 1734 } 1735 1736 if (ip->i_facl) 1737 error = ext2_extattr_block_delete(ip, ap->a_attrnamespace, ap->a_name); 1738 1739 return (error); 1740 } 1741 1742 /* 1743 * Vnode operation to retrieve a named extended attribute. 1744 */ 1745 static int 1746 ext2_getextattr(struct vop_getextattr_args *ap) 1747 { 1748 struct inode *ip; 1749 struct m_ext2fs *fs; 1750 int error; 1751 1752 ip = VTOI(ap->a_vp); 1753 fs = ip->i_e2fs; 1754 1755 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1756 return (EOPNOTSUPP); 1757 1758 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1759 return (EOPNOTSUPP); 1760 1761 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1762 ap->a_cred, ap->a_td, VREAD); 1763 if (error) 1764 return (error); 1765 1766 if (ap->a_size != NULL) 1767 *ap->a_size = 0; 1768 1769 error = ENOATTR; 1770 1771 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1772 error = ext2_extattr_inode_get(ip, ap->a_attrnamespace, 1773 ap->a_name, ap->a_uio, ap->a_size); 1774 if (error != ENOATTR) 1775 return (error); 1776 } 1777 1778 if (ip->i_facl) 1779 error = ext2_extattr_block_get(ip, ap->a_attrnamespace, 1780 ap->a_name, ap->a_uio, ap->a_size); 1781 1782 return (error); 1783 } 1784 1785 /* 1786 * Vnode operation to retrieve extended attributes on a vnode. 1787 */ 1788 static int 1789 ext2_listextattr(struct vop_listextattr_args *ap) 1790 { 1791 struct inode *ip; 1792 struct m_ext2fs *fs; 1793 int error; 1794 1795 ip = VTOI(ap->a_vp); 1796 fs = ip->i_e2fs; 1797 1798 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1799 return (EOPNOTSUPP); 1800 1801 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1802 return (EOPNOTSUPP); 1803 1804 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1805 ap->a_cred, ap->a_td, VREAD); 1806 if (error) 1807 return (error); 1808 1809 if (ap->a_size != NULL) 1810 *ap->a_size = 0; 1811 1812 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1813 error = ext2_extattr_inode_list(ip, ap->a_attrnamespace, 1814 ap->a_uio, ap->a_size); 1815 if (error) 1816 return (error); 1817 } 1818 1819 if (ip->i_facl) 1820 error = ext2_extattr_block_list(ip, ap->a_attrnamespace, 1821 ap->a_uio, ap->a_size); 1822 1823 return (error); 1824 } 1825 1826 /* 1827 * Vnode operation to set a named attribute. 1828 */ 1829 static int 1830 ext2_setextattr(struct vop_setextattr_args *ap) 1831 { 1832 struct inode *ip; 1833 struct m_ext2fs *fs; 1834 int error; 1835 1836 ip = VTOI(ap->a_vp); 1837 fs = ip->i_e2fs; 1838 1839 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1840 return (EOPNOTSUPP); 1841 1842 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1843 return (EOPNOTSUPP); 1844 1845 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1846 ap->a_cred, ap->a_td, VWRITE); 1847 if (error) 1848 return (error); 1849 1850 error = ext2_extattr_valid_attrname(ap->a_attrnamespace, ap->a_name); 1851 if (error) 1852 return (error); 1853 1854 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1855 error = ext2_extattr_inode_set(ip, ap->a_attrnamespace, 1856 ap->a_name, ap->a_uio); 1857 if (error != ENOSPC) 1858 return (error); 1859 } 1860 1861 error = ext2_extattr_block_set(ip, ap->a_attrnamespace, 1862 ap->a_name, ap->a_uio); 1863 1864 return (error); 1865 } 1866 1867 /* 1868 * Vnode pointer to File handle 1869 */ 1870 /* ARGSUSED */ 1871 static int 1872 ext2_vptofh(struct vop_vptofh_args *ap) 1873 { 1874 struct inode *ip; 1875 struct ufid *ufhp; 1876 1877 ip = VTOI(ap->a_vp); 1878 ufhp = (struct ufid *)ap->a_fhp; 1879 ufhp->ufid_len = sizeof(struct ufid); 1880 ufhp->ufid_ino = ip->i_number; 1881 ufhp->ufid_gen = ip->i_gen; 1882 return (0); 1883 } 1884 1885 /* 1886 * Initialize the vnode associated with a new inode, handle aliased 1887 * vnodes. 1888 */ 1889 int 1890 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) 1891 { 1892 struct inode *ip; 1893 struct vnode *vp; 1894 1895 vp = *vpp; 1896 ip = VTOI(vp); 1897 vp->v_type = IFTOVT(ip->i_mode); 1898 if (vp->v_type == VFIFO) 1899 vp->v_op = fifoops; 1900 1901 if (ip->i_number == EXT2_ROOTINO) 1902 vp->v_vflag |= VV_ROOT; 1903 ip->i_modrev = init_va_filerev(); 1904 *vpp = vp; 1905 return (0); 1906 } 1907 1908 /* 1909 * Allocate a new inode. 1910 */ 1911 static int 1912 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1913 struct componentname *cnp) 1914 { 1915 struct inode *ip, *pdir; 1916 struct vnode *tvp; 1917 int error; 1918 1919 pdir = VTOI(dvp); 1920 #ifdef INVARIANTS 1921 if ((cnp->cn_flags & HASBUF) == 0) 1922 panic("ext2_makeinode: no name"); 1923 #endif 1924 *vpp = NULL; 1925 if ((mode & IFMT) == 0) 1926 mode |= IFREG; 1927 1928 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1929 if (error) { 1930 return (error); 1931 } 1932 ip = VTOI(tvp); 1933 ip->i_gid = pdir->i_gid; 1934 #ifdef SUIDDIR 1935 { 1936 /* 1937 * if we are 1938 * not the owner of the directory, 1939 * and we are hacking owners here, (only do this where told to) 1940 * and we are not giving it TOO root, (would subvert quotas) 1941 * then go ahead and give it to the other user. 1942 * Note that this drops off the execute bits for security. 1943 */ 1944 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1945 (pdir->i_mode & ISUID) && 1946 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1947 ip->i_uid = pdir->i_uid; 1948 mode &= ~07111; 1949 } else { 1950 ip->i_uid = cnp->cn_cred->cr_uid; 1951 } 1952 } 1953 #else 1954 ip->i_uid = cnp->cn_cred->cr_uid; 1955 #endif 1956 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1957 ip->i_mode = mode; 1958 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1959 ip->i_nlink = 1; 1960 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1961 if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) 1962 ip->i_mode &= ~ISGID; 1963 } 1964 1965 if (cnp->cn_flags & ISWHITEOUT) 1966 ip->i_flags |= UF_OPAQUE; 1967 1968 /* 1969 * Make sure inode goes to disk before directory entry. 1970 */ 1971 error = ext2_update(tvp, !DOINGASYNC(tvp)); 1972 if (error) 1973 goto bad; 1974 1975 #ifdef UFS_ACL 1976 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1977 error = ext2_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 1978 cnp->cn_cred, cnp->cn_thread); 1979 if (error) 1980 goto bad; 1981 } 1982 #endif /* UFS_ACL */ 1983 1984 error = ext2_direnter(ip, dvp, cnp); 1985 if (error) 1986 goto bad; 1987 1988 *vpp = tvp; 1989 return (0); 1990 1991 bad: 1992 /* 1993 * Write error occurred trying to update the inode 1994 * or the directory so must deallocate the inode. 1995 */ 1996 ip->i_nlink = 0; 1997 ip->i_flag |= IN_CHANGE; 1998 vput(tvp); 1999 return (error); 2000 } 2001 2002 /* 2003 * Vnode op for reading. 2004 */ 2005 static int 2006 ext2_read(struct vop_read_args *ap) 2007 { 2008 struct vnode *vp; 2009 struct inode *ip; 2010 int error; 2011 2012 vp = ap->a_vp; 2013 ip = VTOI(vp); 2014 2015 /* EXT4_EXT_LOCK(ip); */ 2016 if (ip->i_flag & IN_E4EXTENTS) 2017 error = ext4_ext_read(ap); 2018 else 2019 error = ext2_ind_read(ap); 2020 /* EXT4_EXT_UNLOCK(ip); */ 2021 return (error); 2022 } 2023 2024 /* 2025 * Vnode op for reading. 2026 */ 2027 static int 2028 ext2_ind_read(struct vop_read_args *ap) 2029 { 2030 struct vnode *vp; 2031 struct inode *ip; 2032 struct uio *uio; 2033 struct m_ext2fs *fs; 2034 struct buf *bp; 2035 daddr_t lbn, nextlbn; 2036 off_t bytesinfile; 2037 long size, xfersize, blkoffset; 2038 int error, orig_resid, seqcount; 2039 int ioflag; 2040 2041 vp = ap->a_vp; 2042 uio = ap->a_uio; 2043 ioflag = ap->a_ioflag; 2044 2045 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 2046 ip = VTOI(vp); 2047 2048 #ifdef INVARIANTS 2049 if (uio->uio_rw != UIO_READ) 2050 panic("%s: mode", "ext2_read"); 2051 2052 if (vp->v_type == VLNK) { 2053 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 2054 panic("%s: short symlink", "ext2_read"); 2055 } else if (vp->v_type != VREG && vp->v_type != VDIR) 2056 panic("%s: type %d", "ext2_read", vp->v_type); 2057 #endif 2058 orig_resid = uio->uio_resid; 2059 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 2060 if (orig_resid == 0) 2061 return (0); 2062 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 2063 fs = ip->i_e2fs; 2064 if (uio->uio_offset < ip->i_size && 2065 uio->uio_offset >= fs->e2fs_maxfilesize) 2066 return (EOVERFLOW); 2067 2068 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 2069 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2070 break; 2071 lbn = lblkno(fs, uio->uio_offset); 2072 nextlbn = lbn + 1; 2073 size = blksize(fs, ip, lbn); 2074 blkoffset = blkoff(fs, uio->uio_offset); 2075 2076 xfersize = fs->e2fs_fsize - blkoffset; 2077 if (uio->uio_resid < xfersize) 2078 xfersize = uio->uio_resid; 2079 if (bytesinfile < xfersize) 2080 xfersize = bytesinfile; 2081 2082 if (lblktosize(fs, nextlbn) >= ip->i_size) 2083 error = bread(vp, lbn, size, NOCRED, &bp); 2084 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 2085 error = cluster_read(vp, ip->i_size, lbn, size, 2086 NOCRED, blkoffset + uio->uio_resid, seqcount, 2087 0, &bp); 2088 } else if (seqcount > 1) { 2089 u_int nextsize = blksize(fs, ip, nextlbn); 2090 2091 error = breadn(vp, lbn, 2092 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 2093 } else 2094 error = bread(vp, lbn, size, NOCRED, &bp); 2095 if (error) { 2096 brelse(bp); 2097 bp = NULL; 2098 break; 2099 } 2100 2101 /* 2102 * We should only get non-zero b_resid when an I/O error 2103 * has occurred, which should cause us to break above. 2104 * However, if the short read did not cause an error, 2105 * then we want to ensure that we do not uiomove bad 2106 * or uninitialized data. 2107 */ 2108 size -= bp->b_resid; 2109 if (size < xfersize) { 2110 if (size == 0) 2111 break; 2112 xfersize = size; 2113 } 2114 error = uiomove((char *)bp->b_data + blkoffset, 2115 (int)xfersize, uio); 2116 if (error) 2117 break; 2118 vfs_bio_brelse(bp, ioflag); 2119 } 2120 2121 /* 2122 * This can only happen in the case of an error because the loop 2123 * above resets bp to NULL on each iteration and on normal 2124 * completion has not set a new value into it. so it must have come 2125 * from a 'break' statement 2126 */ 2127 if (bp != NULL) 2128 vfs_bio_brelse(bp, ioflag); 2129 2130 if ((error == 0 || uio->uio_resid != orig_resid) && 2131 (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 2132 ip->i_flag |= IN_ACCESS; 2133 return (error); 2134 } 2135 2136 static int 2137 ext2_ioctl(struct vop_ioctl_args *ap) 2138 { 2139 2140 switch (ap->a_command) { 2141 case FIOSEEKDATA: 2142 case FIOSEEKHOLE: 2143 return (vn_bmap_seekhole(ap->a_vp, ap->a_command, 2144 (off_t *)ap->a_data, ap->a_cred)); 2145 default: 2146 return (ENOTTY); 2147 } 2148 } 2149 2150 /* 2151 * this function handles ext4 extents block mapping 2152 */ 2153 static int 2154 ext4_ext_read(struct vop_read_args *ap) 2155 { 2156 static unsigned char zeroes[EXT2_MAX_BLOCK_SIZE]; 2157 struct vnode *vp; 2158 struct inode *ip; 2159 struct uio *uio; 2160 struct m_ext2fs *fs; 2161 struct buf *bp; 2162 struct ext4_extent nex, *ep; 2163 struct ext4_extent_path path; 2164 daddr_t lbn, newblk; 2165 off_t bytesinfile; 2166 int cache_type; 2167 ssize_t orig_resid; 2168 int error; 2169 long size, xfersize, blkoffset; 2170 2171 vp = ap->a_vp; 2172 ip = VTOI(vp); 2173 uio = ap->a_uio; 2174 memset(&path, 0, sizeof(path)); 2175 2176 orig_resid = uio->uio_resid; 2177 KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__)); 2178 if (orig_resid == 0) 2179 return (0); 2180 KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__)); 2181 fs = ip->i_e2fs; 2182 if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) 2183 return (EOVERFLOW); 2184 2185 while (uio->uio_resid > 0) { 2186 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2187 break; 2188 lbn = lblkno(fs, uio->uio_offset); 2189 size = blksize(fs, ip, lbn); 2190 blkoffset = blkoff(fs, uio->uio_offset); 2191 2192 xfersize = fs->e2fs_fsize - blkoffset; 2193 xfersize = MIN(xfersize, uio->uio_resid); 2194 xfersize = MIN(xfersize, bytesinfile); 2195 2196 /* get block from ext4 extent cache */ 2197 cache_type = ext4_ext_in_cache(ip, lbn, &nex); 2198 switch (cache_type) { 2199 case EXT4_EXT_CACHE_NO: 2200 ext4_ext_find_extent(fs, ip, lbn, &path); 2201 if (path.ep_is_sparse) 2202 ep = &path.ep_sparse_ext; 2203 else 2204 ep = path.ep_ext; 2205 if (ep == NULL) 2206 return (EIO); 2207 2208 ext4_ext_put_cache(ip, ep, 2209 path.ep_is_sparse ? EXT4_EXT_CACHE_GAP : EXT4_EXT_CACHE_IN); 2210 2211 newblk = lbn - ep->e_blk + (ep->e_start_lo | 2212 (daddr_t)ep->e_start_hi << 32); 2213 2214 if (path.ep_bp != NULL) { 2215 brelse(path.ep_bp); 2216 path.ep_bp = NULL; 2217 } 2218 break; 2219 2220 case EXT4_EXT_CACHE_GAP: 2221 /* block has not been allocated yet */ 2222 break; 2223 2224 case EXT4_EXT_CACHE_IN: 2225 newblk = lbn - nex.e_blk + (nex.e_start_lo | 2226 (daddr_t)nex.e_start_hi << 32); 2227 break; 2228 2229 default: 2230 panic("%s: invalid cache type", __func__); 2231 } 2232 2233 if (cache_type == EXT4_EXT_CACHE_GAP || 2234 (cache_type == EXT4_EXT_CACHE_NO && path.ep_is_sparse)) { 2235 if (xfersize > sizeof(zeroes)) 2236 xfersize = sizeof(zeroes); 2237 error = uiomove(zeroes, xfersize, uio); 2238 if (error) 2239 return (error); 2240 } else { 2241 error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, 2242 NOCRED, &bp); 2243 if (error) { 2244 brelse(bp); 2245 return (error); 2246 } 2247 2248 size -= bp->b_resid; 2249 if (size < xfersize) { 2250 if (size == 0) { 2251 bqrelse(bp); 2252 break; 2253 } 2254 xfersize = size; 2255 } 2256 error = uiomove(bp->b_data + blkoffset, xfersize, uio); 2257 bqrelse(bp); 2258 if (error) 2259 return (error); 2260 } 2261 } 2262 2263 return (0); 2264 } 2265 2266 /* 2267 * Vnode op for writing. 2268 */ 2269 static int 2270 ext2_write(struct vop_write_args *ap) 2271 { 2272 struct vnode *vp; 2273 struct uio *uio; 2274 struct inode *ip; 2275 struct m_ext2fs *fs; 2276 struct buf *bp; 2277 daddr_t lbn; 2278 off_t osize; 2279 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 2280 2281 ioflag = ap->a_ioflag; 2282 uio = ap->a_uio; 2283 vp = ap->a_vp; 2284 2285 seqcount = ioflag >> IO_SEQSHIFT; 2286 ip = VTOI(vp); 2287 2288 #ifdef INVARIANTS 2289 if (uio->uio_rw != UIO_WRITE) 2290 panic("%s: mode", "ext2_write"); 2291 #endif 2292 2293 switch (vp->v_type) { 2294 case VREG: 2295 if (ioflag & IO_APPEND) 2296 uio->uio_offset = ip->i_size; 2297 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 2298 return (EPERM); 2299 /* FALLTHROUGH */ 2300 case VLNK: 2301 break; 2302 case VDIR: 2303 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 2304 if ((ioflag & IO_SYNC) == 0) 2305 panic("ext2_write: nonsync dir write"); 2306 break; 2307 default: 2308 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 2309 vp->v_type, (intmax_t)uio->uio_offset, 2310 (intmax_t)uio->uio_resid); 2311 } 2312 2313 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 2314 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 2315 fs = ip->i_e2fs; 2316 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 2317 return (EFBIG); 2318 /* 2319 * Maybe this should be above the vnode op call, but so long as 2320 * file servers have no limits, I don't think it matters. 2321 */ 2322 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 2323 return (EFBIG); 2324 2325 resid = uio->uio_resid; 2326 osize = ip->i_size; 2327 if (seqcount > BA_SEQMAX) 2328 flags = BA_SEQMAX << BA_SEQSHIFT; 2329 else 2330 flags = seqcount << BA_SEQSHIFT; 2331 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 2332 flags |= IO_SYNC; 2333 2334 for (error = 0; uio->uio_resid > 0;) { 2335 lbn = lblkno(fs, uio->uio_offset); 2336 blkoffset = blkoff(fs, uio->uio_offset); 2337 xfersize = fs->e2fs_fsize - blkoffset; 2338 if (uio->uio_resid < xfersize) 2339 xfersize = uio->uio_resid; 2340 if (uio->uio_offset + xfersize > ip->i_size) 2341 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 2342 2343 /* 2344 * We must perform a read-before-write if the transfer size 2345 * does not cover the entire buffer. 2346 */ 2347 if (fs->e2fs_bsize > xfersize) 2348 flags |= BA_CLRBUF; 2349 else 2350 flags &= ~BA_CLRBUF; 2351 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 2352 ap->a_cred, &bp, flags); 2353 if (error != 0) 2354 break; 2355 2356 if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL)) 2357 bp->b_flags |= B_NOCACHE; 2358 if (uio->uio_offset + xfersize > ip->i_size) 2359 ip->i_size = uio->uio_offset + xfersize; 2360 size = blksize(fs, ip, lbn) - bp->b_resid; 2361 if (size < xfersize) 2362 xfersize = size; 2363 2364 error = 2365 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 2366 /* 2367 * If the buffer is not already filled and we encounter an 2368 * error while trying to fill it, we have to clear out any 2369 * garbage data from the pages instantiated for the buffer. 2370 * If we do not, a failed uiomove() during a write can leave 2371 * the prior contents of the pages exposed to a userland mmap. 2372 * 2373 * Note that we need only clear buffers with a transfer size 2374 * equal to the block size because buffers with a shorter 2375 * transfer size were cleared above by the call to ext2_balloc() 2376 * with the BA_CLRBUF flag set. 2377 * 2378 * If the source region for uiomove identically mmaps the 2379 * buffer, uiomove() performed the NOP copy, and the buffer 2380 * content remains valid because the page fault handler 2381 * validated the pages. 2382 */ 2383 if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 2384 fs->e2fs_bsize == xfersize) 2385 vfs_bio_clrbuf(bp); 2386 2387 vfs_bio_set_flags(bp, ioflag); 2388 2389 /* 2390 * If IO_SYNC each buffer is written synchronously. Otherwise 2391 * if we have a severe page deficiency write the buffer 2392 * asynchronously. Otherwise try to cluster, and if that 2393 * doesn't do it then either do an async write (if O_DIRECT), 2394 * or a delayed write (if not). 2395 */ 2396 if (ioflag & IO_SYNC) { 2397 (void)bwrite(bp); 2398 } else if (vm_page_count_severe() || 2399 buf_dirty_count_severe() || 2400 (ioflag & IO_ASYNC)) { 2401 bp->b_flags |= B_CLUSTEROK; 2402 bawrite(bp); 2403 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 2404 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 2405 bp->b_flags |= B_CLUSTEROK; 2406 cluster_write(vp, bp, ip->i_size, seqcount, 0); 2407 } else { 2408 bawrite(bp); 2409 } 2410 } else if (ioflag & IO_DIRECT) { 2411 bp->b_flags |= B_CLUSTEROK; 2412 bawrite(bp); 2413 } else { 2414 bp->b_flags |= B_CLUSTEROK; 2415 bdwrite(bp); 2416 } 2417 if (error || xfersize == 0) 2418 break; 2419 } 2420 /* 2421 * If we successfully wrote any data, and we are not the superuser 2422 * we clear the setuid and setgid bits as a precaution against 2423 * tampering. 2424 */ 2425 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 2426 ap->a_cred) { 2427 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 2428 ip->i_mode &= ~(ISUID | ISGID); 2429 } 2430 if (error) { 2431 if (ioflag & IO_UNIT) { 2432 (void)ext2_truncate(vp, osize, 2433 ioflag & IO_SYNC, ap->a_cred, uio->uio_td); 2434 uio->uio_offset -= resid - uio->uio_resid; 2435 uio->uio_resid = resid; 2436 } 2437 } 2438 if (uio->uio_resid != resid) { 2439 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2440 if (ioflag & IO_SYNC) 2441 error = ext2_update(vp, 1); 2442 } 2443 return (error); 2444 } 2445