1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * (c) UNIX System Laboratories, Inc. 13 * All or some portions of this file are derived from material licensed 14 * to the University of California by American Telephone and Telegraph 15 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 16 * the permission of UNIX System Laboratories, Inc. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 43 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 44 * $FreeBSD$ 45 */ 46 47 #include "opt_suiddir.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/kernel.h> 52 #include <sys/fcntl.h> 53 #include <sys/filio.h> 54 #include <sys/stat.h> 55 #include <sys/bio.h> 56 #include <sys/buf.h> 57 #include <sys/endian.h> 58 #include <sys/priv.h> 59 #include <sys/rwlock.h> 60 #include <sys/mount.h> 61 #include <sys/unistd.h> 62 #include <sys/time.h> 63 #include <sys/vnode.h> 64 #include <sys/namei.h> 65 #include <sys/lockf.h> 66 #include <sys/event.h> 67 #include <sys/conf.h> 68 #include <sys/file.h> 69 #include <sys/extattr.h> 70 #include <sys/vmmeter.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_param.h> 74 #include <vm/vm_extern.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 #include <vm/vm_pager.h> 78 #include <vm/vnode_pager.h> 79 80 #include "opt_directio.h" 81 82 #include <ufs/ufs/dir.h> 83 84 #include <fs/ext2fs/fs.h> 85 #include <fs/ext2fs/inode.h> 86 #include <fs/ext2fs/ext2_acl.h> 87 #include <fs/ext2fs/ext2fs.h> 88 #include <fs/ext2fs/ext2_extern.h> 89 #include <fs/ext2fs/ext2_dinode.h> 90 #include <fs/ext2fs/ext2_dir.h> 91 #include <fs/ext2fs/ext2_mount.h> 92 #include <fs/ext2fs/ext2_extattr.h> 93 94 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 95 static void ext2_itimes_locked(struct vnode *); 96 97 static vop_access_t ext2_access; 98 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 99 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 100 struct thread *); 101 static vop_close_t ext2_close; 102 static vop_create_t ext2_create; 103 static vop_fsync_t ext2_fsync; 104 static vop_getattr_t ext2_getattr; 105 static vop_ioctl_t ext2_ioctl; 106 static vop_link_t ext2_link; 107 static vop_mkdir_t ext2_mkdir; 108 static vop_mknod_t ext2_mknod; 109 static vop_open_t ext2_open; 110 static vop_pathconf_t ext2_pathconf; 111 static vop_print_t ext2_print; 112 static vop_read_t ext2_read; 113 static vop_readlink_t ext2_readlink; 114 static vop_remove_t ext2_remove; 115 static vop_rename_t ext2_rename; 116 static vop_rmdir_t ext2_rmdir; 117 static vop_setattr_t ext2_setattr; 118 static vop_strategy_t ext2_strategy; 119 static vop_symlink_t ext2_symlink; 120 static vop_write_t ext2_write; 121 static vop_deleteextattr_t ext2_deleteextattr; 122 static vop_getextattr_t ext2_getextattr; 123 static vop_listextattr_t ext2_listextattr; 124 static vop_setextattr_t ext2_setextattr; 125 static vop_vptofh_t ext2_vptofh; 126 static vop_close_t ext2fifo_close; 127 static vop_kqfilter_t ext2fifo_kqfilter; 128 129 /* Global vfs data structures for ext2. */ 130 struct vop_vector ext2_vnodeops = { 131 .vop_default = &default_vnodeops, 132 .vop_access = ext2_access, 133 .vop_bmap = ext2_bmap, 134 .vop_cachedlookup = ext2_lookup, 135 .vop_close = ext2_close, 136 .vop_create = ext2_create, 137 .vop_fsync = ext2_fsync, 138 .vop_getpages = vnode_pager_local_getpages, 139 .vop_getpages_async = vnode_pager_local_getpages_async, 140 .vop_getattr = ext2_getattr, 141 .vop_inactive = ext2_inactive, 142 .vop_ioctl = ext2_ioctl, 143 .vop_link = ext2_link, 144 .vop_lookup = vfs_cache_lookup, 145 .vop_mkdir = ext2_mkdir, 146 .vop_mknod = ext2_mknod, 147 .vop_open = ext2_open, 148 .vop_pathconf = ext2_pathconf, 149 .vop_poll = vop_stdpoll, 150 .vop_print = ext2_print, 151 .vop_read = ext2_read, 152 .vop_readdir = ext2_readdir, 153 .vop_readlink = ext2_readlink, 154 .vop_reallocblks = ext2_reallocblks, 155 .vop_reclaim = ext2_reclaim, 156 .vop_remove = ext2_remove, 157 .vop_rename = ext2_rename, 158 .vop_rmdir = ext2_rmdir, 159 .vop_setattr = ext2_setattr, 160 .vop_strategy = ext2_strategy, 161 .vop_symlink = ext2_symlink, 162 .vop_write = ext2_write, 163 .vop_deleteextattr = ext2_deleteextattr, 164 .vop_getextattr = ext2_getextattr, 165 .vop_listextattr = ext2_listextattr, 166 .vop_setextattr = ext2_setextattr, 167 #ifdef UFS_ACL 168 .vop_getacl = ext2_getacl, 169 .vop_setacl = ext2_setacl, 170 .vop_aclcheck = ext2_aclcheck, 171 #endif /* UFS_ACL */ 172 .vop_vptofh = ext2_vptofh, 173 }; 174 175 struct vop_vector ext2_fifoops = { 176 .vop_default = &fifo_specops, 177 .vop_access = ext2_access, 178 .vop_close = ext2fifo_close, 179 .vop_fsync = ext2_fsync, 180 .vop_getattr = ext2_getattr, 181 .vop_inactive = ext2_inactive, 182 .vop_kqfilter = ext2fifo_kqfilter, 183 .vop_pathconf = ext2_pathconf, 184 .vop_print = ext2_print, 185 .vop_read = VOP_PANIC, 186 .vop_reclaim = ext2_reclaim, 187 .vop_setattr = ext2_setattr, 188 .vop_write = VOP_PANIC, 189 .vop_vptofh = ext2_vptofh, 190 }; 191 192 /* 193 * A virgin directory (no blushing please). 194 * Note that the type and namlen fields are reversed relative to ext2. 195 * Also, we don't use `struct odirtemplate', since it would just cause 196 * endianness problems. 197 */ 198 static struct dirtemplate mastertemplate = { 199 0, 12, 1, EXT2_FT_DIR, ".", 200 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." 201 }; 202 static struct dirtemplate omastertemplate = { 203 0, 12, 1, EXT2_FT_UNKNOWN, ".", 204 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." 205 }; 206 207 static void 208 ext2_itimes_locked(struct vnode *vp) 209 { 210 struct inode *ip; 211 struct timespec ts; 212 213 ASSERT_VI_LOCKED(vp, __func__); 214 215 ip = VTOI(vp); 216 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 217 return; 218 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 219 ip->i_flag |= IN_LAZYMOD; 220 else 221 ip->i_flag |= IN_MODIFIED; 222 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 223 vfs_timestamp(&ts); 224 if (ip->i_flag & IN_ACCESS) { 225 ip->i_atime = ts.tv_sec; 226 ip->i_atimensec = ts.tv_nsec; 227 } 228 if (ip->i_flag & IN_UPDATE) { 229 ip->i_mtime = ts.tv_sec; 230 ip->i_mtimensec = ts.tv_nsec; 231 ip->i_modrev++; 232 } 233 if (ip->i_flag & IN_CHANGE) { 234 ip->i_ctime = ts.tv_sec; 235 ip->i_ctimensec = ts.tv_nsec; 236 } 237 } 238 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 239 } 240 241 void 242 ext2_itimes(struct vnode *vp) 243 { 244 245 VI_LOCK(vp); 246 ext2_itimes_locked(vp); 247 VI_UNLOCK(vp); 248 } 249 250 /* 251 * Create a regular file 252 */ 253 static int 254 ext2_create(struct vop_create_args *ap) 255 { 256 int error; 257 258 error = 259 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 260 ap->a_dvp, ap->a_vpp, ap->a_cnp); 261 if (error != 0) 262 return (error); 263 if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) 264 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); 265 return (0); 266 } 267 268 static int 269 ext2_open(struct vop_open_args *ap) 270 { 271 272 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 273 return (EOPNOTSUPP); 274 275 /* 276 * Files marked append-only must be opened for appending. 277 */ 278 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 279 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 280 return (EPERM); 281 282 vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); 283 284 return (0); 285 } 286 287 /* 288 * Close called. 289 * 290 * Update the times on the inode. 291 */ 292 static int 293 ext2_close(struct vop_close_args *ap) 294 { 295 struct vnode *vp = ap->a_vp; 296 297 VI_LOCK(vp); 298 if (vp->v_usecount > 1) 299 ext2_itimes_locked(vp); 300 VI_UNLOCK(vp); 301 return (0); 302 } 303 304 static int 305 ext2_access(struct vop_access_args *ap) 306 { 307 struct vnode *vp = ap->a_vp; 308 struct inode *ip = VTOI(vp); 309 accmode_t accmode = ap->a_accmode; 310 int error; 311 312 if (vp->v_type == VBLK || vp->v_type == VCHR) 313 return (EOPNOTSUPP); 314 315 /* 316 * Disallow write attempts on read-only file systems; 317 * unless the file is a socket, fifo, or a block or 318 * character device resident on the file system. 319 */ 320 if (accmode & VWRITE) { 321 switch (vp->v_type) { 322 case VDIR: 323 case VLNK: 324 case VREG: 325 if (vp->v_mount->mnt_flag & MNT_RDONLY) 326 return (EROFS); 327 break; 328 default: 329 break; 330 } 331 } 332 333 /* If immutable bit set, nobody gets to write it. */ 334 if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) 335 return (EPERM); 336 337 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 338 ap->a_accmode, ap->a_cred, NULL); 339 return (error); 340 } 341 342 static int 343 ext2_getattr(struct vop_getattr_args *ap) 344 { 345 struct vnode *vp = ap->a_vp; 346 struct inode *ip = VTOI(vp); 347 struct vattr *vap = ap->a_vap; 348 349 ext2_itimes(vp); 350 /* 351 * Copy from inode table 352 */ 353 vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); 354 vap->va_fileid = ip->i_number; 355 vap->va_mode = ip->i_mode & ~IFMT; 356 vap->va_nlink = ip->i_nlink; 357 vap->va_uid = ip->i_uid; 358 vap->va_gid = ip->i_gid; 359 vap->va_rdev = ip->i_rdev; 360 vap->va_size = ip->i_size; 361 vap->va_atime.tv_sec = ip->i_atime; 362 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; 363 vap->va_mtime.tv_sec = ip->i_mtime; 364 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; 365 vap->va_ctime.tv_sec = ip->i_ctime; 366 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; 367 if E2DI_HAS_XTIME(ip) { 368 vap->va_birthtime.tv_sec = ip->i_birthtime; 369 vap->va_birthtime.tv_nsec = ip->i_birthnsec; 370 } 371 vap->va_flags = ip->i_flags; 372 vap->va_gen = ip->i_gen; 373 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 374 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 375 vap->va_type = IFTOVT(ip->i_mode); 376 vap->va_filerev = ip->i_modrev; 377 return (0); 378 } 379 380 /* 381 * Set attribute vnode op. called from several syscalls 382 */ 383 static int 384 ext2_setattr(struct vop_setattr_args *ap) 385 { 386 struct vattr *vap = ap->a_vap; 387 struct vnode *vp = ap->a_vp; 388 struct inode *ip = VTOI(vp); 389 struct ucred *cred = ap->a_cred; 390 struct thread *td = curthread; 391 int error; 392 393 /* 394 * Check for unsettable attributes. 395 */ 396 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 397 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 398 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 399 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 400 return (EINVAL); 401 } 402 if (vap->va_flags != VNOVAL) { 403 /* Disallow flags not supported by ext2fs. */ 404 if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 405 return (EOPNOTSUPP); 406 407 if (vp->v_mount->mnt_flag & MNT_RDONLY) 408 return (EROFS); 409 /* 410 * Callers may only modify the file flags on objects they 411 * have VADMIN rights for. 412 */ 413 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 414 return (error); 415 /* 416 * Unprivileged processes and privileged processes in 417 * jail() are not permitted to unset system flags, or 418 * modify flags if any system flags are set. 419 * Privileged non-jail processes may not modify system flags 420 * if securelevel > 0 and any existing system flags are set. 421 */ 422 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 423 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) { 424 error = securelevel_gt(cred, 0); 425 if (error) 426 return (error); 427 } 428 } else { 429 if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || 430 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 431 return (EPERM); 432 } 433 ip->i_flags = vap->va_flags; 434 ip->i_flag |= IN_CHANGE; 435 if (ip->i_flags & (IMMUTABLE | APPEND)) 436 return (0); 437 } 438 if (ip->i_flags & (IMMUTABLE | APPEND)) 439 return (EPERM); 440 /* 441 * Go through the fields and update iff not VNOVAL. 442 */ 443 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 444 if (vp->v_mount->mnt_flag & MNT_RDONLY) 445 return (EROFS); 446 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 447 td)) != 0) 448 return (error); 449 } 450 if (vap->va_size != VNOVAL) { 451 /* 452 * Disallow write attempts on read-only file systems; 453 * unless the file is a socket, fifo, or a block or 454 * character device resident on the file system. 455 */ 456 switch (vp->v_type) { 457 case VDIR: 458 return (EISDIR); 459 case VLNK: 460 case VREG: 461 if (vp->v_mount->mnt_flag & MNT_RDONLY) 462 return (EROFS); 463 break; 464 default: 465 break; 466 } 467 if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) 468 return (error); 469 } 470 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 471 if (vp->v_mount->mnt_flag & MNT_RDONLY) 472 return (EROFS); 473 /* 474 * From utimes(2): 475 * If times is NULL, ... The caller must be the owner of 476 * the file, have permission to write the file, or be the 477 * super-user. 478 * If times is non-NULL, ... The caller must be the owner of 479 * the file or be the super-user. 480 */ 481 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 482 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 483 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 484 return (error); 485 ip->i_flag |= IN_CHANGE | IN_MODIFIED; 486 if (vap->va_atime.tv_sec != VNOVAL) { 487 ip->i_flag &= ~IN_ACCESS; 488 ip->i_atime = vap->va_atime.tv_sec; 489 ip->i_atimensec = vap->va_atime.tv_nsec; 490 } 491 if (vap->va_mtime.tv_sec != VNOVAL) { 492 ip->i_flag &= ~IN_UPDATE; 493 ip->i_mtime = vap->va_mtime.tv_sec; 494 ip->i_mtimensec = vap->va_mtime.tv_nsec; 495 } 496 ip->i_birthtime = vap->va_birthtime.tv_sec; 497 ip->i_birthnsec = vap->va_birthtime.tv_nsec; 498 error = ext2_update(vp, 0); 499 if (error) 500 return (error); 501 } 502 error = 0; 503 if (vap->va_mode != (mode_t)VNOVAL) { 504 if (vp->v_mount->mnt_flag & MNT_RDONLY) 505 return (EROFS); 506 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 507 } 508 return (error); 509 } 510 511 /* 512 * Change the mode on a file. 513 * Inode must be locked before calling. 514 */ 515 static int 516 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) 517 { 518 struct inode *ip = VTOI(vp); 519 int error; 520 521 /* 522 * To modify the permissions on a file, must possess VADMIN 523 * for that file. 524 */ 525 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 526 return (error); 527 /* 528 * Privileged processes may set the sticky bit on non-directories, 529 * as well as set the setgid bit on a file with a group that the 530 * process is not a member of. 531 */ 532 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 533 error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); 534 if (error) 535 return (EFTYPE); 536 } 537 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 538 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 539 if (error) 540 return (error); 541 } 542 ip->i_mode &= ~ALLPERMS; 543 ip->i_mode |= (mode & ALLPERMS); 544 ip->i_flag |= IN_CHANGE; 545 return (0); 546 } 547 548 /* 549 * Perform chown operation on inode ip; 550 * inode must be locked prior to call. 551 */ 552 static int 553 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 554 struct thread *td) 555 { 556 struct inode *ip = VTOI(vp); 557 uid_t ouid; 558 gid_t ogid; 559 int error = 0; 560 561 if (uid == (uid_t)VNOVAL) 562 uid = ip->i_uid; 563 if (gid == (gid_t)VNOVAL) 564 gid = ip->i_gid; 565 /* 566 * To modify the ownership of a file, must possess VADMIN 567 * for that file. 568 */ 569 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 570 return (error); 571 /* 572 * To change the owner of a file, or change the group of a file 573 * to a group of which we are not a member, the caller must 574 * have privilege. 575 */ 576 if (uid != ip->i_uid || (gid != ip->i_gid && 577 !groupmember(gid, cred))) { 578 error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); 579 if (error) 580 return (error); 581 } 582 ogid = ip->i_gid; 583 ouid = ip->i_uid; 584 ip->i_gid = gid; 585 ip->i_uid = uid; 586 ip->i_flag |= IN_CHANGE; 587 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 588 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) 589 ip->i_mode &= ~(ISUID | ISGID); 590 } 591 return (0); 592 } 593 594 /* 595 * Synch an open file. 596 */ 597 /* ARGSUSED */ 598 static int 599 ext2_fsync(struct vop_fsync_args *ap) 600 { 601 /* 602 * Flush all dirty buffers associated with a vnode. 603 */ 604 605 vop_stdfsync(ap); 606 607 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 608 } 609 610 /* 611 * Mknod vnode call 612 */ 613 /* ARGSUSED */ 614 static int 615 ext2_mknod(struct vop_mknod_args *ap) 616 { 617 struct vattr *vap = ap->a_vap; 618 struct vnode **vpp = ap->a_vpp; 619 struct inode *ip; 620 ino_t ino; 621 int error; 622 623 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 624 ap->a_dvp, vpp, ap->a_cnp); 625 if (error) 626 return (error); 627 ip = VTOI(*vpp); 628 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 629 if (vap->va_rdev != VNOVAL) { 630 /* 631 * Want to be able to use this to make badblock 632 * inodes, so don't truncate the dev number. 633 */ 634 if (!(ip->i_flag & IN_E4EXTENTS)) 635 ip->i_rdev = vap->va_rdev; 636 } 637 /* 638 * Remove inode, then reload it through VFS_VGET so it is 639 * checked to see if it is an alias of an existing entry in 640 * the inode cache. XXX I don't believe this is necessary now. 641 */ 642 (*vpp)->v_type = VNON; 643 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 644 vgone(*vpp); 645 vput(*vpp); 646 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 647 if (error) { 648 *vpp = NULL; 649 return (error); 650 } 651 return (0); 652 } 653 654 static int 655 ext2_remove(struct vop_remove_args *ap) 656 { 657 struct inode *ip; 658 struct vnode *vp = ap->a_vp; 659 struct vnode *dvp = ap->a_dvp; 660 int error; 661 662 ip = VTOI(vp); 663 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 664 (VTOI(dvp)->i_flags & APPEND)) { 665 error = EPERM; 666 goto out; 667 } 668 error = ext2_dirremove(dvp, ap->a_cnp); 669 if (error == 0) { 670 ip->i_nlink--; 671 ip->i_flag |= IN_CHANGE; 672 } 673 out: 674 return (error); 675 } 676 677 static unsigned short 678 ext2_max_nlink(struct inode *ip) 679 { 680 struct m_ext2fs *fs; 681 682 fs = ip->i_e2fs; 683 684 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_DIR_NLINK)) 685 return (EXT4_LINK_MAX); 686 else 687 return (EXT2_LINK_MAX); 688 } 689 690 /* 691 * link vnode call 692 */ 693 static int 694 ext2_link(struct vop_link_args *ap) 695 { 696 struct vnode *vp = ap->a_vp; 697 struct vnode *tdvp = ap->a_tdvp; 698 struct componentname *cnp = ap->a_cnp; 699 struct inode *ip; 700 int error; 701 702 #ifdef INVARIANTS 703 if ((cnp->cn_flags & HASBUF) == 0) 704 panic("ext2_link: no name"); 705 #endif 706 ip = VTOI(vp); 707 if ((nlink_t)ip->i_nlink >= ext2_max_nlink(ip)) { 708 error = EMLINK; 709 goto out; 710 } 711 if (ip->i_flags & (IMMUTABLE | APPEND)) { 712 error = EPERM; 713 goto out; 714 } 715 ip->i_nlink++; 716 ip->i_flag |= IN_CHANGE; 717 error = ext2_update(vp, !DOINGASYNC(vp)); 718 if (!error) 719 error = ext2_direnter(ip, tdvp, cnp); 720 if (error) { 721 ip->i_nlink--; 722 ip->i_flag |= IN_CHANGE; 723 } 724 out: 725 return (error); 726 } 727 728 static int 729 ext2_inc_nlink(struct inode *ip) 730 { 731 732 ip->i_nlink++; 733 734 if (ext2_htree_has_idx(ip) && ip->i_nlink > 1) { 735 if (ip->i_nlink >= ext2_max_nlink(ip) || ip->i_nlink == 2) 736 ip->i_nlink = 1; 737 } else if (ip->i_nlink > ext2_max_nlink(ip)) { 738 ip->i_nlink--; 739 return (EMLINK); 740 } 741 742 return (0); 743 } 744 745 static void 746 ext2_dec_nlink(struct inode *ip) 747 { 748 749 if (!S_ISDIR(ip->i_mode) || ip->i_nlink > 2) 750 ip->i_nlink--; 751 } 752 753 /* 754 * Rename system call. 755 * rename("foo", "bar"); 756 * is essentially 757 * unlink("bar"); 758 * link("foo", "bar"); 759 * unlink("foo"); 760 * but ``atomically''. Can't do full commit without saving state in the 761 * inode on disk which isn't feasible at this time. Best we can do is 762 * always guarantee the target exists. 763 * 764 * Basic algorithm is: 765 * 766 * 1) Bump link count on source while we're linking it to the 767 * target. This also ensure the inode won't be deleted out 768 * from underneath us while we work (it may be truncated by 769 * a concurrent `trunc' or `open' for creation). 770 * 2) Link source to destination. If destination already exists, 771 * delete it first. 772 * 3) Unlink source reference to inode if still around. If a 773 * directory was moved and the parent of the destination 774 * is different from the source, patch the ".." entry in the 775 * directory. 776 */ 777 static int 778 ext2_rename(struct vop_rename_args *ap) 779 { 780 struct vnode *tvp = ap->a_tvp; 781 struct vnode *tdvp = ap->a_tdvp; 782 struct vnode *fvp = ap->a_fvp; 783 struct vnode *fdvp = ap->a_fdvp; 784 struct componentname *tcnp = ap->a_tcnp; 785 struct componentname *fcnp = ap->a_fcnp; 786 struct inode *ip, *xp, *dp; 787 struct dirtemplate *dirbuf; 788 int doingdirectory = 0, oldparent = 0, newparent = 0; 789 int error = 0; 790 u_char namlen; 791 792 #ifdef INVARIANTS 793 if ((tcnp->cn_flags & HASBUF) == 0 || 794 (fcnp->cn_flags & HASBUF) == 0) 795 panic("ext2_rename: no name"); 796 #endif 797 /* 798 * Check for cross-device rename. 799 */ 800 if ((fvp->v_mount != tdvp->v_mount) || 801 (tvp && (fvp->v_mount != tvp->v_mount))) { 802 error = EXDEV; 803 abortit: 804 if (tdvp == tvp) 805 vrele(tdvp); 806 else 807 vput(tdvp); 808 if (tvp) 809 vput(tvp); 810 vrele(fdvp); 811 vrele(fvp); 812 return (error); 813 } 814 815 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 816 (VTOI(tdvp)->i_flags & APPEND))) { 817 error = EPERM; 818 goto abortit; 819 } 820 821 /* 822 * Renaming a file to itself has no effect. The upper layers should 823 * not call us in that case. Temporarily just warn if they do. 824 */ 825 if (fvp == tvp) { 826 printf("ext2_rename: fvp == tvp (can't happen)\n"); 827 error = 0; 828 goto abortit; 829 } 830 831 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 832 goto abortit; 833 dp = VTOI(fdvp); 834 ip = VTOI(fvp); 835 if (ip->i_nlink >= ext2_max_nlink(ip) && !ext2_htree_has_idx(ip)) { 836 VOP_UNLOCK(fvp, 0); 837 error = EMLINK; 838 goto abortit; 839 } 840 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 841 || (dp->i_flags & APPEND)) { 842 VOP_UNLOCK(fvp, 0); 843 error = EPERM; 844 goto abortit; 845 } 846 if ((ip->i_mode & IFMT) == IFDIR) { 847 /* 848 * Avoid ".", "..", and aliases of "." for obvious reasons. 849 */ 850 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 851 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 852 (ip->i_flag & IN_RENAME)) { 853 VOP_UNLOCK(fvp, 0); 854 error = EINVAL; 855 goto abortit; 856 } 857 ip->i_flag |= IN_RENAME; 858 oldparent = dp->i_number; 859 doingdirectory++; 860 } 861 vrele(fdvp); 862 863 /* 864 * When the target exists, both the directory 865 * and target vnodes are returned locked. 866 */ 867 dp = VTOI(tdvp); 868 xp = NULL; 869 if (tvp) 870 xp = VTOI(tvp); 871 872 /* 873 * 1) Bump link count while we're moving stuff 874 * around. If we crash somewhere before 875 * completing our work, the link count 876 * may be wrong, but correctable. 877 */ 878 ext2_inc_nlink(ip); 879 ip->i_flag |= IN_CHANGE; 880 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 881 VOP_UNLOCK(fvp, 0); 882 goto bad; 883 } 884 885 /* 886 * If ".." must be changed (ie the directory gets a new 887 * parent) then the source directory must not be in the 888 * directory hierarchy above the target, as this would 889 * orphan everything below the source directory. Also 890 * the user must have write permission in the source so 891 * as to be able to change "..". We must repeat the call 892 * to namei, as the parent directory is unlocked by the 893 * call to checkpath(). 894 */ 895 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 896 VOP_UNLOCK(fvp, 0); 897 if (oldparent != dp->i_number) 898 newparent = dp->i_number; 899 if (doingdirectory && newparent) { 900 if (error) /* write access check above */ 901 goto bad; 902 if (xp != NULL) 903 vput(tvp); 904 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 905 if (error) 906 goto out; 907 VREF(tdvp); 908 error = relookup(tdvp, &tvp, tcnp); 909 if (error) 910 goto out; 911 vrele(tdvp); 912 dp = VTOI(tdvp); 913 xp = NULL; 914 if (tvp) 915 xp = VTOI(tvp); 916 } 917 /* 918 * 2) If target doesn't exist, link the target 919 * to the source and unlink the source. 920 * Otherwise, rewrite the target directory 921 * entry to reference the source inode and 922 * expunge the original entry's existence. 923 */ 924 if (xp == NULL) { 925 if (dp->i_devvp != ip->i_devvp) 926 panic("ext2_rename: EXDEV"); 927 /* 928 * Account for ".." in new directory. 929 * When source and destination have the same 930 * parent we don't fool with the link count. 931 */ 932 if (doingdirectory && newparent) { 933 error = ext2_inc_nlink(dp); 934 if (error) 935 goto bad; 936 937 dp->i_flag |= IN_CHANGE; 938 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 939 if (error) 940 goto bad; 941 } 942 error = ext2_direnter(ip, tdvp, tcnp); 943 if (error) { 944 if (doingdirectory && newparent) { 945 ext2_dec_nlink(dp); 946 dp->i_flag |= IN_CHANGE; 947 (void)ext2_update(tdvp, 1); 948 } 949 goto bad; 950 } 951 vput(tdvp); 952 } else { 953 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 954 panic("ext2_rename: EXDEV"); 955 /* 956 * Short circuit rename(foo, foo). 957 */ 958 if (xp->i_number == ip->i_number) 959 panic("ext2_rename: same file"); 960 /* 961 * If the parent directory is "sticky", then the user must 962 * own the parent directory, or the destination of the rename, 963 * otherwise the destination may not be changed (except by 964 * root). This implements append-only directories. 965 */ 966 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 967 tcnp->cn_cred->cr_uid != dp->i_uid && 968 xp->i_uid != tcnp->cn_cred->cr_uid) { 969 error = EPERM; 970 goto bad; 971 } 972 /* 973 * Target must be empty if a directory and have no links 974 * to it. Also, ensure source and target are compatible 975 * (both directories, or both not directories). 976 */ 977 if ((xp->i_mode & IFMT) == IFDIR) { 978 if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred)) { 979 error = ENOTEMPTY; 980 goto bad; 981 } 982 if (!doingdirectory) { 983 error = ENOTDIR; 984 goto bad; 985 } 986 cache_purge(tdvp); 987 } else if (doingdirectory) { 988 error = EISDIR; 989 goto bad; 990 } 991 error = ext2_dirrewrite(dp, ip, tcnp); 992 if (error) 993 goto bad; 994 /* 995 * If the target directory is in the same 996 * directory as the source directory, 997 * decrement the link count on the parent 998 * of the target directory. 999 */ 1000 if (doingdirectory && !newparent) { 1001 ext2_dec_nlink(dp); 1002 dp->i_flag |= IN_CHANGE; 1003 } 1004 vput(tdvp); 1005 /* 1006 * Adjust the link count of the target to 1007 * reflect the dirrewrite above. If this is 1008 * a directory it is empty and there are 1009 * no links to it, so we can squash the inode and 1010 * any space associated with it. We disallowed 1011 * renaming over top of a directory with links to 1012 * it above, as the remaining link would point to 1013 * a directory without "." or ".." entries. 1014 */ 1015 ext2_dec_nlink(xp); 1016 if (doingdirectory) { 1017 if (--xp->i_nlink != 0) 1018 panic("ext2_rename: linked directory"); 1019 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 1020 tcnp->cn_cred, tcnp->cn_thread); 1021 } 1022 xp->i_flag |= IN_CHANGE; 1023 vput(tvp); 1024 xp = NULL; 1025 } 1026 1027 /* 1028 * 3) Unlink the source. 1029 */ 1030 fcnp->cn_flags &= ~MODMASK; 1031 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1032 VREF(fdvp); 1033 error = relookup(fdvp, &fvp, fcnp); 1034 if (error == 0) 1035 vrele(fdvp); 1036 if (fvp != NULL) { 1037 xp = VTOI(fvp); 1038 dp = VTOI(fdvp); 1039 } else { 1040 /* 1041 * From name has disappeared. IN_RENAME is not sufficient 1042 * to protect against directory races due to timing windows, 1043 * so we can't panic here. 1044 */ 1045 vrele(ap->a_fvp); 1046 return (0); 1047 } 1048 /* 1049 * Ensure that the directory entry still exists and has not 1050 * changed while the new name has been entered. If the source is 1051 * a file then the entry may have been unlinked or renamed. In 1052 * either case there is no further work to be done. If the source 1053 * is a directory then it cannot have been rmdir'ed; its link 1054 * count of three would cause a rmdir to fail with ENOTEMPTY. 1055 * The IN_RENAME flag ensures that it cannot be moved by another 1056 * rename. 1057 */ 1058 if (xp != ip) { 1059 /* 1060 * From name resolves to a different inode. IN_RENAME is 1061 * not sufficient protection against timing window races 1062 * so we can't panic here. 1063 */ 1064 } else { 1065 /* 1066 * If the source is a directory with a 1067 * new parent, the link count of the old 1068 * parent directory must be decremented 1069 * and ".." set to point to the new parent. 1070 */ 1071 if (doingdirectory && newparent) { 1072 ext2_dec_nlink(dp); 1073 dp->i_flag |= IN_CHANGE; 1074 dirbuf = malloc(dp->i_e2fs->e2fs_bsize, M_TEMP, M_WAITOK | M_ZERO); 1075 if (!dirbuf) { 1076 error = ENOMEM; 1077 goto bad; 1078 } 1079 error = vn_rdwr(UIO_READ, fvp, (caddr_t)dirbuf, 1080 ip->i_e2fs->e2fs_bsize, (off_t)0, 1081 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1082 tcnp->cn_cred, NOCRED, NULL, NULL); 1083 if (error == 0) { 1084 /* Like ufs little-endian: */ 1085 namlen = dirbuf->dotdot_type; 1086 if (namlen != 2 || 1087 dirbuf->dotdot_name[0] != '.' || 1088 dirbuf->dotdot_name[1] != '.') { 1089 ext2_dirbad(xp, (doff_t)12, 1090 "rename: mangled dir"); 1091 } else { 1092 dirbuf->dotdot_ino = newparent; 1093 ext2_dir_blk_csum_set_mem(ip, 1094 (char *)dirbuf, 1095 ip->i_e2fs->e2fs_bsize); 1096 (void)vn_rdwr(UIO_WRITE, fvp, 1097 (caddr_t)dirbuf, 1098 ip->i_e2fs->e2fs_bsize, 1099 (off_t)0, UIO_SYSSPACE, 1100 IO_NODELOCKED | IO_SYNC | 1101 IO_NOMACCHECK, tcnp->cn_cred, 1102 NOCRED, NULL, NULL); 1103 cache_purge(fdvp); 1104 } 1105 } 1106 free(dirbuf, M_TEMP); 1107 } 1108 error = ext2_dirremove(fdvp, fcnp); 1109 if (!error) { 1110 ext2_dec_nlink(xp); 1111 xp->i_flag |= IN_CHANGE; 1112 } 1113 xp->i_flag &= ~IN_RENAME; 1114 } 1115 if (dp) 1116 vput(fdvp); 1117 if (xp) 1118 vput(fvp); 1119 vrele(ap->a_fvp); 1120 return (error); 1121 1122 bad: 1123 if (xp) 1124 vput(ITOV(xp)); 1125 vput(ITOV(dp)); 1126 out: 1127 if (doingdirectory) 1128 ip->i_flag &= ~IN_RENAME; 1129 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1130 ext2_dec_nlink(ip); 1131 ip->i_flag |= IN_CHANGE; 1132 ip->i_flag &= ~IN_RENAME; 1133 vput(fvp); 1134 } else 1135 vrele(fvp); 1136 return (error); 1137 } 1138 1139 #ifdef UFS_ACL 1140 static int 1141 ext2_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1142 mode_t dmode, struct ucred *cred, struct thread *td) 1143 { 1144 int error; 1145 struct inode *ip = VTOI(tvp); 1146 struct acl *dacl, *acl; 1147 1148 acl = acl_alloc(M_WAITOK); 1149 dacl = acl_alloc(M_WAITOK); 1150 1151 /* 1152 * Retrieve default ACL from parent, if any. 1153 */ 1154 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1155 switch (error) { 1156 case 0: 1157 /* 1158 * Retrieved a default ACL, so merge mode and ACL if 1159 * necessary. If the ACL is empty, fall through to 1160 * the "not defined or available" case. 1161 */ 1162 if (acl->acl_cnt != 0) { 1163 dmode = acl_posix1e_newfilemode(dmode, acl); 1164 ip->i_mode = dmode; 1165 *dacl = *acl; 1166 ext2_sync_acl_from_inode(ip, acl); 1167 break; 1168 } 1169 /* FALLTHROUGH */ 1170 1171 case EOPNOTSUPP: 1172 /* 1173 * Just use the mode as-is. 1174 */ 1175 ip->i_mode = dmode; 1176 error = 0; 1177 goto out; 1178 1179 default: 1180 goto out; 1181 } 1182 1183 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1184 if (error == 0) 1185 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1186 switch (error) { 1187 case 0: 1188 break; 1189 1190 case EOPNOTSUPP: 1191 /* 1192 * XXX: This should not happen, as EOPNOTSUPP above 1193 * was supposed to free acl. 1194 */ 1195 #ifdef DEBUG 1196 printf("ext2_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1197 #endif /* DEBUG */ 1198 break; 1199 1200 default: 1201 goto out; 1202 } 1203 1204 out: 1205 acl_free(acl); 1206 acl_free(dacl); 1207 1208 return (error); 1209 } 1210 1211 static int 1212 ext2_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1213 mode_t mode, struct ucred *cred, struct thread *td) 1214 { 1215 int error; 1216 struct inode *ip = VTOI(tvp); 1217 struct acl *acl; 1218 1219 acl = acl_alloc(M_WAITOK); 1220 1221 /* 1222 * Retrieve default ACL for parent, if any. 1223 */ 1224 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1225 switch (error) { 1226 case 0: 1227 /* 1228 * Retrieved a default ACL, so merge mode and ACL if 1229 * necessary. 1230 */ 1231 if (acl->acl_cnt != 0) { 1232 /* 1233 * Two possible ways for default ACL to not 1234 * be present. First, the EA can be 1235 * undefined, or second, the default ACL can 1236 * be blank. If it's blank, fall through to 1237 * the it's not defined case. 1238 */ 1239 mode = acl_posix1e_newfilemode(mode, acl); 1240 ip->i_mode = mode; 1241 ext2_sync_acl_from_inode(ip, acl); 1242 break; 1243 } 1244 /* FALLTHROUGH */ 1245 1246 case EOPNOTSUPP: 1247 /* 1248 * Just use the mode as-is. 1249 */ 1250 ip->i_mode = mode; 1251 error = 0; 1252 goto out; 1253 1254 default: 1255 goto out; 1256 } 1257 1258 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1259 switch (error) { 1260 case 0: 1261 break; 1262 1263 case EOPNOTSUPP: 1264 /* 1265 * XXX: This should not happen, as EOPNOTSUPP above was 1266 * supposed to free acl. 1267 */ 1268 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1269 "but no VOP_SETACL()\n"); 1270 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1271 "but no VOP_SETACL()"); */ 1272 break; 1273 1274 default: 1275 goto out; 1276 } 1277 1278 out: 1279 acl_free(acl); 1280 1281 return (error); 1282 } 1283 1284 #endif /* UFS_ACL */ 1285 1286 static void 1287 ext2_init_dirent_tail(struct ext2fs_direct_tail *tp) 1288 { 1289 memset(tp, 0, sizeof(struct ext2fs_direct_tail)); 1290 tp->e2dt_rec_len = sizeof(struct ext2fs_direct_tail); 1291 tp->e2dt_reserved_ft = EXT2_FT_DIR_CSUM; 1292 } 1293 1294 /* 1295 * Mkdir system call 1296 */ 1297 static int 1298 ext2_mkdir(struct vop_mkdir_args *ap) 1299 { 1300 struct m_ext2fs *fs; 1301 struct vnode *dvp = ap->a_dvp; 1302 struct vattr *vap = ap->a_vap; 1303 struct componentname *cnp = ap->a_cnp; 1304 struct inode *ip, *dp; 1305 struct vnode *tvp; 1306 struct dirtemplate dirtemplate, *dtp; 1307 char *buf = NULL; 1308 int error, dmode; 1309 1310 #ifdef INVARIANTS 1311 if ((cnp->cn_flags & HASBUF) == 0) 1312 panic("ext2_mkdir: no name"); 1313 #endif 1314 dp = VTOI(dvp); 1315 if ((nlink_t)dp->i_nlink >= ext2_max_nlink(dp) && 1316 !ext2_htree_has_idx(dp)) { 1317 error = EMLINK; 1318 goto out; 1319 } 1320 dmode = vap->va_mode & 0777; 1321 dmode |= IFDIR; 1322 /* 1323 * Must simulate part of ext2_makeinode here to acquire the inode, 1324 * but not have it entered in the parent directory. The entry is 1325 * made later after writing "." and ".." entries. 1326 */ 1327 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1328 if (error) 1329 goto out; 1330 ip = VTOI(tvp); 1331 fs = ip->i_e2fs; 1332 ip->i_gid = dp->i_gid; 1333 #ifdef SUIDDIR 1334 { 1335 /* 1336 * if we are hacking owners here, (only do this where told to) 1337 * and we are not giving it TOO root, (would subvert quotas) 1338 * then go ahead and give it to the other user. 1339 * The new directory also inherits the SUID bit. 1340 * If user's UID and dir UID are the same, 1341 * 'give it away' so that the SUID is still forced on. 1342 */ 1343 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1344 (dp->i_mode & ISUID) && dp->i_uid) { 1345 dmode |= ISUID; 1346 ip->i_uid = dp->i_uid; 1347 } else { 1348 ip->i_uid = cnp->cn_cred->cr_uid; 1349 } 1350 } 1351 #else 1352 ip->i_uid = cnp->cn_cred->cr_uid; 1353 #endif 1354 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1355 ip->i_mode = dmode; 1356 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1357 ip->i_nlink = 2; 1358 if (cnp->cn_flags & ISWHITEOUT) 1359 ip->i_flags |= UF_OPAQUE; 1360 error = ext2_update(tvp, 1); 1361 1362 /* 1363 * Bump link count in parent directory 1364 * to reflect work done below. Should 1365 * be done before reference is created 1366 * so reparation is possible if we crash. 1367 */ 1368 ext2_inc_nlink(dp); 1369 dp->i_flag |= IN_CHANGE; 1370 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1371 if (error) 1372 goto bad; 1373 1374 /* Initialize directory with "." and ".." from static template. */ 1375 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1376 EXT2F_INCOMPAT_FTYPE)) 1377 dtp = &mastertemplate; 1378 else 1379 dtp = &omastertemplate; 1380 dirtemplate = *dtp; 1381 dirtemplate.dot_ino = ip->i_number; 1382 dirtemplate.dotdot_ino = dp->i_number; 1383 /* 1384 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's 1385 * just redefine it - for this function only 1386 */ 1387 #undef DIRBLKSIZ 1388 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1389 dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; 1390 buf = malloc(DIRBLKSIZ, M_TEMP, M_WAITOK | M_ZERO); 1391 if (!buf) { 1392 error = ENOMEM; 1393 ext2_dec_nlink(dp); 1394 dp->i_flag |= IN_CHANGE; 1395 goto bad; 1396 } 1397 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1398 dirtemplate.dotdot_reclen -= sizeof(struct ext2fs_direct_tail); 1399 ext2_init_dirent_tail(EXT2_DIRENT_TAIL(buf, DIRBLKSIZ)); 1400 } 1401 memcpy(buf, &dirtemplate, sizeof(dirtemplate)); 1402 ext2_dir_blk_csum_set_mem(ip, buf, DIRBLKSIZ); 1403 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)buf, 1404 DIRBLKSIZ, (off_t)0, UIO_SYSSPACE, 1405 IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, 1406 NULL, NULL); 1407 if (error) { 1408 ext2_dec_nlink(dp); 1409 dp->i_flag |= IN_CHANGE; 1410 goto bad; 1411 } 1412 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1413 /* XXX should grow with balloc() */ 1414 panic("ext2_mkdir: blksize"); 1415 else { 1416 ip->i_size = DIRBLKSIZ; 1417 ip->i_flag |= IN_CHANGE; 1418 } 1419 1420 #ifdef UFS_ACL 1421 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1422 error = ext2_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 1423 cnp->cn_cred, cnp->cn_thread); 1424 if (error) 1425 goto bad; 1426 } 1427 1428 #endif /* UFS_ACL */ 1429 1430 /* Directory set up, now install its entry in the parent directory. */ 1431 error = ext2_direnter(ip, dvp, cnp); 1432 if (error) { 1433 ext2_dec_nlink(dp); 1434 dp->i_flag |= IN_CHANGE; 1435 } 1436 bad: 1437 /* 1438 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1439 * for us because we set the link count to 0. 1440 */ 1441 if (error) { 1442 ip->i_nlink = 0; 1443 ip->i_flag |= IN_CHANGE; 1444 vput(tvp); 1445 } else 1446 *ap->a_vpp = tvp; 1447 out: 1448 free(buf, M_TEMP); 1449 return (error); 1450 #undef DIRBLKSIZ 1451 #define DIRBLKSIZ DEV_BSIZE 1452 } 1453 1454 /* 1455 * Rmdir system call. 1456 */ 1457 static int 1458 ext2_rmdir(struct vop_rmdir_args *ap) 1459 { 1460 struct vnode *vp = ap->a_vp; 1461 struct vnode *dvp = ap->a_dvp; 1462 struct componentname *cnp = ap->a_cnp; 1463 struct inode *ip, *dp; 1464 int error; 1465 1466 ip = VTOI(vp); 1467 dp = VTOI(dvp); 1468 1469 /* 1470 * Verify the directory is empty (and valid). 1471 * (Rmdir ".." won't be valid since 1472 * ".." will contain a reference to 1473 * the current directory and thus be 1474 * non-empty.) 1475 */ 1476 if (!ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1477 error = ENOTEMPTY; 1478 goto out; 1479 } 1480 if ((dp->i_flags & APPEND) 1481 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1482 error = EPERM; 1483 goto out; 1484 } 1485 /* 1486 * Delete reference to directory before purging 1487 * inode. If we crash in between, the directory 1488 * will be reattached to lost+found, 1489 */ 1490 error = ext2_dirremove(dvp, cnp); 1491 if (error) 1492 goto out; 1493 ext2_dec_nlink(dp); 1494 dp->i_flag |= IN_CHANGE; 1495 cache_purge(dvp); 1496 VOP_UNLOCK(dvp, 0); 1497 /* 1498 * Truncate inode. The only stuff left 1499 * in the directory is "." and "..". 1500 */ 1501 ip->i_nlink = 0; 1502 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, 1503 cnp->cn_thread); 1504 cache_purge(ITOV(ip)); 1505 if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1506 VOP_UNLOCK(vp, 0); 1507 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1508 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1509 } 1510 out: 1511 return (error); 1512 } 1513 1514 /* 1515 * symlink -- make a symbolic link 1516 */ 1517 static int 1518 ext2_symlink(struct vop_symlink_args *ap) 1519 { 1520 struct vnode *vp, **vpp = ap->a_vpp; 1521 struct inode *ip; 1522 int len, error; 1523 1524 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1525 vpp, ap->a_cnp); 1526 if (error) 1527 return (error); 1528 vp = *vpp; 1529 len = strlen(ap->a_target); 1530 if (len < vp->v_mount->mnt_maxsymlinklen) { 1531 ip = VTOI(vp); 1532 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1533 ip->i_size = len; 1534 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1535 } else 1536 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1537 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1538 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 1539 if (error) 1540 vput(vp); 1541 return (error); 1542 } 1543 1544 /* 1545 * Return target name of a symbolic link 1546 */ 1547 static int 1548 ext2_readlink(struct vop_readlink_args *ap) 1549 { 1550 struct vnode *vp = ap->a_vp; 1551 struct inode *ip = VTOI(vp); 1552 int isize; 1553 1554 isize = ip->i_size; 1555 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1556 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1557 return (0); 1558 } 1559 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1560 } 1561 1562 /* 1563 * Calculate the logical to physical mapping if not done already, 1564 * then call the device strategy routine. 1565 * 1566 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1567 * deadlock on memory. See ext2_bmap() for details. 1568 */ 1569 static int 1570 ext2_strategy(struct vop_strategy_args *ap) 1571 { 1572 struct buf *bp = ap->a_bp; 1573 struct vnode *vp = ap->a_vp; 1574 struct bufobj *bo; 1575 daddr_t blkno; 1576 int error; 1577 1578 if (vp->v_type == VBLK || vp->v_type == VCHR) 1579 panic("ext2_strategy: spec"); 1580 if (bp->b_blkno == bp->b_lblkno) { 1581 1582 if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS) 1583 error = ext4_bmapext(vp, bp->b_lblkno, &blkno, NULL, NULL); 1584 else 1585 error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); 1586 1587 bp->b_blkno = blkno; 1588 if (error) { 1589 bp->b_error = error; 1590 bp->b_ioflags |= BIO_ERROR; 1591 bufdone(bp); 1592 return (0); 1593 } 1594 if ((long)bp->b_blkno == -1) 1595 vfs_bio_clrbuf(bp); 1596 } 1597 if ((long)bp->b_blkno == -1) { 1598 bufdone(bp); 1599 return (0); 1600 } 1601 bp->b_iooffset = dbtob(bp->b_blkno); 1602 bo = VFSTOEXT2(vp->v_mount)->um_bo; 1603 BO_STRATEGY(bo, bp); 1604 return (0); 1605 } 1606 1607 /* 1608 * Print out the contents of an inode. 1609 */ 1610 static int 1611 ext2_print(struct vop_print_args *ap) 1612 { 1613 struct vnode *vp = ap->a_vp; 1614 struct inode *ip = VTOI(vp); 1615 1616 vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number); 1617 if (vp->v_type == VFIFO) 1618 fifo_printinfo(vp); 1619 printf("\n"); 1620 return (0); 1621 } 1622 1623 /* 1624 * Close wrapper for fifos. 1625 * 1626 * Update the times on the inode then do device close. 1627 */ 1628 static int 1629 ext2fifo_close(struct vop_close_args *ap) 1630 { 1631 struct vnode *vp = ap->a_vp; 1632 1633 VI_LOCK(vp); 1634 if (vp->v_usecount > 1) 1635 ext2_itimes_locked(vp); 1636 VI_UNLOCK(vp); 1637 return (fifo_specops.vop_close(ap)); 1638 } 1639 1640 /* 1641 * Kqfilter wrapper for fifos. 1642 * 1643 * Fall through to ext2 kqfilter routines if needed 1644 */ 1645 static int 1646 ext2fifo_kqfilter(struct vop_kqfilter_args *ap) 1647 { 1648 int error; 1649 1650 error = fifo_specops.vop_kqfilter(ap); 1651 if (error) 1652 error = vfs_kqfilter(ap); 1653 return (error); 1654 } 1655 1656 /* 1657 * Return POSIX pathconf information applicable to ext2 filesystems. 1658 */ 1659 static int 1660 ext2_pathconf(struct vop_pathconf_args *ap) 1661 { 1662 int error = 0; 1663 1664 switch (ap->a_name) { 1665 case _PC_LINK_MAX: 1666 if (ext2_htree_has_idx(VTOI(ap->a_vp))) 1667 *ap->a_retval = INT_MAX; 1668 else 1669 *ap->a_retval = ext2_max_nlink(VTOI(ap->a_vp)); 1670 break; 1671 case _PC_NAME_MAX: 1672 *ap->a_retval = NAME_MAX; 1673 break; 1674 case _PC_PIPE_BUF: 1675 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 1676 *ap->a_retval = PIPE_BUF; 1677 else 1678 error = EINVAL; 1679 break; 1680 case _PC_CHOWN_RESTRICTED: 1681 *ap->a_retval = 1; 1682 break; 1683 case _PC_NO_TRUNC: 1684 *ap->a_retval = 1; 1685 break; 1686 1687 #ifdef UFS_ACL 1688 case _PC_ACL_EXTENDED: 1689 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1690 *ap->a_retval = 1; 1691 else 1692 *ap->a_retval = 0; 1693 break; 1694 case _PC_ACL_PATH_MAX: 1695 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 1696 *ap->a_retval = ACL_MAX_ENTRIES; 1697 else 1698 *ap->a_retval = 3; 1699 break; 1700 #endif /* UFS_ACL */ 1701 1702 case _PC_MIN_HOLE_SIZE: 1703 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1704 break; 1705 case _PC_PRIO_IO: 1706 *ap->a_retval = 0; 1707 break; 1708 case _PC_SYNC_IO: 1709 *ap->a_retval = 0; 1710 break; 1711 case _PC_ALLOC_SIZE_MIN: 1712 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 1713 break; 1714 case _PC_FILESIZEBITS: 1715 *ap->a_retval = 64; 1716 break; 1717 case _PC_REC_INCR_XFER_SIZE: 1718 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1719 break; 1720 case _PC_REC_MAX_XFER_SIZE: 1721 *ap->a_retval = -1; /* means ``unlimited'' */ 1722 break; 1723 case _PC_REC_MIN_XFER_SIZE: 1724 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1725 break; 1726 case _PC_REC_XFER_ALIGN: 1727 *ap->a_retval = PAGE_SIZE; 1728 break; 1729 case _PC_SYMLINK_MAX: 1730 *ap->a_retval = MAXPATHLEN; 1731 break; 1732 1733 default: 1734 error = vop_stdpathconf(ap); 1735 break; 1736 } 1737 return (error); 1738 } 1739 1740 /* 1741 * Vnode operation to remove a named attribute. 1742 */ 1743 static int 1744 ext2_deleteextattr(struct vop_deleteextattr_args *ap) 1745 { 1746 struct inode *ip; 1747 struct m_ext2fs *fs; 1748 int error; 1749 1750 ip = VTOI(ap->a_vp); 1751 fs = ip->i_e2fs; 1752 1753 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1754 return (EOPNOTSUPP); 1755 1756 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1757 return (EOPNOTSUPP); 1758 1759 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1760 ap->a_cred, ap->a_td, VWRITE); 1761 if (error) 1762 return (error); 1763 1764 error = ENOATTR; 1765 1766 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1767 error = ext2_extattr_inode_delete(ip, ap->a_attrnamespace, ap->a_name); 1768 if (error != ENOATTR) 1769 return (error); 1770 } 1771 1772 if (ip->i_facl) 1773 error = ext2_extattr_block_delete(ip, ap->a_attrnamespace, ap->a_name); 1774 1775 return (error); 1776 } 1777 1778 /* 1779 * Vnode operation to retrieve a named extended attribute. 1780 */ 1781 static int 1782 ext2_getextattr(struct vop_getextattr_args *ap) 1783 { 1784 struct inode *ip; 1785 struct m_ext2fs *fs; 1786 int error; 1787 1788 ip = VTOI(ap->a_vp); 1789 fs = ip->i_e2fs; 1790 1791 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1792 return (EOPNOTSUPP); 1793 1794 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1795 return (EOPNOTSUPP); 1796 1797 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1798 ap->a_cred, ap->a_td, VREAD); 1799 if (error) 1800 return (error); 1801 1802 if (ap->a_size != NULL) 1803 *ap->a_size = 0; 1804 1805 error = ENOATTR; 1806 1807 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1808 error = ext2_extattr_inode_get(ip, ap->a_attrnamespace, 1809 ap->a_name, ap->a_uio, ap->a_size); 1810 if (error != ENOATTR) 1811 return (error); 1812 } 1813 1814 if (ip->i_facl) 1815 error = ext2_extattr_block_get(ip, ap->a_attrnamespace, 1816 ap->a_name, ap->a_uio, ap->a_size); 1817 1818 return (error); 1819 } 1820 1821 /* 1822 * Vnode operation to retrieve extended attributes on a vnode. 1823 */ 1824 static int 1825 ext2_listextattr(struct vop_listextattr_args *ap) 1826 { 1827 struct inode *ip; 1828 struct m_ext2fs *fs; 1829 int error; 1830 1831 ip = VTOI(ap->a_vp); 1832 fs = ip->i_e2fs; 1833 1834 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1835 return (EOPNOTSUPP); 1836 1837 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1838 return (EOPNOTSUPP); 1839 1840 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1841 ap->a_cred, ap->a_td, VREAD); 1842 if (error) 1843 return (error); 1844 1845 if (ap->a_size != NULL) 1846 *ap->a_size = 0; 1847 1848 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1849 error = ext2_extattr_inode_list(ip, ap->a_attrnamespace, 1850 ap->a_uio, ap->a_size); 1851 if (error) 1852 return (error); 1853 } 1854 1855 if (ip->i_facl) 1856 error = ext2_extattr_block_list(ip, ap->a_attrnamespace, 1857 ap->a_uio, ap->a_size); 1858 1859 return (error); 1860 } 1861 1862 /* 1863 * Vnode operation to set a named attribute. 1864 */ 1865 static int 1866 ext2_setextattr(struct vop_setextattr_args *ap) 1867 { 1868 struct inode *ip; 1869 struct m_ext2fs *fs; 1870 int error; 1871 1872 ip = VTOI(ap->a_vp); 1873 fs = ip->i_e2fs; 1874 1875 if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR)) 1876 return (EOPNOTSUPP); 1877 1878 if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1879 return (EOPNOTSUPP); 1880 1881 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1882 ap->a_cred, ap->a_td, VWRITE); 1883 if (error) 1884 return (error); 1885 1886 error = ext2_extattr_valid_attrname(ap->a_attrnamespace, ap->a_name); 1887 if (error) 1888 return (error); 1889 1890 if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) { 1891 error = ext2_extattr_inode_set(ip, ap->a_attrnamespace, 1892 ap->a_name, ap->a_uio); 1893 if (error != ENOSPC) 1894 return (error); 1895 } 1896 1897 error = ext2_extattr_block_set(ip, ap->a_attrnamespace, 1898 ap->a_name, ap->a_uio); 1899 1900 return (error); 1901 } 1902 1903 /* 1904 * Vnode pointer to File handle 1905 */ 1906 /* ARGSUSED */ 1907 static int 1908 ext2_vptofh(struct vop_vptofh_args *ap) 1909 { 1910 struct inode *ip; 1911 struct ufid *ufhp; 1912 1913 ip = VTOI(ap->a_vp); 1914 ufhp = (struct ufid *)ap->a_fhp; 1915 ufhp->ufid_len = sizeof(struct ufid); 1916 ufhp->ufid_ino = ip->i_number; 1917 ufhp->ufid_gen = ip->i_gen; 1918 return (0); 1919 } 1920 1921 /* 1922 * Initialize the vnode associated with a new inode, handle aliased 1923 * vnodes. 1924 */ 1925 int 1926 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) 1927 { 1928 struct inode *ip; 1929 struct vnode *vp; 1930 1931 vp = *vpp; 1932 ip = VTOI(vp); 1933 vp->v_type = IFTOVT(ip->i_mode); 1934 if (vp->v_type == VFIFO) 1935 vp->v_op = fifoops; 1936 1937 if (ip->i_number == EXT2_ROOTINO) 1938 vp->v_vflag |= VV_ROOT; 1939 ip->i_modrev = init_va_filerev(); 1940 *vpp = vp; 1941 return (0); 1942 } 1943 1944 /* 1945 * Allocate a new inode. 1946 */ 1947 static int 1948 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1949 struct componentname *cnp) 1950 { 1951 struct inode *ip, *pdir; 1952 struct vnode *tvp; 1953 int error; 1954 1955 pdir = VTOI(dvp); 1956 #ifdef INVARIANTS 1957 if ((cnp->cn_flags & HASBUF) == 0) 1958 panic("ext2_makeinode: no name"); 1959 #endif 1960 *vpp = NULL; 1961 if ((mode & IFMT) == 0) 1962 mode |= IFREG; 1963 1964 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1965 if (error) { 1966 return (error); 1967 } 1968 ip = VTOI(tvp); 1969 ip->i_gid = pdir->i_gid; 1970 #ifdef SUIDDIR 1971 { 1972 /* 1973 * if we are 1974 * not the owner of the directory, 1975 * and we are hacking owners here, (only do this where told to) 1976 * and we are not giving it TOO root, (would subvert quotas) 1977 * then go ahead and give it to the other user. 1978 * Note that this drops off the execute bits for security. 1979 */ 1980 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1981 (pdir->i_mode & ISUID) && 1982 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1983 ip->i_uid = pdir->i_uid; 1984 mode &= ~07111; 1985 } else { 1986 ip->i_uid = cnp->cn_cred->cr_uid; 1987 } 1988 } 1989 #else 1990 ip->i_uid = cnp->cn_cred->cr_uid; 1991 #endif 1992 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1993 ip->i_mode = mode; 1994 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1995 ip->i_nlink = 1; 1996 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1997 if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) 1998 ip->i_mode &= ~ISGID; 1999 } 2000 2001 if (cnp->cn_flags & ISWHITEOUT) 2002 ip->i_flags |= UF_OPAQUE; 2003 2004 /* 2005 * Make sure inode goes to disk before directory entry. 2006 */ 2007 error = ext2_update(tvp, !DOINGASYNC(tvp)); 2008 if (error) 2009 goto bad; 2010 2011 #ifdef UFS_ACL 2012 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 2013 error = ext2_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 2014 cnp->cn_cred, cnp->cn_thread); 2015 if (error) 2016 goto bad; 2017 } 2018 #endif /* UFS_ACL */ 2019 2020 error = ext2_direnter(ip, dvp, cnp); 2021 if (error) 2022 goto bad; 2023 2024 *vpp = tvp; 2025 return (0); 2026 2027 bad: 2028 /* 2029 * Write error occurred trying to update the inode 2030 * or the directory so must deallocate the inode. 2031 */ 2032 ip->i_nlink = 0; 2033 ip->i_flag |= IN_CHANGE; 2034 vput(tvp); 2035 return (error); 2036 } 2037 2038 /* 2039 * Vnode op for reading. 2040 */ 2041 static int 2042 ext2_read(struct vop_read_args *ap) 2043 { 2044 struct vnode *vp; 2045 struct inode *ip; 2046 struct uio *uio; 2047 struct m_ext2fs *fs; 2048 struct buf *bp; 2049 daddr_t lbn, nextlbn; 2050 off_t bytesinfile; 2051 long size, xfersize, blkoffset; 2052 int error, orig_resid, seqcount; 2053 int ioflag; 2054 2055 vp = ap->a_vp; 2056 uio = ap->a_uio; 2057 ioflag = ap->a_ioflag; 2058 2059 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 2060 ip = VTOI(vp); 2061 2062 #ifdef INVARIANTS 2063 if (uio->uio_rw != UIO_READ) 2064 panic("%s: mode", "ext2_read"); 2065 2066 if (vp->v_type == VLNK) { 2067 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 2068 panic("%s: short symlink", "ext2_read"); 2069 } else if (vp->v_type != VREG && vp->v_type != VDIR) 2070 panic("%s: type %d", "ext2_read", vp->v_type); 2071 #endif 2072 orig_resid = uio->uio_resid; 2073 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 2074 if (orig_resid == 0) 2075 return (0); 2076 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 2077 fs = ip->i_e2fs; 2078 if (uio->uio_offset < ip->i_size && 2079 uio->uio_offset >= fs->e2fs_maxfilesize) 2080 return (EOVERFLOW); 2081 2082 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 2083 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 2084 break; 2085 lbn = lblkno(fs, uio->uio_offset); 2086 nextlbn = lbn + 1; 2087 size = blksize(fs, ip, lbn); 2088 blkoffset = blkoff(fs, uio->uio_offset); 2089 2090 xfersize = fs->e2fs_fsize - blkoffset; 2091 if (uio->uio_resid < xfersize) 2092 xfersize = uio->uio_resid; 2093 if (bytesinfile < xfersize) 2094 xfersize = bytesinfile; 2095 2096 if (lblktosize(fs, nextlbn) >= ip->i_size) 2097 error = bread(vp, lbn, size, NOCRED, &bp); 2098 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 2099 error = cluster_read(vp, ip->i_size, lbn, size, 2100 NOCRED, blkoffset + uio->uio_resid, seqcount, 2101 0, &bp); 2102 } else if (seqcount > 1) { 2103 u_int nextsize = blksize(fs, ip, nextlbn); 2104 2105 error = breadn(vp, lbn, 2106 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 2107 } else 2108 error = bread(vp, lbn, size, NOCRED, &bp); 2109 if (error) { 2110 brelse(bp); 2111 bp = NULL; 2112 break; 2113 } 2114 2115 /* 2116 * We should only get non-zero b_resid when an I/O error 2117 * has occurred, which should cause us to break above. 2118 * However, if the short read did not cause an error, 2119 * then we want to ensure that we do not uiomove bad 2120 * or uninitialized data. 2121 */ 2122 size -= bp->b_resid; 2123 if (size < xfersize) { 2124 if (size == 0) 2125 break; 2126 xfersize = size; 2127 } 2128 error = uiomove((char *)bp->b_data + blkoffset, 2129 (int)xfersize, uio); 2130 if (error) 2131 break; 2132 vfs_bio_brelse(bp, ioflag); 2133 } 2134 2135 /* 2136 * This can only happen in the case of an error because the loop 2137 * above resets bp to NULL on each iteration and on normal 2138 * completion has not set a new value into it. so it must have come 2139 * from a 'break' statement 2140 */ 2141 if (bp != NULL) 2142 vfs_bio_brelse(bp, ioflag); 2143 2144 if ((error == 0 || uio->uio_resid != orig_resid) && 2145 (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 2146 ip->i_flag |= IN_ACCESS; 2147 return (error); 2148 } 2149 2150 static int 2151 ext2_ioctl(struct vop_ioctl_args *ap) 2152 { 2153 2154 switch (ap->a_command) { 2155 case FIOSEEKDATA: 2156 case FIOSEEKHOLE: 2157 return (vn_bmap_seekhole(ap->a_vp, ap->a_command, 2158 (off_t *)ap->a_data, ap->a_cred)); 2159 default: 2160 return (ENOTTY); 2161 } 2162 } 2163 2164 /* 2165 * Vnode op for writing. 2166 */ 2167 static int 2168 ext2_write(struct vop_write_args *ap) 2169 { 2170 struct vnode *vp; 2171 struct uio *uio; 2172 struct inode *ip; 2173 struct m_ext2fs *fs; 2174 struct buf *bp; 2175 daddr_t lbn; 2176 off_t osize; 2177 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 2178 2179 ioflag = ap->a_ioflag; 2180 uio = ap->a_uio; 2181 vp = ap->a_vp; 2182 2183 seqcount = ioflag >> IO_SEQSHIFT; 2184 ip = VTOI(vp); 2185 2186 #ifdef INVARIANTS 2187 if (uio->uio_rw != UIO_WRITE) 2188 panic("%s: mode", "ext2_write"); 2189 #endif 2190 2191 switch (vp->v_type) { 2192 case VREG: 2193 if (ioflag & IO_APPEND) 2194 uio->uio_offset = ip->i_size; 2195 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 2196 return (EPERM); 2197 /* FALLTHROUGH */ 2198 case VLNK: 2199 break; 2200 case VDIR: 2201 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 2202 if ((ioflag & IO_SYNC) == 0) 2203 panic("ext2_write: nonsync dir write"); 2204 break; 2205 default: 2206 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 2207 vp->v_type, (intmax_t)uio->uio_offset, 2208 (intmax_t)uio->uio_resid); 2209 } 2210 2211 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 2212 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 2213 fs = ip->i_e2fs; 2214 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 2215 return (EFBIG); 2216 /* 2217 * Maybe this should be above the vnode op call, but so long as 2218 * file servers have no limits, I don't think it matters. 2219 */ 2220 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 2221 return (EFBIG); 2222 2223 resid = uio->uio_resid; 2224 osize = ip->i_size; 2225 if (seqcount > BA_SEQMAX) 2226 flags = BA_SEQMAX << BA_SEQSHIFT; 2227 else 2228 flags = seqcount << BA_SEQSHIFT; 2229 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 2230 flags |= IO_SYNC; 2231 2232 for (error = 0; uio->uio_resid > 0;) { 2233 lbn = lblkno(fs, uio->uio_offset); 2234 blkoffset = blkoff(fs, uio->uio_offset); 2235 xfersize = fs->e2fs_fsize - blkoffset; 2236 if (uio->uio_resid < xfersize) 2237 xfersize = uio->uio_resid; 2238 if (uio->uio_offset + xfersize > ip->i_size) 2239 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 2240 2241 /* 2242 * We must perform a read-before-write if the transfer size 2243 * does not cover the entire buffer. 2244 */ 2245 if (fs->e2fs_bsize > xfersize) 2246 flags |= BA_CLRBUF; 2247 else 2248 flags &= ~BA_CLRBUF; 2249 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 2250 ap->a_cred, &bp, flags); 2251 if (error != 0) 2252 break; 2253 2254 if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL)) 2255 bp->b_flags |= B_NOCACHE; 2256 if (uio->uio_offset + xfersize > ip->i_size) 2257 ip->i_size = uio->uio_offset + xfersize; 2258 size = blksize(fs, ip, lbn) - bp->b_resid; 2259 if (size < xfersize) 2260 xfersize = size; 2261 2262 error = 2263 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 2264 /* 2265 * If the buffer is not already filled and we encounter an 2266 * error while trying to fill it, we have to clear out any 2267 * garbage data from the pages instantiated for the buffer. 2268 * If we do not, a failed uiomove() during a write can leave 2269 * the prior contents of the pages exposed to a userland mmap. 2270 * 2271 * Note that we need only clear buffers with a transfer size 2272 * equal to the block size because buffers with a shorter 2273 * transfer size were cleared above by the call to ext2_balloc() 2274 * with the BA_CLRBUF flag set. 2275 * 2276 * If the source region for uiomove identically mmaps the 2277 * buffer, uiomove() performed the NOP copy, and the buffer 2278 * content remains valid because the page fault handler 2279 * validated the pages. 2280 */ 2281 if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 2282 fs->e2fs_bsize == xfersize) 2283 vfs_bio_clrbuf(bp); 2284 2285 vfs_bio_set_flags(bp, ioflag); 2286 2287 /* 2288 * If IO_SYNC each buffer is written synchronously. Otherwise 2289 * if we have a severe page deficiency write the buffer 2290 * asynchronously. Otherwise try to cluster, and if that 2291 * doesn't do it then either do an async write (if O_DIRECT), 2292 * or a delayed write (if not). 2293 */ 2294 if (ioflag & IO_SYNC) { 2295 (void)bwrite(bp); 2296 } else if (vm_page_count_severe() || 2297 buf_dirty_count_severe() || 2298 (ioflag & IO_ASYNC)) { 2299 bp->b_flags |= B_CLUSTEROK; 2300 bawrite(bp); 2301 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 2302 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 2303 bp->b_flags |= B_CLUSTEROK; 2304 cluster_write(vp, bp, ip->i_size, seqcount, 0); 2305 } else { 2306 bawrite(bp); 2307 } 2308 } else if (ioflag & IO_DIRECT) { 2309 bp->b_flags |= B_CLUSTEROK; 2310 bawrite(bp); 2311 } else { 2312 bp->b_flags |= B_CLUSTEROK; 2313 bdwrite(bp); 2314 } 2315 if (error || xfersize == 0) 2316 break; 2317 } 2318 /* 2319 * If we successfully wrote any data, and we are not the superuser 2320 * we clear the setuid and setgid bits as a precaution against 2321 * tampering. 2322 */ 2323 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 2324 ap->a_cred) { 2325 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 2326 ip->i_mode &= ~(ISUID | ISGID); 2327 } 2328 if (error) { 2329 if (ioflag & IO_UNIT) { 2330 (void)ext2_truncate(vp, osize, 2331 ioflag & IO_SYNC, ap->a_cred, uio->uio_td); 2332 uio->uio_offset -= resid - uio->uio_resid; 2333 uio->uio_resid = resid; 2334 } 2335 } 2336 if (uio->uio_resid != resid) { 2337 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2338 if (ioflag & IO_SYNC) 2339 error = ext2_update(vp, 1); 2340 } 2341 return (error); 2342 } 2343