1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/fs/open.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/string.h> 9 #include <linux/mm.h> 10 #include <linux/file.h> 11 #include <linux/fdtable.h> 12 #include <linux/fsnotify.h> 13 #include <linux/module.h> 14 #include <linux/tty.h> 15 #include <linux/namei.h> 16 #include <linux/backing-dev.h> 17 #include <linux/capability.h> 18 #include <linux/securebits.h> 19 #include <linux/security.h> 20 #include <linux/mount.h> 21 #include <linux/fcntl.h> 22 #include <linux/slab.h> 23 #include <linux/uaccess.h> 24 #include <linux/fs.h> 25 #include <linux/personality.h> 26 #include <linux/pagemap.h> 27 #include <linux/syscalls.h> 28 #include <linux/rcupdate.h> 29 #include <linux/audit.h> 30 #include <linux/falloc.h> 31 #include <linux/fs_struct.h> 32 #include <linux/dnotify.h> 33 #include <linux/compat.h> 34 #include <linux/mnt_idmapping.h> 35 #include <linux/filelock.h> 36 37 #include "internal.h" 38 39 int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, 40 loff_t length, unsigned int time_attrs, struct file *filp) 41 { 42 int ret; 43 struct iattr newattrs; 44 45 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 46 if (length < 0) 47 return -EINVAL; 48 49 newattrs.ia_size = length; 50 newattrs.ia_valid = ATTR_SIZE | time_attrs; 51 if (filp) { 52 newattrs.ia_file = filp; 53 newattrs.ia_valid |= ATTR_FILE; 54 } 55 56 /* Remove suid, sgid, and file capabilities on truncate too */ 57 ret = dentry_needs_remove_privs(idmap, dentry); 58 if (ret < 0) 59 return ret; 60 if (ret) 61 newattrs.ia_valid |= ret | ATTR_FORCE; 62 63 ret = inode_lock_killable(dentry->d_inode); 64 if (ret) 65 return ret; 66 67 /* Note any delegations or leases have already been broken: */ 68 ret = notify_change(idmap, dentry, &newattrs, NULL); 69 inode_unlock(dentry->d_inode); 70 return ret; 71 } 72 73 int vfs_truncate(const struct path *path, loff_t length) 74 { 75 struct mnt_idmap *idmap; 76 struct inode *inode; 77 int error; 78 79 inode = path->dentry->d_inode; 80 81 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 82 if (S_ISDIR(inode->i_mode)) 83 return -EISDIR; 84 if (!S_ISREG(inode->i_mode)) 85 return -EINVAL; 86 87 idmap = mnt_idmap(path->mnt); 88 error = inode_permission(idmap, inode, MAY_WRITE); 89 if (error) 90 return error; 91 92 error = fsnotify_truncate_perm(path, length); 93 if (error) 94 return error; 95 96 error = mnt_want_write(path->mnt); 97 if (error) 98 return error; 99 100 error = -EPERM; 101 if (IS_APPEND(inode)) 102 goto mnt_drop_write_and_out; 103 104 error = get_write_access(inode); 105 if (error) 106 goto mnt_drop_write_and_out; 107 108 /* 109 * Make sure that there are no leases. get_write_access() protects 110 * against the truncate racing with a lease-granting setlease(). 111 */ 112 error = break_lease(inode, O_WRONLY); 113 if (error) 114 goto put_write_and_out; 115 116 error = security_path_truncate(path); 117 if (!error) 118 error = do_truncate(idmap, path->dentry, length, 0, NULL); 119 120 put_write_and_out: 121 put_write_access(inode); 122 mnt_drop_write_and_out: 123 mnt_drop_write(path->mnt); 124 125 return error; 126 } 127 EXPORT_SYMBOL_GPL(vfs_truncate); 128 129 int ksys_truncate(const char __user *pathname, loff_t length) 130 { 131 unsigned int lookup_flags = LOOKUP_FOLLOW; 132 struct path path; 133 int error; 134 135 if (length < 0) /* sorry, but loff_t says... */ 136 return -EINVAL; 137 138 CLASS(filename, name)(pathname); 139 retry: 140 error = filename_lookup(AT_FDCWD, name, lookup_flags, &path, NULL); 141 if (!error) { 142 error = vfs_truncate(&path, length); 143 path_put(&path); 144 if (retry_estale(error, lookup_flags)) { 145 lookup_flags |= LOOKUP_REVAL; 146 goto retry; 147 } 148 } 149 return error; 150 } 151 152 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) 153 { 154 return ksys_truncate(path, length); 155 } 156 157 #ifdef CONFIG_COMPAT 158 COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length) 159 { 160 return ksys_truncate(path, length); 161 } 162 #endif 163 164 int do_ftruncate(struct file *file, loff_t length, unsigned int flags) 165 { 166 struct dentry *dentry = file->f_path.dentry; 167 struct inode *inode = dentry->d_inode; 168 int error; 169 170 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 171 return -EINVAL; 172 173 /* 174 * Cannot ftruncate over 2^31 bytes without large file support, either 175 * through opening with O_LARGEFILE or by using ftruncate64(). 176 */ 177 if (length > MAX_NON_LFS && 178 !(file->f_flags & O_LARGEFILE) && !(flags & FTRUNCATE_LFS)) 179 return -EINVAL; 180 181 /* Check IS_APPEND on real upper inode */ 182 if (IS_APPEND(file_inode(file))) 183 return -EPERM; 184 185 error = security_file_truncate(file); 186 if (error) 187 return error; 188 189 error = fsnotify_truncate_perm(&file->f_path, length); 190 if (error) 191 return error; 192 193 scoped_guard(super_write, inode->i_sb) 194 return do_truncate(file_mnt_idmap(file), dentry, length, 195 ATTR_MTIME | ATTR_CTIME, file); 196 } 197 198 int ksys_ftruncate(unsigned int fd, loff_t length, unsigned int flags) 199 { 200 if (length < 0) 201 return -EINVAL; 202 CLASS(fd, f)(fd); 203 if (fd_empty(f)) 204 return -EBADF; 205 206 return do_ftruncate(fd_file(f), length, flags); 207 } 208 209 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length) 210 { 211 return ksys_ftruncate(fd, length, 0); 212 } 213 214 #ifdef CONFIG_COMPAT 215 COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length) 216 { 217 return ksys_ftruncate(fd, length, 0); 218 } 219 #endif 220 221 /* LFS versions of truncate are only needed on 32 bit machines */ 222 #if BITS_PER_LONG == 32 223 SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length) 224 { 225 return ksys_truncate(path, length); 226 } 227 228 SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length) 229 { 230 return ksys_ftruncate(fd, length, FTRUNCATE_LFS); 231 } 232 #endif /* BITS_PER_LONG == 32 */ 233 234 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_TRUNCATE64) 235 COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname, 236 compat_arg_u64_dual(length)) 237 { 238 return ksys_truncate(pathname, compat_arg_u64_glue(length)); 239 } 240 #endif 241 242 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FTRUNCATE64) 243 COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, 244 compat_arg_u64_dual(length)) 245 { 246 return ksys_ftruncate(fd, compat_arg_u64_glue(length), FTRUNCATE_LFS); 247 } 248 #endif 249 250 int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 251 { 252 struct inode *inode = file_inode(file); 253 int ret; 254 loff_t sum; 255 256 if (offset < 0 || len <= 0) 257 return -EINVAL; 258 259 if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE)) 260 return -EOPNOTSUPP; 261 262 /* 263 * Modes are exclusive, even if that is not obvious from the encoding 264 * as bit masks and the mix with the flag in the same namespace. 265 * 266 * To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is 267 * encoded as no bit set. 268 */ 269 switch (mode & FALLOC_FL_MODE_MASK) { 270 case FALLOC_FL_ALLOCATE_RANGE: 271 case FALLOC_FL_UNSHARE_RANGE: 272 case FALLOC_FL_ZERO_RANGE: 273 break; 274 case FALLOC_FL_PUNCH_HOLE: 275 if (!(mode & FALLOC_FL_KEEP_SIZE)) 276 return -EOPNOTSUPP; 277 break; 278 case FALLOC_FL_COLLAPSE_RANGE: 279 case FALLOC_FL_INSERT_RANGE: 280 case FALLOC_FL_WRITE_ZEROES: 281 if (mode & FALLOC_FL_KEEP_SIZE) 282 return -EOPNOTSUPP; 283 break; 284 default: 285 return -EOPNOTSUPP; 286 } 287 288 if (!(file->f_mode & FMODE_WRITE)) 289 return -EBADF; 290 291 /* 292 * On append-only files only space preallocation is supported. 293 */ 294 if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode)) 295 return -EPERM; 296 297 if (IS_IMMUTABLE(inode)) 298 return -EPERM; 299 300 /* 301 * We cannot allow any fallocate operation on an active swapfile 302 */ 303 if (IS_SWAPFILE(inode)) 304 return -ETXTBSY; 305 306 /* 307 * Revalidate the write permissions, in case security policy has 308 * changed since the files were opened. 309 */ 310 ret = security_file_permission(file, MAY_WRITE); 311 if (ret) 312 return ret; 313 314 ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len); 315 if (ret) 316 return ret; 317 318 if (S_ISFIFO(inode->i_mode)) 319 return -ESPIPE; 320 321 if (S_ISDIR(inode->i_mode)) 322 return -EISDIR; 323 324 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 325 return -ENODEV; 326 327 /* Check for wraparound */ 328 if (check_add_overflow(offset, len, &sum)) 329 return -EFBIG; 330 331 if (sum > inode->i_sb->s_maxbytes) 332 return -EFBIG; 333 334 if (!file->f_op->fallocate) 335 return -EOPNOTSUPP; 336 337 file_start_write(file); 338 ret = file->f_op->fallocate(file, mode, offset, len); 339 340 /* 341 * Create inotify and fanotify events. 342 * 343 * To keep the logic simple always create events if fallocate succeeds. 344 * This implies that events are even created if the file size remains 345 * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE. 346 */ 347 if (ret == 0) 348 fsnotify_modify(file); 349 350 file_end_write(file); 351 return ret; 352 } 353 EXPORT_SYMBOL_GPL(vfs_fallocate); 354 355 int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len) 356 { 357 CLASS(fd, f)(fd); 358 359 if (fd_empty(f)) 360 return -EBADF; 361 362 return vfs_fallocate(fd_file(f), mode, offset, len); 363 } 364 365 SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) 366 { 367 return ksys_fallocate(fd, mode, offset, len); 368 } 369 370 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FALLOCATE) 371 COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset), 372 compat_arg_u64_dual(len)) 373 { 374 return ksys_fallocate(fd, mode, compat_arg_u64_glue(offset), 375 compat_arg_u64_glue(len)); 376 } 377 #endif 378 379 /* 380 * access() needs to use the real uid/gid, not the effective uid/gid. 381 * We do this by temporarily clearing all FS-related capabilities and 382 * switching the fsuid/fsgid around to the real ones. 383 * 384 * Creating new credentials is expensive, so we try to skip doing it, 385 * which we can if the result would match what we already got. 386 */ 387 static bool access_need_override_creds(int flags) 388 { 389 const struct cred *cred; 390 391 if (flags & AT_EACCESS) 392 return false; 393 394 cred = current_cred(); 395 if (!uid_eq(cred->fsuid, cred->uid) || 396 !gid_eq(cred->fsgid, cred->gid)) 397 return true; 398 399 if (!issecure(SECURE_NO_SETUID_FIXUP)) { 400 kuid_t root_uid = make_kuid(cred->user_ns, 0); 401 if (!uid_eq(cred->uid, root_uid)) { 402 if (!cap_isclear(cred->cap_effective)) 403 return true; 404 } else { 405 if (!cap_isidentical(cred->cap_effective, 406 cred->cap_permitted)) 407 return true; 408 } 409 } 410 411 return false; 412 } 413 414 static const struct cred *access_override_creds(void) 415 { 416 struct cred *override_cred; 417 418 override_cred = prepare_creds(); 419 if (!override_cred) 420 return NULL; 421 422 /* 423 * XXX access_need_override_creds performs checks in hopes of skipping 424 * this work. Make sure it stays in sync if making any changes in this 425 * routine. 426 */ 427 428 override_cred->fsuid = override_cred->uid; 429 override_cred->fsgid = override_cred->gid; 430 431 if (!issecure(SECURE_NO_SETUID_FIXUP)) { 432 /* Clear the capabilities if we switch to a non-root user */ 433 kuid_t root_uid = make_kuid(override_cred->user_ns, 0); 434 if (!uid_eq(override_cred->uid, root_uid)) 435 cap_clear(override_cred->cap_effective); 436 else 437 override_cred->cap_effective = 438 override_cred->cap_permitted; 439 } 440 441 /* 442 * The new set of credentials can *only* be used in 443 * task-synchronous circumstances, and does not need 444 * RCU freeing, unless somebody then takes a separate 445 * reference to it. 446 * 447 * NOTE! This is _only_ true because this credential 448 * is used purely for override_creds() that installs 449 * it as the subjective cred. Other threads will be 450 * accessing ->real_cred, not the subjective cred. 451 * 452 * If somebody _does_ make a copy of this (using the 453 * 'get_current_cred()' function), that will clear the 454 * non_rcu field, because now that other user may be 455 * expecting RCU freeing. But normal thread-synchronous 456 * cred accesses will keep things non-racy to avoid RCU 457 * freeing. 458 */ 459 override_cred->non_rcu = 1; 460 return override_creds(override_cred); 461 } 462 463 static int do_faccessat(int dfd, const char __user *filename, int mode, int flags) 464 { 465 struct path path; 466 struct inode *inode; 467 int res; 468 unsigned int lookup_flags = LOOKUP_FOLLOW; 469 const struct cred *old_cred = NULL; 470 471 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 472 return -EINVAL; 473 474 if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) 475 return -EINVAL; 476 477 if (flags & AT_SYMLINK_NOFOLLOW) 478 lookup_flags &= ~LOOKUP_FOLLOW; 479 480 if (access_need_override_creds(flags)) { 481 old_cred = access_override_creds(); 482 if (!old_cred) 483 return -ENOMEM; 484 } 485 486 CLASS(filename_uflags, name)(filename, flags); 487 retry: 488 res = filename_lookup(dfd, name, lookup_flags, &path, NULL); 489 if (res) 490 goto out; 491 492 inode = d_backing_inode(path.dentry); 493 494 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { 495 /* 496 * MAY_EXEC on regular files is denied if the fs is mounted 497 * with the "noexec" flag. 498 */ 499 res = -EACCES; 500 if (path_noexec(&path)) 501 goto out_path_release; 502 } 503 504 res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS); 505 /* SuS v2 requires we report a read only fs too */ 506 if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) 507 goto out_path_release; 508 /* 509 * This is a rare case where using __mnt_is_readonly() 510 * is OK without a mnt_want/drop_write() pair. Since 511 * no actual write to the fs is performed here, we do 512 * not need to telegraph to that to anyone. 513 * 514 * By doing this, we accept that this access is 515 * inherently racy and know that the fs may change 516 * state before we even see this result. 517 */ 518 if (__mnt_is_readonly(path.mnt)) 519 res = -EROFS; 520 521 out_path_release: 522 path_put(&path); 523 if (retry_estale(res, lookup_flags)) { 524 lookup_flags |= LOOKUP_REVAL; 525 goto retry; 526 } 527 out: 528 if (old_cred) 529 put_cred(revert_creds(old_cred)); 530 531 return res; 532 } 533 534 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) 535 { 536 return do_faccessat(dfd, filename, mode, 0); 537 } 538 539 SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode, 540 int, flags) 541 { 542 return do_faccessat(dfd, filename, mode, flags); 543 } 544 545 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) 546 { 547 return do_faccessat(AT_FDCWD, filename, mode, 0); 548 } 549 550 SYSCALL_DEFINE1(chdir, const char __user *, filename) 551 { 552 struct path path; 553 int error; 554 unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 555 CLASS(filename, name)(filename); 556 retry: 557 error = filename_lookup(AT_FDCWD, name, lookup_flags, &path, NULL); 558 if (!error) { 559 error = path_permission(&path, MAY_EXEC | MAY_CHDIR); 560 if (!error) 561 set_fs_pwd(current->fs, &path); 562 path_put(&path); 563 if (retry_estale(error, lookup_flags)) { 564 lookup_flags |= LOOKUP_REVAL; 565 goto retry; 566 } 567 } 568 return error; 569 } 570 571 SYSCALL_DEFINE1(fchdir, unsigned int, fd) 572 { 573 CLASS(fd_raw, f)(fd); 574 int error; 575 576 if (fd_empty(f)) 577 return -EBADF; 578 579 if (!d_can_lookup(fd_file(f)->f_path.dentry)) 580 return -ENOTDIR; 581 582 error = file_permission(fd_file(f), MAY_EXEC | MAY_CHDIR); 583 if (!error) 584 set_fs_pwd(current->fs, &fd_file(f)->f_path); 585 return error; 586 } 587 588 SYSCALL_DEFINE1(chroot, const char __user *, filename) 589 { 590 struct path path; 591 int error; 592 unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 593 CLASS(filename, name)(filename); 594 retry: 595 error = filename_lookup(AT_FDCWD, name, lookup_flags, &path, NULL); 596 if (error) 597 return error; 598 599 error = path_permission(&path, MAY_EXEC | MAY_CHDIR); 600 if (error) 601 goto dput_and_out; 602 603 error = -EPERM; 604 if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) 605 goto dput_and_out; 606 error = security_path_chroot(&path); 607 if (!error) 608 set_fs_root(current->fs, &path); 609 dput_and_out: 610 path_put(&path); 611 if (retry_estale(error, lookup_flags)) { 612 lookup_flags |= LOOKUP_REVAL; 613 goto retry; 614 } 615 return error; 616 } 617 618 int chmod_common(const struct path *path, umode_t mode) 619 { 620 struct inode *inode = path->dentry->d_inode; 621 struct delegated_inode delegated_inode = { }; 622 struct iattr newattrs; 623 int error; 624 625 error = mnt_want_write(path->mnt); 626 if (error) 627 return error; 628 retry_deleg: 629 error = inode_lock_killable(inode); 630 if (error) 631 goto out_mnt_unlock; 632 error = security_path_chmod(path, mode); 633 if (error) 634 goto out_unlock; 635 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 636 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 637 error = notify_change(mnt_idmap(path->mnt), path->dentry, 638 &newattrs, &delegated_inode); 639 out_unlock: 640 inode_unlock(inode); 641 if (is_delegated(&delegated_inode)) { 642 error = break_deleg_wait(&delegated_inode); 643 if (!error) 644 goto retry_deleg; 645 } 646 out_mnt_unlock: 647 mnt_drop_write(path->mnt); 648 return error; 649 } 650 651 int vfs_fchmod(struct file *file, umode_t mode) 652 { 653 audit_file(file); 654 return chmod_common(&file->f_path, mode); 655 } 656 657 SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) 658 { 659 CLASS(fd, f)(fd); 660 661 if (fd_empty(f)) 662 return -EBADF; 663 664 return vfs_fchmod(fd_file(f), mode); 665 } 666 667 static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, 668 unsigned int flags) 669 { 670 struct path path; 671 int error; 672 unsigned int lookup_flags; 673 674 if (unlikely(flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))) 675 return -EINVAL; 676 677 lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 678 CLASS(filename_uflags, name)(filename, flags); 679 retry: 680 error = filename_lookup(dfd, name, lookup_flags, &path, NULL); 681 if (!error) { 682 error = chmod_common(&path, mode); 683 path_put(&path); 684 if (retry_estale(error, lookup_flags)) { 685 lookup_flags |= LOOKUP_REVAL; 686 goto retry; 687 } 688 } 689 return error; 690 } 691 692 SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename, 693 umode_t, mode, unsigned int, flags) 694 { 695 return do_fchmodat(dfd, filename, mode, flags); 696 } 697 698 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, 699 umode_t, mode) 700 { 701 return do_fchmodat(dfd, filename, mode, 0); 702 } 703 704 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) 705 { 706 return do_fchmodat(AT_FDCWD, filename, mode, 0); 707 } 708 709 /* 710 * Check whether @kuid is valid and if so generate and set vfsuid_t in 711 * ia_vfsuid. 712 * 713 * Return: true if @kuid is valid, false if not. 714 */ 715 static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid) 716 { 717 if (!uid_valid(kuid)) 718 return false; 719 attr->ia_valid |= ATTR_UID; 720 attr->ia_vfsuid = VFSUIDT_INIT(kuid); 721 return true; 722 } 723 724 /* 725 * Check whether @kgid is valid and if so generate and set vfsgid_t in 726 * ia_vfsgid. 727 * 728 * Return: true if @kgid is valid, false if not. 729 */ 730 static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid) 731 { 732 if (!gid_valid(kgid)) 733 return false; 734 attr->ia_valid |= ATTR_GID; 735 attr->ia_vfsgid = VFSGIDT_INIT(kgid); 736 return true; 737 } 738 739 int chown_common(const struct path *path, uid_t user, gid_t group) 740 { 741 struct mnt_idmap *idmap; 742 struct user_namespace *fs_userns; 743 struct inode *inode = path->dentry->d_inode; 744 struct delegated_inode delegated_inode = { }; 745 int error; 746 struct iattr newattrs; 747 kuid_t uid; 748 kgid_t gid; 749 750 uid = make_kuid(current_user_ns(), user); 751 gid = make_kgid(current_user_ns(), group); 752 753 idmap = mnt_idmap(path->mnt); 754 fs_userns = i_user_ns(inode); 755 756 retry_deleg: 757 newattrs.ia_vfsuid = INVALID_VFSUID; 758 newattrs.ia_vfsgid = INVALID_VFSGID; 759 newattrs.ia_valid = ATTR_CTIME; 760 if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid)) 761 return -EINVAL; 762 if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid)) 763 return -EINVAL; 764 error = inode_lock_killable(inode); 765 if (error) 766 return error; 767 if (!S_ISDIR(inode->i_mode)) 768 newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV | 769 setattr_should_drop_sgid(idmap, inode); 770 /* Continue to send actual fs values, not the mount values. */ 771 error = security_path_chown( 772 path, 773 from_vfsuid(idmap, fs_userns, newattrs.ia_vfsuid), 774 from_vfsgid(idmap, fs_userns, newattrs.ia_vfsgid)); 775 if (!error) 776 error = notify_change(idmap, path->dentry, &newattrs, 777 &delegated_inode); 778 inode_unlock(inode); 779 if (is_delegated(&delegated_inode)) { 780 error = break_deleg_wait(&delegated_inode); 781 if (!error) 782 goto retry_deleg; 783 } 784 return error; 785 } 786 787 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, 788 int flag) 789 { 790 struct path path; 791 int error; 792 int lookup_flags; 793 794 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) 795 return -EINVAL; 796 797 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 798 CLASS(filename_uflags, name)(filename, flag); 799 retry: 800 error = filename_lookup(dfd, name, lookup_flags, &path, NULL); 801 if (!error) { 802 error = mnt_want_write(path.mnt); 803 if (!error) { 804 error = chown_common(&path, user, group); 805 mnt_drop_write(path.mnt); 806 } 807 path_put(&path); 808 if (retry_estale(error, lookup_flags)) { 809 lookup_flags |= LOOKUP_REVAL; 810 goto retry; 811 } 812 } 813 return error; 814 } 815 816 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, 817 gid_t, group, int, flag) 818 { 819 return do_fchownat(dfd, filename, user, group, flag); 820 } 821 822 SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) 823 { 824 return do_fchownat(AT_FDCWD, filename, user, group, 0); 825 } 826 827 SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) 828 { 829 return do_fchownat(AT_FDCWD, filename, user, group, 830 AT_SYMLINK_NOFOLLOW); 831 } 832 833 int vfs_fchown(struct file *file, uid_t user, gid_t group) 834 { 835 int error; 836 837 error = mnt_want_write_file(file); 838 if (error) 839 return error; 840 audit_file(file); 841 error = chown_common(&file->f_path, user, group); 842 mnt_drop_write_file(file); 843 return error; 844 } 845 846 int ksys_fchown(unsigned int fd, uid_t user, gid_t group) 847 { 848 CLASS(fd, f)(fd); 849 850 if (fd_empty(f)) 851 return -EBADF; 852 853 return vfs_fchown(fd_file(f), user, group); 854 } 855 856 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) 857 { 858 return ksys_fchown(fd, user, group); 859 } 860 861 static inline int file_get_write_access(struct file *f) 862 { 863 int error; 864 865 error = get_write_access(f->f_inode); 866 if (unlikely(error)) 867 return error; 868 error = mnt_get_write_access(f->f_path.mnt); 869 if (unlikely(error)) 870 goto cleanup_inode; 871 if (unlikely(f->f_mode & FMODE_BACKING)) { 872 error = mnt_get_write_access(backing_file_user_path(f)->mnt); 873 if (unlikely(error)) 874 goto cleanup_mnt; 875 } 876 return 0; 877 878 cleanup_mnt: 879 mnt_put_write_access(f->f_path.mnt); 880 cleanup_inode: 881 put_write_access(f->f_inode); 882 return error; 883 } 884 885 static int do_dentry_open(struct file *f, 886 int (*open)(struct inode *, struct file *)) 887 { 888 static const struct file_operations empty_fops = {}; 889 struct inode *inode = f->f_path.dentry->d_inode; 890 int error; 891 892 path_get(&f->f_path); 893 f->f_inode = inode; 894 f->f_mapping = inode->i_mapping; 895 f->f_wb_err = filemap_sample_wb_err(f->f_mapping); 896 f->f_sb_err = file_sample_sb_err(f); 897 898 if (unlikely(f->f_flags & O_PATH)) { 899 f->f_mode = FMODE_PATH | FMODE_OPENED; 900 file_set_fsnotify_mode(f, FMODE_NONOTIFY); 901 f->f_op = &empty_fops; 902 return 0; 903 } 904 905 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) { 906 i_readcount_inc(inode); 907 } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { 908 error = file_get_write_access(f); 909 if (unlikely(error)) 910 goto cleanup_file; 911 f->f_mode |= FMODE_WRITER; 912 } 913 914 /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ 915 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) 916 f->f_mode |= FMODE_ATOMIC_POS; 917 918 f->f_op = fops_get(inode->i_fop); 919 if (WARN_ON(!f->f_op)) { 920 error = -ENODEV; 921 goto cleanup_all; 922 } 923 924 error = security_file_open(f); 925 if (unlikely(error)) 926 goto cleanup_all; 927 928 /* 929 * Call fsnotify open permission hook and set FMODE_NONOTIFY_* bits 930 * according to existing permission watches. 931 * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a 932 * pseudo file, this call will not change the mode. 933 */ 934 error = fsnotify_open_perm_and_set_mode(f); 935 if (unlikely(error)) 936 goto cleanup_all; 937 938 error = break_lease(file_inode(f), f->f_flags); 939 if (unlikely(error)) 940 goto cleanup_all; 941 942 /* normally all 3 are set; ->open() can clear them if needed */ 943 f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; 944 if (!open) 945 open = f->f_op->open; 946 if (open) { 947 error = open(inode, f); 948 if (error) 949 goto cleanup_all; 950 } 951 f->f_mode |= FMODE_OPENED; 952 if ((f->f_mode & FMODE_READ) && 953 likely(f->f_op->read || f->f_op->read_iter)) 954 f->f_mode |= FMODE_CAN_READ; 955 if ((f->f_mode & FMODE_WRITE) && 956 likely(f->f_op->write || f->f_op->write_iter)) 957 f->f_mode |= FMODE_CAN_WRITE; 958 if ((f->f_mode & FMODE_LSEEK) && !f->f_op->llseek) 959 f->f_mode &= ~FMODE_LSEEK; 960 if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO) 961 f->f_mode |= FMODE_CAN_ODIRECT; 962 963 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | __O_REGULAR); 964 f->f_iocb_flags = iocb_flags(f); 965 966 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 967 968 if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT)) 969 return -EINVAL; 970 971 /* 972 * XXX: Huge page cache doesn't support writing yet. Drop all page 973 * cache for this file before processing writes. 974 */ 975 if (f->f_mode & FMODE_WRITE) { 976 /* 977 * Depends on full fence from get_write_access() to synchronize 978 * against collapse_file() regarding i_writecount and nr_thps 979 * updates. Ensures subsequent insertion of THPs into the page 980 * cache will fail. 981 */ 982 if (filemap_nr_thps(inode->i_mapping)) { 983 struct address_space *mapping = inode->i_mapping; 984 985 filemap_invalidate_lock(inode->i_mapping); 986 /* 987 * unmap_mapping_range just need to be called once 988 * here, because the private pages is not need to be 989 * unmapped mapping (e.g. data segment of dynamic 990 * shared libraries here). 991 */ 992 unmap_mapping_range(mapping, 0, 0, 0); 993 truncate_inode_pages(mapping, 0); 994 filemap_invalidate_unlock(inode->i_mapping); 995 } 996 } 997 998 return 0; 999 1000 cleanup_all: 1001 if (WARN_ON_ONCE(error > 0)) 1002 error = -EINVAL; 1003 fops_put(f->f_op); 1004 put_file_access(f); 1005 cleanup_file: 1006 path_put(&f->f_path); 1007 f->__f_path.mnt = NULL; 1008 f->__f_path.dentry = NULL; 1009 f->f_inode = NULL; 1010 return error; 1011 } 1012 1013 /** 1014 * finish_open - finish opening a file 1015 * @file: file pointer 1016 * @dentry: pointer to dentry 1017 * @open: open callback 1018 * 1019 * This can be used to finish opening a file passed to i_op->atomic_open(). 1020 * 1021 * If the open callback is set to NULL, then the standard f_op->open() 1022 * filesystem callback is substituted. 1023 * 1024 * NB: the dentry reference is _not_ consumed. If, for example, the dentry is 1025 * the return value of d_splice_alias(), then the caller needs to perform dput() 1026 * on it after finish_open(). 1027 * 1028 * Returns zero on success or -errno if the open failed. 1029 */ 1030 int finish_open(struct file *file, struct dentry *dentry, 1031 int (*open)(struct inode *, struct file *)) 1032 { 1033 BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */ 1034 1035 file->__f_path.dentry = dentry; 1036 return do_dentry_open(file, open); 1037 } 1038 EXPORT_SYMBOL(finish_open); 1039 1040 /** 1041 * finish_no_open - finish ->atomic_open() without opening the file 1042 * 1043 * @file: file pointer 1044 * @dentry: dentry, ERR_PTR(-E...) or NULL (as returned from ->lookup()) 1045 * 1046 * This can be used to set the result of a lookup in ->atomic_open(). 1047 * 1048 * NB: unlike finish_open() this function does consume the dentry reference and 1049 * the caller need not dput() it. 1050 * 1051 * Returns 0 or -E..., which must be the return value of ->atomic_open() after 1052 * having called this function. 1053 */ 1054 int finish_no_open(struct file *file, struct dentry *dentry) 1055 { 1056 if (IS_ERR(dentry)) 1057 return PTR_ERR(dentry); 1058 file->__f_path.dentry = dentry; 1059 return 0; 1060 } 1061 EXPORT_SYMBOL(finish_no_open); 1062 1063 char *file_path(struct file *filp, char *buf, int buflen) 1064 { 1065 return d_path(&filp->f_path, buf, buflen); 1066 } 1067 EXPORT_SYMBOL(file_path); 1068 1069 /** 1070 * vfs_open - open the file at the given path 1071 * @path: path to open 1072 * @file: newly allocated file with f_flag initialized 1073 */ 1074 int vfs_open(const struct path *path, struct file *file) 1075 { 1076 int ret; 1077 1078 file->__f_path = *path; 1079 ret = do_dentry_open(file, NULL); 1080 if (!ret) { 1081 /* 1082 * Once we return a file with FMODE_OPENED, __fput() will call 1083 * fsnotify_close(), so we need fsnotify_open() here for 1084 * symmetry. 1085 */ 1086 fsnotify_open(file); 1087 } 1088 return ret; 1089 } 1090 1091 struct file *dentry_open(const struct path *path, int flags, 1092 const struct cred *cred) 1093 { 1094 int error; 1095 struct file *f; 1096 1097 /* We must always pass in a valid mount pointer. */ 1098 BUG_ON(!path->mnt); 1099 1100 f = alloc_empty_file(flags, cred); 1101 if (!IS_ERR(f)) { 1102 error = vfs_open(path, f); 1103 if (error) { 1104 fput(f); 1105 f = ERR_PTR(error); 1106 } 1107 } 1108 return f; 1109 } 1110 EXPORT_SYMBOL(dentry_open); 1111 1112 struct file *dentry_open_nonotify(const struct path *path, int flags, 1113 const struct cred *cred) 1114 { 1115 struct file *f = alloc_empty_file(flags, cred); 1116 if (!IS_ERR(f)) { 1117 int error; 1118 1119 file_set_fsnotify_mode(f, FMODE_NONOTIFY); 1120 error = vfs_open(path, f); 1121 if (error) { 1122 fput(f); 1123 f = ERR_PTR(error); 1124 } 1125 } 1126 return f; 1127 } 1128 1129 /** 1130 * kernel_file_open - open a file for kernel internal use 1131 * @path: path of the file to open 1132 * @flags: open flags 1133 * @cred: credentials for open 1134 * 1135 * Open a file for use by in-kernel consumers. The file is not accounted 1136 * against nr_files and must not be installed into the file descriptor 1137 * table. 1138 * 1139 * Return: Opened file on success, an error pointer on failure. 1140 */ 1141 struct file *kernel_file_open(const struct path *path, int flags, 1142 const struct cred *cred) 1143 { 1144 struct file *f; 1145 int error; 1146 1147 f = alloc_empty_file_noaccount(flags, cred); 1148 if (IS_ERR(f)) 1149 return f; 1150 1151 error = vfs_open(path, f); 1152 if (error) { 1153 fput(f); 1154 return ERR_PTR(error); 1155 } 1156 return f; 1157 } 1158 EXPORT_SYMBOL_GPL(kernel_file_open); 1159 1160 #define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) 1161 #define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC | O_EMPTYPATH) 1162 1163 inline struct open_how build_open_how(int flags, umode_t mode) 1164 { 1165 struct open_how how = { 1166 .flags = flags & VALID_OPEN_FLAGS, 1167 .mode = mode & S_IALLUGO, 1168 }; 1169 1170 /* O_PATH beats everything else. */ 1171 if (how.flags & O_PATH) 1172 how.flags &= O_PATH_FLAGS; 1173 /* Modes should only be set for create-like flags. */ 1174 if (!WILL_CREATE(how.flags)) 1175 how.mode = 0; 1176 return how; 1177 } 1178 1179 inline int build_open_flags(const struct open_how *how, struct open_flags *op) 1180 { 1181 u64 flags = how->flags; 1182 u64 strip = O_CLOEXEC; 1183 int lookup_flags = 0; 1184 int acc_mode = ACC_MODE(flags); 1185 1186 BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS), 1187 "VALID_OPEN_FLAGS must fit in 32 bits"); 1188 /* The whole point: OPENAT2_REGULAR must be unrepresentable in int. */ 1189 BUILD_BUG_ON_MSG(!upper_32_bits(OPENAT2_REGULAR), 1190 "OPENAT2_REGULAR must live in the upper 32 bits of open_how::flags"); 1191 /* Prevent a future bit collision between UAPI and internal carrier. */ 1192 BUILD_BUG_ON_MSG(OPENAT2_REGULAR & VALID_OPEN_FLAGS, 1193 "OPENAT2_REGULAR must not alias any open()/openat() flag"); 1194 BUILD_BUG_ON_MSG(__O_REGULAR & VALID_OPENAT2_FLAGS, 1195 "__O_REGULAR must not alias any user-visible flag"); 1196 1197 /* 1198 * Strip flags that aren't relevant in determining struct open_flags. 1199 */ 1200 flags &= ~strip; 1201 1202 /* 1203 * Older syscalls implicitly clear all of the invalid flags or argument 1204 * values before calling build_open_flags(), but openat2(2) checks all 1205 * of its arguments. 1206 */ 1207 if (flags & ~VALID_OPENAT2_FLAGS) 1208 return -EINVAL; 1209 if (how->resolve & ~VALID_RESOLVE_FLAGS) 1210 return -EINVAL; 1211 1212 /* Scoping flags are mutually exclusive. */ 1213 if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT)) 1214 return -EINVAL; 1215 1216 /* Deal with the mode. */ 1217 if (WILL_CREATE(flags)) { 1218 if (how->mode & ~S_IALLUGO) 1219 return -EINVAL; 1220 op->mode = how->mode | S_IFREG; 1221 } else { 1222 if (how->mode != 0) 1223 return -EINVAL; 1224 op->mode = 0; 1225 } 1226 1227 /* 1228 * Block bugs where O_DIRECTORY | O_CREAT created regular files. 1229 * Note, that blocking O_DIRECTORY | O_CREAT here also protects 1230 * O_TMPFILE below which requires O_DIRECTORY being raised. 1231 */ 1232 if ((flags & (O_DIRECTORY | O_CREAT)) == (O_DIRECTORY | O_CREAT)) 1233 return -EINVAL; 1234 1235 /* Now handle the creative implementation of O_TMPFILE. */ 1236 if (flags & __O_TMPFILE) { 1237 /* 1238 * In order to ensure programs get explicit errors when trying 1239 * to use O_TMPFILE on old kernels we enforce that O_DIRECTORY 1240 * is raised alongside __O_TMPFILE. 1241 */ 1242 if (!(flags & O_DIRECTORY)) 1243 return -EINVAL; 1244 if (!(acc_mode & MAY_WRITE)) 1245 return -EINVAL; 1246 } 1247 /* 1248 * Asking to open a directory and a regular file at the same time is 1249 * contradictory. 1250 */ 1251 if ((flags & (O_DIRECTORY | OPENAT2_REGULAR)) == 1252 (O_DIRECTORY | OPENAT2_REGULAR)) 1253 return -EINVAL; 1254 1255 if (flags & O_PATH) { 1256 /* O_PATH only permits certain other flags to be set. */ 1257 if (flags & ~O_PATH_FLAGS) 1258 return -EINVAL; 1259 acc_mode = 0; 1260 } 1261 1262 /* 1263 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1264 * check for O_DSYNC if the need any syncing at all we enforce it's 1265 * always set instead of having to deal with possibly weird behaviour 1266 * for malicious applications setting only __O_SYNC. 1267 */ 1268 if (flags & __O_SYNC) 1269 flags |= O_DSYNC; 1270 1271 /* 1272 * Translate the upper-32-bit UAPI bit OPENAT2_REGULAR into the 1273 * kernel-internal lower-32-bit __O_REGULAR carrier so the bit 1274 * survives the assignment to op->open_flag (an int) below and the 1275 * subsequent flow through f->f_flags (unsigned int) and the 1276 * i_op->atomic_open() callback (unsigned). do_dentry_open() strips 1277 * __O_REGULAR before the file becomes visible to userspace. 1278 */ 1279 if (flags & OPENAT2_REGULAR) { 1280 flags &= ~OPENAT2_REGULAR; 1281 flags |= __O_REGULAR; 1282 } 1283 1284 op->open_flag = flags; 1285 1286 /* O_TRUNC implies we need access checks for write permissions */ 1287 if (flags & O_TRUNC) 1288 acc_mode |= MAY_WRITE; 1289 1290 /* Allow the LSM permission hook to distinguish append 1291 access from general write access. */ 1292 if (flags & O_APPEND) 1293 acc_mode |= MAY_APPEND; 1294 1295 op->acc_mode = acc_mode; 1296 1297 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN; 1298 1299 if (flags & O_CREAT) { 1300 op->intent |= LOOKUP_CREATE; 1301 if (flags & O_EXCL) { 1302 op->intent |= LOOKUP_EXCL; 1303 flags |= O_NOFOLLOW; 1304 } 1305 } 1306 1307 if (flags & O_DIRECTORY) 1308 lookup_flags |= LOOKUP_DIRECTORY; 1309 if (!(flags & O_NOFOLLOW)) 1310 lookup_flags |= LOOKUP_FOLLOW; 1311 if (flags & O_EMPTYPATH) 1312 lookup_flags |= LOOKUP_EMPTY; 1313 1314 if (how->resolve & RESOLVE_NO_XDEV) 1315 lookup_flags |= LOOKUP_NO_XDEV; 1316 if (how->resolve & RESOLVE_NO_MAGICLINKS) 1317 lookup_flags |= LOOKUP_NO_MAGICLINKS; 1318 if (how->resolve & RESOLVE_NO_SYMLINKS) 1319 lookup_flags |= LOOKUP_NO_SYMLINKS; 1320 if (how->resolve & RESOLVE_BENEATH) 1321 lookup_flags |= LOOKUP_BENEATH; 1322 if (how->resolve & RESOLVE_IN_ROOT) 1323 lookup_flags |= LOOKUP_IN_ROOT; 1324 if (how->resolve & RESOLVE_CACHED) { 1325 /* Don't bother even trying for create/truncate/tmpfile open */ 1326 if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE)) 1327 return -EAGAIN; 1328 lookup_flags |= LOOKUP_CACHED; 1329 } 1330 1331 op->lookup_flags = lookup_flags; 1332 return 0; 1333 } 1334 1335 /** 1336 * file_open_name - open file and return file pointer 1337 * 1338 * @name: struct filename containing path to open 1339 * @flags: open flags as per the open(2) second argument 1340 * @mode: mode for the new file if O_CREAT is set, else ignored 1341 * 1342 * This is the helper to open a file from kernelspace if you really 1343 * have to. But in generally you should not do this, so please move 1344 * along, nothing to see here.. 1345 */ 1346 struct file *file_open_name(struct filename *name, int flags, umode_t mode) 1347 { 1348 struct open_flags op; 1349 struct open_how how = build_open_how(flags, mode); 1350 int err = build_open_flags(&how, &op); 1351 if (err) 1352 return ERR_PTR(err); 1353 return do_file_open(AT_FDCWD, name, &op); 1354 } 1355 1356 /** 1357 * filp_open - open file and return file pointer 1358 * 1359 * @filename: path to open 1360 * @flags: open flags as per the open(2) second argument 1361 * @mode: mode for the new file if O_CREAT is set, else ignored 1362 * 1363 * This is the helper to open a file from kernelspace if you really 1364 * have to. But in generally you should not do this, so please move 1365 * along, nothing to see here.. 1366 */ 1367 struct file *filp_open(const char *filename, int flags, umode_t mode) 1368 { 1369 CLASS(filename_kernel, name)(filename); 1370 return file_open_name(name, flags, mode); 1371 } 1372 EXPORT_SYMBOL(filp_open); 1373 1374 struct file *file_open_root(const struct path *root, 1375 const char *filename, int flags, umode_t mode) 1376 { 1377 struct open_flags op; 1378 struct open_how how = build_open_how(flags, mode); 1379 int err = build_open_flags(&how, &op); 1380 if (err) 1381 return ERR_PTR(err); 1382 return do_file_open_root(root, filename, &op); 1383 } 1384 EXPORT_SYMBOL(file_open_root); 1385 1386 static int do_sys_openat2(int dfd, const char __user *filename, 1387 struct open_how *how) 1388 { 1389 struct open_flags op; 1390 int err = build_open_flags(how, &op); 1391 if (unlikely(err)) 1392 return err; 1393 1394 CLASS(filename_flags, name)(filename, op.lookup_flags); 1395 return FD_ADD(how->flags, do_file_open(dfd, name, &op)); 1396 } 1397 1398 int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) 1399 { 1400 struct open_how how = build_open_how(flags, mode); 1401 return do_sys_openat2(dfd, filename, &how); 1402 } 1403 1404 1405 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) 1406 { 1407 if (force_o_largefile()) 1408 flags |= O_LARGEFILE; 1409 return do_sys_open(AT_FDCWD, filename, flags, mode); 1410 } 1411 1412 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, 1413 umode_t, mode) 1414 { 1415 if (force_o_largefile()) 1416 flags |= O_LARGEFILE; 1417 return do_sys_open(dfd, filename, flags, mode); 1418 } 1419 1420 SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename, 1421 struct open_how __user *, how, size_t, usize) 1422 { 1423 int err; 1424 struct open_how tmp; 1425 1426 BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0); 1427 BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST); 1428 1429 if (unlikely(usize < OPEN_HOW_SIZE_VER0)) 1430 return -EINVAL; 1431 if (unlikely(usize > PAGE_SIZE)) 1432 return -E2BIG; 1433 1434 err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize); 1435 if (err) 1436 return err; 1437 1438 audit_openat2_how(&tmp); 1439 1440 /* O_LARGEFILE is only allowed for non-O_PATH. */ 1441 if (!(tmp.flags & O_PATH) && force_o_largefile()) 1442 tmp.flags |= O_LARGEFILE; 1443 1444 return do_sys_openat2(dfd, filename, &tmp); 1445 } 1446 1447 #ifdef CONFIG_COMPAT 1448 /* 1449 * Exactly like sys_open(), except that it doesn't set the 1450 * O_LARGEFILE flag. 1451 */ 1452 COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) 1453 { 1454 return do_sys_open(AT_FDCWD, filename, flags, mode); 1455 } 1456 1457 /* 1458 * Exactly like sys_openat(), except that it doesn't set the 1459 * O_LARGEFILE flag. 1460 */ 1461 COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) 1462 { 1463 return do_sys_open(dfd, filename, flags, mode); 1464 } 1465 #endif 1466 1467 #ifndef __alpha__ 1468 1469 /* 1470 * For backward compatibility? Maybe this should be moved 1471 * into arch/i386 instead? 1472 */ 1473 SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) 1474 { 1475 int flags = O_CREAT | O_WRONLY | O_TRUNC; 1476 1477 if (force_o_largefile()) 1478 flags |= O_LARGEFILE; 1479 return do_sys_open(AT_FDCWD, pathname, flags, mode); 1480 } 1481 #endif 1482 1483 /* 1484 * "id" is the POSIX thread ID. We use the 1485 * files pointer for this.. 1486 */ 1487 static int filp_flush(struct file *filp, fl_owner_t id) 1488 { 1489 int retval = 0; 1490 1491 if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, filp, 1492 "VFS: Close: file count is 0 (f_op=%ps)", 1493 filp->f_op)) { 1494 return 0; 1495 } 1496 1497 if (filp->f_op->flush) 1498 retval = filp->f_op->flush(filp, id); 1499 1500 if (likely(!(filp->f_mode & FMODE_PATH))) { 1501 dnotify_flush(filp, id); 1502 locks_remove_posix(filp, id); 1503 } 1504 return retval; 1505 } 1506 1507 int filp_close(struct file *filp, fl_owner_t id) 1508 { 1509 int retval; 1510 1511 retval = filp_flush(filp, id); 1512 fput_close(filp); 1513 1514 return retval; 1515 } 1516 EXPORT_SYMBOL(filp_close); 1517 1518 /* 1519 * Careful here! We test whether the file pointer is NULL before 1520 * releasing the fd. This ensures that one clone task can't release 1521 * an fd while another clone is opening it. 1522 */ 1523 SYSCALL_DEFINE1(close, unsigned int, fd) 1524 { 1525 int retval; 1526 struct file *file; 1527 1528 file = file_close_fd(fd); 1529 if (!file) 1530 return -EBADF; 1531 1532 retval = filp_flush(file, current->files); 1533 1534 /* 1535 * We're returning to user space. Don't bother 1536 * with any delayed fput() cases. 1537 */ 1538 fput_close_sync(file); 1539 1540 if (likely(retval == 0)) 1541 return 0; 1542 1543 /* can't restart close syscall because file table entry was cleared */ 1544 if (retval == -ERESTARTSYS || 1545 retval == -ERESTARTNOINTR || 1546 retval == -ERESTARTNOHAND || 1547 retval == -ERESTART_RESTARTBLOCK) 1548 retval = -EINTR; 1549 1550 return retval; 1551 } 1552 1553 /* 1554 * This routine simulates a hangup on the tty, to arrange that users 1555 * are given clean terminals at login time. 1556 */ 1557 SYSCALL_DEFINE0(vhangup) 1558 { 1559 if (capable(CAP_SYS_TTY_CONFIG)) { 1560 tty_vhangup_self(); 1561 return 0; 1562 } 1563 return -EPERM; 1564 } 1565 1566 /* 1567 * Called when an inode is about to be open. 1568 * We use this to disallow opening large files on 32bit systems if 1569 * the caller didn't specify O_LARGEFILE. On 64bit systems we force 1570 * on this flag in sys_open. 1571 */ 1572 int generic_file_open(struct inode * inode, struct file * filp) 1573 { 1574 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1575 return -EOVERFLOW; 1576 return 0; 1577 } 1578 1579 EXPORT_SYMBOL(generic_file_open); 1580 1581 /* 1582 * This is used by subsystems that don't want seekable 1583 * file descriptors. The function is not supposed to ever fail, the only 1584 * reason it returns an 'int' and not 'void' is so that it can be plugged 1585 * directly into file_operations structure. 1586 */ 1587 int nonseekable_open(struct inode *inode, struct file *filp) 1588 { 1589 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1590 return 0; 1591 } 1592 1593 EXPORT_SYMBOL(nonseekable_open); 1594 1595 /* 1596 * stream_open is used by subsystems that want stream-like file descriptors. 1597 * Such file descriptors are not seekable and don't have notion of position 1598 * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL). 1599 * Contrary to file descriptors of other regular files, .read() and .write() 1600 * can run simultaneously. 1601 * 1602 * stream_open never fails and is marked to return int so that it could be 1603 * directly used as file_operations.open . 1604 */ 1605 int stream_open(struct inode *inode, struct file *filp) 1606 { 1607 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS); 1608 filp->f_mode |= FMODE_STREAM; 1609 return 0; 1610 } 1611 1612 EXPORT_SYMBOL(stream_open); 1613