1 /* 2 * linux/fs/open.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/string.h> 8 #include <linux/mm.h> 9 #include <linux/file.h> 10 #include <linux/quotaops.h> 11 #include <linux/fsnotify.h> 12 #include <linux/module.h> 13 #include <linux/slab.h> 14 #include <linux/tty.h> 15 #include <linux/namei.h> 16 #include <linux/backing-dev.h> 17 #include <linux/capability.h> 18 #include <linux/security.h> 19 #include <linux/mount.h> 20 #include <linux/vfs.h> 21 #include <linux/fcntl.h> 22 #include <asm/uaccess.h> 23 #include <linux/fs.h> 24 #include <linux/personality.h> 25 #include <linux/pagemap.h> 26 #include <linux/syscalls.h> 27 #include <linux/rcupdate.h> 28 #include <linux/audit.h> 29 #include <linux/falloc.h> 30 31 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 32 { 33 int retval = -ENODEV; 34 35 if (dentry) { 36 retval = -ENOSYS; 37 if (dentry->d_sb->s_op->statfs) { 38 memset(buf, 0, sizeof(*buf)); 39 retval = security_sb_statfs(dentry); 40 if (retval) 41 return retval; 42 retval = dentry->d_sb->s_op->statfs(dentry, buf); 43 if (retval == 0 && buf->f_frsize == 0) 44 buf->f_frsize = buf->f_bsize; 45 } 46 } 47 return retval; 48 } 49 50 EXPORT_SYMBOL(vfs_statfs); 51 52 static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) 53 { 54 struct kstatfs st; 55 int retval; 56 57 retval = vfs_statfs(dentry, &st); 58 if (retval) 59 return retval; 60 61 if (sizeof(*buf) == sizeof(st)) 62 memcpy(buf, &st, sizeof(st)); 63 else { 64 if (sizeof buf->f_blocks == 4) { 65 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 66 0xffffffff00000000ULL) 67 return -EOVERFLOW; 68 /* 69 * f_files and f_ffree may be -1; it's okay to stuff 70 * that into 32 bits 71 */ 72 if (st.f_files != -1 && 73 (st.f_files & 0xffffffff00000000ULL)) 74 return -EOVERFLOW; 75 if (st.f_ffree != -1 && 76 (st.f_ffree & 0xffffffff00000000ULL)) 77 return -EOVERFLOW; 78 } 79 80 buf->f_type = st.f_type; 81 buf->f_bsize = st.f_bsize; 82 buf->f_blocks = st.f_blocks; 83 buf->f_bfree = st.f_bfree; 84 buf->f_bavail = st.f_bavail; 85 buf->f_files = st.f_files; 86 buf->f_ffree = st.f_ffree; 87 buf->f_fsid = st.f_fsid; 88 buf->f_namelen = st.f_namelen; 89 buf->f_frsize = st.f_frsize; 90 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 91 } 92 return 0; 93 } 94 95 static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) 96 { 97 struct kstatfs st; 98 int retval; 99 100 retval = vfs_statfs(dentry, &st); 101 if (retval) 102 return retval; 103 104 if (sizeof(*buf) == sizeof(st)) 105 memcpy(buf, &st, sizeof(st)); 106 else { 107 buf->f_type = st.f_type; 108 buf->f_bsize = st.f_bsize; 109 buf->f_blocks = st.f_blocks; 110 buf->f_bfree = st.f_bfree; 111 buf->f_bavail = st.f_bavail; 112 buf->f_files = st.f_files; 113 buf->f_ffree = st.f_ffree; 114 buf->f_fsid = st.f_fsid; 115 buf->f_namelen = st.f_namelen; 116 buf->f_frsize = st.f_frsize; 117 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 118 } 119 return 0; 120 } 121 122 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 123 { 124 struct nameidata nd; 125 int error; 126 127 error = user_path_walk(path, &nd); 128 if (!error) { 129 struct statfs tmp; 130 error = vfs_statfs_native(nd.path.dentry, &tmp); 131 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 132 error = -EFAULT; 133 path_put(&nd.path); 134 } 135 return error; 136 } 137 138 139 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 140 { 141 struct nameidata nd; 142 long error; 143 144 if (sz != sizeof(*buf)) 145 return -EINVAL; 146 error = user_path_walk(path, &nd); 147 if (!error) { 148 struct statfs64 tmp; 149 error = vfs_statfs64(nd.path.dentry, &tmp); 150 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 151 error = -EFAULT; 152 path_put(&nd.path); 153 } 154 return error; 155 } 156 157 158 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) 159 { 160 struct file * file; 161 struct statfs tmp; 162 int error; 163 164 error = -EBADF; 165 file = fget(fd); 166 if (!file) 167 goto out; 168 error = vfs_statfs_native(file->f_path.dentry, &tmp); 169 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 170 error = -EFAULT; 171 fput(file); 172 out: 173 return error; 174 } 175 176 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf) 177 { 178 struct file * file; 179 struct statfs64 tmp; 180 int error; 181 182 if (sz != sizeof(*buf)) 183 return -EINVAL; 184 185 error = -EBADF; 186 file = fget(fd); 187 if (!file) 188 goto out; 189 error = vfs_statfs64(file->f_path.dentry, &tmp); 190 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 191 error = -EFAULT; 192 fput(file); 193 out: 194 return error; 195 } 196 197 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 198 struct file *filp) 199 { 200 int err; 201 struct iattr newattrs; 202 203 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 204 if (length < 0) 205 return -EINVAL; 206 207 newattrs.ia_size = length; 208 newattrs.ia_valid = ATTR_SIZE | time_attrs; 209 if (filp) { 210 newattrs.ia_file = filp; 211 newattrs.ia_valid |= ATTR_FILE; 212 } 213 214 /* Remove suid/sgid on truncate too */ 215 newattrs.ia_valid |= should_remove_suid(dentry); 216 217 mutex_lock(&dentry->d_inode->i_mutex); 218 err = notify_change(dentry, &newattrs); 219 mutex_unlock(&dentry->d_inode->i_mutex); 220 return err; 221 } 222 223 static long do_sys_truncate(const char __user * path, loff_t length) 224 { 225 struct nameidata nd; 226 struct inode * inode; 227 int error; 228 229 error = -EINVAL; 230 if (length < 0) /* sorry, but loff_t says... */ 231 goto out; 232 233 error = user_path_walk(path, &nd); 234 if (error) 235 goto out; 236 inode = nd.path.dentry->d_inode; 237 238 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 239 error = -EISDIR; 240 if (S_ISDIR(inode->i_mode)) 241 goto dput_and_out; 242 243 error = -EINVAL; 244 if (!S_ISREG(inode->i_mode)) 245 goto dput_and_out; 246 247 error = mnt_want_write(nd.path.mnt); 248 if (error) 249 goto dput_and_out; 250 251 error = vfs_permission(&nd, MAY_WRITE); 252 if (error) 253 goto mnt_drop_write_and_out; 254 255 error = -EPERM; 256 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 257 goto mnt_drop_write_and_out; 258 259 error = get_write_access(inode); 260 if (error) 261 goto mnt_drop_write_and_out; 262 263 /* 264 * Make sure that there are no leases. get_write_access() protects 265 * against the truncate racing with a lease-granting setlease(). 266 */ 267 error = break_lease(inode, FMODE_WRITE); 268 if (error) 269 goto put_write_and_out; 270 271 error = locks_verify_truncate(inode, NULL, length); 272 if (!error) { 273 DQUOT_INIT(inode); 274 error = do_truncate(nd.path.dentry, length, 0, NULL); 275 } 276 277 put_write_and_out: 278 put_write_access(inode); 279 mnt_drop_write_and_out: 280 mnt_drop_write(nd.path.mnt); 281 dput_and_out: 282 path_put(&nd.path); 283 out: 284 return error; 285 } 286 287 asmlinkage long sys_truncate(const char __user * path, unsigned long length) 288 { 289 /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ 290 return do_sys_truncate(path, (long)length); 291 } 292 293 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 294 { 295 struct inode * inode; 296 struct dentry *dentry; 297 struct file * file; 298 int error; 299 300 error = -EINVAL; 301 if (length < 0) 302 goto out; 303 error = -EBADF; 304 file = fget(fd); 305 if (!file) 306 goto out; 307 308 /* explicitly opened as large or we are on 64-bit box */ 309 if (file->f_flags & O_LARGEFILE) 310 small = 0; 311 312 dentry = file->f_path.dentry; 313 inode = dentry->d_inode; 314 error = -EINVAL; 315 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 316 goto out_putf; 317 318 error = -EINVAL; 319 /* Cannot ftruncate over 2^31 bytes without large file support */ 320 if (small && length > MAX_NON_LFS) 321 goto out_putf; 322 323 error = -EPERM; 324 if (IS_APPEND(inode)) 325 goto out_putf; 326 327 error = locks_verify_truncate(inode, file, length); 328 if (!error) 329 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 330 out_putf: 331 fput(file); 332 out: 333 return error; 334 } 335 336 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 337 { 338 long ret = do_sys_ftruncate(fd, length, 1); 339 /* avoid REGPARM breakage on x86: */ 340 asmlinkage_protect(2, ret, fd, length); 341 return ret; 342 } 343 344 /* LFS versions of truncate are only needed on 32 bit machines */ 345 #if BITS_PER_LONG == 32 346 asmlinkage long sys_truncate64(const char __user * path, loff_t length) 347 { 348 return do_sys_truncate(path, length); 349 } 350 351 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 352 { 353 long ret = do_sys_ftruncate(fd, length, 0); 354 /* avoid REGPARM breakage on x86: */ 355 asmlinkage_protect(2, ret, fd, length); 356 return ret; 357 } 358 #endif 359 360 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) 361 { 362 struct file *file; 363 struct inode *inode; 364 long ret = -EINVAL; 365 366 if (offset < 0 || len <= 0) 367 goto out; 368 369 /* Return error if mode is not supported */ 370 ret = -EOPNOTSUPP; 371 if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) 372 goto out; 373 374 ret = -EBADF; 375 file = fget(fd); 376 if (!file) 377 goto out; 378 if (!(file->f_mode & FMODE_WRITE)) 379 goto out_fput; 380 /* 381 * Revalidate the write permissions, in case security policy has 382 * changed since the files were opened. 383 */ 384 ret = security_file_permission(file, MAY_WRITE); 385 if (ret) 386 goto out_fput; 387 388 inode = file->f_path.dentry->d_inode; 389 390 ret = -ESPIPE; 391 if (S_ISFIFO(inode->i_mode)) 392 goto out_fput; 393 394 ret = -ENODEV; 395 /* 396 * Let individual file system decide if it supports preallocation 397 * for directories or not. 398 */ 399 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 400 goto out_fput; 401 402 ret = -EFBIG; 403 /* Check for wrap through zero too */ 404 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 405 goto out_fput; 406 407 if (inode->i_op && inode->i_op->fallocate) 408 ret = inode->i_op->fallocate(inode, mode, offset, len); 409 else 410 ret = -EOPNOTSUPP; 411 412 out_fput: 413 fput(file); 414 out: 415 return ret; 416 } 417 418 /* 419 * access() needs to use the real uid/gid, not the effective uid/gid. 420 * We do this by temporarily clearing all FS-related capabilities and 421 * switching the fsuid/fsgid around to the real ones. 422 */ 423 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 424 { 425 struct nameidata nd; 426 int old_fsuid, old_fsgid; 427 kernel_cap_t old_cap; 428 int res; 429 430 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 431 return -EINVAL; 432 433 old_fsuid = current->fsuid; 434 old_fsgid = current->fsgid; 435 old_cap = current->cap_effective; 436 437 current->fsuid = current->uid; 438 current->fsgid = current->gid; 439 440 /* 441 * Clear the capabilities if we switch to a non-root user 442 * 443 * FIXME: There is a race here against sys_capset. The 444 * capabilities can change yet we will restore the old 445 * value below. We should hold task_capabilities_lock, 446 * but we cannot because user_path_walk can sleep. 447 */ 448 if (current->uid) 449 cap_clear(current->cap_effective); 450 else 451 current->cap_effective = current->cap_permitted; 452 453 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 454 if (res) 455 goto out; 456 457 res = vfs_permission(&nd, mode); 458 /* SuS v2 requires we report a read only fs too */ 459 if(res || !(mode & S_IWOTH) || 460 special_file(nd.path.dentry->d_inode->i_mode)) 461 goto out_path_release; 462 /* 463 * This is a rare case where using __mnt_is_readonly() 464 * is OK without a mnt_want/drop_write() pair. Since 465 * no actual write to the fs is performed here, we do 466 * not need to telegraph to that to anyone. 467 * 468 * By doing this, we accept that this access is 469 * inherently racy and know that the fs may change 470 * state before we even see this result. 471 */ 472 if (__mnt_is_readonly(nd.path.mnt)) 473 res = -EROFS; 474 475 out_path_release: 476 path_put(&nd.path); 477 out: 478 current->fsuid = old_fsuid; 479 current->fsgid = old_fsgid; 480 current->cap_effective = old_cap; 481 482 return res; 483 } 484 485 asmlinkage long sys_access(const char __user *filename, int mode) 486 { 487 return sys_faccessat(AT_FDCWD, filename, mode); 488 } 489 490 asmlinkage long sys_chdir(const char __user * filename) 491 { 492 struct nameidata nd; 493 int error; 494 495 error = __user_walk(filename, 496 LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); 497 if (error) 498 goto out; 499 500 error = vfs_permission(&nd, MAY_EXEC); 501 if (error) 502 goto dput_and_out; 503 504 set_fs_pwd(current->fs, &nd.path); 505 506 dput_and_out: 507 path_put(&nd.path); 508 out: 509 return error; 510 } 511 512 asmlinkage long sys_fchdir(unsigned int fd) 513 { 514 struct file *file; 515 struct inode *inode; 516 int error; 517 518 error = -EBADF; 519 file = fget(fd); 520 if (!file) 521 goto out; 522 523 inode = file->f_path.dentry->d_inode; 524 525 error = -ENOTDIR; 526 if (!S_ISDIR(inode->i_mode)) 527 goto out_putf; 528 529 error = file_permission(file, MAY_EXEC); 530 if (!error) 531 set_fs_pwd(current->fs, &file->f_path); 532 out_putf: 533 fput(file); 534 out: 535 return error; 536 } 537 538 asmlinkage long sys_chroot(const char __user * filename) 539 { 540 struct nameidata nd; 541 int error; 542 543 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 544 if (error) 545 goto out; 546 547 error = vfs_permission(&nd, MAY_EXEC); 548 if (error) 549 goto dput_and_out; 550 551 error = -EPERM; 552 if (!capable(CAP_SYS_CHROOT)) 553 goto dput_and_out; 554 555 set_fs_root(current->fs, &nd.path); 556 set_fs_altroot(); 557 error = 0; 558 dput_and_out: 559 path_put(&nd.path); 560 out: 561 return error; 562 } 563 564 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) 565 { 566 struct inode * inode; 567 struct dentry * dentry; 568 struct file * file; 569 int err = -EBADF; 570 struct iattr newattrs; 571 572 file = fget(fd); 573 if (!file) 574 goto out; 575 576 dentry = file->f_path.dentry; 577 inode = dentry->d_inode; 578 579 audit_inode(NULL, dentry); 580 581 err = mnt_want_write(file->f_path.mnt); 582 if (err) 583 goto out_putf; 584 err = -EPERM; 585 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 586 goto out_drop_write; 587 mutex_lock(&inode->i_mutex); 588 if (mode == (mode_t) -1) 589 mode = inode->i_mode; 590 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 591 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 592 err = notify_change(dentry, &newattrs); 593 mutex_unlock(&inode->i_mutex); 594 595 out_drop_write: 596 mnt_drop_write(file->f_path.mnt); 597 out_putf: 598 fput(file); 599 out: 600 return err; 601 } 602 603 asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 604 mode_t mode) 605 { 606 struct nameidata nd; 607 struct inode * inode; 608 int error; 609 struct iattr newattrs; 610 611 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 612 if (error) 613 goto out; 614 inode = nd.path.dentry->d_inode; 615 616 error = mnt_want_write(nd.path.mnt); 617 if (error) 618 goto dput_and_out; 619 620 error = -EPERM; 621 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 622 goto out_drop_write; 623 624 mutex_lock(&inode->i_mutex); 625 if (mode == (mode_t) -1) 626 mode = inode->i_mode; 627 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 628 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 629 error = notify_change(nd.path.dentry, &newattrs); 630 mutex_unlock(&inode->i_mutex); 631 632 out_drop_write: 633 mnt_drop_write(nd.path.mnt); 634 dput_and_out: 635 path_put(&nd.path); 636 out: 637 return error; 638 } 639 640 asmlinkage long sys_chmod(const char __user *filename, mode_t mode) 641 { 642 return sys_fchmodat(AT_FDCWD, filename, mode); 643 } 644 645 static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 646 { 647 struct inode * inode; 648 int error; 649 struct iattr newattrs; 650 651 error = -ENOENT; 652 if (!(inode = dentry->d_inode)) { 653 printk(KERN_ERR "chown_common: NULL inode\n"); 654 goto out; 655 } 656 error = -EPERM; 657 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 658 goto out; 659 newattrs.ia_valid = ATTR_CTIME; 660 if (user != (uid_t) -1) { 661 newattrs.ia_valid |= ATTR_UID; 662 newattrs.ia_uid = user; 663 } 664 if (group != (gid_t) -1) { 665 newattrs.ia_valid |= ATTR_GID; 666 newattrs.ia_gid = group; 667 } 668 if (!S_ISDIR(inode->i_mode)) 669 newattrs.ia_valid |= 670 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 671 mutex_lock(&inode->i_mutex); 672 error = notify_change(dentry, &newattrs); 673 mutex_unlock(&inode->i_mutex); 674 out: 675 return error; 676 } 677 678 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 679 { 680 struct nameidata nd; 681 int error; 682 683 error = user_path_walk(filename, &nd); 684 if (error) 685 goto out; 686 error = mnt_want_write(nd.path.mnt); 687 if (error) 688 goto out_release; 689 error = chown_common(nd.path.dentry, user, group); 690 mnt_drop_write(nd.path.mnt); 691 out_release: 692 path_put(&nd.path); 693 out: 694 return error; 695 } 696 697 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 698 gid_t group, int flag) 699 { 700 struct nameidata nd; 701 int error = -EINVAL; 702 int follow; 703 704 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 705 goto out; 706 707 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 708 error = __user_walk_fd(dfd, filename, follow, &nd); 709 if (error) 710 goto out; 711 error = mnt_want_write(nd.path.mnt); 712 if (error) 713 goto out_release; 714 error = chown_common(nd.path.dentry, user, group); 715 mnt_drop_write(nd.path.mnt); 716 out_release: 717 path_put(&nd.path); 718 out: 719 return error; 720 } 721 722 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 723 { 724 struct nameidata nd; 725 int error; 726 727 error = user_path_walk_link(filename, &nd); 728 if (error) 729 goto out; 730 error = mnt_want_write(nd.path.mnt); 731 if (error) 732 goto out_release; 733 error = chown_common(nd.path.dentry, user, group); 734 mnt_drop_write(nd.path.mnt); 735 out_release: 736 path_put(&nd.path); 737 out: 738 return error; 739 } 740 741 742 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) 743 { 744 struct file * file; 745 int error = -EBADF; 746 struct dentry * dentry; 747 748 file = fget(fd); 749 if (!file) 750 goto out; 751 752 error = mnt_want_write(file->f_path.mnt); 753 if (error) 754 goto out_fput; 755 dentry = file->f_path.dentry; 756 audit_inode(NULL, dentry); 757 error = chown_common(dentry, user, group); 758 mnt_drop_write(file->f_path.mnt); 759 out_fput: 760 fput(file); 761 out: 762 return error; 763 } 764 765 /* 766 * You have to be very careful that these write 767 * counts get cleaned up in error cases and 768 * upon __fput(). This should probably never 769 * be called outside of __dentry_open(). 770 */ 771 static inline int __get_file_write_access(struct inode *inode, 772 struct vfsmount *mnt) 773 { 774 int error; 775 error = get_write_access(inode); 776 if (error) 777 return error; 778 /* 779 * Do not take mount writer counts on 780 * special files since no writes to 781 * the mount itself will occur. 782 */ 783 if (!special_file(inode->i_mode)) { 784 /* 785 * Balanced in __fput() 786 */ 787 error = mnt_want_write(mnt); 788 if (error) 789 put_write_access(inode); 790 } 791 return error; 792 } 793 794 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 795 int flags, struct file *f, 796 int (*open)(struct inode *, struct file *)) 797 { 798 struct inode *inode; 799 int error; 800 801 f->f_flags = flags; 802 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 803 FMODE_PREAD | FMODE_PWRITE; 804 inode = dentry->d_inode; 805 if (f->f_mode & FMODE_WRITE) { 806 error = __get_file_write_access(inode, mnt); 807 if (error) 808 goto cleanup_file; 809 if (!special_file(inode->i_mode)) 810 file_take_write(f); 811 } 812 813 f->f_mapping = inode->i_mapping; 814 f->f_path.dentry = dentry; 815 f->f_path.mnt = mnt; 816 f->f_pos = 0; 817 f->f_op = fops_get(inode->i_fop); 818 file_move(f, &inode->i_sb->s_files); 819 820 error = security_dentry_open(f); 821 if (error) 822 goto cleanup_all; 823 824 if (!open && f->f_op) 825 open = f->f_op->open; 826 if (open) { 827 error = open(inode, f); 828 if (error) 829 goto cleanup_all; 830 } 831 832 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 833 834 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 835 836 /* NB: we're sure to have correct a_ops only after f_op->open */ 837 if (f->f_flags & O_DIRECT) { 838 if (!f->f_mapping->a_ops || 839 ((!f->f_mapping->a_ops->direct_IO) && 840 (!f->f_mapping->a_ops->get_xip_mem))) { 841 fput(f); 842 f = ERR_PTR(-EINVAL); 843 } 844 } 845 846 return f; 847 848 cleanup_all: 849 fops_put(f->f_op); 850 if (f->f_mode & FMODE_WRITE) { 851 put_write_access(inode); 852 if (!special_file(inode->i_mode)) { 853 /* 854 * We don't consider this a real 855 * mnt_want/drop_write() pair 856 * because it all happenend right 857 * here, so just reset the state. 858 */ 859 file_reset_write(f); 860 mnt_drop_write(mnt); 861 } 862 } 863 file_kill(f); 864 f->f_path.dentry = NULL; 865 f->f_path.mnt = NULL; 866 cleanup_file: 867 put_filp(f); 868 dput(dentry); 869 mntput(mnt); 870 return ERR_PTR(error); 871 } 872 873 /** 874 * lookup_instantiate_filp - instantiates the open intent filp 875 * @nd: pointer to nameidata 876 * @dentry: pointer to dentry 877 * @open: open callback 878 * 879 * Helper for filesystems that want to use lookup open intents and pass back 880 * a fully instantiated struct file to the caller. 881 * This function is meant to be called from within a filesystem's 882 * lookup method. 883 * Beware of calling it for non-regular files! Those ->open methods might block 884 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo, 885 * leading to a deadlock, as nobody can open that fifo anymore, because 886 * another process to open fifo will block on locked parent when doing lookup). 887 * Note that in case of error, nd->intent.open.file is destroyed, but the 888 * path information remains valid. 889 * If the open callback is set to NULL, then the standard f_op->open() 890 * filesystem callback is substituted. 891 */ 892 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 893 int (*open)(struct inode *, struct file *)) 894 { 895 if (IS_ERR(nd->intent.open.file)) 896 goto out; 897 if (IS_ERR(dentry)) 898 goto out_err; 899 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 900 nd->intent.open.flags - 1, 901 nd->intent.open.file, 902 open); 903 out: 904 return nd->intent.open.file; 905 out_err: 906 release_open_intent(nd); 907 nd->intent.open.file = (struct file *)dentry; 908 goto out; 909 } 910 EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 911 912 /** 913 * nameidata_to_filp - convert a nameidata to an open filp. 914 * @nd: pointer to nameidata 915 * @flags: open flags 916 * 917 * Note that this function destroys the original nameidata 918 */ 919 struct file *nameidata_to_filp(struct nameidata *nd, int flags) 920 { 921 struct file *filp; 922 923 /* Pick up the filp from the open intent */ 924 filp = nd->intent.open.file; 925 /* Has the filesystem initialised the file for us? */ 926 if (filp->f_path.dentry == NULL) 927 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, 928 NULL); 929 else 930 path_put(&nd->path); 931 return filp; 932 } 933 934 /* 935 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an 936 * error. 937 */ 938 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 939 { 940 int error; 941 struct file *f; 942 943 /* 944 * We must always pass in a valid mount pointer. Historically 945 * callers got away with not passing it, but we must enforce this at 946 * the earliest possible point now to avoid strange problems deep in the 947 * filesystem stack. 948 */ 949 if (!mnt) { 950 printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__); 951 dump_stack(); 952 return ERR_PTR(-EINVAL); 953 } 954 955 error = -ENFILE; 956 f = get_empty_filp(); 957 if (f == NULL) { 958 dput(dentry); 959 mntput(mnt); 960 return ERR_PTR(error); 961 } 962 963 return __dentry_open(dentry, mnt, flags, f, NULL); 964 } 965 EXPORT_SYMBOL(dentry_open); 966 967 /* 968 * Find an empty file descriptor entry, and mark it busy. 969 */ 970 int get_unused_fd_flags(int flags) 971 { 972 struct files_struct * files = current->files; 973 int fd, error; 974 struct fdtable *fdt; 975 976 error = -EMFILE; 977 spin_lock(&files->file_lock); 978 979 repeat: 980 fdt = files_fdtable(files); 981 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, 982 files->next_fd); 983 984 /* 985 * N.B. For clone tasks sharing a files structure, this test 986 * will limit the total number of files that can be opened. 987 */ 988 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 989 goto out; 990 991 /* Do we need to expand the fd array or fd set? */ 992 error = expand_files(files, fd); 993 if (error < 0) 994 goto out; 995 996 if (error) { 997 /* 998 * If we needed to expand the fs array we 999 * might have blocked - try again. 1000 */ 1001 error = -EMFILE; 1002 goto repeat; 1003 } 1004 1005 FD_SET(fd, fdt->open_fds); 1006 if (flags & O_CLOEXEC) 1007 FD_SET(fd, fdt->close_on_exec); 1008 else 1009 FD_CLR(fd, fdt->close_on_exec); 1010 files->next_fd = fd + 1; 1011 #if 1 1012 /* Sanity check */ 1013 if (fdt->fd[fd] != NULL) { 1014 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 1015 fdt->fd[fd] = NULL; 1016 } 1017 #endif 1018 error = fd; 1019 1020 out: 1021 spin_unlock(&files->file_lock); 1022 return error; 1023 } 1024 1025 int get_unused_fd(void) 1026 { 1027 return get_unused_fd_flags(0); 1028 } 1029 1030 EXPORT_SYMBOL(get_unused_fd); 1031 1032 static void __put_unused_fd(struct files_struct *files, unsigned int fd) 1033 { 1034 struct fdtable *fdt = files_fdtable(files); 1035 __FD_CLR(fd, fdt->open_fds); 1036 if (fd < files->next_fd) 1037 files->next_fd = fd; 1038 } 1039 1040 void put_unused_fd(unsigned int fd) 1041 { 1042 struct files_struct *files = current->files; 1043 spin_lock(&files->file_lock); 1044 __put_unused_fd(files, fd); 1045 spin_unlock(&files->file_lock); 1046 } 1047 1048 EXPORT_SYMBOL(put_unused_fd); 1049 1050 /* 1051 * Install a file pointer in the fd array. 1052 * 1053 * The VFS is full of places where we drop the files lock between 1054 * setting the open_fds bitmap and installing the file in the file 1055 * array. At any such point, we are vulnerable to a dup2() race 1056 * installing a file in the array before us. We need to detect this and 1057 * fput() the struct file we are about to overwrite in this case. 1058 * 1059 * It should never happen - if we allow dup2() do it, _really_ bad things 1060 * will follow. 1061 */ 1062 1063 void fd_install(unsigned int fd, struct file *file) 1064 { 1065 struct files_struct *files = current->files; 1066 struct fdtable *fdt; 1067 spin_lock(&files->file_lock); 1068 fdt = files_fdtable(files); 1069 BUG_ON(fdt->fd[fd] != NULL); 1070 rcu_assign_pointer(fdt->fd[fd], file); 1071 spin_unlock(&files->file_lock); 1072 } 1073 1074 EXPORT_SYMBOL(fd_install); 1075 1076 long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1077 { 1078 char *tmp = getname(filename); 1079 int fd = PTR_ERR(tmp); 1080 1081 if (!IS_ERR(tmp)) { 1082 fd = get_unused_fd_flags(flags); 1083 if (fd >= 0) { 1084 struct file *f = do_filp_open(dfd, tmp, flags, mode); 1085 if (IS_ERR(f)) { 1086 put_unused_fd(fd); 1087 fd = PTR_ERR(f); 1088 } else { 1089 fsnotify_open(f->f_path.dentry); 1090 fd_install(fd, f); 1091 } 1092 } 1093 putname(tmp); 1094 } 1095 return fd; 1096 } 1097 1098 asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1099 { 1100 long ret; 1101 1102 if (force_o_largefile()) 1103 flags |= O_LARGEFILE; 1104 1105 ret = do_sys_open(AT_FDCWD, filename, flags, mode); 1106 /* avoid REGPARM breakage on x86: */ 1107 asmlinkage_protect(3, ret, filename, flags, mode); 1108 return ret; 1109 } 1110 1111 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1112 int mode) 1113 { 1114 long ret; 1115 1116 if (force_o_largefile()) 1117 flags |= O_LARGEFILE; 1118 1119 ret = do_sys_open(dfd, filename, flags, mode); 1120 /* avoid REGPARM breakage on x86: */ 1121 asmlinkage_protect(4, ret, dfd, filename, flags, mode); 1122 return ret; 1123 } 1124 1125 #ifndef __alpha__ 1126 1127 /* 1128 * For backward compatibility? Maybe this should be moved 1129 * into arch/i386 instead? 1130 */ 1131 asmlinkage long sys_creat(const char __user * pathname, int mode) 1132 { 1133 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1134 } 1135 1136 #endif 1137 1138 /* 1139 * "id" is the POSIX thread ID. We use the 1140 * files pointer for this.. 1141 */ 1142 int filp_close(struct file *filp, fl_owner_t id) 1143 { 1144 int retval = 0; 1145 1146 if (!file_count(filp)) { 1147 printk(KERN_ERR "VFS: Close: file count is 0\n"); 1148 return 0; 1149 } 1150 1151 if (filp->f_op && filp->f_op->flush) 1152 retval = filp->f_op->flush(filp, id); 1153 1154 dnotify_flush(filp, id); 1155 locks_remove_posix(filp, id); 1156 fput(filp); 1157 return retval; 1158 } 1159 1160 EXPORT_SYMBOL(filp_close); 1161 1162 /* 1163 * Careful here! We test whether the file pointer is NULL before 1164 * releasing the fd. This ensures that one clone task can't release 1165 * an fd while another clone is opening it. 1166 */ 1167 asmlinkage long sys_close(unsigned int fd) 1168 { 1169 struct file * filp; 1170 struct files_struct *files = current->files; 1171 struct fdtable *fdt; 1172 int retval; 1173 1174 spin_lock(&files->file_lock); 1175 fdt = files_fdtable(files); 1176 if (fd >= fdt->max_fds) 1177 goto out_unlock; 1178 filp = fdt->fd[fd]; 1179 if (!filp) 1180 goto out_unlock; 1181 rcu_assign_pointer(fdt->fd[fd], NULL); 1182 FD_CLR(fd, fdt->close_on_exec); 1183 __put_unused_fd(files, fd); 1184 spin_unlock(&files->file_lock); 1185 retval = filp_close(filp, files); 1186 1187 /* can't restart close syscall because file table entry was cleared */ 1188 if (unlikely(retval == -ERESTARTSYS || 1189 retval == -ERESTARTNOINTR || 1190 retval == -ERESTARTNOHAND || 1191 retval == -ERESTART_RESTARTBLOCK)) 1192 retval = -EINTR; 1193 1194 return retval; 1195 1196 out_unlock: 1197 spin_unlock(&files->file_lock); 1198 return -EBADF; 1199 } 1200 1201 EXPORT_SYMBOL(sys_close); 1202 1203 /* 1204 * This routine simulates a hangup on the tty, to arrange that users 1205 * are given clean terminals at login time. 1206 */ 1207 asmlinkage long sys_vhangup(void) 1208 { 1209 if (capable(CAP_SYS_TTY_CONFIG)) { 1210 /* XXX: this needs locking */ 1211 tty_vhangup(current->signal->tty); 1212 return 0; 1213 } 1214 return -EPERM; 1215 } 1216 1217 /* 1218 * Called when an inode is about to be open. 1219 * We use this to disallow opening large files on 32bit systems if 1220 * the caller didn't specify O_LARGEFILE. On 64bit systems we force 1221 * on this flag in sys_open. 1222 */ 1223 int generic_file_open(struct inode * inode, struct file * filp) 1224 { 1225 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1226 return -EOVERFLOW; 1227 return 0; 1228 } 1229 1230 EXPORT_SYMBOL(generic_file_open); 1231 1232 /* 1233 * This is used by subsystems that don't want seekable 1234 * file descriptors 1235 */ 1236 int nonseekable_open(struct inode *inode, struct file *filp) 1237 { 1238 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1239 return 0; 1240 } 1241 1242 EXPORT_SYMBOL(nonseekable_open); 1243