1 /* 2 * linux/fs/open.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/string.h> 8 #include <linux/mm.h> 9 #include <linux/utime.h> 10 #include <linux/file.h> 11 #include <linux/smp_lock.h> 12 #include <linux/quotaops.h> 13 #include <linux/fsnotify.h> 14 #include <linux/module.h> 15 #include <linux/slab.h> 16 #include <linux/tty.h> 17 #include <linux/namei.h> 18 #include <linux/backing-dev.h> 19 #include <linux/capability.h> 20 #include <linux/security.h> 21 #include <linux/mount.h> 22 #include <linux/vfs.h> 23 #include <linux/fcntl.h> 24 #include <asm/uaccess.h> 25 #include <linux/fs.h> 26 #include <linux/personality.h> 27 #include <linux/pagemap.h> 28 #include <linux/syscalls.h> 29 #include <linux/rcupdate.h> 30 31 #include <asm/unistd.h> 32 33 int vfs_statfs(struct super_block *sb, struct kstatfs *buf) 34 { 35 int retval = -ENODEV; 36 37 if (sb) { 38 retval = -ENOSYS; 39 if (sb->s_op->statfs) { 40 memset(buf, 0, sizeof(*buf)); 41 retval = security_sb_statfs(sb); 42 if (retval) 43 return retval; 44 retval = sb->s_op->statfs(sb, buf); 45 if (retval == 0 && buf->f_frsize == 0) 46 buf->f_frsize = buf->f_bsize; 47 } 48 } 49 return retval; 50 } 51 52 EXPORT_SYMBOL(vfs_statfs); 53 54 static int vfs_statfs_native(struct super_block *sb, struct statfs *buf) 55 { 56 struct kstatfs st; 57 int retval; 58 59 retval = vfs_statfs(sb, &st); 60 if (retval) 61 return retval; 62 63 if (sizeof(*buf) == sizeof(st)) 64 memcpy(buf, &st, sizeof(st)); 65 else { 66 if (sizeof buf->f_blocks == 4) { 67 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 68 0xffffffff00000000ULL) 69 return -EOVERFLOW; 70 /* 71 * f_files and f_ffree may be -1; it's okay to stuff 72 * that into 32 bits 73 */ 74 if (st.f_files != -1 && 75 (st.f_files & 0xffffffff00000000ULL)) 76 return -EOVERFLOW; 77 if (st.f_ffree != -1 && 78 (st.f_ffree & 0xffffffff00000000ULL)) 79 return -EOVERFLOW; 80 } 81 82 buf->f_type = st.f_type; 83 buf->f_bsize = st.f_bsize; 84 buf->f_blocks = st.f_blocks; 85 buf->f_bfree = st.f_bfree; 86 buf->f_bavail = st.f_bavail; 87 buf->f_files = st.f_files; 88 buf->f_ffree = st.f_ffree; 89 buf->f_fsid = st.f_fsid; 90 buf->f_namelen = st.f_namelen; 91 buf->f_frsize = st.f_frsize; 92 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 93 } 94 return 0; 95 } 96 97 static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf) 98 { 99 struct kstatfs st; 100 int retval; 101 102 retval = vfs_statfs(sb, &st); 103 if (retval) 104 return retval; 105 106 if (sizeof(*buf) == sizeof(st)) 107 memcpy(buf, &st, sizeof(st)); 108 else { 109 buf->f_type = st.f_type; 110 buf->f_bsize = st.f_bsize; 111 buf->f_blocks = st.f_blocks; 112 buf->f_bfree = st.f_bfree; 113 buf->f_bavail = st.f_bavail; 114 buf->f_files = st.f_files; 115 buf->f_ffree = st.f_ffree; 116 buf->f_fsid = st.f_fsid; 117 buf->f_namelen = st.f_namelen; 118 buf->f_frsize = st.f_frsize; 119 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 120 } 121 return 0; 122 } 123 124 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 125 { 126 struct nameidata nd; 127 int error; 128 129 error = user_path_walk(path, &nd); 130 if (!error) { 131 struct statfs tmp; 132 error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp); 133 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 134 error = -EFAULT; 135 path_release(&nd); 136 } 137 return error; 138 } 139 140 141 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 142 { 143 struct nameidata nd; 144 long error; 145 146 if (sz != sizeof(*buf)) 147 return -EINVAL; 148 error = user_path_walk(path, &nd); 149 if (!error) { 150 struct statfs64 tmp; 151 error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp); 152 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 153 error = -EFAULT; 154 path_release(&nd); 155 } 156 return error; 157 } 158 159 160 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) 161 { 162 struct file * file; 163 struct statfs tmp; 164 int error; 165 166 error = -EBADF; 167 file = fget(fd); 168 if (!file) 169 goto out; 170 error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp); 171 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 172 error = -EFAULT; 173 fput(file); 174 out: 175 return error; 176 } 177 178 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf) 179 { 180 struct file * file; 181 struct statfs64 tmp; 182 int error; 183 184 if (sz != sizeof(*buf)) 185 return -EINVAL; 186 187 error = -EBADF; 188 file = fget(fd); 189 if (!file) 190 goto out; 191 error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp); 192 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 193 error = -EFAULT; 194 fput(file); 195 out: 196 return error; 197 } 198 199 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 200 struct file *filp) 201 { 202 int err; 203 struct iattr newattrs; 204 205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 206 if (length < 0) 207 return -EINVAL; 208 209 newattrs.ia_size = length; 210 newattrs.ia_valid = ATTR_SIZE | time_attrs; 211 if (filp) { 212 newattrs.ia_file = filp; 213 newattrs.ia_valid |= ATTR_FILE; 214 } 215 216 mutex_lock(&dentry->d_inode->i_mutex); 217 err = notify_change(dentry, &newattrs); 218 mutex_unlock(&dentry->d_inode->i_mutex); 219 return err; 220 } 221 222 static long do_sys_truncate(const char __user * path, loff_t length) 223 { 224 struct nameidata nd; 225 struct inode * inode; 226 int error; 227 228 error = -EINVAL; 229 if (length < 0) /* sorry, but loff_t says... */ 230 goto out; 231 232 error = user_path_walk(path, &nd); 233 if (error) 234 goto out; 235 inode = nd.dentry->d_inode; 236 237 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 238 error = -EISDIR; 239 if (S_ISDIR(inode->i_mode)) 240 goto dput_and_out; 241 242 error = -EINVAL; 243 if (!S_ISREG(inode->i_mode)) 244 goto dput_and_out; 245 246 error = vfs_permission(&nd, MAY_WRITE); 247 if (error) 248 goto dput_and_out; 249 250 error = -EROFS; 251 if (IS_RDONLY(inode)) 252 goto dput_and_out; 253 254 error = -EPERM; 255 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 256 goto dput_and_out; 257 258 /* 259 * Make sure that there are no leases. 260 */ 261 error = break_lease(inode, FMODE_WRITE); 262 if (error) 263 goto dput_and_out; 264 265 error = get_write_access(inode); 266 if (error) 267 goto dput_and_out; 268 269 error = locks_verify_truncate(inode, NULL, length); 270 if (!error) { 271 DQUOT_INIT(inode); 272 error = do_truncate(nd.dentry, length, 0, NULL); 273 } 274 put_write_access(inode); 275 276 dput_and_out: 277 path_release(&nd); 278 out: 279 return error; 280 } 281 282 asmlinkage long sys_truncate(const char __user * path, unsigned long length) 283 { 284 /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ 285 return do_sys_truncate(path, (long)length); 286 } 287 288 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 289 { 290 struct inode * inode; 291 struct dentry *dentry; 292 struct file * file; 293 int error; 294 295 error = -EINVAL; 296 if (length < 0) 297 goto out; 298 error = -EBADF; 299 file = fget(fd); 300 if (!file) 301 goto out; 302 303 /* explicitly opened as large or we are on 64-bit box */ 304 if (file->f_flags & O_LARGEFILE) 305 small = 0; 306 307 dentry = file->f_dentry; 308 inode = dentry->d_inode; 309 error = -EINVAL; 310 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 311 goto out_putf; 312 313 error = -EINVAL; 314 /* Cannot ftruncate over 2^31 bytes without large file support */ 315 if (small && length > MAX_NON_LFS) 316 goto out_putf; 317 318 error = -EPERM; 319 if (IS_APPEND(inode)) 320 goto out_putf; 321 322 error = locks_verify_truncate(inode, file, length); 323 if (!error) 324 error = do_truncate(dentry, length, 0, file); 325 out_putf: 326 fput(file); 327 out: 328 return error; 329 } 330 331 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 332 { 333 return do_sys_ftruncate(fd, length, 1); 334 } 335 336 /* LFS versions of truncate are only needed on 32 bit machines */ 337 #if BITS_PER_LONG == 32 338 asmlinkage long sys_truncate64(const char __user * path, loff_t length) 339 { 340 return do_sys_truncate(path, length); 341 } 342 343 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 344 { 345 return do_sys_ftruncate(fd, length, 0); 346 } 347 #endif 348 349 #ifdef __ARCH_WANT_SYS_UTIME 350 351 /* 352 * sys_utime() can be implemented in user-level using sys_utimes(). 353 * Is this for backwards compatibility? If so, why not move it 354 * into the appropriate arch directory (for those architectures that 355 * need it). 356 */ 357 358 /* If times==NULL, set access and modification to current time, 359 * must be owner or have write permission. 360 * Else, update from *times, must be owner or super user. 361 */ 362 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) 363 { 364 int error; 365 struct nameidata nd; 366 struct inode * inode; 367 struct iattr newattrs; 368 369 error = user_path_walk(filename, &nd); 370 if (error) 371 goto out; 372 inode = nd.dentry->d_inode; 373 374 error = -EROFS; 375 if (IS_RDONLY(inode)) 376 goto dput_and_out; 377 378 /* Don't worry, the checks are done in inode_change_ok() */ 379 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; 380 if (times) { 381 error = -EPERM; 382 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 383 goto dput_and_out; 384 385 error = get_user(newattrs.ia_atime.tv_sec, ×->actime); 386 newattrs.ia_atime.tv_nsec = 0; 387 if (!error) 388 error = get_user(newattrs.ia_mtime.tv_sec, ×->modtime); 389 newattrs.ia_mtime.tv_nsec = 0; 390 if (error) 391 goto dput_and_out; 392 393 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; 394 } else { 395 error = -EACCES; 396 if (IS_IMMUTABLE(inode)) 397 goto dput_and_out; 398 399 if (current->fsuid != inode->i_uid && 400 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 401 goto dput_and_out; 402 } 403 mutex_lock(&inode->i_mutex); 404 error = notify_change(nd.dentry, &newattrs); 405 mutex_unlock(&inode->i_mutex); 406 dput_and_out: 407 path_release(&nd); 408 out: 409 return error; 410 } 411 412 #endif 413 414 /* If times==NULL, set access and modification to current time, 415 * must be owner or have write permission. 416 * Else, update from *times, must be owner or super user. 417 */ 418 long do_utimes(int dfd, char __user *filename, struct timeval *times) 419 { 420 int error; 421 struct nameidata nd; 422 struct inode * inode; 423 struct iattr newattrs; 424 425 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 426 427 if (error) 428 goto out; 429 inode = nd.dentry->d_inode; 430 431 error = -EROFS; 432 if (IS_RDONLY(inode)) 433 goto dput_and_out; 434 435 /* Don't worry, the checks are done in inode_change_ok() */ 436 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; 437 if (times) { 438 error = -EPERM; 439 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 440 goto dput_and_out; 441 442 newattrs.ia_atime.tv_sec = times[0].tv_sec; 443 newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000; 444 newattrs.ia_mtime.tv_sec = times[1].tv_sec; 445 newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000; 446 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; 447 } else { 448 error = -EACCES; 449 if (IS_IMMUTABLE(inode)) 450 goto dput_and_out; 451 452 if (current->fsuid != inode->i_uid && 453 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 454 goto dput_and_out; 455 } 456 mutex_lock(&inode->i_mutex); 457 error = notify_change(nd.dentry, &newattrs); 458 mutex_unlock(&inode->i_mutex); 459 dput_and_out: 460 path_release(&nd); 461 out: 462 return error; 463 } 464 465 asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) 466 { 467 struct timeval times[2]; 468 469 if (utimes && copy_from_user(×, utimes, sizeof(times))) 470 return -EFAULT; 471 return do_utimes(dfd, filename, utimes ? times : NULL); 472 } 473 474 asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) 475 { 476 return sys_futimesat(AT_FDCWD, filename, utimes); 477 } 478 479 480 /* 481 * access() needs to use the real uid/gid, not the effective uid/gid. 482 * We do this by temporarily clearing all FS-related capabilities and 483 * switching the fsuid/fsgid around to the real ones. 484 */ 485 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 486 { 487 struct nameidata nd; 488 int old_fsuid, old_fsgid; 489 kernel_cap_t old_cap; 490 int res; 491 492 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 493 return -EINVAL; 494 495 old_fsuid = current->fsuid; 496 old_fsgid = current->fsgid; 497 old_cap = current->cap_effective; 498 499 current->fsuid = current->uid; 500 current->fsgid = current->gid; 501 502 /* 503 * Clear the capabilities if we switch to a non-root user 504 * 505 * FIXME: There is a race here against sys_capset. The 506 * capabilities can change yet we will restore the old 507 * value below. We should hold task_capabilities_lock, 508 * but we cannot because user_path_walk can sleep. 509 */ 510 if (current->uid) 511 cap_clear(current->cap_effective); 512 else 513 current->cap_effective = current->cap_permitted; 514 515 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 516 if (!res) { 517 res = vfs_permission(&nd, mode); 518 /* SuS v2 requires we report a read only fs too */ 519 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) 520 && !special_file(nd.dentry->d_inode->i_mode)) 521 res = -EROFS; 522 path_release(&nd); 523 } 524 525 current->fsuid = old_fsuid; 526 current->fsgid = old_fsgid; 527 current->cap_effective = old_cap; 528 529 return res; 530 } 531 532 asmlinkage long sys_access(const char __user *filename, int mode) 533 { 534 return sys_faccessat(AT_FDCWD, filename, mode); 535 } 536 537 asmlinkage long sys_chdir(const char __user * filename) 538 { 539 struct nameidata nd; 540 int error; 541 542 error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); 543 if (error) 544 goto out; 545 546 error = vfs_permission(&nd, MAY_EXEC); 547 if (error) 548 goto dput_and_out; 549 550 set_fs_pwd(current->fs, nd.mnt, nd.dentry); 551 552 dput_and_out: 553 path_release(&nd); 554 out: 555 return error; 556 } 557 558 asmlinkage long sys_fchdir(unsigned int fd) 559 { 560 struct file *file; 561 struct dentry *dentry; 562 struct inode *inode; 563 struct vfsmount *mnt; 564 int error; 565 566 error = -EBADF; 567 file = fget(fd); 568 if (!file) 569 goto out; 570 571 dentry = file->f_dentry; 572 mnt = file->f_vfsmnt; 573 inode = dentry->d_inode; 574 575 error = -ENOTDIR; 576 if (!S_ISDIR(inode->i_mode)) 577 goto out_putf; 578 579 error = file_permission(file, MAY_EXEC); 580 if (!error) 581 set_fs_pwd(current->fs, mnt, dentry); 582 out_putf: 583 fput(file); 584 out: 585 return error; 586 } 587 588 asmlinkage long sys_chroot(const char __user * filename) 589 { 590 struct nameidata nd; 591 int error; 592 593 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 594 if (error) 595 goto out; 596 597 error = vfs_permission(&nd, MAY_EXEC); 598 if (error) 599 goto dput_and_out; 600 601 error = -EPERM; 602 if (!capable(CAP_SYS_CHROOT)) 603 goto dput_and_out; 604 605 set_fs_root(current->fs, nd.mnt, nd.dentry); 606 set_fs_altroot(); 607 error = 0; 608 dput_and_out: 609 path_release(&nd); 610 out: 611 return error; 612 } 613 614 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) 615 { 616 struct inode * inode; 617 struct dentry * dentry; 618 struct file * file; 619 int err = -EBADF; 620 struct iattr newattrs; 621 622 file = fget(fd); 623 if (!file) 624 goto out; 625 626 dentry = file->f_dentry; 627 inode = dentry->d_inode; 628 629 err = -EROFS; 630 if (IS_RDONLY(inode)) 631 goto out_putf; 632 err = -EPERM; 633 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 634 goto out_putf; 635 mutex_lock(&inode->i_mutex); 636 if (mode == (mode_t) -1) 637 mode = inode->i_mode; 638 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 639 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 640 err = notify_change(dentry, &newattrs); 641 mutex_unlock(&inode->i_mutex); 642 643 out_putf: 644 fput(file); 645 out: 646 return err; 647 } 648 649 asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 650 mode_t mode) 651 { 652 struct nameidata nd; 653 struct inode * inode; 654 int error; 655 struct iattr newattrs; 656 657 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 658 if (error) 659 goto out; 660 inode = nd.dentry->d_inode; 661 662 error = -EROFS; 663 if (IS_RDONLY(inode)) 664 goto dput_and_out; 665 666 error = -EPERM; 667 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 668 goto dput_and_out; 669 670 mutex_lock(&inode->i_mutex); 671 if (mode == (mode_t) -1) 672 mode = inode->i_mode; 673 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 674 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 675 error = notify_change(nd.dentry, &newattrs); 676 mutex_unlock(&inode->i_mutex); 677 678 dput_and_out: 679 path_release(&nd); 680 out: 681 return error; 682 } 683 684 asmlinkage long sys_chmod(const char __user *filename, mode_t mode) 685 { 686 return sys_fchmodat(AT_FDCWD, filename, mode); 687 } 688 689 static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 690 { 691 struct inode * inode; 692 int error; 693 struct iattr newattrs; 694 695 error = -ENOENT; 696 if (!(inode = dentry->d_inode)) { 697 printk(KERN_ERR "chown_common: NULL inode\n"); 698 goto out; 699 } 700 error = -EROFS; 701 if (IS_RDONLY(inode)) 702 goto out; 703 error = -EPERM; 704 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 705 goto out; 706 newattrs.ia_valid = ATTR_CTIME; 707 if (user != (uid_t) -1) { 708 newattrs.ia_valid |= ATTR_UID; 709 newattrs.ia_uid = user; 710 } 711 if (group != (gid_t) -1) { 712 newattrs.ia_valid |= ATTR_GID; 713 newattrs.ia_gid = group; 714 } 715 if (!S_ISDIR(inode->i_mode)) 716 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; 717 mutex_lock(&inode->i_mutex); 718 error = notify_change(dentry, &newattrs); 719 mutex_unlock(&inode->i_mutex); 720 out: 721 return error; 722 } 723 724 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 725 { 726 struct nameidata nd; 727 int error; 728 729 error = user_path_walk(filename, &nd); 730 if (!error) { 731 error = chown_common(nd.dentry, user, group); 732 path_release(&nd); 733 } 734 return error; 735 } 736 737 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 738 gid_t group, int flag) 739 { 740 struct nameidata nd; 741 int error = -EINVAL; 742 int follow; 743 744 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 745 goto out; 746 747 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 748 error = __user_walk_fd(dfd, filename, follow, &nd); 749 if (!error) { 750 error = chown_common(nd.dentry, user, group); 751 path_release(&nd); 752 } 753 out: 754 return error; 755 } 756 757 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 758 { 759 struct nameidata nd; 760 int error; 761 762 error = user_path_walk_link(filename, &nd); 763 if (!error) { 764 error = chown_common(nd.dentry, user, group); 765 path_release(&nd); 766 } 767 return error; 768 } 769 770 771 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) 772 { 773 struct file * file; 774 int error = -EBADF; 775 776 file = fget(fd); 777 if (file) { 778 error = chown_common(file->f_dentry, user, group); 779 fput(file); 780 } 781 return error; 782 } 783 784 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 785 int flags, struct file *f, 786 int (*open)(struct inode *, struct file *)) 787 { 788 struct inode *inode; 789 int error; 790 791 f->f_flags = flags; 792 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 793 FMODE_PREAD | FMODE_PWRITE; 794 inode = dentry->d_inode; 795 if (f->f_mode & FMODE_WRITE) { 796 error = get_write_access(inode); 797 if (error) 798 goto cleanup_file; 799 } 800 801 f->f_mapping = inode->i_mapping; 802 f->f_dentry = dentry; 803 f->f_vfsmnt = mnt; 804 f->f_pos = 0; 805 f->f_op = fops_get(inode->i_fop); 806 file_move(f, &inode->i_sb->s_files); 807 808 if (!open && f->f_op) 809 open = f->f_op->open; 810 if (open) { 811 error = open(inode, f); 812 if (error) 813 goto cleanup_all; 814 } 815 816 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 817 818 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 819 820 /* NB: we're sure to have correct a_ops only after f_op->open */ 821 if (f->f_flags & O_DIRECT) { 822 if (!f->f_mapping->a_ops || 823 ((!f->f_mapping->a_ops->direct_IO) && 824 (!f->f_mapping->a_ops->get_xip_page))) { 825 fput(f); 826 f = ERR_PTR(-EINVAL); 827 } 828 } 829 830 return f; 831 832 cleanup_all: 833 fops_put(f->f_op); 834 if (f->f_mode & FMODE_WRITE) 835 put_write_access(inode); 836 file_kill(f); 837 f->f_dentry = NULL; 838 f->f_vfsmnt = NULL; 839 cleanup_file: 840 put_filp(f); 841 dput(dentry); 842 mntput(mnt); 843 return ERR_PTR(error); 844 } 845 846 /* 847 * Note that while the flag value (low two bits) for sys_open means: 848 * 00 - read-only 849 * 01 - write-only 850 * 10 - read-write 851 * 11 - special 852 * it is changed into 853 * 00 - no permissions needed 854 * 01 - read-permission 855 * 10 - write-permission 856 * 11 - read-write 857 * for the internal routines (ie open_namei()/follow_link() etc). 00 is 858 * used by symlinks. 859 */ 860 static struct file *do_filp_open(int dfd, const char *filename, int flags, 861 int mode) 862 { 863 int namei_flags, error; 864 struct nameidata nd; 865 866 namei_flags = flags; 867 if ((namei_flags+1) & O_ACCMODE) 868 namei_flags++; 869 870 error = open_namei(dfd, filename, namei_flags, mode, &nd); 871 if (!error) 872 return nameidata_to_filp(&nd, flags); 873 874 return ERR_PTR(error); 875 } 876 877 struct file *filp_open(const char *filename, int flags, int mode) 878 { 879 return do_filp_open(AT_FDCWD, filename, flags, mode); 880 } 881 EXPORT_SYMBOL(filp_open); 882 883 /** 884 * lookup_instantiate_filp - instantiates the open intent filp 885 * @nd: pointer to nameidata 886 * @dentry: pointer to dentry 887 * @open: open callback 888 * 889 * Helper for filesystems that want to use lookup open intents and pass back 890 * a fully instantiated struct file to the caller. 891 * This function is meant to be called from within a filesystem's 892 * lookup method. 893 * Note that in case of error, nd->intent.open.file is destroyed, but the 894 * path information remains valid. 895 * If the open callback is set to NULL, then the standard f_op->open() 896 * filesystem callback is substituted. 897 */ 898 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 899 int (*open)(struct inode *, struct file *)) 900 { 901 if (IS_ERR(nd->intent.open.file)) 902 goto out; 903 if (IS_ERR(dentry)) 904 goto out_err; 905 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), 906 nd->intent.open.flags - 1, 907 nd->intent.open.file, 908 open); 909 out: 910 return nd->intent.open.file; 911 out_err: 912 release_open_intent(nd); 913 nd->intent.open.file = (struct file *)dentry; 914 goto out; 915 } 916 EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 917 918 /** 919 * nameidata_to_filp - convert a nameidata to an open filp. 920 * @nd: pointer to nameidata 921 * @flags: open flags 922 * 923 * Note that this function destroys the original nameidata 924 */ 925 struct file *nameidata_to_filp(struct nameidata *nd, int flags) 926 { 927 struct file *filp; 928 929 /* Pick up the filp from the open intent */ 930 filp = nd->intent.open.file; 931 /* Has the filesystem initialised the file for us? */ 932 if (filp->f_dentry == NULL) 933 filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); 934 else 935 path_release(nd); 936 return filp; 937 } 938 939 /* 940 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an 941 * error. 942 */ 943 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 944 { 945 int error; 946 struct file *f; 947 948 error = -ENFILE; 949 f = get_empty_filp(); 950 if (f == NULL) { 951 dput(dentry); 952 mntput(mnt); 953 return ERR_PTR(error); 954 } 955 956 return __dentry_open(dentry, mnt, flags, f, NULL); 957 } 958 EXPORT_SYMBOL(dentry_open); 959 960 /* 961 * Find an empty file descriptor entry, and mark it busy. 962 */ 963 int get_unused_fd(void) 964 { 965 struct files_struct * files = current->files; 966 int fd, error; 967 struct fdtable *fdt; 968 969 error = -EMFILE; 970 spin_lock(&files->file_lock); 971 972 repeat: 973 fdt = files_fdtable(files); 974 fd = find_next_zero_bit(fdt->open_fds->fds_bits, 975 fdt->max_fdset, 976 fdt->next_fd); 977 978 /* 979 * N.B. For clone tasks sharing a files structure, this test 980 * will limit the total number of files that can be opened. 981 */ 982 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 983 goto out; 984 985 /* Do we need to expand the fd array or fd set? */ 986 error = expand_files(files, fd); 987 if (error < 0) 988 goto out; 989 990 if (error) { 991 /* 992 * If we needed to expand the fs array we 993 * might have blocked - try again. 994 */ 995 error = -EMFILE; 996 goto repeat; 997 } 998 999 FD_SET(fd, fdt->open_fds); 1000 FD_CLR(fd, fdt->close_on_exec); 1001 fdt->next_fd = fd + 1; 1002 #if 1 1003 /* Sanity check */ 1004 if (fdt->fd[fd] != NULL) { 1005 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 1006 fdt->fd[fd] = NULL; 1007 } 1008 #endif 1009 error = fd; 1010 1011 out: 1012 spin_unlock(&files->file_lock); 1013 return error; 1014 } 1015 1016 EXPORT_SYMBOL(get_unused_fd); 1017 1018 static void __put_unused_fd(struct files_struct *files, unsigned int fd) 1019 { 1020 struct fdtable *fdt = files_fdtable(files); 1021 __FD_CLR(fd, fdt->open_fds); 1022 if (fd < fdt->next_fd) 1023 fdt->next_fd = fd; 1024 } 1025 1026 void fastcall put_unused_fd(unsigned int fd) 1027 { 1028 struct files_struct *files = current->files; 1029 spin_lock(&files->file_lock); 1030 __put_unused_fd(files, fd); 1031 spin_unlock(&files->file_lock); 1032 } 1033 1034 EXPORT_SYMBOL(put_unused_fd); 1035 1036 /* 1037 * Install a file pointer in the fd array. 1038 * 1039 * The VFS is full of places where we drop the files lock between 1040 * setting the open_fds bitmap and installing the file in the file 1041 * array. At any such point, we are vulnerable to a dup2() race 1042 * installing a file in the array before us. We need to detect this and 1043 * fput() the struct file we are about to overwrite in this case. 1044 * 1045 * It should never happen - if we allow dup2() do it, _really_ bad things 1046 * will follow. 1047 */ 1048 1049 void fastcall fd_install(unsigned int fd, struct file * file) 1050 { 1051 struct files_struct *files = current->files; 1052 struct fdtable *fdt; 1053 spin_lock(&files->file_lock); 1054 fdt = files_fdtable(files); 1055 BUG_ON(fdt->fd[fd] != NULL); 1056 rcu_assign_pointer(fdt->fd[fd], file); 1057 spin_unlock(&files->file_lock); 1058 } 1059 1060 EXPORT_SYMBOL(fd_install); 1061 1062 long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1063 { 1064 char *tmp = getname(filename); 1065 int fd = PTR_ERR(tmp); 1066 1067 if (!IS_ERR(tmp)) { 1068 fd = get_unused_fd(); 1069 if (fd >= 0) { 1070 struct file *f = do_filp_open(dfd, tmp, flags, mode); 1071 if (IS_ERR(f)) { 1072 put_unused_fd(fd); 1073 fd = PTR_ERR(f); 1074 } else { 1075 fsnotify_open(f->f_dentry); 1076 fd_install(fd, f); 1077 } 1078 } 1079 putname(tmp); 1080 } 1081 return fd; 1082 } 1083 1084 asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1085 { 1086 if (force_o_largefile()) 1087 flags |= O_LARGEFILE; 1088 1089 return do_sys_open(AT_FDCWD, filename, flags, mode); 1090 } 1091 EXPORT_SYMBOL_GPL(sys_open); 1092 1093 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1094 int mode) 1095 { 1096 if (force_o_largefile()) 1097 flags |= O_LARGEFILE; 1098 1099 return do_sys_open(dfd, filename, flags, mode); 1100 } 1101 EXPORT_SYMBOL_GPL(sys_openat); 1102 1103 #ifndef __alpha__ 1104 1105 /* 1106 * For backward compatibility? Maybe this should be moved 1107 * into arch/i386 instead? 1108 */ 1109 asmlinkage long sys_creat(const char __user * pathname, int mode) 1110 { 1111 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1112 } 1113 1114 #endif 1115 1116 /* 1117 * "id" is the POSIX thread ID. We use the 1118 * files pointer for this.. 1119 */ 1120 int filp_close(struct file *filp, fl_owner_t id) 1121 { 1122 int retval = 0; 1123 1124 if (!file_count(filp)) { 1125 printk(KERN_ERR "VFS: Close: file count is 0\n"); 1126 return 0; 1127 } 1128 1129 if (filp->f_op && filp->f_op->flush) 1130 retval = filp->f_op->flush(filp); 1131 1132 dnotify_flush(filp, id); 1133 locks_remove_posix(filp, id); 1134 fput(filp); 1135 return retval; 1136 } 1137 1138 EXPORT_SYMBOL(filp_close); 1139 1140 /* 1141 * Careful here! We test whether the file pointer is NULL before 1142 * releasing the fd. This ensures that one clone task can't release 1143 * an fd while another clone is opening it. 1144 */ 1145 asmlinkage long sys_close(unsigned int fd) 1146 { 1147 struct file * filp; 1148 struct files_struct *files = current->files; 1149 struct fdtable *fdt; 1150 1151 spin_lock(&files->file_lock); 1152 fdt = files_fdtable(files); 1153 if (fd >= fdt->max_fds) 1154 goto out_unlock; 1155 filp = fdt->fd[fd]; 1156 if (!filp) 1157 goto out_unlock; 1158 rcu_assign_pointer(fdt->fd[fd], NULL); 1159 FD_CLR(fd, fdt->close_on_exec); 1160 __put_unused_fd(files, fd); 1161 spin_unlock(&files->file_lock); 1162 return filp_close(filp, files); 1163 1164 out_unlock: 1165 spin_unlock(&files->file_lock); 1166 return -EBADF; 1167 } 1168 1169 EXPORT_SYMBOL(sys_close); 1170 1171 /* 1172 * This routine simulates a hangup on the tty, to arrange that users 1173 * are given clean terminals at login time. 1174 */ 1175 asmlinkage long sys_vhangup(void) 1176 { 1177 if (capable(CAP_SYS_TTY_CONFIG)) { 1178 tty_vhangup(current->signal->tty); 1179 return 0; 1180 } 1181 return -EPERM; 1182 } 1183 1184 /* 1185 * Called when an inode is about to be open. 1186 * We use this to disallow opening large files on 32bit systems if 1187 * the caller didn't specify O_LARGEFILE. On 64bit systems we force 1188 * on this flag in sys_open. 1189 */ 1190 int generic_file_open(struct inode * inode, struct file * filp) 1191 { 1192 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1193 return -EFBIG; 1194 return 0; 1195 } 1196 1197 EXPORT_SYMBOL(generic_file_open); 1198 1199 /* 1200 * This is used by subsystems that don't want seekable 1201 * file descriptors 1202 */ 1203 int nonseekable_open(struct inode *inode, struct file *filp) 1204 { 1205 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1206 return 0; 1207 } 1208 1209 EXPORT_SYMBOL(nonseekable_open); 1210