1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/fcntl.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/syscalls.h> 9 #include <linux/init.h> 10 #include <linux/mm.h> 11 #include <linux/sched/task.h> 12 #include <linux/fs.h> 13 #include <linux/filelock.h> 14 #include <linux/file.h> 15 #include <linux/capability.h> 16 #include <linux/dnotify.h> 17 #include <linux/slab.h> 18 #include <linux/module.h> 19 #include <linux/pipe_fs_i.h> 20 #include <linux/security.h> 21 #include <linux/ptrace.h> 22 #include <linux/signal.h> 23 #include <linux/rcupdate.h> 24 #include <linux/pid_namespace.h> 25 #include <linux/user_namespace.h> 26 #include <linux/memfd.h> 27 #include <linux/compat.h> 28 #include <linux/mount.h> 29 #include <linux/rw_hint.h> 30 31 #include <linux/poll.h> 32 #include <asm/siginfo.h> 33 #include <linux/uaccess.h> 34 35 #include "internal.h" 36 37 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 38 39 static int setfl(int fd, struct file * filp, unsigned int arg) 40 { 41 struct inode * inode = file_inode(filp); 42 int error = 0; 43 44 /* 45 * O_APPEND cannot be cleared if the file is marked as append-only 46 * and the file is open for write. 47 */ 48 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 49 return -EPERM; 50 51 /* O_NOATIME can only be set by the owner or superuser */ 52 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 53 if (!inode_owner_or_capable(file_mnt_idmap(filp), inode)) 54 return -EPERM; 55 56 /* required for strict SunOS emulation */ 57 if (O_NONBLOCK != O_NDELAY) 58 if (arg & O_NDELAY) 59 arg |= O_NONBLOCK; 60 61 /* Pipe packetized mode is controlled by O_DIRECT flag */ 62 if (!S_ISFIFO(inode->i_mode) && 63 (arg & O_DIRECT) && 64 !(filp->f_mode & FMODE_CAN_ODIRECT)) 65 return -EINVAL; 66 67 if (filp->f_op->check_flags) 68 error = filp->f_op->check_flags(arg); 69 if (error) 70 return error; 71 72 /* 73 * ->fasync() is responsible for setting the FASYNC bit. 74 */ 75 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { 76 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 77 if (error < 0) 78 goto out; 79 if (error > 0) 80 error = 0; 81 } 82 spin_lock(&filp->f_lock); 83 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 84 filp->f_iocb_flags = iocb_flags(filp); 85 spin_unlock(&filp->f_lock); 86 87 out: 88 return error; 89 } 90 91 /* 92 * Allocate an file->f_owner struct if it doesn't exist, handling racing 93 * allocations correctly. 94 */ 95 int file_f_owner_allocate(struct file *file) 96 { 97 struct fown_struct *f_owner; 98 99 f_owner = file_f_owner(file); 100 if (f_owner) 101 return 0; 102 103 f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); 104 if (!f_owner) 105 return -ENOMEM; 106 107 rwlock_init(&f_owner->lock); 108 f_owner->file = file; 109 /* If someone else raced us, drop our allocation. */ 110 if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner))) 111 kfree(f_owner); 112 return 0; 113 } 114 EXPORT_SYMBOL(file_f_owner_allocate); 115 116 void file_f_owner_release(struct file *file) 117 { 118 struct fown_struct *f_owner; 119 120 f_owner = file_f_owner(file); 121 if (f_owner) { 122 put_pid(f_owner->pid); 123 kfree(f_owner); 124 } 125 } 126 127 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 128 int force) 129 { 130 struct fown_struct *f_owner; 131 132 f_owner = file_f_owner(filp); 133 if (WARN_ON_ONCE(!f_owner)) 134 return; 135 136 write_lock_irq(&f_owner->lock); 137 if (force || !f_owner->pid) { 138 put_pid(f_owner->pid); 139 f_owner->pid = get_pid(pid); 140 f_owner->pid_type = type; 141 142 if (pid) { 143 const struct cred *cred = current_cred(); 144 security_file_set_fowner(filp); 145 f_owner->uid = cred->uid; 146 f_owner->euid = cred->euid; 147 } 148 } 149 write_unlock_irq(&f_owner->lock); 150 } 151 EXPORT_SYMBOL(__f_setown); 152 153 int f_setown(struct file *filp, int who, int force) 154 { 155 enum pid_type type; 156 struct pid *pid = NULL; 157 int ret = 0; 158 159 might_sleep(); 160 161 type = PIDTYPE_TGID; 162 if (who < 0) { 163 /* avoid overflow below */ 164 if (who == INT_MIN) 165 return -EINVAL; 166 167 type = PIDTYPE_PGID; 168 who = -who; 169 } 170 171 ret = file_f_owner_allocate(filp); 172 if (ret) 173 return ret; 174 175 rcu_read_lock(); 176 if (who) { 177 pid = find_vpid(who); 178 if (!pid) 179 ret = -ESRCH; 180 } 181 182 if (!ret) 183 __f_setown(filp, pid, type, force); 184 rcu_read_unlock(); 185 186 return ret; 187 } 188 EXPORT_SYMBOL(f_setown); 189 190 void f_delown(struct file *filp) 191 { 192 __f_setown(filp, NULL, PIDTYPE_TGID, 1); 193 } 194 195 pid_t f_getown(struct file *filp) 196 { 197 pid_t pid = 0; 198 struct fown_struct *f_owner; 199 200 f_owner = file_f_owner(filp); 201 if (!f_owner) 202 return pid; 203 204 read_lock_irq(&f_owner->lock); 205 rcu_read_lock(); 206 if (pid_task(f_owner->pid, f_owner->pid_type)) { 207 pid = pid_vnr(f_owner->pid); 208 if (f_owner->pid_type == PIDTYPE_PGID) 209 pid = -pid; 210 } 211 rcu_read_unlock(); 212 read_unlock_irq(&f_owner->lock); 213 return pid; 214 } 215 216 static int f_setown_ex(struct file *filp, unsigned long arg) 217 { 218 struct f_owner_ex __user *owner_p = (void __user *)arg; 219 struct f_owner_ex owner; 220 struct pid *pid; 221 int type; 222 int ret; 223 224 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 225 if (ret) 226 return -EFAULT; 227 228 switch (owner.type) { 229 case F_OWNER_TID: 230 type = PIDTYPE_PID; 231 break; 232 233 case F_OWNER_PID: 234 type = PIDTYPE_TGID; 235 break; 236 237 case F_OWNER_PGRP: 238 type = PIDTYPE_PGID; 239 break; 240 241 default: 242 return -EINVAL; 243 } 244 245 ret = file_f_owner_allocate(filp); 246 if (ret) 247 return ret; 248 249 rcu_read_lock(); 250 pid = find_vpid(owner.pid); 251 if (owner.pid && !pid) 252 ret = -ESRCH; 253 else 254 __f_setown(filp, pid, type, 1); 255 rcu_read_unlock(); 256 257 return ret; 258 } 259 260 static int f_getown_ex(struct file *filp, unsigned long arg) 261 { 262 struct f_owner_ex __user *owner_p = (void __user *)arg; 263 struct f_owner_ex owner = {}; 264 int ret = 0; 265 struct fown_struct *f_owner; 266 enum pid_type pid_type = PIDTYPE_PID; 267 268 f_owner = file_f_owner(filp); 269 if (f_owner) { 270 read_lock_irq(&f_owner->lock); 271 rcu_read_lock(); 272 if (pid_task(f_owner->pid, f_owner->pid_type)) 273 owner.pid = pid_vnr(f_owner->pid); 274 rcu_read_unlock(); 275 pid_type = f_owner->pid_type; 276 } 277 278 switch (pid_type) { 279 case PIDTYPE_PID: 280 owner.type = F_OWNER_TID; 281 break; 282 283 case PIDTYPE_TGID: 284 owner.type = F_OWNER_PID; 285 break; 286 287 case PIDTYPE_PGID: 288 owner.type = F_OWNER_PGRP; 289 break; 290 291 default: 292 WARN_ON(1); 293 ret = -EINVAL; 294 break; 295 } 296 if (f_owner) 297 read_unlock_irq(&f_owner->lock); 298 299 if (!ret) { 300 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 301 if (ret) 302 ret = -EFAULT; 303 } 304 return ret; 305 } 306 307 #ifdef CONFIG_CHECKPOINT_RESTORE 308 static int f_getowner_uids(struct file *filp, unsigned long arg) 309 { 310 struct user_namespace *user_ns = current_user_ns(); 311 struct fown_struct *f_owner; 312 uid_t __user *dst = (void __user *)arg; 313 uid_t src[2] = {0, 0}; 314 int err; 315 316 f_owner = file_f_owner(filp); 317 if (f_owner) { 318 read_lock_irq(&f_owner->lock); 319 src[0] = from_kuid(user_ns, f_owner->uid); 320 src[1] = from_kuid(user_ns, f_owner->euid); 321 read_unlock_irq(&f_owner->lock); 322 } 323 324 err = put_user(src[0], &dst[0]); 325 err |= put_user(src[1], &dst[1]); 326 327 return err; 328 } 329 #else 330 static int f_getowner_uids(struct file *filp, unsigned long arg) 331 { 332 return -EINVAL; 333 } 334 #endif 335 336 static bool rw_hint_valid(u64 hint) 337 { 338 BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET); 339 BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE); 340 BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT); 341 BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM); 342 BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG); 343 BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME); 344 345 switch (hint) { 346 case RWH_WRITE_LIFE_NOT_SET: 347 case RWH_WRITE_LIFE_NONE: 348 case RWH_WRITE_LIFE_SHORT: 349 case RWH_WRITE_LIFE_MEDIUM: 350 case RWH_WRITE_LIFE_LONG: 351 case RWH_WRITE_LIFE_EXTREME: 352 return true; 353 default: 354 return false; 355 } 356 } 357 358 static long fcntl_get_rw_hint(struct file *file, unsigned int cmd, 359 unsigned long arg) 360 { 361 struct inode *inode = file_inode(file); 362 u64 __user *argp = (u64 __user *)arg; 363 u64 hint = READ_ONCE(inode->i_write_hint); 364 365 if (copy_to_user(argp, &hint, sizeof(*argp))) 366 return -EFAULT; 367 return 0; 368 } 369 370 static long fcntl_set_rw_hint(struct file *file, unsigned int cmd, 371 unsigned long arg) 372 { 373 struct inode *inode = file_inode(file); 374 u64 __user *argp = (u64 __user *)arg; 375 u64 hint; 376 377 if (copy_from_user(&hint, argp, sizeof(hint))) 378 return -EFAULT; 379 if (!rw_hint_valid(hint)) 380 return -EINVAL; 381 382 WRITE_ONCE(inode->i_write_hint, hint); 383 384 /* 385 * file->f_mapping->host may differ from inode. As an example, 386 * blkdev_open() modifies file->f_mapping. 387 */ 388 if (file->f_mapping->host != inode) 389 WRITE_ONCE(file->f_mapping->host->i_write_hint, hint); 390 391 return 0; 392 } 393 394 /* Is the file descriptor a dup of the file? */ 395 static long f_dupfd_query(int fd, struct file *filp) 396 { 397 CLASS(fd_raw, f)(fd); 398 399 /* 400 * We can do the 'fdput()' immediately, as the only thing that 401 * matters is the pointer value which isn't changed by the fdput. 402 * 403 * Technically we didn't need a ref at all, and 'fdget()' was 404 * overkill, but given our lockless file pointer lookup, the 405 * alternatives are complicated. 406 */ 407 return fd_file(f) == filp; 408 } 409 410 /* Let the caller figure out whether a given file was just created. */ 411 static long f_created_query(const struct file *filp) 412 { 413 return !!(filp->f_mode & FMODE_CREATED); 414 } 415 416 static int f_owner_sig(struct file *filp, int signum, bool setsig) 417 { 418 int ret = 0; 419 struct fown_struct *f_owner; 420 421 might_sleep(); 422 423 if (setsig) { 424 if (!valid_signal(signum)) 425 return -EINVAL; 426 427 ret = file_f_owner_allocate(filp); 428 if (ret) 429 return ret; 430 } 431 432 f_owner = file_f_owner(filp); 433 if (setsig) 434 f_owner->signum = signum; 435 else if (f_owner) 436 ret = f_owner->signum; 437 return ret; 438 } 439 440 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 441 struct file *filp) 442 { 443 void __user *argp = (void __user *)arg; 444 int argi = (int)arg; 445 struct flock flock; 446 long err = -EINVAL; 447 448 switch (cmd) { 449 case F_CREATED_QUERY: 450 err = f_created_query(filp); 451 break; 452 case F_DUPFD: 453 err = f_dupfd(argi, filp, 0); 454 break; 455 case F_DUPFD_CLOEXEC: 456 err = f_dupfd(argi, filp, O_CLOEXEC); 457 break; 458 case F_DUPFD_QUERY: 459 err = f_dupfd_query(argi, filp); 460 break; 461 case F_GETFD: 462 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 463 break; 464 case F_SETFD: 465 err = 0; 466 set_close_on_exec(fd, argi & FD_CLOEXEC); 467 break; 468 case F_GETFL: 469 err = filp->f_flags; 470 break; 471 case F_SETFL: 472 err = setfl(fd, filp, argi); 473 break; 474 #if BITS_PER_LONG != 32 475 /* 32-bit arches must use fcntl64() */ 476 case F_OFD_GETLK: 477 #endif 478 case F_GETLK: 479 if (copy_from_user(&flock, argp, sizeof(flock))) 480 return -EFAULT; 481 err = fcntl_getlk(filp, cmd, &flock); 482 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 483 return -EFAULT; 484 break; 485 #if BITS_PER_LONG != 32 486 /* 32-bit arches must use fcntl64() */ 487 case F_OFD_SETLK: 488 case F_OFD_SETLKW: 489 fallthrough; 490 #endif 491 case F_SETLK: 492 case F_SETLKW: 493 if (copy_from_user(&flock, argp, sizeof(flock))) 494 return -EFAULT; 495 err = fcntl_setlk(fd, filp, cmd, &flock); 496 break; 497 case F_GETOWN: 498 /* 499 * XXX If f_owner is a process group, the 500 * negative return value will get converted 501 * into an error. Oops. If we keep the 502 * current syscall conventions, the only way 503 * to fix this will be in libc. 504 */ 505 err = f_getown(filp); 506 force_successful_syscall_return(); 507 break; 508 case F_SETOWN: 509 err = f_setown(filp, argi, 1); 510 break; 511 case F_GETOWN_EX: 512 err = f_getown_ex(filp, arg); 513 break; 514 case F_SETOWN_EX: 515 err = f_setown_ex(filp, arg); 516 break; 517 case F_GETOWNER_UIDS: 518 err = f_getowner_uids(filp, arg); 519 break; 520 case F_GETSIG: 521 err = f_owner_sig(filp, 0, false); 522 break; 523 case F_SETSIG: 524 err = f_owner_sig(filp, argi, true); 525 break; 526 case F_GETLEASE: 527 err = fcntl_getlease(filp); 528 break; 529 case F_SETLEASE: 530 err = fcntl_setlease(fd, filp, argi); 531 break; 532 case F_NOTIFY: 533 err = fcntl_dirnotify(fd, filp, argi); 534 break; 535 case F_SETPIPE_SZ: 536 case F_GETPIPE_SZ: 537 err = pipe_fcntl(filp, cmd, argi); 538 break; 539 case F_ADD_SEALS: 540 case F_GET_SEALS: 541 err = memfd_fcntl(filp, cmd, argi); 542 break; 543 case F_GET_RW_HINT: 544 err = fcntl_get_rw_hint(filp, cmd, arg); 545 break; 546 case F_SET_RW_HINT: 547 err = fcntl_set_rw_hint(filp, cmd, arg); 548 break; 549 default: 550 break; 551 } 552 return err; 553 } 554 555 static int check_fcntl_cmd(unsigned cmd) 556 { 557 switch (cmd) { 558 case F_CREATED_QUERY: 559 case F_DUPFD: 560 case F_DUPFD_CLOEXEC: 561 case F_DUPFD_QUERY: 562 case F_GETFD: 563 case F_SETFD: 564 case F_GETFL: 565 return 1; 566 } 567 return 0; 568 } 569 570 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 571 { 572 struct fd f = fdget_raw(fd); 573 long err = -EBADF; 574 575 if (!fd_file(f)) 576 goto out; 577 578 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 579 if (!check_fcntl_cmd(cmd)) 580 goto out1; 581 } 582 583 err = security_file_fcntl(fd_file(f), cmd, arg); 584 if (!err) 585 err = do_fcntl(fd, cmd, arg, fd_file(f)); 586 587 out1: 588 fdput(f); 589 out: 590 return err; 591 } 592 593 #if BITS_PER_LONG == 32 594 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 595 unsigned long, arg) 596 { 597 void __user *argp = (void __user *)arg; 598 struct fd f = fdget_raw(fd); 599 struct flock64 flock; 600 long err = -EBADF; 601 602 if (!fd_file(f)) 603 goto out; 604 605 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 606 if (!check_fcntl_cmd(cmd)) 607 goto out1; 608 } 609 610 err = security_file_fcntl(fd_file(f), cmd, arg); 611 if (err) 612 goto out1; 613 614 switch (cmd) { 615 case F_GETLK64: 616 case F_OFD_GETLK: 617 err = -EFAULT; 618 if (copy_from_user(&flock, argp, sizeof(flock))) 619 break; 620 err = fcntl_getlk64(fd_file(f), cmd, &flock); 621 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 622 err = -EFAULT; 623 break; 624 case F_SETLK64: 625 case F_SETLKW64: 626 case F_OFD_SETLK: 627 case F_OFD_SETLKW: 628 err = -EFAULT; 629 if (copy_from_user(&flock, argp, sizeof(flock))) 630 break; 631 err = fcntl_setlk64(fd, fd_file(f), cmd, &flock); 632 break; 633 default: 634 err = do_fcntl(fd, cmd, arg, fd_file(f)); 635 break; 636 } 637 out1: 638 fdput(f); 639 out: 640 return err; 641 } 642 #endif 643 644 #ifdef CONFIG_COMPAT 645 /* careful - don't use anywhere else */ 646 #define copy_flock_fields(dst, src) \ 647 (dst)->l_type = (src)->l_type; \ 648 (dst)->l_whence = (src)->l_whence; \ 649 (dst)->l_start = (src)->l_start; \ 650 (dst)->l_len = (src)->l_len; \ 651 (dst)->l_pid = (src)->l_pid; 652 653 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl) 654 { 655 struct compat_flock fl; 656 657 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock))) 658 return -EFAULT; 659 copy_flock_fields(kfl, &fl); 660 return 0; 661 } 662 663 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl) 664 { 665 struct compat_flock64 fl; 666 667 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64))) 668 return -EFAULT; 669 copy_flock_fields(kfl, &fl); 670 return 0; 671 } 672 673 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl) 674 { 675 struct compat_flock fl; 676 677 memset(&fl, 0, sizeof(struct compat_flock)); 678 copy_flock_fields(&fl, kfl); 679 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock))) 680 return -EFAULT; 681 return 0; 682 } 683 684 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl) 685 { 686 struct compat_flock64 fl; 687 688 BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start)); 689 BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len)); 690 691 memset(&fl, 0, sizeof(struct compat_flock64)); 692 copy_flock_fields(&fl, kfl); 693 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) 694 return -EFAULT; 695 return 0; 696 } 697 #undef copy_flock_fields 698 699 static unsigned int 700 convert_fcntl_cmd(unsigned int cmd) 701 { 702 switch (cmd) { 703 case F_GETLK64: 704 return F_GETLK; 705 case F_SETLK64: 706 return F_SETLK; 707 case F_SETLKW64: 708 return F_SETLKW; 709 } 710 711 return cmd; 712 } 713 714 /* 715 * GETLK was successful and we need to return the data, but it needs to fit in 716 * the compat structure. 717 * l_start shouldn't be too big, unless the original start + end is greater than 718 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return 719 * -EOVERFLOW in that case. l_len could be too big, in which case we just 720 * truncate it, and only allow the app to see that part of the conflicting lock 721 * that might make sense to it anyway 722 */ 723 static int fixup_compat_flock(struct flock *flock) 724 { 725 if (flock->l_start > COMPAT_OFF_T_MAX) 726 return -EOVERFLOW; 727 if (flock->l_len > COMPAT_OFF_T_MAX) 728 flock->l_len = COMPAT_OFF_T_MAX; 729 return 0; 730 } 731 732 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd, 733 compat_ulong_t arg) 734 { 735 struct fd f = fdget_raw(fd); 736 struct flock flock; 737 long err = -EBADF; 738 739 if (!fd_file(f)) 740 return err; 741 742 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 743 if (!check_fcntl_cmd(cmd)) 744 goto out_put; 745 } 746 747 err = security_file_fcntl(fd_file(f), cmd, arg); 748 if (err) 749 goto out_put; 750 751 switch (cmd) { 752 case F_GETLK: 753 err = get_compat_flock(&flock, compat_ptr(arg)); 754 if (err) 755 break; 756 err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); 757 if (err) 758 break; 759 err = fixup_compat_flock(&flock); 760 if (!err) 761 err = put_compat_flock(&flock, compat_ptr(arg)); 762 break; 763 case F_GETLK64: 764 case F_OFD_GETLK: 765 err = get_compat_flock64(&flock, compat_ptr(arg)); 766 if (err) 767 break; 768 err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); 769 if (!err) 770 err = put_compat_flock64(&flock, compat_ptr(arg)); 771 break; 772 case F_SETLK: 773 case F_SETLKW: 774 err = get_compat_flock(&flock, compat_ptr(arg)); 775 if (err) 776 break; 777 err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); 778 break; 779 case F_SETLK64: 780 case F_SETLKW64: 781 case F_OFD_SETLK: 782 case F_OFD_SETLKW: 783 err = get_compat_flock64(&flock, compat_ptr(arg)); 784 if (err) 785 break; 786 err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); 787 break; 788 default: 789 err = do_fcntl(fd, cmd, arg, fd_file(f)); 790 break; 791 } 792 out_put: 793 fdput(f); 794 return err; 795 } 796 797 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 798 compat_ulong_t, arg) 799 { 800 return do_compat_fcntl64(fd, cmd, arg); 801 } 802 803 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, 804 compat_ulong_t, arg) 805 { 806 switch (cmd) { 807 case F_GETLK64: 808 case F_SETLK64: 809 case F_SETLKW64: 810 case F_OFD_GETLK: 811 case F_OFD_SETLK: 812 case F_OFD_SETLKW: 813 return -EINVAL; 814 } 815 return do_compat_fcntl64(fd, cmd, arg); 816 } 817 #endif 818 819 /* Table to convert sigio signal codes into poll band bitmaps */ 820 821 static const __poll_t band_table[NSIGPOLL] = { 822 EPOLLIN | EPOLLRDNORM, /* POLL_IN */ 823 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */ 824 EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */ 825 EPOLLERR, /* POLL_ERR */ 826 EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */ 827 EPOLLHUP | EPOLLERR /* POLL_HUP */ 828 }; 829 830 static inline int sigio_perm(struct task_struct *p, 831 struct fown_struct *fown, int sig) 832 { 833 const struct cred *cred; 834 int ret; 835 836 rcu_read_lock(); 837 cred = __task_cred(p); 838 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || 839 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || 840 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && 841 !security_file_send_sigiotask(p, fown, sig)); 842 rcu_read_unlock(); 843 return ret; 844 } 845 846 static void send_sigio_to_task(struct task_struct *p, 847 struct fown_struct *fown, 848 int fd, int reason, enum pid_type type) 849 { 850 /* 851 * F_SETSIG can change ->signum lockless in parallel, make 852 * sure we read it once and use the same value throughout. 853 */ 854 int signum = READ_ONCE(fown->signum); 855 856 if (!sigio_perm(p, fown, signum)) 857 return; 858 859 switch (signum) { 860 default: { 861 kernel_siginfo_t si; 862 863 /* Queue a rt signal with the appropriate fd as its 864 value. We use SI_SIGIO as the source, not 865 SI_KERNEL, since kernel signals always get 866 delivered even if we can't queue. Failure to 867 queue in this case _should_ be reported; we fall 868 back to SIGIO in that case. --sct */ 869 clear_siginfo(&si); 870 si.si_signo = signum; 871 si.si_errno = 0; 872 si.si_code = reason; 873 /* 874 * Posix definies POLL_IN and friends to be signal 875 * specific si_codes for SIG_POLL. Linux extended 876 * these si_codes to other signals in a way that is 877 * ambiguous if other signals also have signal 878 * specific si_codes. In that case use SI_SIGIO instead 879 * to remove the ambiguity. 880 */ 881 if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) 882 si.si_code = SI_SIGIO; 883 884 /* Make sure we are called with one of the POLL_* 885 reasons, otherwise we could leak kernel stack into 886 userspace. */ 887 BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); 888 if (reason - POLL_IN >= NSIGPOLL) 889 si.si_band = ~0L; 890 else 891 si.si_band = mangle_poll(band_table[reason - POLL_IN]); 892 si.si_fd = fd; 893 if (!do_send_sig_info(signum, &si, p, type)) 894 break; 895 } 896 fallthrough; /* fall back on the old plain SIGIO signal */ 897 case 0: 898 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); 899 } 900 } 901 902 void send_sigio(struct fown_struct *fown, int fd, int band) 903 { 904 struct task_struct *p; 905 enum pid_type type; 906 unsigned long flags; 907 struct pid *pid; 908 909 read_lock_irqsave(&fown->lock, flags); 910 911 type = fown->pid_type; 912 pid = fown->pid; 913 if (!pid) 914 goto out_unlock_fown; 915 916 if (type <= PIDTYPE_TGID) { 917 rcu_read_lock(); 918 p = pid_task(pid, PIDTYPE_PID); 919 if (p) 920 send_sigio_to_task(p, fown, fd, band, type); 921 rcu_read_unlock(); 922 } else { 923 read_lock(&tasklist_lock); 924 do_each_pid_task(pid, type, p) { 925 send_sigio_to_task(p, fown, fd, band, type); 926 } while_each_pid_task(pid, type, p); 927 read_unlock(&tasklist_lock); 928 } 929 out_unlock_fown: 930 read_unlock_irqrestore(&fown->lock, flags); 931 } 932 933 static void send_sigurg_to_task(struct task_struct *p, 934 struct fown_struct *fown, enum pid_type type) 935 { 936 if (sigio_perm(p, fown, SIGURG)) 937 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); 938 } 939 940 int send_sigurg(struct file *file) 941 { 942 struct fown_struct *fown; 943 struct task_struct *p; 944 enum pid_type type; 945 struct pid *pid; 946 unsigned long flags; 947 int ret = 0; 948 949 fown = file_f_owner(file); 950 if (!fown) 951 return 0; 952 953 read_lock_irqsave(&fown->lock, flags); 954 955 type = fown->pid_type; 956 pid = fown->pid; 957 if (!pid) 958 goto out_unlock_fown; 959 960 ret = 1; 961 962 if (type <= PIDTYPE_TGID) { 963 rcu_read_lock(); 964 p = pid_task(pid, PIDTYPE_PID); 965 if (p) 966 send_sigurg_to_task(p, fown, type); 967 rcu_read_unlock(); 968 } else { 969 read_lock(&tasklist_lock); 970 do_each_pid_task(pid, type, p) { 971 send_sigurg_to_task(p, fown, type); 972 } while_each_pid_task(pid, type, p); 973 read_unlock(&tasklist_lock); 974 } 975 out_unlock_fown: 976 read_unlock_irqrestore(&fown->lock, flags); 977 return ret; 978 } 979 980 static DEFINE_SPINLOCK(fasync_lock); 981 static struct kmem_cache *fasync_cache __ro_after_init; 982 983 /* 984 * Remove a fasync entry. If successfully removed, return 985 * positive and clear the FASYNC flag. If no entry exists, 986 * do nothing and return 0. 987 * 988 * NOTE! It is very important that the FASYNC flag always 989 * match the state "is the filp on a fasync list". 990 * 991 */ 992 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 993 { 994 struct fasync_struct *fa, **fp; 995 int result = 0; 996 997 spin_lock(&filp->f_lock); 998 spin_lock(&fasync_lock); 999 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 1000 if (fa->fa_file != filp) 1001 continue; 1002 1003 write_lock_irq(&fa->fa_lock); 1004 fa->fa_file = NULL; 1005 write_unlock_irq(&fa->fa_lock); 1006 1007 *fp = fa->fa_next; 1008 kfree_rcu(fa, fa_rcu); 1009 filp->f_flags &= ~FASYNC; 1010 result = 1; 1011 break; 1012 } 1013 spin_unlock(&fasync_lock); 1014 spin_unlock(&filp->f_lock); 1015 return result; 1016 } 1017 1018 struct fasync_struct *fasync_alloc(void) 1019 { 1020 return kmem_cache_alloc(fasync_cache, GFP_KERNEL); 1021 } 1022 1023 /* 1024 * NOTE! This can be used only for unused fasync entries: 1025 * entries that actually got inserted on the fasync list 1026 * need to be released by rcu - see fasync_remove_entry. 1027 */ 1028 void fasync_free(struct fasync_struct *new) 1029 { 1030 kmem_cache_free(fasync_cache, new); 1031 } 1032 1033 /* 1034 * Insert a new entry into the fasync list. Return the pointer to the 1035 * old one if we didn't use the new one. 1036 * 1037 * NOTE! It is very important that the FASYNC flag always 1038 * match the state "is the filp on a fasync list". 1039 */ 1040 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) 1041 { 1042 struct fasync_struct *fa, **fp; 1043 1044 spin_lock(&filp->f_lock); 1045 spin_lock(&fasync_lock); 1046 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 1047 if (fa->fa_file != filp) 1048 continue; 1049 1050 write_lock_irq(&fa->fa_lock); 1051 fa->fa_fd = fd; 1052 write_unlock_irq(&fa->fa_lock); 1053 goto out; 1054 } 1055 1056 rwlock_init(&new->fa_lock); 1057 new->magic = FASYNC_MAGIC; 1058 new->fa_file = filp; 1059 new->fa_fd = fd; 1060 new->fa_next = *fapp; 1061 rcu_assign_pointer(*fapp, new); 1062 filp->f_flags |= FASYNC; 1063 1064 out: 1065 spin_unlock(&fasync_lock); 1066 spin_unlock(&filp->f_lock); 1067 return fa; 1068 } 1069 1070 /* 1071 * Add a fasync entry. Return negative on error, positive if 1072 * added, and zero if did nothing but change an existing one. 1073 */ 1074 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 1075 { 1076 struct fasync_struct *new; 1077 1078 new = fasync_alloc(); 1079 if (!new) 1080 return -ENOMEM; 1081 1082 /* 1083 * fasync_insert_entry() returns the old (update) entry if 1084 * it existed. 1085 * 1086 * So free the (unused) new entry and return 0 to let the 1087 * caller know that we didn't add any new fasync entries. 1088 */ 1089 if (fasync_insert_entry(fd, filp, fapp, new)) { 1090 fasync_free(new); 1091 return 0; 1092 } 1093 1094 return 1; 1095 } 1096 1097 /* 1098 * fasync_helper() is used by almost all character device drivers 1099 * to set up the fasync queue, and for regular files by the file 1100 * lease code. It returns negative on error, 0 if it did no changes 1101 * and positive if it added/deleted the entry. 1102 */ 1103 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 1104 { 1105 if (!on) 1106 return fasync_remove_entry(filp, fapp); 1107 return fasync_add_entry(fd, filp, fapp); 1108 } 1109 1110 EXPORT_SYMBOL(fasync_helper); 1111 1112 /* 1113 * rcu_read_lock() is held 1114 */ 1115 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) 1116 { 1117 while (fa) { 1118 struct fown_struct *fown; 1119 unsigned long flags; 1120 1121 if (fa->magic != FASYNC_MAGIC) { 1122 printk(KERN_ERR "kill_fasync: bad magic number in " 1123 "fasync_struct!\n"); 1124 return; 1125 } 1126 read_lock_irqsave(&fa->fa_lock, flags); 1127 if (fa->fa_file) { 1128 fown = file_f_owner(fa->fa_file); 1129 if (!fown) 1130 goto next; 1131 /* Don't send SIGURG to processes which have not set a 1132 queued signum: SIGURG has its own default signalling 1133 mechanism. */ 1134 if (!(sig == SIGURG && fown->signum == 0)) 1135 send_sigio(fown, fa->fa_fd, band); 1136 } 1137 next: 1138 read_unlock_irqrestore(&fa->fa_lock, flags); 1139 fa = rcu_dereference(fa->fa_next); 1140 } 1141 } 1142 1143 void kill_fasync(struct fasync_struct **fp, int sig, int band) 1144 { 1145 /* First a quick test without locking: usually 1146 * the list is empty. 1147 */ 1148 if (*fp) { 1149 rcu_read_lock(); 1150 kill_fasync_rcu(rcu_dereference(*fp), sig, band); 1151 rcu_read_unlock(); 1152 } 1153 } 1154 EXPORT_SYMBOL(kill_fasync); 1155 1156 static int __init fcntl_init(void) 1157 { 1158 /* 1159 * Please add new bits here to ensure allocation uniqueness. 1160 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 1161 * is defined as O_NONBLOCK on some platforms and not on others. 1162 */ 1163 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != 1164 HWEIGHT32( 1165 (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | 1166 __FMODE_EXEC | __FMODE_NONOTIFY)); 1167 1168 fasync_cache = kmem_cache_create("fasync_cache", 1169 sizeof(struct fasync_struct), 0, 1170 SLAB_PANIC | SLAB_ACCOUNT, NULL); 1171 return 0; 1172 } 1173 1174 module_init(fcntl_init) 1175