1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/fcntl.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/syscalls.h> 9 #include <linux/init.h> 10 #include <linux/mm.h> 11 #include <linux/sched/task.h> 12 #include <linux/fs.h> 13 #include <linux/filelock.h> 14 #include <linux/file.h> 15 #include <linux/capability.h> 16 #include <linux/dnotify.h> 17 #include <linux/slab.h> 18 #include <linux/module.h> 19 #include <linux/pipe_fs_i.h> 20 #include <linux/security.h> 21 #include <linux/ptrace.h> 22 #include <linux/signal.h> 23 #include <linux/rcupdate.h> 24 #include <linux/pid_namespace.h> 25 #include <linux/user_namespace.h> 26 #include <linux/memfd.h> 27 #include <linux/compat.h> 28 #include <linux/mount.h> 29 #include <linux/rw_hint.h> 30 31 #include <linux/poll.h> 32 #include <asm/siginfo.h> 33 #include <linux/uaccess.h> 34 35 #include "internal.h" 36 37 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 38 39 static int setfl(int fd, struct file * filp, unsigned int arg) 40 { 41 struct inode * inode = file_inode(filp); 42 int error = 0; 43 44 /* 45 * O_APPEND cannot be cleared if the file is marked as append-only 46 * and the file is open for write. 47 */ 48 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 49 return -EPERM; 50 51 /* O_NOATIME can only be set by the owner or superuser */ 52 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 53 if (!inode_owner_or_capable(file_mnt_idmap(filp), inode)) 54 return -EPERM; 55 56 /* required for strict SunOS emulation */ 57 if (O_NONBLOCK != O_NDELAY) 58 if (arg & O_NDELAY) 59 arg |= O_NONBLOCK; 60 61 /* Pipe packetized mode is controlled by O_DIRECT flag */ 62 if (!S_ISFIFO(inode->i_mode) && 63 (arg & O_DIRECT) && 64 !(filp->f_mode & FMODE_CAN_ODIRECT)) 65 return -EINVAL; 66 67 if (filp->f_op->check_flags) 68 error = filp->f_op->check_flags(arg); 69 if (error) 70 return error; 71 72 /* 73 * ->fasync() is responsible for setting the FASYNC bit. 74 */ 75 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { 76 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 77 if (error < 0) 78 goto out; 79 if (error > 0) 80 error = 0; 81 } 82 spin_lock(&filp->f_lock); 83 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 84 filp->f_iocb_flags = iocb_flags(filp); 85 spin_unlock(&filp->f_lock); 86 87 out: 88 return error; 89 } 90 91 /* 92 * Allocate an file->f_owner struct if it doesn't exist, handling racing 93 * allocations correctly. 94 */ 95 int file_f_owner_allocate(struct file *file) 96 { 97 struct fown_struct *f_owner; 98 99 f_owner = file_f_owner(file); 100 if (f_owner) 101 return 0; 102 103 f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); 104 if (!f_owner) 105 return -ENOMEM; 106 107 rwlock_init(&f_owner->lock); 108 f_owner->file = file; 109 /* If someone else raced us, drop our allocation. */ 110 if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner))) 111 kfree(f_owner); 112 return 0; 113 } 114 EXPORT_SYMBOL(file_f_owner_allocate); 115 116 void file_f_owner_release(struct file *file) 117 { 118 struct fown_struct *f_owner; 119 120 f_owner = file_f_owner(file); 121 if (f_owner) { 122 put_pid(f_owner->pid); 123 kfree(f_owner); 124 } 125 } 126 127 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 128 int force) 129 { 130 struct fown_struct *f_owner; 131 132 f_owner = file_f_owner(filp); 133 if (WARN_ON_ONCE(!f_owner)) 134 return; 135 136 write_lock_irq(&f_owner->lock); 137 if (force || !f_owner->pid) { 138 put_pid(f_owner->pid); 139 f_owner->pid = get_pid(pid); 140 f_owner->pid_type = type; 141 142 if (pid) { 143 const struct cred *cred = current_cred(); 144 security_file_set_fowner(filp); 145 f_owner->uid = cred->uid; 146 f_owner->euid = cred->euid; 147 } 148 } 149 write_unlock_irq(&f_owner->lock); 150 } 151 EXPORT_SYMBOL(__f_setown); 152 153 int f_setown(struct file *filp, int who, int force) 154 { 155 enum pid_type type; 156 struct pid *pid = NULL; 157 int ret = 0; 158 159 might_sleep(); 160 161 type = PIDTYPE_TGID; 162 if (who < 0) { 163 /* avoid overflow below */ 164 if (who == INT_MIN) 165 return -EINVAL; 166 167 type = PIDTYPE_PGID; 168 who = -who; 169 } 170 171 ret = file_f_owner_allocate(filp); 172 if (ret) 173 return ret; 174 175 rcu_read_lock(); 176 if (who) { 177 pid = find_vpid(who); 178 if (!pid) 179 ret = -ESRCH; 180 } 181 182 if (!ret) 183 __f_setown(filp, pid, type, force); 184 rcu_read_unlock(); 185 186 return ret; 187 } 188 EXPORT_SYMBOL(f_setown); 189 190 void f_delown(struct file *filp) 191 { 192 __f_setown(filp, NULL, PIDTYPE_TGID, 1); 193 } 194 195 pid_t f_getown(struct file *filp) 196 { 197 pid_t pid = 0; 198 struct fown_struct *f_owner; 199 200 f_owner = file_f_owner(filp); 201 if (!f_owner) 202 return pid; 203 204 read_lock_irq(&f_owner->lock); 205 rcu_read_lock(); 206 if (pid_task(f_owner->pid, f_owner->pid_type)) { 207 pid = pid_vnr(f_owner->pid); 208 if (f_owner->pid_type == PIDTYPE_PGID) 209 pid = -pid; 210 } 211 rcu_read_unlock(); 212 read_unlock_irq(&f_owner->lock); 213 return pid; 214 } 215 216 static int f_setown_ex(struct file *filp, unsigned long arg) 217 { 218 struct f_owner_ex __user *owner_p = (void __user *)arg; 219 struct f_owner_ex owner; 220 struct pid *pid; 221 int type; 222 int ret; 223 224 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 225 if (ret) 226 return -EFAULT; 227 228 switch (owner.type) { 229 case F_OWNER_TID: 230 type = PIDTYPE_PID; 231 break; 232 233 case F_OWNER_PID: 234 type = PIDTYPE_TGID; 235 break; 236 237 case F_OWNER_PGRP: 238 type = PIDTYPE_PGID; 239 break; 240 241 default: 242 return -EINVAL; 243 } 244 245 ret = file_f_owner_allocate(filp); 246 if (ret) 247 return ret; 248 249 rcu_read_lock(); 250 pid = find_vpid(owner.pid); 251 if (owner.pid && !pid) 252 ret = -ESRCH; 253 else 254 __f_setown(filp, pid, type, 1); 255 rcu_read_unlock(); 256 257 return ret; 258 } 259 260 static int f_getown_ex(struct file *filp, unsigned long arg) 261 { 262 struct f_owner_ex __user *owner_p = (void __user *)arg; 263 struct f_owner_ex owner = {}; 264 int ret = 0; 265 struct fown_struct *f_owner; 266 enum pid_type pid_type = PIDTYPE_PID; 267 268 f_owner = file_f_owner(filp); 269 if (f_owner) { 270 read_lock_irq(&f_owner->lock); 271 rcu_read_lock(); 272 if (pid_task(f_owner->pid, f_owner->pid_type)) 273 owner.pid = pid_vnr(f_owner->pid); 274 rcu_read_unlock(); 275 pid_type = f_owner->pid_type; 276 } 277 278 switch (pid_type) { 279 case PIDTYPE_PID: 280 owner.type = F_OWNER_TID; 281 break; 282 283 case PIDTYPE_TGID: 284 owner.type = F_OWNER_PID; 285 break; 286 287 case PIDTYPE_PGID: 288 owner.type = F_OWNER_PGRP; 289 break; 290 291 default: 292 WARN_ON(1); 293 ret = -EINVAL; 294 break; 295 } 296 if (f_owner) 297 read_unlock_irq(&f_owner->lock); 298 299 if (!ret) { 300 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 301 if (ret) 302 ret = -EFAULT; 303 } 304 return ret; 305 } 306 307 #ifdef CONFIG_CHECKPOINT_RESTORE 308 static int f_getowner_uids(struct file *filp, unsigned long arg) 309 { 310 struct user_namespace *user_ns = current_user_ns(); 311 struct fown_struct *f_owner; 312 uid_t __user *dst = (void __user *)arg; 313 uid_t src[2] = {0, 0}; 314 int err; 315 316 f_owner = file_f_owner(filp); 317 if (f_owner) { 318 read_lock_irq(&f_owner->lock); 319 src[0] = from_kuid(user_ns, f_owner->uid); 320 src[1] = from_kuid(user_ns, f_owner->euid); 321 read_unlock_irq(&f_owner->lock); 322 } 323 324 err = put_user(src[0], &dst[0]); 325 err |= put_user(src[1], &dst[1]); 326 327 return err; 328 } 329 #else 330 static int f_getowner_uids(struct file *filp, unsigned long arg) 331 { 332 return -EINVAL; 333 } 334 #endif 335 336 static bool rw_hint_valid(u64 hint) 337 { 338 BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET); 339 BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE); 340 BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT); 341 BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM); 342 BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG); 343 BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME); 344 345 switch (hint) { 346 case RWH_WRITE_LIFE_NOT_SET: 347 case RWH_WRITE_LIFE_NONE: 348 case RWH_WRITE_LIFE_SHORT: 349 case RWH_WRITE_LIFE_MEDIUM: 350 case RWH_WRITE_LIFE_LONG: 351 case RWH_WRITE_LIFE_EXTREME: 352 return true; 353 default: 354 return false; 355 } 356 } 357 358 static long fcntl_get_rw_hint(struct file *file, unsigned int cmd, 359 unsigned long arg) 360 { 361 struct inode *inode = file_inode(file); 362 u64 __user *argp = (u64 __user *)arg; 363 u64 hint = READ_ONCE(inode->i_write_hint); 364 365 if (copy_to_user(argp, &hint, sizeof(*argp))) 366 return -EFAULT; 367 return 0; 368 } 369 370 static long fcntl_set_rw_hint(struct file *file, unsigned int cmd, 371 unsigned long arg) 372 { 373 struct inode *inode = file_inode(file); 374 u64 __user *argp = (u64 __user *)arg; 375 u64 hint; 376 377 if (!inode_owner_or_capable(file_mnt_idmap(file), inode)) 378 return -EPERM; 379 380 if (copy_from_user(&hint, argp, sizeof(hint))) 381 return -EFAULT; 382 if (!rw_hint_valid(hint)) 383 return -EINVAL; 384 385 WRITE_ONCE(inode->i_write_hint, hint); 386 387 /* 388 * file->f_mapping->host may differ from inode. As an example, 389 * blkdev_open() modifies file->f_mapping. 390 */ 391 if (file->f_mapping->host != inode) 392 WRITE_ONCE(file->f_mapping->host->i_write_hint, hint); 393 394 return 0; 395 } 396 397 /* Is the file descriptor a dup of the file? */ 398 static long f_dupfd_query(int fd, struct file *filp) 399 { 400 CLASS(fd_raw, f)(fd); 401 402 if (fd_empty(f)) 403 return -EBADF; 404 405 /* 406 * We can do the 'fdput()' immediately, as the only thing that 407 * matters is the pointer value which isn't changed by the fdput. 408 * 409 * Technically we didn't need a ref at all, and 'fdget()' was 410 * overkill, but given our lockless file pointer lookup, the 411 * alternatives are complicated. 412 */ 413 return fd_file(f) == filp; 414 } 415 416 /* Let the caller figure out whether a given file was just created. */ 417 static long f_created_query(const struct file *filp) 418 { 419 return !!(filp->f_mode & FMODE_CREATED); 420 } 421 422 static int f_owner_sig(struct file *filp, int signum, bool setsig) 423 { 424 int ret = 0; 425 struct fown_struct *f_owner; 426 427 might_sleep(); 428 429 if (setsig) { 430 if (!valid_signal(signum)) 431 return -EINVAL; 432 433 ret = file_f_owner_allocate(filp); 434 if (ret) 435 return ret; 436 } 437 438 f_owner = file_f_owner(filp); 439 if (setsig) 440 f_owner->signum = signum; 441 else if (f_owner) 442 ret = f_owner->signum; 443 return ret; 444 } 445 446 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 447 struct file *filp) 448 { 449 void __user *argp = (void __user *)arg; 450 int argi = (int)arg; 451 struct flock flock; 452 long err = -EINVAL; 453 454 switch (cmd) { 455 case F_CREATED_QUERY: 456 err = f_created_query(filp); 457 break; 458 case F_DUPFD: 459 err = f_dupfd(argi, filp, 0); 460 break; 461 case F_DUPFD_CLOEXEC: 462 err = f_dupfd(argi, filp, O_CLOEXEC); 463 break; 464 case F_DUPFD_QUERY: 465 err = f_dupfd_query(argi, filp); 466 break; 467 case F_GETFD: 468 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 469 break; 470 case F_SETFD: 471 err = 0; 472 set_close_on_exec(fd, argi & FD_CLOEXEC); 473 break; 474 case F_GETFL: 475 err = filp->f_flags; 476 break; 477 case F_SETFL: 478 err = setfl(fd, filp, argi); 479 break; 480 #if BITS_PER_LONG != 32 481 /* 32-bit arches must use fcntl64() */ 482 case F_OFD_GETLK: 483 #endif 484 case F_GETLK: 485 if (copy_from_user(&flock, argp, sizeof(flock))) 486 return -EFAULT; 487 err = fcntl_getlk(filp, cmd, &flock); 488 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 489 return -EFAULT; 490 break; 491 #if BITS_PER_LONG != 32 492 /* 32-bit arches must use fcntl64() */ 493 case F_OFD_SETLK: 494 case F_OFD_SETLKW: 495 fallthrough; 496 #endif 497 case F_SETLK: 498 case F_SETLKW: 499 if (copy_from_user(&flock, argp, sizeof(flock))) 500 return -EFAULT; 501 err = fcntl_setlk(fd, filp, cmd, &flock); 502 break; 503 case F_GETOWN: 504 /* 505 * XXX If f_owner is a process group, the 506 * negative return value will get converted 507 * into an error. Oops. If we keep the 508 * current syscall conventions, the only way 509 * to fix this will be in libc. 510 */ 511 err = f_getown(filp); 512 force_successful_syscall_return(); 513 break; 514 case F_SETOWN: 515 err = f_setown(filp, argi, 1); 516 break; 517 case F_GETOWN_EX: 518 err = f_getown_ex(filp, arg); 519 break; 520 case F_SETOWN_EX: 521 err = f_setown_ex(filp, arg); 522 break; 523 case F_GETOWNER_UIDS: 524 err = f_getowner_uids(filp, arg); 525 break; 526 case F_GETSIG: 527 err = f_owner_sig(filp, 0, false); 528 break; 529 case F_SETSIG: 530 err = f_owner_sig(filp, argi, true); 531 break; 532 case F_GETLEASE: 533 err = fcntl_getlease(filp); 534 break; 535 case F_SETLEASE: 536 err = fcntl_setlease(fd, filp, argi); 537 break; 538 case F_NOTIFY: 539 err = fcntl_dirnotify(fd, filp, argi); 540 break; 541 case F_SETPIPE_SZ: 542 case F_GETPIPE_SZ: 543 err = pipe_fcntl(filp, cmd, argi); 544 break; 545 case F_ADD_SEALS: 546 case F_GET_SEALS: 547 err = memfd_fcntl(filp, cmd, argi); 548 break; 549 case F_GET_RW_HINT: 550 err = fcntl_get_rw_hint(filp, cmd, arg); 551 break; 552 case F_SET_RW_HINT: 553 err = fcntl_set_rw_hint(filp, cmd, arg); 554 break; 555 default: 556 break; 557 } 558 return err; 559 } 560 561 static int check_fcntl_cmd(unsigned cmd) 562 { 563 switch (cmd) { 564 case F_CREATED_QUERY: 565 case F_DUPFD: 566 case F_DUPFD_CLOEXEC: 567 case F_DUPFD_QUERY: 568 case F_GETFD: 569 case F_SETFD: 570 case F_GETFL: 571 return 1; 572 } 573 return 0; 574 } 575 576 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 577 { 578 CLASS(fd_raw, f)(fd); 579 long err; 580 581 if (fd_empty(f)) 582 return -EBADF; 583 584 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 585 if (!check_fcntl_cmd(cmd)) 586 return -EBADF; 587 } 588 589 err = security_file_fcntl(fd_file(f), cmd, arg); 590 if (!err) 591 err = do_fcntl(fd, cmd, arg, fd_file(f)); 592 593 return err; 594 } 595 596 #if BITS_PER_LONG == 32 597 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 598 unsigned long, arg) 599 { 600 void __user *argp = (void __user *)arg; 601 CLASS(fd_raw, f)(fd); 602 struct flock64 flock; 603 long err; 604 605 if (fd_empty(f)) 606 return -EBADF; 607 608 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 609 if (!check_fcntl_cmd(cmd)) 610 return -EBADF; 611 } 612 613 err = security_file_fcntl(fd_file(f), cmd, arg); 614 if (err) 615 return err; 616 617 switch (cmd) { 618 case F_GETLK64: 619 case F_OFD_GETLK: 620 err = -EFAULT; 621 if (copy_from_user(&flock, argp, sizeof(flock))) 622 break; 623 err = fcntl_getlk64(fd_file(f), cmd, &flock); 624 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 625 err = -EFAULT; 626 break; 627 case F_SETLK64: 628 case F_SETLKW64: 629 case F_OFD_SETLK: 630 case F_OFD_SETLKW: 631 err = -EFAULT; 632 if (copy_from_user(&flock, argp, sizeof(flock))) 633 break; 634 err = fcntl_setlk64(fd, fd_file(f), cmd, &flock); 635 break; 636 default: 637 err = do_fcntl(fd, cmd, arg, fd_file(f)); 638 break; 639 } 640 return err; 641 } 642 #endif 643 644 #ifdef CONFIG_COMPAT 645 /* careful - don't use anywhere else */ 646 #define copy_flock_fields(dst, src) \ 647 (dst)->l_type = (src)->l_type; \ 648 (dst)->l_whence = (src)->l_whence; \ 649 (dst)->l_start = (src)->l_start; \ 650 (dst)->l_len = (src)->l_len; \ 651 (dst)->l_pid = (src)->l_pid; 652 653 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl) 654 { 655 struct compat_flock fl; 656 657 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock))) 658 return -EFAULT; 659 copy_flock_fields(kfl, &fl); 660 return 0; 661 } 662 663 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl) 664 { 665 struct compat_flock64 fl; 666 667 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64))) 668 return -EFAULT; 669 copy_flock_fields(kfl, &fl); 670 return 0; 671 } 672 673 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl) 674 { 675 struct compat_flock fl; 676 677 memset(&fl, 0, sizeof(struct compat_flock)); 678 copy_flock_fields(&fl, kfl); 679 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock))) 680 return -EFAULT; 681 return 0; 682 } 683 684 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl) 685 { 686 struct compat_flock64 fl; 687 688 BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start)); 689 BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len)); 690 691 memset(&fl, 0, sizeof(struct compat_flock64)); 692 copy_flock_fields(&fl, kfl); 693 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) 694 return -EFAULT; 695 return 0; 696 } 697 #undef copy_flock_fields 698 699 static unsigned int 700 convert_fcntl_cmd(unsigned int cmd) 701 { 702 switch (cmd) { 703 case F_GETLK64: 704 return F_GETLK; 705 case F_SETLK64: 706 return F_SETLK; 707 case F_SETLKW64: 708 return F_SETLKW; 709 } 710 711 return cmd; 712 } 713 714 /* 715 * GETLK was successful and we need to return the data, but it needs to fit in 716 * the compat structure. 717 * l_start shouldn't be too big, unless the original start + end is greater than 718 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return 719 * -EOVERFLOW in that case. l_len could be too big, in which case we just 720 * truncate it, and only allow the app to see that part of the conflicting lock 721 * that might make sense to it anyway 722 */ 723 static int fixup_compat_flock(struct flock *flock) 724 { 725 if (flock->l_start > COMPAT_OFF_T_MAX) 726 return -EOVERFLOW; 727 if (flock->l_len > COMPAT_OFF_T_MAX) 728 flock->l_len = COMPAT_OFF_T_MAX; 729 return 0; 730 } 731 732 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd, 733 compat_ulong_t arg) 734 { 735 CLASS(fd_raw, f)(fd); 736 struct flock flock; 737 long err; 738 739 if (fd_empty(f)) 740 return -EBADF; 741 742 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 743 if (!check_fcntl_cmd(cmd)) 744 return -EBADF; 745 } 746 747 err = security_file_fcntl(fd_file(f), cmd, arg); 748 if (err) 749 return err; 750 751 switch (cmd) { 752 case F_GETLK: 753 err = get_compat_flock(&flock, compat_ptr(arg)); 754 if (err) 755 break; 756 err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); 757 if (err) 758 break; 759 err = fixup_compat_flock(&flock); 760 if (!err) 761 err = put_compat_flock(&flock, compat_ptr(arg)); 762 break; 763 case F_GETLK64: 764 case F_OFD_GETLK: 765 err = get_compat_flock64(&flock, compat_ptr(arg)); 766 if (err) 767 break; 768 err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); 769 if (!err) 770 err = put_compat_flock64(&flock, compat_ptr(arg)); 771 break; 772 case F_SETLK: 773 case F_SETLKW: 774 err = get_compat_flock(&flock, compat_ptr(arg)); 775 if (err) 776 break; 777 err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); 778 break; 779 case F_SETLK64: 780 case F_SETLKW64: 781 case F_OFD_SETLK: 782 case F_OFD_SETLKW: 783 err = get_compat_flock64(&flock, compat_ptr(arg)); 784 if (err) 785 break; 786 err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); 787 break; 788 default: 789 err = do_fcntl(fd, cmd, arg, fd_file(f)); 790 break; 791 } 792 return err; 793 } 794 795 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 796 compat_ulong_t, arg) 797 { 798 return do_compat_fcntl64(fd, cmd, arg); 799 } 800 801 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, 802 compat_ulong_t, arg) 803 { 804 switch (cmd) { 805 case F_GETLK64: 806 case F_SETLK64: 807 case F_SETLKW64: 808 case F_OFD_GETLK: 809 case F_OFD_SETLK: 810 case F_OFD_SETLKW: 811 return -EINVAL; 812 } 813 return do_compat_fcntl64(fd, cmd, arg); 814 } 815 #endif 816 817 /* Table to convert sigio signal codes into poll band bitmaps */ 818 819 static const __poll_t band_table[NSIGPOLL] = { 820 EPOLLIN | EPOLLRDNORM, /* POLL_IN */ 821 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */ 822 EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */ 823 EPOLLERR, /* POLL_ERR */ 824 EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */ 825 EPOLLHUP | EPOLLERR /* POLL_HUP */ 826 }; 827 828 static inline int sigio_perm(struct task_struct *p, 829 struct fown_struct *fown, int sig) 830 { 831 const struct cred *cred; 832 int ret; 833 834 rcu_read_lock(); 835 cred = __task_cred(p); 836 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || 837 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || 838 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && 839 !security_file_send_sigiotask(p, fown, sig)); 840 rcu_read_unlock(); 841 return ret; 842 } 843 844 static void send_sigio_to_task(struct task_struct *p, 845 struct fown_struct *fown, 846 int fd, int reason, enum pid_type type) 847 { 848 /* 849 * F_SETSIG can change ->signum lockless in parallel, make 850 * sure we read it once and use the same value throughout. 851 */ 852 int signum = READ_ONCE(fown->signum); 853 854 if (!sigio_perm(p, fown, signum)) 855 return; 856 857 switch (signum) { 858 default: { 859 kernel_siginfo_t si; 860 861 /* Queue a rt signal with the appropriate fd as its 862 value. We use SI_SIGIO as the source, not 863 SI_KERNEL, since kernel signals always get 864 delivered even if we can't queue. Failure to 865 queue in this case _should_ be reported; we fall 866 back to SIGIO in that case. --sct */ 867 clear_siginfo(&si); 868 si.si_signo = signum; 869 si.si_errno = 0; 870 si.si_code = reason; 871 /* 872 * Posix definies POLL_IN and friends to be signal 873 * specific si_codes for SIG_POLL. Linux extended 874 * these si_codes to other signals in a way that is 875 * ambiguous if other signals also have signal 876 * specific si_codes. In that case use SI_SIGIO instead 877 * to remove the ambiguity. 878 */ 879 if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) 880 si.si_code = SI_SIGIO; 881 882 /* Make sure we are called with one of the POLL_* 883 reasons, otherwise we could leak kernel stack into 884 userspace. */ 885 BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); 886 if (reason - POLL_IN >= NSIGPOLL) 887 si.si_band = ~0L; 888 else 889 si.si_band = mangle_poll(band_table[reason - POLL_IN]); 890 si.si_fd = fd; 891 if (!do_send_sig_info(signum, &si, p, type)) 892 break; 893 } 894 fallthrough; /* fall back on the old plain SIGIO signal */ 895 case 0: 896 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); 897 } 898 } 899 900 void send_sigio(struct fown_struct *fown, int fd, int band) 901 { 902 struct task_struct *p; 903 enum pid_type type; 904 unsigned long flags; 905 struct pid *pid; 906 907 read_lock_irqsave(&fown->lock, flags); 908 909 type = fown->pid_type; 910 pid = fown->pid; 911 if (!pid) 912 goto out_unlock_fown; 913 914 if (type <= PIDTYPE_TGID) { 915 rcu_read_lock(); 916 p = pid_task(pid, PIDTYPE_PID); 917 if (p) 918 send_sigio_to_task(p, fown, fd, band, type); 919 rcu_read_unlock(); 920 } else { 921 read_lock(&tasklist_lock); 922 do_each_pid_task(pid, type, p) { 923 send_sigio_to_task(p, fown, fd, band, type); 924 } while_each_pid_task(pid, type, p); 925 read_unlock(&tasklist_lock); 926 } 927 out_unlock_fown: 928 read_unlock_irqrestore(&fown->lock, flags); 929 } 930 931 static void send_sigurg_to_task(struct task_struct *p, 932 struct fown_struct *fown, enum pid_type type) 933 { 934 if (sigio_perm(p, fown, SIGURG)) 935 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); 936 } 937 938 int send_sigurg(struct file *file) 939 { 940 struct fown_struct *fown; 941 struct task_struct *p; 942 enum pid_type type; 943 struct pid *pid; 944 unsigned long flags; 945 int ret = 0; 946 947 fown = file_f_owner(file); 948 if (!fown) 949 return 0; 950 951 read_lock_irqsave(&fown->lock, flags); 952 953 type = fown->pid_type; 954 pid = fown->pid; 955 if (!pid) 956 goto out_unlock_fown; 957 958 ret = 1; 959 960 if (type <= PIDTYPE_TGID) { 961 rcu_read_lock(); 962 p = pid_task(pid, PIDTYPE_PID); 963 if (p) 964 send_sigurg_to_task(p, fown, type); 965 rcu_read_unlock(); 966 } else { 967 read_lock(&tasklist_lock); 968 do_each_pid_task(pid, type, p) { 969 send_sigurg_to_task(p, fown, type); 970 } while_each_pid_task(pid, type, p); 971 read_unlock(&tasklist_lock); 972 } 973 out_unlock_fown: 974 read_unlock_irqrestore(&fown->lock, flags); 975 return ret; 976 } 977 978 static DEFINE_SPINLOCK(fasync_lock); 979 static struct kmem_cache *fasync_cache __ro_after_init; 980 981 /* 982 * Remove a fasync entry. If successfully removed, return 983 * positive and clear the FASYNC flag. If no entry exists, 984 * do nothing and return 0. 985 * 986 * NOTE! It is very important that the FASYNC flag always 987 * match the state "is the filp on a fasync list". 988 * 989 */ 990 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 991 { 992 struct fasync_struct *fa, **fp; 993 int result = 0; 994 995 spin_lock(&filp->f_lock); 996 spin_lock(&fasync_lock); 997 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 998 if (fa->fa_file != filp) 999 continue; 1000 1001 write_lock_irq(&fa->fa_lock); 1002 fa->fa_file = NULL; 1003 write_unlock_irq(&fa->fa_lock); 1004 1005 *fp = fa->fa_next; 1006 kfree_rcu(fa, fa_rcu); 1007 filp->f_flags &= ~FASYNC; 1008 result = 1; 1009 break; 1010 } 1011 spin_unlock(&fasync_lock); 1012 spin_unlock(&filp->f_lock); 1013 return result; 1014 } 1015 1016 struct fasync_struct *fasync_alloc(void) 1017 { 1018 return kmem_cache_alloc(fasync_cache, GFP_KERNEL); 1019 } 1020 1021 /* 1022 * NOTE! This can be used only for unused fasync entries: 1023 * entries that actually got inserted on the fasync list 1024 * need to be released by rcu - see fasync_remove_entry. 1025 */ 1026 void fasync_free(struct fasync_struct *new) 1027 { 1028 kmem_cache_free(fasync_cache, new); 1029 } 1030 1031 /* 1032 * Insert a new entry into the fasync list. Return the pointer to the 1033 * old one if we didn't use the new one. 1034 * 1035 * NOTE! It is very important that the FASYNC flag always 1036 * match the state "is the filp on a fasync list". 1037 */ 1038 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) 1039 { 1040 struct fasync_struct *fa, **fp; 1041 1042 spin_lock(&filp->f_lock); 1043 spin_lock(&fasync_lock); 1044 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 1045 if (fa->fa_file != filp) 1046 continue; 1047 1048 write_lock_irq(&fa->fa_lock); 1049 fa->fa_fd = fd; 1050 write_unlock_irq(&fa->fa_lock); 1051 goto out; 1052 } 1053 1054 rwlock_init(&new->fa_lock); 1055 new->magic = FASYNC_MAGIC; 1056 new->fa_file = filp; 1057 new->fa_fd = fd; 1058 new->fa_next = *fapp; 1059 rcu_assign_pointer(*fapp, new); 1060 filp->f_flags |= FASYNC; 1061 1062 out: 1063 spin_unlock(&fasync_lock); 1064 spin_unlock(&filp->f_lock); 1065 return fa; 1066 } 1067 1068 /* 1069 * Add a fasync entry. Return negative on error, positive if 1070 * added, and zero if did nothing but change an existing one. 1071 */ 1072 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 1073 { 1074 struct fasync_struct *new; 1075 1076 new = fasync_alloc(); 1077 if (!new) 1078 return -ENOMEM; 1079 1080 /* 1081 * fasync_insert_entry() returns the old (update) entry if 1082 * it existed. 1083 * 1084 * So free the (unused) new entry and return 0 to let the 1085 * caller know that we didn't add any new fasync entries. 1086 */ 1087 if (fasync_insert_entry(fd, filp, fapp, new)) { 1088 fasync_free(new); 1089 return 0; 1090 } 1091 1092 return 1; 1093 } 1094 1095 /* 1096 * fasync_helper() is used by almost all character device drivers 1097 * to set up the fasync queue, and for regular files by the file 1098 * lease code. It returns negative on error, 0 if it did no changes 1099 * and positive if it added/deleted the entry. 1100 */ 1101 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 1102 { 1103 if (!on) 1104 return fasync_remove_entry(filp, fapp); 1105 return fasync_add_entry(fd, filp, fapp); 1106 } 1107 1108 EXPORT_SYMBOL(fasync_helper); 1109 1110 /* 1111 * rcu_read_lock() is held 1112 */ 1113 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) 1114 { 1115 while (fa) { 1116 struct fown_struct *fown; 1117 unsigned long flags; 1118 1119 if (fa->magic != FASYNC_MAGIC) { 1120 printk(KERN_ERR "kill_fasync: bad magic number in " 1121 "fasync_struct!\n"); 1122 return; 1123 } 1124 read_lock_irqsave(&fa->fa_lock, flags); 1125 if (fa->fa_file) { 1126 fown = file_f_owner(fa->fa_file); 1127 if (!fown) 1128 goto next; 1129 /* Don't send SIGURG to processes which have not set a 1130 queued signum: SIGURG has its own default signalling 1131 mechanism. */ 1132 if (!(sig == SIGURG && fown->signum == 0)) 1133 send_sigio(fown, fa->fa_fd, band); 1134 } 1135 next: 1136 read_unlock_irqrestore(&fa->fa_lock, flags); 1137 fa = rcu_dereference(fa->fa_next); 1138 } 1139 } 1140 1141 void kill_fasync(struct fasync_struct **fp, int sig, int band) 1142 { 1143 /* First a quick test without locking: usually 1144 * the list is empty. 1145 */ 1146 if (*fp) { 1147 rcu_read_lock(); 1148 kill_fasync_rcu(rcu_dereference(*fp), sig, band); 1149 rcu_read_unlock(); 1150 } 1151 } 1152 EXPORT_SYMBOL(kill_fasync); 1153 1154 static int __init fcntl_init(void) 1155 { 1156 /* 1157 * Please add new bits here to ensure allocation uniqueness. 1158 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 1159 * is defined as O_NONBLOCK on some platforms and not on others. 1160 */ 1161 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != 1162 HWEIGHT32( 1163 (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | 1164 __FMODE_EXEC | __FMODE_NONOTIFY)); 1165 1166 fasync_cache = kmem_cache_create("fasync_cache", 1167 sizeof(struct fasync_struct), 0, 1168 SLAB_PANIC | SLAB_ACCOUNT, NULL); 1169 return 0; 1170 } 1171 1172 module_init(fcntl_init) 1173