1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/fcntl.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/syscalls.h> 9 #include <linux/init.h> 10 #include <linux/mm.h> 11 #include <linux/sched/task.h> 12 #include <linux/fs.h> 13 #include <linux/filelock.h> 14 #include <linux/file.h> 15 #include <linux/capability.h> 16 #include <linux/dnotify.h> 17 #include <linux/slab.h> 18 #include <linux/module.h> 19 #include <linux/pipe_fs_i.h> 20 #include <linux/security.h> 21 #include <linux/ptrace.h> 22 #include <linux/signal.h> 23 #include <linux/rcupdate.h> 24 #include <linux/pid_namespace.h> 25 #include <linux/user_namespace.h> 26 #include <linux/memfd.h> 27 #include <linux/compat.h> 28 #include <linux/mount.h> 29 #include <linux/rw_hint.h> 30 31 #include <linux/poll.h> 32 #include <asm/siginfo.h> 33 #include <linux/uaccess.h> 34 35 #include "internal.h" 36 37 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 38 39 static int setfl(int fd, struct file * filp, unsigned int arg) 40 { 41 struct inode * inode = file_inode(filp); 42 int error = 0; 43 44 /* 45 * O_APPEND cannot be cleared if the file is marked as append-only 46 * and the file is open for write. 47 */ 48 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 49 return -EPERM; 50 51 /* O_NOATIME can only be set by the owner or superuser */ 52 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 53 if (!inode_owner_or_capable(file_mnt_idmap(filp), inode)) 54 return -EPERM; 55 56 /* required for strict SunOS emulation */ 57 if (O_NONBLOCK != O_NDELAY) 58 if (arg & O_NDELAY) 59 arg |= O_NONBLOCK; 60 61 /* Pipe packetized mode is controlled by O_DIRECT flag */ 62 if (!S_ISFIFO(inode->i_mode) && 63 (arg & O_DIRECT) && 64 !(filp->f_mode & FMODE_CAN_ODIRECT)) 65 return -EINVAL; 66 67 if (filp->f_op->check_flags) 68 error = filp->f_op->check_flags(arg); 69 if (error) 70 return error; 71 72 /* 73 * ->fasync() is responsible for setting the FASYNC bit. 74 */ 75 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { 76 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 77 if (error < 0) 78 goto out; 79 if (error > 0) 80 error = 0; 81 } 82 spin_lock(&filp->f_lock); 83 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 84 filp->f_iocb_flags = iocb_flags(filp); 85 spin_unlock(&filp->f_lock); 86 87 out: 88 return error; 89 } 90 91 /* 92 * Allocate an file->f_owner struct if it doesn't exist, handling racing 93 * allocations correctly. 94 */ 95 int file_f_owner_allocate(struct file *file) 96 { 97 struct fown_struct *f_owner; 98 99 f_owner = file_f_owner(file); 100 if (f_owner) 101 return 0; 102 103 f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); 104 if (!f_owner) 105 return -ENOMEM; 106 107 rwlock_init(&f_owner->lock); 108 f_owner->file = file; 109 /* If someone else raced us, drop our allocation. */ 110 if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner))) 111 kfree(f_owner); 112 return 0; 113 } 114 EXPORT_SYMBOL(file_f_owner_allocate); 115 116 void file_f_owner_release(struct file *file) 117 { 118 struct fown_struct *f_owner; 119 120 f_owner = file_f_owner(file); 121 if (f_owner) { 122 put_pid(f_owner->pid); 123 kfree(f_owner); 124 } 125 } 126 127 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 128 int force) 129 { 130 struct fown_struct *f_owner; 131 132 f_owner = file_f_owner(filp); 133 if (WARN_ON_ONCE(!f_owner)) 134 return; 135 136 write_lock_irq(&f_owner->lock); 137 if (force || !f_owner->pid) { 138 put_pid(f_owner->pid); 139 f_owner->pid = get_pid(pid); 140 f_owner->pid_type = type; 141 142 if (pid) { 143 const struct cred *cred = current_cred(); 144 security_file_set_fowner(filp); 145 f_owner->uid = cred->uid; 146 f_owner->euid = cred->euid; 147 } 148 } 149 write_unlock_irq(&f_owner->lock); 150 } 151 EXPORT_SYMBOL(__f_setown); 152 153 int f_setown(struct file *filp, int who, int force) 154 { 155 enum pid_type type; 156 struct pid *pid = NULL; 157 int ret = 0; 158 159 might_sleep(); 160 161 type = PIDTYPE_TGID; 162 if (who < 0) { 163 /* avoid overflow below */ 164 if (who == INT_MIN) 165 return -EINVAL; 166 167 type = PIDTYPE_PGID; 168 who = -who; 169 } 170 171 ret = file_f_owner_allocate(filp); 172 if (ret) 173 return ret; 174 175 rcu_read_lock(); 176 if (who) { 177 pid = find_vpid(who); 178 if (!pid) 179 ret = -ESRCH; 180 } 181 182 if (!ret) 183 __f_setown(filp, pid, type, force); 184 rcu_read_unlock(); 185 186 return ret; 187 } 188 EXPORT_SYMBOL(f_setown); 189 190 void f_delown(struct file *filp) 191 { 192 __f_setown(filp, NULL, PIDTYPE_TGID, 1); 193 } 194 195 pid_t f_getown(struct file *filp) 196 { 197 pid_t pid = 0; 198 struct fown_struct *f_owner; 199 200 f_owner = file_f_owner(filp); 201 if (!f_owner) 202 return pid; 203 204 read_lock_irq(&f_owner->lock); 205 rcu_read_lock(); 206 if (pid_task(f_owner->pid, f_owner->pid_type)) { 207 pid = pid_vnr(f_owner->pid); 208 if (f_owner->pid_type == PIDTYPE_PGID) 209 pid = -pid; 210 } 211 rcu_read_unlock(); 212 read_unlock_irq(&f_owner->lock); 213 return pid; 214 } 215 216 static int f_setown_ex(struct file *filp, unsigned long arg) 217 { 218 struct f_owner_ex __user *owner_p = (void __user *)arg; 219 struct f_owner_ex owner; 220 struct pid *pid; 221 int type; 222 int ret; 223 224 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 225 if (ret) 226 return -EFAULT; 227 228 switch (owner.type) { 229 case F_OWNER_TID: 230 type = PIDTYPE_PID; 231 break; 232 233 case F_OWNER_PID: 234 type = PIDTYPE_TGID; 235 break; 236 237 case F_OWNER_PGRP: 238 type = PIDTYPE_PGID; 239 break; 240 241 default: 242 return -EINVAL; 243 } 244 245 ret = file_f_owner_allocate(filp); 246 if (ret) 247 return ret; 248 249 rcu_read_lock(); 250 pid = find_vpid(owner.pid); 251 if (owner.pid && !pid) 252 ret = -ESRCH; 253 else 254 __f_setown(filp, pid, type, 1); 255 rcu_read_unlock(); 256 257 return ret; 258 } 259 260 static int f_getown_ex(struct file *filp, unsigned long arg) 261 { 262 struct f_owner_ex __user *owner_p = (void __user *)arg; 263 struct f_owner_ex owner = {}; 264 int ret = 0; 265 struct fown_struct *f_owner; 266 enum pid_type pid_type = PIDTYPE_PID; 267 268 f_owner = file_f_owner(filp); 269 if (f_owner) { 270 read_lock_irq(&f_owner->lock); 271 rcu_read_lock(); 272 if (pid_task(f_owner->pid, f_owner->pid_type)) 273 owner.pid = pid_vnr(f_owner->pid); 274 rcu_read_unlock(); 275 pid_type = f_owner->pid_type; 276 } 277 278 switch (pid_type) { 279 case PIDTYPE_PID: 280 owner.type = F_OWNER_TID; 281 break; 282 283 case PIDTYPE_TGID: 284 owner.type = F_OWNER_PID; 285 break; 286 287 case PIDTYPE_PGID: 288 owner.type = F_OWNER_PGRP; 289 break; 290 291 default: 292 WARN_ON(1); 293 ret = -EINVAL; 294 break; 295 } 296 if (f_owner) 297 read_unlock_irq(&f_owner->lock); 298 299 if (!ret) { 300 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 301 if (ret) 302 ret = -EFAULT; 303 } 304 return ret; 305 } 306 307 #ifdef CONFIG_CHECKPOINT_RESTORE 308 static int f_getowner_uids(struct file *filp, unsigned long arg) 309 { 310 struct user_namespace *user_ns = current_user_ns(); 311 struct fown_struct *f_owner; 312 uid_t __user *dst = (void __user *)arg; 313 uid_t src[2] = {0, 0}; 314 int err; 315 316 f_owner = file_f_owner(filp); 317 if (f_owner) { 318 read_lock_irq(&f_owner->lock); 319 src[0] = from_kuid(user_ns, f_owner->uid); 320 src[1] = from_kuid(user_ns, f_owner->euid); 321 read_unlock_irq(&f_owner->lock); 322 } 323 324 err = put_user(src[0], &dst[0]); 325 err |= put_user(src[1], &dst[1]); 326 327 return err; 328 } 329 #else 330 static int f_getowner_uids(struct file *filp, unsigned long arg) 331 { 332 return -EINVAL; 333 } 334 #endif 335 336 static bool rw_hint_valid(u64 hint) 337 { 338 BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET); 339 BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE); 340 BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT); 341 BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM); 342 BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG); 343 BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME); 344 345 switch (hint) { 346 case RWH_WRITE_LIFE_NOT_SET: 347 case RWH_WRITE_LIFE_NONE: 348 case RWH_WRITE_LIFE_SHORT: 349 case RWH_WRITE_LIFE_MEDIUM: 350 case RWH_WRITE_LIFE_LONG: 351 case RWH_WRITE_LIFE_EXTREME: 352 return true; 353 default: 354 return false; 355 } 356 } 357 358 static long fcntl_get_rw_hint(struct file *file, unsigned int cmd, 359 unsigned long arg) 360 { 361 struct inode *inode = file_inode(file); 362 u64 __user *argp = (u64 __user *)arg; 363 u64 hint = READ_ONCE(inode->i_write_hint); 364 365 if (copy_to_user(argp, &hint, sizeof(*argp))) 366 return -EFAULT; 367 return 0; 368 } 369 370 static long fcntl_set_rw_hint(struct file *file, unsigned int cmd, 371 unsigned long arg) 372 { 373 struct inode *inode = file_inode(file); 374 u64 __user *argp = (u64 __user *)arg; 375 u64 hint; 376 377 if (copy_from_user(&hint, argp, sizeof(hint))) 378 return -EFAULT; 379 if (!rw_hint_valid(hint)) 380 return -EINVAL; 381 382 WRITE_ONCE(inode->i_write_hint, hint); 383 384 /* 385 * file->f_mapping->host may differ from inode. As an example, 386 * blkdev_open() modifies file->f_mapping. 387 */ 388 if (file->f_mapping->host != inode) 389 WRITE_ONCE(file->f_mapping->host->i_write_hint, hint); 390 391 return 0; 392 } 393 394 /* Is the file descriptor a dup of the file? */ 395 static long f_dupfd_query(int fd, struct file *filp) 396 { 397 CLASS(fd_raw, f)(fd); 398 399 if (fd_empty(f)) 400 return -EBADF; 401 402 /* 403 * We can do the 'fdput()' immediately, as the only thing that 404 * matters is the pointer value which isn't changed by the fdput. 405 * 406 * Technically we didn't need a ref at all, and 'fdget()' was 407 * overkill, but given our lockless file pointer lookup, the 408 * alternatives are complicated. 409 */ 410 return fd_file(f) == filp; 411 } 412 413 /* Let the caller figure out whether a given file was just created. */ 414 static long f_created_query(const struct file *filp) 415 { 416 return !!(filp->f_mode & FMODE_CREATED); 417 } 418 419 static int f_owner_sig(struct file *filp, int signum, bool setsig) 420 { 421 int ret = 0; 422 struct fown_struct *f_owner; 423 424 might_sleep(); 425 426 if (setsig) { 427 if (!valid_signal(signum)) 428 return -EINVAL; 429 430 ret = file_f_owner_allocate(filp); 431 if (ret) 432 return ret; 433 } 434 435 f_owner = file_f_owner(filp); 436 if (setsig) 437 f_owner->signum = signum; 438 else if (f_owner) 439 ret = f_owner->signum; 440 return ret; 441 } 442 443 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 444 struct file *filp) 445 { 446 void __user *argp = (void __user *)arg; 447 int argi = (int)arg; 448 struct flock flock; 449 long err = -EINVAL; 450 451 switch (cmd) { 452 case F_CREATED_QUERY: 453 err = f_created_query(filp); 454 break; 455 case F_DUPFD: 456 err = f_dupfd(argi, filp, 0); 457 break; 458 case F_DUPFD_CLOEXEC: 459 err = f_dupfd(argi, filp, O_CLOEXEC); 460 break; 461 case F_DUPFD_QUERY: 462 err = f_dupfd_query(argi, filp); 463 break; 464 case F_GETFD: 465 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 466 break; 467 case F_SETFD: 468 err = 0; 469 set_close_on_exec(fd, argi & FD_CLOEXEC); 470 break; 471 case F_GETFL: 472 err = filp->f_flags; 473 break; 474 case F_SETFL: 475 err = setfl(fd, filp, argi); 476 break; 477 #if BITS_PER_LONG != 32 478 /* 32-bit arches must use fcntl64() */ 479 case F_OFD_GETLK: 480 #endif 481 case F_GETLK: 482 if (copy_from_user(&flock, argp, sizeof(flock))) 483 return -EFAULT; 484 err = fcntl_getlk(filp, cmd, &flock); 485 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 486 return -EFAULT; 487 break; 488 #if BITS_PER_LONG != 32 489 /* 32-bit arches must use fcntl64() */ 490 case F_OFD_SETLK: 491 case F_OFD_SETLKW: 492 fallthrough; 493 #endif 494 case F_SETLK: 495 case F_SETLKW: 496 if (copy_from_user(&flock, argp, sizeof(flock))) 497 return -EFAULT; 498 err = fcntl_setlk(fd, filp, cmd, &flock); 499 break; 500 case F_GETOWN: 501 /* 502 * XXX If f_owner is a process group, the 503 * negative return value will get converted 504 * into an error. Oops. If we keep the 505 * current syscall conventions, the only way 506 * to fix this will be in libc. 507 */ 508 err = f_getown(filp); 509 force_successful_syscall_return(); 510 break; 511 case F_SETOWN: 512 err = f_setown(filp, argi, 1); 513 break; 514 case F_GETOWN_EX: 515 err = f_getown_ex(filp, arg); 516 break; 517 case F_SETOWN_EX: 518 err = f_setown_ex(filp, arg); 519 break; 520 case F_GETOWNER_UIDS: 521 err = f_getowner_uids(filp, arg); 522 break; 523 case F_GETSIG: 524 err = f_owner_sig(filp, 0, false); 525 break; 526 case F_SETSIG: 527 err = f_owner_sig(filp, argi, true); 528 break; 529 case F_GETLEASE: 530 err = fcntl_getlease(filp); 531 break; 532 case F_SETLEASE: 533 err = fcntl_setlease(fd, filp, argi); 534 break; 535 case F_NOTIFY: 536 err = fcntl_dirnotify(fd, filp, argi); 537 break; 538 case F_SETPIPE_SZ: 539 case F_GETPIPE_SZ: 540 err = pipe_fcntl(filp, cmd, argi); 541 break; 542 case F_ADD_SEALS: 543 case F_GET_SEALS: 544 err = memfd_fcntl(filp, cmd, argi); 545 break; 546 case F_GET_RW_HINT: 547 err = fcntl_get_rw_hint(filp, cmd, arg); 548 break; 549 case F_SET_RW_HINT: 550 err = fcntl_set_rw_hint(filp, cmd, arg); 551 break; 552 default: 553 break; 554 } 555 return err; 556 } 557 558 static int check_fcntl_cmd(unsigned cmd) 559 { 560 switch (cmd) { 561 case F_CREATED_QUERY: 562 case F_DUPFD: 563 case F_DUPFD_CLOEXEC: 564 case F_DUPFD_QUERY: 565 case F_GETFD: 566 case F_SETFD: 567 case F_GETFL: 568 return 1; 569 } 570 return 0; 571 } 572 573 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 574 { 575 CLASS(fd_raw, f)(fd); 576 long err; 577 578 if (fd_empty(f)) 579 return -EBADF; 580 581 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 582 if (!check_fcntl_cmd(cmd)) 583 return -EBADF; 584 } 585 586 err = security_file_fcntl(fd_file(f), cmd, arg); 587 if (!err) 588 err = do_fcntl(fd, cmd, arg, fd_file(f)); 589 590 return err; 591 } 592 593 #if BITS_PER_LONG == 32 594 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 595 unsigned long, arg) 596 { 597 void __user *argp = (void __user *)arg; 598 CLASS(fd_raw, f)(fd); 599 struct flock64 flock; 600 long err; 601 602 if (fd_empty(f)) 603 return -EBADF; 604 605 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 606 if (!check_fcntl_cmd(cmd)) 607 return -EBADF; 608 } 609 610 err = security_file_fcntl(fd_file(f), cmd, arg); 611 if (err) 612 return err; 613 614 switch (cmd) { 615 case F_GETLK64: 616 case F_OFD_GETLK: 617 err = -EFAULT; 618 if (copy_from_user(&flock, argp, sizeof(flock))) 619 break; 620 err = fcntl_getlk64(fd_file(f), cmd, &flock); 621 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 622 err = -EFAULT; 623 break; 624 case F_SETLK64: 625 case F_SETLKW64: 626 case F_OFD_SETLK: 627 case F_OFD_SETLKW: 628 err = -EFAULT; 629 if (copy_from_user(&flock, argp, sizeof(flock))) 630 break; 631 err = fcntl_setlk64(fd, fd_file(f), cmd, &flock); 632 break; 633 default: 634 err = do_fcntl(fd, cmd, arg, fd_file(f)); 635 break; 636 } 637 return err; 638 } 639 #endif 640 641 #ifdef CONFIG_COMPAT 642 /* careful - don't use anywhere else */ 643 #define copy_flock_fields(dst, src) \ 644 (dst)->l_type = (src)->l_type; \ 645 (dst)->l_whence = (src)->l_whence; \ 646 (dst)->l_start = (src)->l_start; \ 647 (dst)->l_len = (src)->l_len; \ 648 (dst)->l_pid = (src)->l_pid; 649 650 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl) 651 { 652 struct compat_flock fl; 653 654 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock))) 655 return -EFAULT; 656 copy_flock_fields(kfl, &fl); 657 return 0; 658 } 659 660 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl) 661 { 662 struct compat_flock64 fl; 663 664 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64))) 665 return -EFAULT; 666 copy_flock_fields(kfl, &fl); 667 return 0; 668 } 669 670 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl) 671 { 672 struct compat_flock fl; 673 674 memset(&fl, 0, sizeof(struct compat_flock)); 675 copy_flock_fields(&fl, kfl); 676 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock))) 677 return -EFAULT; 678 return 0; 679 } 680 681 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl) 682 { 683 struct compat_flock64 fl; 684 685 BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start)); 686 BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len)); 687 688 memset(&fl, 0, sizeof(struct compat_flock64)); 689 copy_flock_fields(&fl, kfl); 690 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) 691 return -EFAULT; 692 return 0; 693 } 694 #undef copy_flock_fields 695 696 static unsigned int 697 convert_fcntl_cmd(unsigned int cmd) 698 { 699 switch (cmd) { 700 case F_GETLK64: 701 return F_GETLK; 702 case F_SETLK64: 703 return F_SETLK; 704 case F_SETLKW64: 705 return F_SETLKW; 706 } 707 708 return cmd; 709 } 710 711 /* 712 * GETLK was successful and we need to return the data, but it needs to fit in 713 * the compat structure. 714 * l_start shouldn't be too big, unless the original start + end is greater than 715 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return 716 * -EOVERFLOW in that case. l_len could be too big, in which case we just 717 * truncate it, and only allow the app to see that part of the conflicting lock 718 * that might make sense to it anyway 719 */ 720 static int fixup_compat_flock(struct flock *flock) 721 { 722 if (flock->l_start > COMPAT_OFF_T_MAX) 723 return -EOVERFLOW; 724 if (flock->l_len > COMPAT_OFF_T_MAX) 725 flock->l_len = COMPAT_OFF_T_MAX; 726 return 0; 727 } 728 729 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd, 730 compat_ulong_t arg) 731 { 732 CLASS(fd_raw, f)(fd); 733 struct flock flock; 734 long err; 735 736 if (fd_empty(f)) 737 return -EBADF; 738 739 if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { 740 if (!check_fcntl_cmd(cmd)) 741 return -EBADF; 742 } 743 744 err = security_file_fcntl(fd_file(f), cmd, arg); 745 if (err) 746 return err; 747 748 switch (cmd) { 749 case F_GETLK: 750 err = get_compat_flock(&flock, compat_ptr(arg)); 751 if (err) 752 break; 753 err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); 754 if (err) 755 break; 756 err = fixup_compat_flock(&flock); 757 if (!err) 758 err = put_compat_flock(&flock, compat_ptr(arg)); 759 break; 760 case F_GETLK64: 761 case F_OFD_GETLK: 762 err = get_compat_flock64(&flock, compat_ptr(arg)); 763 if (err) 764 break; 765 err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); 766 if (!err) 767 err = put_compat_flock64(&flock, compat_ptr(arg)); 768 break; 769 case F_SETLK: 770 case F_SETLKW: 771 err = get_compat_flock(&flock, compat_ptr(arg)); 772 if (err) 773 break; 774 err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); 775 break; 776 case F_SETLK64: 777 case F_SETLKW64: 778 case F_OFD_SETLK: 779 case F_OFD_SETLKW: 780 err = get_compat_flock64(&flock, compat_ptr(arg)); 781 if (err) 782 break; 783 err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); 784 break; 785 default: 786 err = do_fcntl(fd, cmd, arg, fd_file(f)); 787 break; 788 } 789 return err; 790 } 791 792 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 793 compat_ulong_t, arg) 794 { 795 return do_compat_fcntl64(fd, cmd, arg); 796 } 797 798 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, 799 compat_ulong_t, arg) 800 { 801 switch (cmd) { 802 case F_GETLK64: 803 case F_SETLK64: 804 case F_SETLKW64: 805 case F_OFD_GETLK: 806 case F_OFD_SETLK: 807 case F_OFD_SETLKW: 808 return -EINVAL; 809 } 810 return do_compat_fcntl64(fd, cmd, arg); 811 } 812 #endif 813 814 /* Table to convert sigio signal codes into poll band bitmaps */ 815 816 static const __poll_t band_table[NSIGPOLL] = { 817 EPOLLIN | EPOLLRDNORM, /* POLL_IN */ 818 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */ 819 EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */ 820 EPOLLERR, /* POLL_ERR */ 821 EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */ 822 EPOLLHUP | EPOLLERR /* POLL_HUP */ 823 }; 824 825 static inline int sigio_perm(struct task_struct *p, 826 struct fown_struct *fown, int sig) 827 { 828 const struct cred *cred; 829 int ret; 830 831 rcu_read_lock(); 832 cred = __task_cred(p); 833 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || 834 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || 835 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && 836 !security_file_send_sigiotask(p, fown, sig)); 837 rcu_read_unlock(); 838 return ret; 839 } 840 841 static void send_sigio_to_task(struct task_struct *p, 842 struct fown_struct *fown, 843 int fd, int reason, enum pid_type type) 844 { 845 /* 846 * F_SETSIG can change ->signum lockless in parallel, make 847 * sure we read it once and use the same value throughout. 848 */ 849 int signum = READ_ONCE(fown->signum); 850 851 if (!sigio_perm(p, fown, signum)) 852 return; 853 854 switch (signum) { 855 default: { 856 kernel_siginfo_t si; 857 858 /* Queue a rt signal with the appropriate fd as its 859 value. We use SI_SIGIO as the source, not 860 SI_KERNEL, since kernel signals always get 861 delivered even if we can't queue. Failure to 862 queue in this case _should_ be reported; we fall 863 back to SIGIO in that case. --sct */ 864 clear_siginfo(&si); 865 si.si_signo = signum; 866 si.si_errno = 0; 867 si.si_code = reason; 868 /* 869 * Posix definies POLL_IN and friends to be signal 870 * specific si_codes for SIG_POLL. Linux extended 871 * these si_codes to other signals in a way that is 872 * ambiguous if other signals also have signal 873 * specific si_codes. In that case use SI_SIGIO instead 874 * to remove the ambiguity. 875 */ 876 if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) 877 si.si_code = SI_SIGIO; 878 879 /* Make sure we are called with one of the POLL_* 880 reasons, otherwise we could leak kernel stack into 881 userspace. */ 882 BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); 883 if (reason - POLL_IN >= NSIGPOLL) 884 si.si_band = ~0L; 885 else 886 si.si_band = mangle_poll(band_table[reason - POLL_IN]); 887 si.si_fd = fd; 888 if (!do_send_sig_info(signum, &si, p, type)) 889 break; 890 } 891 fallthrough; /* fall back on the old plain SIGIO signal */ 892 case 0: 893 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); 894 } 895 } 896 897 void send_sigio(struct fown_struct *fown, int fd, int band) 898 { 899 struct task_struct *p; 900 enum pid_type type; 901 unsigned long flags; 902 struct pid *pid; 903 904 read_lock_irqsave(&fown->lock, flags); 905 906 type = fown->pid_type; 907 pid = fown->pid; 908 if (!pid) 909 goto out_unlock_fown; 910 911 if (type <= PIDTYPE_TGID) { 912 rcu_read_lock(); 913 p = pid_task(pid, PIDTYPE_PID); 914 if (p) 915 send_sigio_to_task(p, fown, fd, band, type); 916 rcu_read_unlock(); 917 } else { 918 read_lock(&tasklist_lock); 919 do_each_pid_task(pid, type, p) { 920 send_sigio_to_task(p, fown, fd, band, type); 921 } while_each_pid_task(pid, type, p); 922 read_unlock(&tasklist_lock); 923 } 924 out_unlock_fown: 925 read_unlock_irqrestore(&fown->lock, flags); 926 } 927 928 static void send_sigurg_to_task(struct task_struct *p, 929 struct fown_struct *fown, enum pid_type type) 930 { 931 if (sigio_perm(p, fown, SIGURG)) 932 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); 933 } 934 935 int send_sigurg(struct file *file) 936 { 937 struct fown_struct *fown; 938 struct task_struct *p; 939 enum pid_type type; 940 struct pid *pid; 941 unsigned long flags; 942 int ret = 0; 943 944 fown = file_f_owner(file); 945 if (!fown) 946 return 0; 947 948 read_lock_irqsave(&fown->lock, flags); 949 950 type = fown->pid_type; 951 pid = fown->pid; 952 if (!pid) 953 goto out_unlock_fown; 954 955 ret = 1; 956 957 if (type <= PIDTYPE_TGID) { 958 rcu_read_lock(); 959 p = pid_task(pid, PIDTYPE_PID); 960 if (p) 961 send_sigurg_to_task(p, fown, type); 962 rcu_read_unlock(); 963 } else { 964 read_lock(&tasklist_lock); 965 do_each_pid_task(pid, type, p) { 966 send_sigurg_to_task(p, fown, type); 967 } while_each_pid_task(pid, type, p); 968 read_unlock(&tasklist_lock); 969 } 970 out_unlock_fown: 971 read_unlock_irqrestore(&fown->lock, flags); 972 return ret; 973 } 974 975 static DEFINE_SPINLOCK(fasync_lock); 976 static struct kmem_cache *fasync_cache __ro_after_init; 977 978 /* 979 * Remove a fasync entry. If successfully removed, return 980 * positive and clear the FASYNC flag. If no entry exists, 981 * do nothing and return 0. 982 * 983 * NOTE! It is very important that the FASYNC flag always 984 * match the state "is the filp on a fasync list". 985 * 986 */ 987 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 988 { 989 struct fasync_struct *fa, **fp; 990 int result = 0; 991 992 spin_lock(&filp->f_lock); 993 spin_lock(&fasync_lock); 994 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 995 if (fa->fa_file != filp) 996 continue; 997 998 write_lock_irq(&fa->fa_lock); 999 fa->fa_file = NULL; 1000 write_unlock_irq(&fa->fa_lock); 1001 1002 *fp = fa->fa_next; 1003 kfree_rcu(fa, fa_rcu); 1004 filp->f_flags &= ~FASYNC; 1005 result = 1; 1006 break; 1007 } 1008 spin_unlock(&fasync_lock); 1009 spin_unlock(&filp->f_lock); 1010 return result; 1011 } 1012 1013 struct fasync_struct *fasync_alloc(void) 1014 { 1015 return kmem_cache_alloc(fasync_cache, GFP_KERNEL); 1016 } 1017 1018 /* 1019 * NOTE! This can be used only for unused fasync entries: 1020 * entries that actually got inserted on the fasync list 1021 * need to be released by rcu - see fasync_remove_entry. 1022 */ 1023 void fasync_free(struct fasync_struct *new) 1024 { 1025 kmem_cache_free(fasync_cache, new); 1026 } 1027 1028 /* 1029 * Insert a new entry into the fasync list. Return the pointer to the 1030 * old one if we didn't use the new one. 1031 * 1032 * NOTE! It is very important that the FASYNC flag always 1033 * match the state "is the filp on a fasync list". 1034 */ 1035 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) 1036 { 1037 struct fasync_struct *fa, **fp; 1038 1039 spin_lock(&filp->f_lock); 1040 spin_lock(&fasync_lock); 1041 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 1042 if (fa->fa_file != filp) 1043 continue; 1044 1045 write_lock_irq(&fa->fa_lock); 1046 fa->fa_fd = fd; 1047 write_unlock_irq(&fa->fa_lock); 1048 goto out; 1049 } 1050 1051 rwlock_init(&new->fa_lock); 1052 new->magic = FASYNC_MAGIC; 1053 new->fa_file = filp; 1054 new->fa_fd = fd; 1055 new->fa_next = *fapp; 1056 rcu_assign_pointer(*fapp, new); 1057 filp->f_flags |= FASYNC; 1058 1059 out: 1060 spin_unlock(&fasync_lock); 1061 spin_unlock(&filp->f_lock); 1062 return fa; 1063 } 1064 1065 /* 1066 * Add a fasync entry. Return negative on error, positive if 1067 * added, and zero if did nothing but change an existing one. 1068 */ 1069 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 1070 { 1071 struct fasync_struct *new; 1072 1073 new = fasync_alloc(); 1074 if (!new) 1075 return -ENOMEM; 1076 1077 /* 1078 * fasync_insert_entry() returns the old (update) entry if 1079 * it existed. 1080 * 1081 * So free the (unused) new entry and return 0 to let the 1082 * caller know that we didn't add any new fasync entries. 1083 */ 1084 if (fasync_insert_entry(fd, filp, fapp, new)) { 1085 fasync_free(new); 1086 return 0; 1087 } 1088 1089 return 1; 1090 } 1091 1092 /* 1093 * fasync_helper() is used by almost all character device drivers 1094 * to set up the fasync queue, and for regular files by the file 1095 * lease code. It returns negative on error, 0 if it did no changes 1096 * and positive if it added/deleted the entry. 1097 */ 1098 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 1099 { 1100 if (!on) 1101 return fasync_remove_entry(filp, fapp); 1102 return fasync_add_entry(fd, filp, fapp); 1103 } 1104 1105 EXPORT_SYMBOL(fasync_helper); 1106 1107 /* 1108 * rcu_read_lock() is held 1109 */ 1110 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) 1111 { 1112 while (fa) { 1113 struct fown_struct *fown; 1114 unsigned long flags; 1115 1116 if (fa->magic != FASYNC_MAGIC) { 1117 printk(KERN_ERR "kill_fasync: bad magic number in " 1118 "fasync_struct!\n"); 1119 return; 1120 } 1121 read_lock_irqsave(&fa->fa_lock, flags); 1122 if (fa->fa_file) { 1123 fown = file_f_owner(fa->fa_file); 1124 if (!fown) 1125 goto next; 1126 /* Don't send SIGURG to processes which have not set a 1127 queued signum: SIGURG has its own default signalling 1128 mechanism. */ 1129 if (!(sig == SIGURG && fown->signum == 0)) 1130 send_sigio(fown, fa->fa_fd, band); 1131 } 1132 next: 1133 read_unlock_irqrestore(&fa->fa_lock, flags); 1134 fa = rcu_dereference(fa->fa_next); 1135 } 1136 } 1137 1138 void kill_fasync(struct fasync_struct **fp, int sig, int band) 1139 { 1140 /* First a quick test without locking: usually 1141 * the list is empty. 1142 */ 1143 if (*fp) { 1144 rcu_read_lock(); 1145 kill_fasync_rcu(rcu_dereference(*fp), sig, band); 1146 rcu_read_unlock(); 1147 } 1148 } 1149 EXPORT_SYMBOL(kill_fasync); 1150 1151 static int __init fcntl_init(void) 1152 { 1153 /* 1154 * Please add new bits here to ensure allocation uniqueness. 1155 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 1156 * is defined as O_NONBLOCK on some platforms and not on others. 1157 */ 1158 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != 1159 HWEIGHT32( 1160 (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | 1161 __FMODE_EXEC | __FMODE_NONOTIFY)); 1162 1163 fasync_cache = kmem_cache_create("fasync_cache", 1164 sizeof(struct fasync_struct), 0, 1165 SLAB_PANIC | SLAB_ACCOUNT, NULL); 1166 return 0; 1167 } 1168 1169 module_init(fcntl_init) 1170