1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/fanotify.h> 3 #include <linux/fcntl.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/anon_inodes.h> 7 #include <linux/fsnotify_backend.h> 8 #include <linux/init.h> 9 #include <linux/mount.h> 10 #include <linux/namei.h> 11 #include <linux/poll.h> 12 #include <linux/security.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/uaccess.h> 17 #include <linux/compat.h> 18 #include <linux/sched/signal.h> 19 #include <linux/memcontrol.h> 20 #include <linux/statfs.h> 21 #include <linux/exportfs.h> 22 23 #include <asm/ioctls.h> 24 25 #include "../../mount.h" 26 #include "../fdinfo.h" 27 #include "fanotify.h" 28 29 #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 30 #define FANOTIFY_DEFAULT_MAX_MARKS 8192 31 #define FANOTIFY_DEFAULT_MAX_LISTENERS 128 32 33 /* 34 * All flags that may be specified in parameter event_f_flags of fanotify_init. 35 * 36 * Internal and external open flags are stored together in field f_flags of 37 * struct file. Only external open flags shall be allowed in event_f_flags. 38 * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be 39 * excluded. 40 */ 41 #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ 42 O_ACCMODE | O_APPEND | O_NONBLOCK | \ 43 __O_SYNC | O_DSYNC | O_CLOEXEC | \ 44 O_LARGEFILE | O_NOATIME ) 45 46 extern const struct fsnotify_ops fanotify_fsnotify_ops; 47 48 struct kmem_cache *fanotify_mark_cache __read_mostly; 49 struct kmem_cache *fanotify_event_cachep __read_mostly; 50 struct kmem_cache *fanotify_perm_event_cachep __read_mostly; 51 52 #define FANOTIFY_EVENT_ALIGN 4 53 54 static int fanotify_event_info_len(struct fanotify_event *event) 55 { 56 if (!fanotify_event_has_fid(event)) 57 return 0; 58 59 return roundup(sizeof(struct fanotify_event_info_fid) + 60 sizeof(struct file_handle) + event->fh_len, 61 FANOTIFY_EVENT_ALIGN); 62 } 63 64 /* 65 * Get an fsnotify notification event if one exists and is small 66 * enough to fit in "count". Return an error pointer if the count 67 * is not large enough. When permission event is dequeued, its state is 68 * updated accordingly. 69 */ 70 static struct fsnotify_event *get_one_event(struct fsnotify_group *group, 71 size_t count) 72 { 73 size_t event_size = FAN_EVENT_METADATA_LEN; 74 struct fsnotify_event *fsn_event = NULL; 75 76 pr_debug("%s: group=%p count=%zd\n", __func__, group, count); 77 78 spin_lock(&group->notification_lock); 79 if (fsnotify_notify_queue_is_empty(group)) 80 goto out; 81 82 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { 83 event_size += fanotify_event_info_len( 84 FANOTIFY_E(fsnotify_peek_first_event(group))); 85 } 86 87 if (event_size > count) { 88 fsn_event = ERR_PTR(-EINVAL); 89 goto out; 90 } 91 fsn_event = fsnotify_remove_first_event(group); 92 if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask)) 93 FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED; 94 out: 95 spin_unlock(&group->notification_lock); 96 return fsn_event; 97 } 98 99 static int create_fd(struct fsnotify_group *group, 100 struct fanotify_event *event, 101 struct file **file) 102 { 103 int client_fd; 104 struct file *new_file; 105 106 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 107 108 client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); 109 if (client_fd < 0) 110 return client_fd; 111 112 /* 113 * we need a new file handle for the userspace program so it can read even if it was 114 * originally opened O_WRONLY. 115 */ 116 /* it's possible this event was an overflow event. in that case dentry and mnt 117 * are NULL; That's fine, just don't call dentry open */ 118 if (event->path.dentry && event->path.mnt) 119 new_file = dentry_open(&event->path, 120 group->fanotify_data.f_flags | FMODE_NONOTIFY, 121 current_cred()); 122 else 123 new_file = ERR_PTR(-EOVERFLOW); 124 if (IS_ERR(new_file)) { 125 /* 126 * we still send an event even if we can't open the file. this 127 * can happen when say tasks are gone and we try to open their 128 * /proc files or we try to open a WRONLY file like in sysfs 129 * we just send the errno to userspace since there isn't much 130 * else we can do. 131 */ 132 put_unused_fd(client_fd); 133 client_fd = PTR_ERR(new_file); 134 } else { 135 *file = new_file; 136 } 137 138 return client_fd; 139 } 140 141 /* 142 * Finish processing of permission event by setting it to ANSWERED state and 143 * drop group->notification_lock. 144 */ 145 static void finish_permission_event(struct fsnotify_group *group, 146 struct fanotify_perm_event *event, 147 unsigned int response) 148 __releases(&group->notification_lock) 149 { 150 bool destroy = false; 151 152 assert_spin_locked(&group->notification_lock); 153 event->response = response; 154 if (event->state == FAN_EVENT_CANCELED) 155 destroy = true; 156 else 157 event->state = FAN_EVENT_ANSWERED; 158 spin_unlock(&group->notification_lock); 159 if (destroy) 160 fsnotify_destroy_event(group, &event->fae.fse); 161 } 162 163 static int process_access_response(struct fsnotify_group *group, 164 struct fanotify_response *response_struct) 165 { 166 struct fanotify_perm_event *event; 167 int fd = response_struct->fd; 168 int response = response_struct->response; 169 170 pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, 171 fd, response); 172 /* 173 * make sure the response is valid, if invalid we do nothing and either 174 * userspace can send a valid response or we will clean it up after the 175 * timeout 176 */ 177 switch (response & ~FAN_AUDIT) { 178 case FAN_ALLOW: 179 case FAN_DENY: 180 break; 181 default: 182 return -EINVAL; 183 } 184 185 if (fd < 0) 186 return -EINVAL; 187 188 if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) 189 return -EINVAL; 190 191 spin_lock(&group->notification_lock); 192 list_for_each_entry(event, &group->fanotify_data.access_list, 193 fae.fse.list) { 194 if (event->fd != fd) 195 continue; 196 197 list_del_init(&event->fae.fse.list); 198 finish_permission_event(group, event, response); 199 wake_up(&group->fanotify_data.access_waitq); 200 return 0; 201 } 202 spin_unlock(&group->notification_lock); 203 204 return -ENOENT; 205 } 206 207 static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) 208 { 209 struct fanotify_event_info_fid info = { }; 210 struct file_handle handle = { }; 211 unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh; 212 size_t fh_len = event->fh_len; 213 size_t len = fanotify_event_info_len(event); 214 215 if (!len) 216 return 0; 217 218 if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) 219 return -EFAULT; 220 221 /* Copy event info fid header followed by vaiable sized file handle */ 222 info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; 223 info.hdr.len = len; 224 info.fsid = event->fid.fsid; 225 if (copy_to_user(buf, &info, sizeof(info))) 226 return -EFAULT; 227 228 buf += sizeof(info); 229 len -= sizeof(info); 230 handle.handle_type = event->fh_type; 231 handle.handle_bytes = fh_len; 232 if (copy_to_user(buf, &handle, sizeof(handle))) 233 return -EFAULT; 234 235 buf += sizeof(handle); 236 len -= sizeof(handle); 237 /* 238 * For an inline fh, copy through stack to exclude the copy from 239 * usercopy hardening protections. 240 */ 241 fh = fanotify_event_fh(event); 242 if (fh_len <= FANOTIFY_INLINE_FH_LEN) { 243 memcpy(bounce, fh, fh_len); 244 fh = bounce; 245 } 246 if (copy_to_user(buf, fh, fh_len)) 247 return -EFAULT; 248 249 /* Pad with 0's */ 250 buf += fh_len; 251 len -= fh_len; 252 WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); 253 if (len > 0 && clear_user(buf, len)) 254 return -EFAULT; 255 256 return 0; 257 } 258 259 static ssize_t copy_event_to_user(struct fsnotify_group *group, 260 struct fsnotify_event *fsn_event, 261 char __user *buf, size_t count) 262 { 263 struct fanotify_event_metadata metadata; 264 struct fanotify_event *event; 265 struct file *f = NULL; 266 int ret, fd = FAN_NOFD; 267 268 pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); 269 270 event = container_of(fsn_event, struct fanotify_event, fse); 271 metadata.event_len = FAN_EVENT_METADATA_LEN; 272 metadata.metadata_len = FAN_EVENT_METADATA_LEN; 273 metadata.vers = FANOTIFY_METADATA_VERSION; 274 metadata.reserved = 0; 275 metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; 276 metadata.pid = pid_vnr(event->pid); 277 278 if (fanotify_event_has_path(event)) { 279 fd = create_fd(group, event, &f); 280 if (fd < 0) 281 return fd; 282 } else if (fanotify_event_has_fid(event)) { 283 metadata.event_len += fanotify_event_info_len(event); 284 } 285 metadata.fd = fd; 286 287 ret = -EFAULT; 288 /* 289 * Sanity check copy size in case get_one_event() and 290 * fill_event_metadata() event_len sizes ever get out of sync. 291 */ 292 if (WARN_ON_ONCE(metadata.event_len > count)) 293 goto out_close_fd; 294 295 if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) 296 goto out_close_fd; 297 298 if (fanotify_is_perm_event(event->mask)) 299 FANOTIFY_PE(fsn_event)->fd = fd; 300 301 if (fanotify_event_has_path(event)) { 302 fd_install(fd, f); 303 } else if (fanotify_event_has_fid(event)) { 304 ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); 305 if (ret < 0) 306 return ret; 307 } 308 309 return metadata.event_len; 310 311 out_close_fd: 312 if (fd != FAN_NOFD) { 313 put_unused_fd(fd); 314 fput(f); 315 } 316 return ret; 317 } 318 319 /* intofiy userspace file descriptor functions */ 320 static __poll_t fanotify_poll(struct file *file, poll_table *wait) 321 { 322 struct fsnotify_group *group = file->private_data; 323 __poll_t ret = 0; 324 325 poll_wait(file, &group->notification_waitq, wait); 326 spin_lock(&group->notification_lock); 327 if (!fsnotify_notify_queue_is_empty(group)) 328 ret = EPOLLIN | EPOLLRDNORM; 329 spin_unlock(&group->notification_lock); 330 331 return ret; 332 } 333 334 static ssize_t fanotify_read(struct file *file, char __user *buf, 335 size_t count, loff_t *pos) 336 { 337 struct fsnotify_group *group; 338 struct fsnotify_event *kevent; 339 char __user *start; 340 int ret; 341 DEFINE_WAIT_FUNC(wait, woken_wake_function); 342 343 start = buf; 344 group = file->private_data; 345 346 pr_debug("%s: group=%p\n", __func__, group); 347 348 add_wait_queue(&group->notification_waitq, &wait); 349 while (1) { 350 kevent = get_one_event(group, count); 351 if (IS_ERR(kevent)) { 352 ret = PTR_ERR(kevent); 353 break; 354 } 355 356 if (!kevent) { 357 ret = -EAGAIN; 358 if (file->f_flags & O_NONBLOCK) 359 break; 360 361 ret = -ERESTARTSYS; 362 if (signal_pending(current)) 363 break; 364 365 if (start != buf) 366 break; 367 368 wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); 369 continue; 370 } 371 372 ret = copy_event_to_user(group, kevent, buf, count); 373 if (unlikely(ret == -EOPENSTALE)) { 374 /* 375 * We cannot report events with stale fd so drop it. 376 * Setting ret to 0 will continue the event loop and 377 * do the right thing if there are no more events to 378 * read (i.e. return bytes read, -EAGAIN or wait). 379 */ 380 ret = 0; 381 } 382 383 /* 384 * Permission events get queued to wait for response. Other 385 * events can be destroyed now. 386 */ 387 if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { 388 fsnotify_destroy_event(group, kevent); 389 } else { 390 if (ret <= 0) { 391 spin_lock(&group->notification_lock); 392 finish_permission_event(group, 393 FANOTIFY_PE(kevent), FAN_DENY); 394 wake_up(&group->fanotify_data.access_waitq); 395 } else { 396 spin_lock(&group->notification_lock); 397 list_add_tail(&kevent->list, 398 &group->fanotify_data.access_list); 399 spin_unlock(&group->notification_lock); 400 } 401 } 402 if (ret < 0) 403 break; 404 buf += ret; 405 count -= ret; 406 } 407 remove_wait_queue(&group->notification_waitq, &wait); 408 409 if (start != buf && ret != -EFAULT) 410 ret = buf - start; 411 return ret; 412 } 413 414 static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 415 { 416 struct fanotify_response response = { .fd = -1, .response = -1 }; 417 struct fsnotify_group *group; 418 int ret; 419 420 if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 421 return -EINVAL; 422 423 group = file->private_data; 424 425 if (count > sizeof(response)) 426 count = sizeof(response); 427 428 pr_debug("%s: group=%p count=%zu\n", __func__, group, count); 429 430 if (copy_from_user(&response, buf, count)) 431 return -EFAULT; 432 433 ret = process_access_response(group, &response); 434 if (ret < 0) 435 count = ret; 436 437 return count; 438 } 439 440 static int fanotify_release(struct inode *ignored, struct file *file) 441 { 442 struct fsnotify_group *group = file->private_data; 443 struct fanotify_perm_event *event; 444 struct fsnotify_event *fsn_event; 445 446 /* 447 * Stop new events from arriving in the notification queue. since 448 * userspace cannot use fanotify fd anymore, no event can enter or 449 * leave access_list by now either. 450 */ 451 fsnotify_group_stop_queueing(group); 452 453 /* 454 * Process all permission events on access_list and notification queue 455 * and simulate reply from userspace. 456 */ 457 spin_lock(&group->notification_lock); 458 while (!list_empty(&group->fanotify_data.access_list)) { 459 event = list_first_entry(&group->fanotify_data.access_list, 460 struct fanotify_perm_event, fae.fse.list); 461 list_del_init(&event->fae.fse.list); 462 finish_permission_event(group, event, FAN_ALLOW); 463 spin_lock(&group->notification_lock); 464 } 465 466 /* 467 * Destroy all non-permission events. For permission events just 468 * dequeue them and set the response. They will be freed once the 469 * response is consumed and fanotify_get_response() returns. 470 */ 471 while (!fsnotify_notify_queue_is_empty(group)) { 472 fsn_event = fsnotify_remove_first_event(group); 473 if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { 474 spin_unlock(&group->notification_lock); 475 fsnotify_destroy_event(group, fsn_event); 476 } else { 477 finish_permission_event(group, FANOTIFY_PE(fsn_event), 478 FAN_ALLOW); 479 } 480 spin_lock(&group->notification_lock); 481 } 482 spin_unlock(&group->notification_lock); 483 484 /* Response for all permission events it set, wakeup waiters */ 485 wake_up(&group->fanotify_data.access_waitq); 486 487 /* matches the fanotify_init->fsnotify_alloc_group */ 488 fsnotify_destroy_group(group); 489 490 return 0; 491 } 492 493 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 494 { 495 struct fsnotify_group *group; 496 struct fsnotify_event *fsn_event; 497 void __user *p; 498 int ret = -ENOTTY; 499 size_t send_len = 0; 500 501 group = file->private_data; 502 503 p = (void __user *) arg; 504 505 switch (cmd) { 506 case FIONREAD: 507 spin_lock(&group->notification_lock); 508 list_for_each_entry(fsn_event, &group->notification_list, list) 509 send_len += FAN_EVENT_METADATA_LEN; 510 spin_unlock(&group->notification_lock); 511 ret = put_user(send_len, (int __user *) p); 512 break; 513 } 514 515 return ret; 516 } 517 518 static const struct file_operations fanotify_fops = { 519 .show_fdinfo = fanotify_show_fdinfo, 520 .poll = fanotify_poll, 521 .read = fanotify_read, 522 .write = fanotify_write, 523 .fasync = NULL, 524 .release = fanotify_release, 525 .unlocked_ioctl = fanotify_ioctl, 526 .compat_ioctl = fanotify_ioctl, 527 .llseek = noop_llseek, 528 }; 529 530 static int fanotify_find_path(int dfd, const char __user *filename, 531 struct path *path, unsigned int flags) 532 { 533 int ret; 534 535 pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, 536 dfd, filename, flags); 537 538 if (filename == NULL) { 539 struct fd f = fdget(dfd); 540 541 ret = -EBADF; 542 if (!f.file) 543 goto out; 544 545 ret = -ENOTDIR; 546 if ((flags & FAN_MARK_ONLYDIR) && 547 !(S_ISDIR(file_inode(f.file)->i_mode))) { 548 fdput(f); 549 goto out; 550 } 551 552 *path = f.file->f_path; 553 path_get(path); 554 fdput(f); 555 } else { 556 unsigned int lookup_flags = 0; 557 558 if (!(flags & FAN_MARK_DONT_FOLLOW)) 559 lookup_flags |= LOOKUP_FOLLOW; 560 if (flags & FAN_MARK_ONLYDIR) 561 lookup_flags |= LOOKUP_DIRECTORY; 562 563 ret = user_path_at(dfd, filename, lookup_flags, path); 564 if (ret) 565 goto out; 566 } 567 568 /* you can only watch an inode if you have read permissions on it */ 569 ret = inode_permission(path->dentry->d_inode, MAY_READ); 570 if (ret) 571 path_put(path); 572 out: 573 return ret; 574 } 575 576 static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, 577 __u32 mask, 578 unsigned int flags, 579 int *destroy) 580 { 581 __u32 oldmask = 0; 582 583 spin_lock(&fsn_mark->lock); 584 if (!(flags & FAN_MARK_IGNORED_MASK)) { 585 oldmask = fsn_mark->mask; 586 fsn_mark->mask &= ~mask; 587 } else { 588 fsn_mark->ignored_mask &= ~mask; 589 } 590 *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); 591 spin_unlock(&fsn_mark->lock); 592 593 return mask & oldmask; 594 } 595 596 static int fanotify_remove_mark(struct fsnotify_group *group, 597 fsnotify_connp_t *connp, __u32 mask, 598 unsigned int flags) 599 { 600 struct fsnotify_mark *fsn_mark = NULL; 601 __u32 removed; 602 int destroy_mark; 603 604 mutex_lock(&group->mark_mutex); 605 fsn_mark = fsnotify_find_mark(connp, group); 606 if (!fsn_mark) { 607 mutex_unlock(&group->mark_mutex); 608 return -ENOENT; 609 } 610 611 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, 612 &destroy_mark); 613 if (removed & fsnotify_conn_mask(fsn_mark->connector)) 614 fsnotify_recalc_mask(fsn_mark->connector); 615 if (destroy_mark) 616 fsnotify_detach_mark(fsn_mark); 617 mutex_unlock(&group->mark_mutex); 618 if (destroy_mark) 619 fsnotify_free_mark(fsn_mark); 620 621 /* matches the fsnotify_find_mark() */ 622 fsnotify_put_mark(fsn_mark); 623 return 0; 624 } 625 626 static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, 627 struct vfsmount *mnt, __u32 mask, 628 unsigned int flags) 629 { 630 return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 631 mask, flags); 632 } 633 634 static int fanotify_remove_sb_mark(struct fsnotify_group *group, 635 struct super_block *sb, __u32 mask, 636 unsigned int flags) 637 { 638 return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags); 639 } 640 641 static int fanotify_remove_inode_mark(struct fsnotify_group *group, 642 struct inode *inode, __u32 mask, 643 unsigned int flags) 644 { 645 return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, 646 flags); 647 } 648 649 static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, 650 __u32 mask, 651 unsigned int flags) 652 { 653 __u32 oldmask = -1; 654 655 spin_lock(&fsn_mark->lock); 656 if (!(flags & FAN_MARK_IGNORED_MASK)) { 657 oldmask = fsn_mark->mask; 658 fsn_mark->mask |= mask; 659 } else { 660 fsn_mark->ignored_mask |= mask; 661 if (flags & FAN_MARK_IGNORED_SURV_MODIFY) 662 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; 663 } 664 spin_unlock(&fsn_mark->lock); 665 666 return mask & ~oldmask; 667 } 668 669 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, 670 fsnotify_connp_t *connp, 671 unsigned int type, 672 __kernel_fsid_t *fsid) 673 { 674 struct fsnotify_mark *mark; 675 int ret; 676 677 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 678 return ERR_PTR(-ENOSPC); 679 680 mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); 681 if (!mark) 682 return ERR_PTR(-ENOMEM); 683 684 fsnotify_init_mark(mark, group); 685 ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); 686 if (ret) { 687 fsnotify_put_mark(mark); 688 return ERR_PTR(ret); 689 } 690 691 return mark; 692 } 693 694 695 static int fanotify_add_mark(struct fsnotify_group *group, 696 fsnotify_connp_t *connp, unsigned int type, 697 __u32 mask, unsigned int flags, 698 __kernel_fsid_t *fsid) 699 { 700 struct fsnotify_mark *fsn_mark; 701 __u32 added; 702 703 mutex_lock(&group->mark_mutex); 704 fsn_mark = fsnotify_find_mark(connp, group); 705 if (!fsn_mark) { 706 fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); 707 if (IS_ERR(fsn_mark)) { 708 mutex_unlock(&group->mark_mutex); 709 return PTR_ERR(fsn_mark); 710 } 711 } 712 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 713 if (added & ~fsnotify_conn_mask(fsn_mark->connector)) 714 fsnotify_recalc_mask(fsn_mark->connector); 715 mutex_unlock(&group->mark_mutex); 716 717 fsnotify_put_mark(fsn_mark); 718 return 0; 719 } 720 721 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, 722 struct vfsmount *mnt, __u32 mask, 723 unsigned int flags, __kernel_fsid_t *fsid) 724 { 725 return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 726 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); 727 } 728 729 static int fanotify_add_sb_mark(struct fsnotify_group *group, 730 struct super_block *sb, __u32 mask, 731 unsigned int flags, __kernel_fsid_t *fsid) 732 { 733 return fanotify_add_mark(group, &sb->s_fsnotify_marks, 734 FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); 735 } 736 737 static int fanotify_add_inode_mark(struct fsnotify_group *group, 738 struct inode *inode, __u32 mask, 739 unsigned int flags, __kernel_fsid_t *fsid) 740 { 741 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 742 743 /* 744 * If some other task has this inode open for write we should not add 745 * an ignored mark, unless that ignored mark is supposed to survive 746 * modification changes anyway. 747 */ 748 if ((flags & FAN_MARK_IGNORED_MASK) && 749 !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && 750 inode_is_open_for_write(inode)) 751 return 0; 752 753 return fanotify_add_mark(group, &inode->i_fsnotify_marks, 754 FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); 755 } 756 757 /* fanotify syscalls */ 758 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) 759 { 760 struct fsnotify_group *group; 761 int f_flags, fd; 762 struct user_struct *user; 763 struct fanotify_event *oevent; 764 765 pr_debug("%s: flags=%x event_f_flags=%x\n", 766 __func__, flags, event_f_flags); 767 768 if (!capable(CAP_SYS_ADMIN)) 769 return -EPERM; 770 771 #ifdef CONFIG_AUDITSYSCALL 772 if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) 773 #else 774 if (flags & ~FANOTIFY_INIT_FLAGS) 775 #endif 776 return -EINVAL; 777 778 if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) 779 return -EINVAL; 780 781 switch (event_f_flags & O_ACCMODE) { 782 case O_RDONLY: 783 case O_RDWR: 784 case O_WRONLY: 785 break; 786 default: 787 return -EINVAL; 788 } 789 790 if ((flags & FAN_REPORT_FID) && 791 (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF) 792 return -EINVAL; 793 794 user = get_current_user(); 795 if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { 796 free_uid(user); 797 return -EMFILE; 798 } 799 800 f_flags = O_RDWR | FMODE_NONOTIFY; 801 if (flags & FAN_CLOEXEC) 802 f_flags |= O_CLOEXEC; 803 if (flags & FAN_NONBLOCK) 804 f_flags |= O_NONBLOCK; 805 806 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ 807 group = fsnotify_alloc_group(&fanotify_fsnotify_ops); 808 if (IS_ERR(group)) { 809 free_uid(user); 810 return PTR_ERR(group); 811 } 812 813 group->fanotify_data.user = user; 814 group->fanotify_data.flags = flags; 815 atomic_inc(&user->fanotify_listeners); 816 group->memcg = get_mem_cgroup_from_mm(current->mm); 817 818 oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, 819 FSNOTIFY_EVENT_NONE, NULL); 820 if (unlikely(!oevent)) { 821 fd = -ENOMEM; 822 goto out_destroy_group; 823 } 824 group->overflow_event = &oevent->fse; 825 826 if (force_o_largefile()) 827 event_f_flags |= O_LARGEFILE; 828 group->fanotify_data.f_flags = event_f_flags; 829 init_waitqueue_head(&group->fanotify_data.access_waitq); 830 INIT_LIST_HEAD(&group->fanotify_data.access_list); 831 switch (flags & FANOTIFY_CLASS_BITS) { 832 case FAN_CLASS_NOTIF: 833 group->priority = FS_PRIO_0; 834 break; 835 case FAN_CLASS_CONTENT: 836 group->priority = FS_PRIO_1; 837 break; 838 case FAN_CLASS_PRE_CONTENT: 839 group->priority = FS_PRIO_2; 840 break; 841 default: 842 fd = -EINVAL; 843 goto out_destroy_group; 844 } 845 846 if (flags & FAN_UNLIMITED_QUEUE) { 847 fd = -EPERM; 848 if (!capable(CAP_SYS_ADMIN)) 849 goto out_destroy_group; 850 group->max_events = UINT_MAX; 851 } else { 852 group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; 853 } 854 855 if (flags & FAN_UNLIMITED_MARKS) { 856 fd = -EPERM; 857 if (!capable(CAP_SYS_ADMIN)) 858 goto out_destroy_group; 859 group->fanotify_data.max_marks = UINT_MAX; 860 } else { 861 group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; 862 } 863 864 if (flags & FAN_ENABLE_AUDIT) { 865 fd = -EPERM; 866 if (!capable(CAP_AUDIT_WRITE)) 867 goto out_destroy_group; 868 } 869 870 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); 871 if (fd < 0) 872 goto out_destroy_group; 873 874 return fd; 875 876 out_destroy_group: 877 fsnotify_destroy_group(group); 878 return fd; 879 } 880 881 /* Check if filesystem can encode a unique fid */ 882 static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) 883 { 884 __kernel_fsid_t root_fsid; 885 int err; 886 887 /* 888 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). 889 */ 890 err = vfs_get_fsid(path->dentry, fsid); 891 if (err) 892 return err; 893 894 if (!fsid->val[0] && !fsid->val[1]) 895 return -ENODEV; 896 897 /* 898 * Make sure path is not inside a filesystem subvolume (e.g. btrfs) 899 * which uses a different fsid than sb root. 900 */ 901 err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); 902 if (err) 903 return err; 904 905 if (root_fsid.val[0] != fsid->val[0] || 906 root_fsid.val[1] != fsid->val[1]) 907 return -EXDEV; 908 909 /* 910 * We need to make sure that the file system supports at least 911 * encoding a file handle so user can use name_to_handle_at() to 912 * compare fid returned with event to the file handle of watched 913 * objects. However, name_to_handle_at() requires that the 914 * filesystem also supports decoding file handles. 915 */ 916 if (!path->dentry->d_sb->s_export_op || 917 !path->dentry->d_sb->s_export_op->fh_to_dentry) 918 return -EOPNOTSUPP; 919 920 return 0; 921 } 922 923 static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, 924 int dfd, const char __user *pathname) 925 { 926 struct inode *inode = NULL; 927 struct vfsmount *mnt = NULL; 928 struct fsnotify_group *group; 929 struct fd f; 930 struct path path; 931 __kernel_fsid_t __fsid, *fsid = NULL; 932 u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; 933 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 934 int ret; 935 936 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 937 __func__, fanotify_fd, flags, dfd, pathname, mask); 938 939 /* we only use the lower 32 bits as of right now. */ 940 if (mask & ((__u64)0xffffffff << 32)) 941 return -EINVAL; 942 943 if (flags & ~FANOTIFY_MARK_FLAGS) 944 return -EINVAL; 945 946 switch (mark_type) { 947 case FAN_MARK_INODE: 948 case FAN_MARK_MOUNT: 949 case FAN_MARK_FILESYSTEM: 950 break; 951 default: 952 return -EINVAL; 953 } 954 955 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { 956 case FAN_MARK_ADD: /* fallthrough */ 957 case FAN_MARK_REMOVE: 958 if (!mask) 959 return -EINVAL; 960 break; 961 case FAN_MARK_FLUSH: 962 if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) 963 return -EINVAL; 964 break; 965 default: 966 return -EINVAL; 967 } 968 969 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 970 valid_mask |= FANOTIFY_PERM_EVENTS; 971 972 if (mask & ~valid_mask) 973 return -EINVAL; 974 975 f = fdget(fanotify_fd); 976 if (unlikely(!f.file)) 977 return -EBADF; 978 979 /* verify that this is indeed an fanotify instance */ 980 ret = -EINVAL; 981 if (unlikely(f.file->f_op != &fanotify_fops)) 982 goto fput_and_out; 983 group = f.file->private_data; 984 985 /* 986 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not 987 * allowed to set permissions events. 988 */ 989 ret = -EINVAL; 990 if (mask & FANOTIFY_PERM_EVENTS && 991 group->priority == FS_PRIO_0) 992 goto fput_and_out; 993 994 /* 995 * Events with data type inode do not carry enough information to report 996 * event->fd, so we do not allow setting a mask for inode events unless 997 * group supports reporting fid. 998 * inode events are not supported on a mount mark, because they do not 999 * carry enough information (i.e. path) to be filtered by mount point. 1000 */ 1001 if (mask & FANOTIFY_INODE_EVENTS && 1002 (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) || 1003 mark_type == FAN_MARK_MOUNT)) 1004 goto fput_and_out; 1005 1006 if (flags & FAN_MARK_FLUSH) { 1007 ret = 0; 1008 if (mark_type == FAN_MARK_MOUNT) 1009 fsnotify_clear_vfsmount_marks_by_group(group); 1010 else if (mark_type == FAN_MARK_FILESYSTEM) 1011 fsnotify_clear_sb_marks_by_group(group); 1012 else 1013 fsnotify_clear_inode_marks_by_group(group); 1014 goto fput_and_out; 1015 } 1016 1017 ret = fanotify_find_path(dfd, pathname, &path, flags); 1018 if (ret) 1019 goto fput_and_out; 1020 1021 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { 1022 ret = fanotify_test_fid(&path, &__fsid); 1023 if (ret) 1024 goto path_put_and_out; 1025 1026 fsid = &__fsid; 1027 } 1028 1029 /* inode held in place by reference to path; group by fget on fd */ 1030 if (mark_type == FAN_MARK_INODE) 1031 inode = path.dentry->d_inode; 1032 else 1033 mnt = path.mnt; 1034 1035 /* create/update an inode mark */ 1036 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { 1037 case FAN_MARK_ADD: 1038 if (mark_type == FAN_MARK_MOUNT) 1039 ret = fanotify_add_vfsmount_mark(group, mnt, mask, 1040 flags, fsid); 1041 else if (mark_type == FAN_MARK_FILESYSTEM) 1042 ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, 1043 flags, fsid); 1044 else 1045 ret = fanotify_add_inode_mark(group, inode, mask, 1046 flags, fsid); 1047 break; 1048 case FAN_MARK_REMOVE: 1049 if (mark_type == FAN_MARK_MOUNT) 1050 ret = fanotify_remove_vfsmount_mark(group, mnt, mask, 1051 flags); 1052 else if (mark_type == FAN_MARK_FILESYSTEM) 1053 ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, 1054 flags); 1055 else 1056 ret = fanotify_remove_inode_mark(group, inode, mask, 1057 flags); 1058 break; 1059 default: 1060 ret = -EINVAL; 1061 } 1062 1063 path_put_and_out: 1064 path_put(&path); 1065 fput_and_out: 1066 fdput(f); 1067 return ret; 1068 } 1069 1070 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, 1071 __u64, mask, int, dfd, 1072 const char __user *, pathname) 1073 { 1074 return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); 1075 } 1076 1077 #ifdef CONFIG_COMPAT 1078 COMPAT_SYSCALL_DEFINE6(fanotify_mark, 1079 int, fanotify_fd, unsigned int, flags, 1080 __u32, mask0, __u32, mask1, int, dfd, 1081 const char __user *, pathname) 1082 { 1083 return do_fanotify_mark(fanotify_fd, flags, 1084 #ifdef __BIG_ENDIAN 1085 ((__u64)mask0 << 32) | mask1, 1086 #else 1087 ((__u64)mask1 << 32) | mask0, 1088 #endif 1089 dfd, pathname); 1090 } 1091 #endif 1092 1093 /* 1094 * fanotify_user_setup - Our initialization function. Note that we cannot return 1095 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 1096 * must result in panic(). 1097 */ 1098 static int __init fanotify_user_setup(void) 1099 { 1100 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8); 1101 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); 1102 1103 fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, 1104 SLAB_PANIC|SLAB_ACCOUNT); 1105 fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); 1106 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { 1107 fanotify_perm_event_cachep = 1108 KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); 1109 } 1110 1111 return 0; 1112 } 1113 device_initcall(fanotify_user_setup); 1114