1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/fanotify.h> 3 #include <linux/fcntl.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/anon_inodes.h> 7 #include <linux/fsnotify_backend.h> 8 #include <linux/init.h> 9 #include <linux/mount.h> 10 #include <linux/namei.h> 11 #include <linux/poll.h> 12 #include <linux/security.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/uaccess.h> 17 #include <linux/compat.h> 18 #include <linux/sched/signal.h> 19 #include <linux/memcontrol.h> 20 #include <linux/statfs.h> 21 #include <linux/exportfs.h> 22 23 #include <asm/ioctls.h> 24 25 #include "../fsnotify.h" 26 #include "../fdinfo.h" 27 #include "fanotify.h" 28 29 #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 30 #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 31 #define FANOTIFY_DEFAULT_MAX_GROUPS 128 32 #define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32 33 34 /* 35 * Legacy fanotify marks limits (8192) is per group and we introduced a tunable 36 * limit of marks per user, similar to inotify. Effectively, the legacy limit 37 * of fanotify marks per user is <max marks per group> * <max groups per user>. 38 * This default limit (1M) also happens to match the increased limit of inotify 39 * max_user_watches since v5.10. 40 */ 41 #define FANOTIFY_DEFAULT_MAX_USER_MARKS \ 42 (FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS) 43 44 /* 45 * Most of the memory cost of adding an inode mark is pinning the marked inode. 46 * The size of the filesystem inode struct is not uniform across filesystems, 47 * so double the size of a VFS inode is used as a conservative approximation. 48 */ 49 #define INODE_MARK_COST (2 * sizeof(struct inode)) 50 51 /* configurable via /proc/sys/fs/fanotify/ */ 52 static int fanotify_max_queued_events __read_mostly; 53 54 #ifdef CONFIG_SYSCTL 55 56 #include <linux/sysctl.h> 57 58 static long ft_zero = 0; 59 static long ft_int_max = INT_MAX; 60 61 static struct ctl_table fanotify_table[] = { 62 { 63 .procname = "max_user_groups", 64 .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], 65 .maxlen = sizeof(long), 66 .mode = 0644, 67 .proc_handler = proc_doulongvec_minmax, 68 .extra1 = &ft_zero, 69 .extra2 = &ft_int_max, 70 }, 71 { 72 .procname = "max_user_marks", 73 .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], 74 .maxlen = sizeof(long), 75 .mode = 0644, 76 .proc_handler = proc_doulongvec_minmax, 77 .extra1 = &ft_zero, 78 .extra2 = &ft_int_max, 79 }, 80 { 81 .procname = "max_queued_events", 82 .data = &fanotify_max_queued_events, 83 .maxlen = sizeof(int), 84 .mode = 0644, 85 .proc_handler = proc_dointvec_minmax, 86 .extra1 = SYSCTL_ZERO 87 }, 88 }; 89 90 static void __init fanotify_sysctls_init(void) 91 { 92 register_sysctl("fs/fanotify", fanotify_table); 93 } 94 #else 95 #define fanotify_sysctls_init() do { } while (0) 96 #endif /* CONFIG_SYSCTL */ 97 98 /* 99 * All flags that may be specified in parameter event_f_flags of fanotify_init. 100 * 101 * Internal and external open flags are stored together in field f_flags of 102 * struct file. Only external open flags shall be allowed in event_f_flags. 103 * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be 104 * excluded. 105 */ 106 #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ 107 O_ACCMODE | O_APPEND | O_NONBLOCK | \ 108 __O_SYNC | O_DSYNC | O_CLOEXEC | \ 109 O_LARGEFILE | O_NOATIME ) 110 111 extern const struct fsnotify_ops fanotify_fsnotify_ops; 112 113 struct kmem_cache *fanotify_mark_cache __ro_after_init; 114 struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; 115 struct kmem_cache *fanotify_path_event_cachep __ro_after_init; 116 struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; 117 118 #define FANOTIFY_EVENT_ALIGN 4 119 #define FANOTIFY_FID_INFO_HDR_LEN \ 120 (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) 121 #define FANOTIFY_PIDFD_INFO_HDR_LEN \ 122 sizeof(struct fanotify_event_info_pidfd) 123 #define FANOTIFY_ERROR_INFO_LEN \ 124 (sizeof(struct fanotify_event_info_error)) 125 126 static int fanotify_fid_info_len(int fh_len, int name_len) 127 { 128 int info_len = fh_len; 129 130 if (name_len) 131 info_len += name_len + 1; 132 133 return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len, 134 FANOTIFY_EVENT_ALIGN); 135 } 136 137 /* FAN_RENAME may have one or two dir+name info records */ 138 static int fanotify_dir_name_info_len(struct fanotify_event *event) 139 { 140 struct fanotify_info *info = fanotify_event_info(event); 141 int dir_fh_len = fanotify_event_dir_fh_len(event); 142 int dir2_fh_len = fanotify_event_dir2_fh_len(event); 143 int info_len = 0; 144 145 if (dir_fh_len) 146 info_len += fanotify_fid_info_len(dir_fh_len, 147 info->name_len); 148 if (dir2_fh_len) 149 info_len += fanotify_fid_info_len(dir2_fh_len, 150 info->name2_len); 151 152 return info_len; 153 } 154 155 static size_t fanotify_event_len(unsigned int info_mode, 156 struct fanotify_event *event) 157 { 158 size_t event_len = FAN_EVENT_METADATA_LEN; 159 int fh_len; 160 int dot_len = 0; 161 162 if (!info_mode) 163 return event_len; 164 165 if (fanotify_is_error_event(event->mask)) 166 event_len += FANOTIFY_ERROR_INFO_LEN; 167 168 if (fanotify_event_has_any_dir_fh(event)) { 169 event_len += fanotify_dir_name_info_len(event); 170 } else if ((info_mode & FAN_REPORT_NAME) && 171 (event->mask & FAN_ONDIR)) { 172 /* 173 * With group flag FAN_REPORT_NAME, if name was not recorded in 174 * event on a directory, we will report the name ".". 175 */ 176 dot_len = 1; 177 } 178 179 if (info_mode & FAN_REPORT_PIDFD) 180 event_len += FANOTIFY_PIDFD_INFO_HDR_LEN; 181 182 if (fanotify_event_has_object_fh(event)) { 183 fh_len = fanotify_event_object_fh_len(event); 184 event_len += fanotify_fid_info_len(fh_len, dot_len); 185 } 186 187 return event_len; 188 } 189 190 /* 191 * Remove an hashed event from merge hash table. 192 */ 193 static void fanotify_unhash_event(struct fsnotify_group *group, 194 struct fanotify_event *event) 195 { 196 assert_spin_locked(&group->notification_lock); 197 198 pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, 199 group, event, fanotify_event_hash_bucket(group, event)); 200 201 if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list))) 202 return; 203 204 hlist_del_init(&event->merge_list); 205 } 206 207 /* 208 * Get an fanotify notification event if one exists and is small 209 * enough to fit in "count". Return an error pointer if the count 210 * is not large enough. When permission event is dequeued, its state is 211 * updated accordingly. 212 */ 213 static struct fanotify_event *get_one_event(struct fsnotify_group *group, 214 size_t count) 215 { 216 size_t event_size; 217 struct fanotify_event *event = NULL; 218 struct fsnotify_event *fsn_event; 219 unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); 220 221 pr_debug("%s: group=%p count=%zd\n", __func__, group, count); 222 223 spin_lock(&group->notification_lock); 224 fsn_event = fsnotify_peek_first_event(group); 225 if (!fsn_event) 226 goto out; 227 228 event = FANOTIFY_E(fsn_event); 229 event_size = fanotify_event_len(info_mode, event); 230 231 if (event_size > count) { 232 event = ERR_PTR(-EINVAL); 233 goto out; 234 } 235 236 /* 237 * Held the notification_lock the whole time, so this is the 238 * same event we peeked above. 239 */ 240 fsnotify_remove_first_event(group); 241 if (fanotify_is_perm_event(event->mask)) 242 FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; 243 if (fanotify_is_hashed_event(event->mask)) 244 fanotify_unhash_event(group, event); 245 out: 246 spin_unlock(&group->notification_lock); 247 return event; 248 } 249 250 static int create_fd(struct fsnotify_group *group, const struct path *path, 251 struct file **file) 252 { 253 int client_fd; 254 struct file *new_file; 255 256 client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); 257 if (client_fd < 0) 258 return client_fd; 259 260 /* 261 * we need a new file handle for the userspace program so it can read even if it was 262 * originally opened O_WRONLY. 263 */ 264 new_file = dentry_open(path, 265 group->fanotify_data.f_flags | __FMODE_NONOTIFY, 266 current_cred()); 267 if (IS_ERR(new_file)) { 268 /* 269 * we still send an event even if we can't open the file. this 270 * can happen when say tasks are gone and we try to open their 271 * /proc files or we try to open a WRONLY file like in sysfs 272 * we just send the errno to userspace since there isn't much 273 * else we can do. 274 */ 275 put_unused_fd(client_fd); 276 client_fd = PTR_ERR(new_file); 277 } else { 278 *file = new_file; 279 } 280 281 return client_fd; 282 } 283 284 static int process_access_response_info(const char __user *info, 285 size_t info_len, 286 struct fanotify_response_info_audit_rule *friar) 287 { 288 if (info_len != sizeof(*friar)) 289 return -EINVAL; 290 291 if (copy_from_user(friar, info, sizeof(*friar))) 292 return -EFAULT; 293 294 if (friar->hdr.type != FAN_RESPONSE_INFO_AUDIT_RULE) 295 return -EINVAL; 296 if (friar->hdr.pad != 0) 297 return -EINVAL; 298 if (friar->hdr.len != sizeof(*friar)) 299 return -EINVAL; 300 301 return info_len; 302 } 303 304 /* 305 * Finish processing of permission event by setting it to ANSWERED state and 306 * drop group->notification_lock. 307 */ 308 static void finish_permission_event(struct fsnotify_group *group, 309 struct fanotify_perm_event *event, u32 response, 310 struct fanotify_response_info_audit_rule *friar) 311 __releases(&group->notification_lock) 312 { 313 bool destroy = false; 314 315 assert_spin_locked(&group->notification_lock); 316 event->response = response & ~FAN_INFO; 317 if (response & FAN_INFO) 318 memcpy(&event->audit_rule, friar, sizeof(*friar)); 319 320 if (event->state == FAN_EVENT_CANCELED) 321 destroy = true; 322 else 323 event->state = FAN_EVENT_ANSWERED; 324 spin_unlock(&group->notification_lock); 325 if (destroy) 326 fsnotify_destroy_event(group, &event->fae.fse); 327 } 328 329 static int process_access_response(struct fsnotify_group *group, 330 struct fanotify_response *response_struct, 331 const char __user *info, 332 size_t info_len) 333 { 334 struct fanotify_perm_event *event; 335 int fd = response_struct->fd; 336 u32 response = response_struct->response; 337 int ret = info_len; 338 struct fanotify_response_info_audit_rule friar; 339 340 pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__, 341 group, fd, response, info, info_len); 342 /* 343 * make sure the response is valid, if invalid we do nothing and either 344 * userspace can send a valid response or we will clean it up after the 345 * timeout 346 */ 347 if (response & ~FANOTIFY_RESPONSE_VALID_MASK) 348 return -EINVAL; 349 350 switch (response & FANOTIFY_RESPONSE_ACCESS) { 351 case FAN_ALLOW: 352 case FAN_DENY: 353 break; 354 default: 355 return -EINVAL; 356 } 357 358 if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) 359 return -EINVAL; 360 361 if (response & FAN_INFO) { 362 ret = process_access_response_info(info, info_len, &friar); 363 if (ret < 0) 364 return ret; 365 if (fd == FAN_NOFD) 366 return ret; 367 } else { 368 ret = 0; 369 } 370 371 if (fd < 0) 372 return -EINVAL; 373 374 spin_lock(&group->notification_lock); 375 list_for_each_entry(event, &group->fanotify_data.access_list, 376 fae.fse.list) { 377 if (event->fd != fd) 378 continue; 379 380 list_del_init(&event->fae.fse.list); 381 finish_permission_event(group, event, response, &friar); 382 wake_up(&group->fanotify_data.access_waitq); 383 return ret; 384 } 385 spin_unlock(&group->notification_lock); 386 387 return -ENOENT; 388 } 389 390 static size_t copy_error_info_to_user(struct fanotify_event *event, 391 char __user *buf, int count) 392 { 393 struct fanotify_event_info_error info = { }; 394 struct fanotify_error_event *fee = FANOTIFY_EE(event); 395 396 info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR; 397 info.hdr.len = FANOTIFY_ERROR_INFO_LEN; 398 399 if (WARN_ON(count < info.hdr.len)) 400 return -EFAULT; 401 402 info.error = fee->error; 403 info.error_count = fee->err_count; 404 405 if (copy_to_user(buf, &info, sizeof(info))) 406 return -EFAULT; 407 408 return info.hdr.len; 409 } 410 411 static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, 412 int info_type, const char *name, 413 size_t name_len, 414 char __user *buf, size_t count) 415 { 416 struct fanotify_event_info_fid info = { }; 417 struct file_handle handle = { }; 418 unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; 419 size_t fh_len = fh ? fh->len : 0; 420 size_t info_len = fanotify_fid_info_len(fh_len, name_len); 421 size_t len = info_len; 422 423 pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", 424 __func__, fh_len, name_len, info_len, count); 425 426 if (WARN_ON_ONCE(len < sizeof(info) || len > count)) 427 return -EFAULT; 428 429 /* 430 * Copy event info fid header followed by variable sized file handle 431 * and optionally followed by variable sized filename. 432 */ 433 switch (info_type) { 434 case FAN_EVENT_INFO_TYPE_FID: 435 case FAN_EVENT_INFO_TYPE_DFID: 436 if (WARN_ON_ONCE(name_len)) 437 return -EFAULT; 438 break; 439 case FAN_EVENT_INFO_TYPE_DFID_NAME: 440 case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME: 441 case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME: 442 if (WARN_ON_ONCE(!name || !name_len)) 443 return -EFAULT; 444 break; 445 default: 446 return -EFAULT; 447 } 448 449 info.hdr.info_type = info_type; 450 info.hdr.len = len; 451 info.fsid = *fsid; 452 if (copy_to_user(buf, &info, sizeof(info))) 453 return -EFAULT; 454 455 buf += sizeof(info); 456 len -= sizeof(info); 457 if (WARN_ON_ONCE(len < sizeof(handle))) 458 return -EFAULT; 459 460 handle.handle_type = fh->type; 461 handle.handle_bytes = fh_len; 462 463 /* Mangle handle_type for bad file_handle */ 464 if (!fh_len) 465 handle.handle_type = FILEID_INVALID; 466 467 if (copy_to_user(buf, &handle, sizeof(handle))) 468 return -EFAULT; 469 470 buf += sizeof(handle); 471 len -= sizeof(handle); 472 if (WARN_ON_ONCE(len < fh_len)) 473 return -EFAULT; 474 475 /* 476 * For an inline fh and inline file name, copy through stack to exclude 477 * the copy from usercopy hardening protections. 478 */ 479 fh_buf = fanotify_fh_buf(fh); 480 if (fh_len <= FANOTIFY_INLINE_FH_LEN) { 481 memcpy(bounce, fh_buf, fh_len); 482 fh_buf = bounce; 483 } 484 if (copy_to_user(buf, fh_buf, fh_len)) 485 return -EFAULT; 486 487 buf += fh_len; 488 len -= fh_len; 489 490 if (name_len) { 491 /* Copy the filename with terminating null */ 492 name_len++; 493 if (WARN_ON_ONCE(len < name_len)) 494 return -EFAULT; 495 496 if (copy_to_user(buf, name, name_len)) 497 return -EFAULT; 498 499 buf += name_len; 500 len -= name_len; 501 } 502 503 /* Pad with 0's */ 504 WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); 505 if (len > 0 && clear_user(buf, len)) 506 return -EFAULT; 507 508 return info_len; 509 } 510 511 static int copy_pidfd_info_to_user(int pidfd, 512 char __user *buf, 513 size_t count) 514 { 515 struct fanotify_event_info_pidfd info = { }; 516 size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN; 517 518 if (WARN_ON_ONCE(info_len > count)) 519 return -EFAULT; 520 521 info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD; 522 info.hdr.len = info_len; 523 info.pidfd = pidfd; 524 525 if (copy_to_user(buf, &info, info_len)) 526 return -EFAULT; 527 528 return info_len; 529 } 530 531 static int copy_info_records_to_user(struct fanotify_event *event, 532 struct fanotify_info *info, 533 unsigned int info_mode, int pidfd, 534 char __user *buf, size_t count) 535 { 536 int ret, total_bytes = 0, info_type = 0; 537 unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS; 538 unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; 539 540 /* 541 * Event info records order is as follows: 542 * 1. dir fid + name 543 * 2. (optional) new dir fid + new name 544 * 3. (optional) child fid 545 */ 546 if (fanotify_event_has_dir_fh(event)) { 547 info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : 548 FAN_EVENT_INFO_TYPE_DFID; 549 550 /* FAN_RENAME uses special info types */ 551 if (event->mask & FAN_RENAME) 552 info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME; 553 554 ret = copy_fid_info_to_user(fanotify_event_fsid(event), 555 fanotify_info_dir_fh(info), 556 info_type, 557 fanotify_info_name(info), 558 info->name_len, buf, count); 559 if (ret < 0) 560 return ret; 561 562 buf += ret; 563 count -= ret; 564 total_bytes += ret; 565 } 566 567 /* New dir fid+name may be reported in addition to old dir fid+name */ 568 if (fanotify_event_has_dir2_fh(event)) { 569 info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME; 570 ret = copy_fid_info_to_user(fanotify_event_fsid(event), 571 fanotify_info_dir2_fh(info), 572 info_type, 573 fanotify_info_name2(info), 574 info->name2_len, buf, count); 575 if (ret < 0) 576 return ret; 577 578 buf += ret; 579 count -= ret; 580 total_bytes += ret; 581 } 582 583 if (fanotify_event_has_object_fh(event)) { 584 const char *dot = NULL; 585 int dot_len = 0; 586 587 if (fid_mode == FAN_REPORT_FID || info_type) { 588 /* 589 * With only group flag FAN_REPORT_FID only type FID is 590 * reported. Second info record type is always FID. 591 */ 592 info_type = FAN_EVENT_INFO_TYPE_FID; 593 } else if ((fid_mode & FAN_REPORT_NAME) && 594 (event->mask & FAN_ONDIR)) { 595 /* 596 * With group flag FAN_REPORT_NAME, if name was not 597 * recorded in an event on a directory, report the name 598 * "." with info type DFID_NAME. 599 */ 600 info_type = FAN_EVENT_INFO_TYPE_DFID_NAME; 601 dot = "."; 602 dot_len = 1; 603 } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) || 604 (event->mask & FAN_ONDIR)) { 605 /* 606 * With group flag FAN_REPORT_DIR_FID, a single info 607 * record has type DFID for directory entry modification 608 * event and for event on a directory. 609 */ 610 info_type = FAN_EVENT_INFO_TYPE_DFID; 611 } else { 612 /* 613 * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID, 614 * a single info record has type FID for event on a 615 * non-directory, when there is no directory to report. 616 * For example, on FAN_DELETE_SELF event. 617 */ 618 info_type = FAN_EVENT_INFO_TYPE_FID; 619 } 620 621 ret = copy_fid_info_to_user(fanotify_event_fsid(event), 622 fanotify_event_object_fh(event), 623 info_type, dot, dot_len, 624 buf, count); 625 if (ret < 0) 626 return ret; 627 628 buf += ret; 629 count -= ret; 630 total_bytes += ret; 631 } 632 633 if (pidfd_mode) { 634 ret = copy_pidfd_info_to_user(pidfd, buf, count); 635 if (ret < 0) 636 return ret; 637 638 buf += ret; 639 count -= ret; 640 total_bytes += ret; 641 } 642 643 if (fanotify_is_error_event(event->mask)) { 644 ret = copy_error_info_to_user(event, buf, count); 645 if (ret < 0) 646 return ret; 647 buf += ret; 648 count -= ret; 649 total_bytes += ret; 650 } 651 652 return total_bytes; 653 } 654 655 static ssize_t copy_event_to_user(struct fsnotify_group *group, 656 struct fanotify_event *event, 657 char __user *buf, size_t count) 658 { 659 struct fanotify_event_metadata metadata; 660 const struct path *path = fanotify_event_path(event); 661 struct fanotify_info *info = fanotify_event_info(event); 662 unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); 663 unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; 664 struct file *f = NULL, *pidfd_file = NULL; 665 int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD; 666 667 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 668 669 metadata.event_len = fanotify_event_len(info_mode, event); 670 metadata.metadata_len = FAN_EVENT_METADATA_LEN; 671 metadata.vers = FANOTIFY_METADATA_VERSION; 672 metadata.reserved = 0; 673 metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; 674 metadata.pid = pid_vnr(event->pid); 675 /* 676 * For an unprivileged listener, event->pid can be used to identify the 677 * events generated by the listener process itself, without disclosing 678 * the pids of other processes. 679 */ 680 if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && 681 task_tgid(current) != event->pid) 682 metadata.pid = 0; 683 684 /* 685 * For now, fid mode is required for an unprivileged listener and 686 * fid mode does not report fd in events. Keep this check anyway 687 * for safety in case fid mode requirement is relaxed in the future 688 * to allow unprivileged listener to get events with no fd and no fid. 689 */ 690 if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && 691 path && path->mnt && path->dentry) { 692 fd = create_fd(group, path, &f); 693 if (fd < 0) 694 return fd; 695 } 696 metadata.fd = fd; 697 698 if (pidfd_mode) { 699 /* 700 * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual 701 * exclusion is ever lifted. At the time of incoporating pidfd 702 * support within fanotify, the pidfd API only supported the 703 * creation of pidfds for thread-group leaders. 704 */ 705 WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID)); 706 707 /* 708 * The PIDTYPE_TGID check for an event->pid is performed 709 * preemptively in an attempt to catch out cases where the event 710 * listener reads events after the event generating process has 711 * already terminated. Report FAN_NOPIDFD to the event listener 712 * in those cases, with all other pidfd creation errors being 713 * reported as FAN_EPIDFD. 714 */ 715 if (metadata.pid == 0 || 716 !pid_has_task(event->pid, PIDTYPE_TGID)) { 717 pidfd = FAN_NOPIDFD; 718 } else { 719 pidfd = pidfd_prepare(event->pid, 0, &pidfd_file); 720 if (pidfd < 0) 721 pidfd = FAN_EPIDFD; 722 } 723 } 724 725 ret = -EFAULT; 726 /* 727 * Sanity check copy size in case get_one_event() and 728 * event_len sizes ever get out of sync. 729 */ 730 if (WARN_ON_ONCE(metadata.event_len > count)) 731 goto out_close_fd; 732 733 if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) 734 goto out_close_fd; 735 736 buf += FAN_EVENT_METADATA_LEN; 737 count -= FAN_EVENT_METADATA_LEN; 738 739 if (fanotify_is_perm_event(event->mask)) 740 FANOTIFY_PERM(event)->fd = fd; 741 742 if (info_mode) { 743 ret = copy_info_records_to_user(event, info, info_mode, pidfd, 744 buf, count); 745 if (ret < 0) 746 goto out_close_fd; 747 } 748 749 if (f) 750 fd_install(fd, f); 751 752 if (pidfd_file) 753 fd_install(pidfd, pidfd_file); 754 755 return metadata.event_len; 756 757 out_close_fd: 758 if (fd != FAN_NOFD) { 759 put_unused_fd(fd); 760 fput(f); 761 } 762 763 if (pidfd >= 0) { 764 put_unused_fd(pidfd); 765 fput(pidfd_file); 766 } 767 768 return ret; 769 } 770 771 /* intofiy userspace file descriptor functions */ 772 static __poll_t fanotify_poll(struct file *file, poll_table *wait) 773 { 774 struct fsnotify_group *group = file->private_data; 775 __poll_t ret = 0; 776 777 poll_wait(file, &group->notification_waitq, wait); 778 spin_lock(&group->notification_lock); 779 if (!fsnotify_notify_queue_is_empty(group)) 780 ret = EPOLLIN | EPOLLRDNORM; 781 spin_unlock(&group->notification_lock); 782 783 return ret; 784 } 785 786 static ssize_t fanotify_read(struct file *file, char __user *buf, 787 size_t count, loff_t *pos) 788 { 789 struct fsnotify_group *group; 790 struct fanotify_event *event; 791 char __user *start; 792 int ret; 793 DEFINE_WAIT_FUNC(wait, woken_wake_function); 794 795 start = buf; 796 group = file->private_data; 797 798 pr_debug("%s: group=%p\n", __func__, group); 799 800 add_wait_queue(&group->notification_waitq, &wait); 801 while (1) { 802 /* 803 * User can supply arbitrarily large buffer. Avoid softlockups 804 * in case there are lots of available events. 805 */ 806 cond_resched(); 807 event = get_one_event(group, count); 808 if (IS_ERR(event)) { 809 ret = PTR_ERR(event); 810 break; 811 } 812 813 if (!event) { 814 ret = -EAGAIN; 815 if (file->f_flags & O_NONBLOCK) 816 break; 817 818 ret = -ERESTARTSYS; 819 if (signal_pending(current)) 820 break; 821 822 if (start != buf) 823 break; 824 825 wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); 826 continue; 827 } 828 829 ret = copy_event_to_user(group, event, buf, count); 830 if (unlikely(ret == -EOPENSTALE)) { 831 /* 832 * We cannot report events with stale fd so drop it. 833 * Setting ret to 0 will continue the event loop and 834 * do the right thing if there are no more events to 835 * read (i.e. return bytes read, -EAGAIN or wait). 836 */ 837 ret = 0; 838 } 839 840 /* 841 * Permission events get queued to wait for response. Other 842 * events can be destroyed now. 843 */ 844 if (!fanotify_is_perm_event(event->mask)) { 845 fsnotify_destroy_event(group, &event->fse); 846 } else { 847 if (ret <= 0) { 848 spin_lock(&group->notification_lock); 849 finish_permission_event(group, 850 FANOTIFY_PERM(event), FAN_DENY, NULL); 851 wake_up(&group->fanotify_data.access_waitq); 852 } else { 853 spin_lock(&group->notification_lock); 854 list_add_tail(&event->fse.list, 855 &group->fanotify_data.access_list); 856 spin_unlock(&group->notification_lock); 857 } 858 } 859 if (ret < 0) 860 break; 861 buf += ret; 862 count -= ret; 863 } 864 remove_wait_queue(&group->notification_waitq, &wait); 865 866 if (start != buf && ret != -EFAULT) 867 ret = buf - start; 868 return ret; 869 } 870 871 static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 872 { 873 struct fanotify_response response; 874 struct fsnotify_group *group; 875 int ret; 876 const char __user *info_buf = buf + sizeof(struct fanotify_response); 877 size_t info_len; 878 879 if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 880 return -EINVAL; 881 882 group = file->private_data; 883 884 pr_debug("%s: group=%p count=%zu\n", __func__, group, count); 885 886 if (count < sizeof(response)) 887 return -EINVAL; 888 889 if (copy_from_user(&response, buf, sizeof(response))) 890 return -EFAULT; 891 892 info_len = count - sizeof(response); 893 894 ret = process_access_response(group, &response, info_buf, info_len); 895 if (ret < 0) 896 count = ret; 897 else 898 count = sizeof(response) + ret; 899 900 return count; 901 } 902 903 static int fanotify_release(struct inode *ignored, struct file *file) 904 { 905 struct fsnotify_group *group = file->private_data; 906 struct fsnotify_event *fsn_event; 907 908 /* 909 * Stop new events from arriving in the notification queue. since 910 * userspace cannot use fanotify fd anymore, no event can enter or 911 * leave access_list by now either. 912 */ 913 fsnotify_group_stop_queueing(group); 914 915 /* 916 * Process all permission events on access_list and notification queue 917 * and simulate reply from userspace. 918 */ 919 spin_lock(&group->notification_lock); 920 while (!list_empty(&group->fanotify_data.access_list)) { 921 struct fanotify_perm_event *event; 922 923 event = list_first_entry(&group->fanotify_data.access_list, 924 struct fanotify_perm_event, fae.fse.list); 925 list_del_init(&event->fae.fse.list); 926 finish_permission_event(group, event, FAN_ALLOW, NULL); 927 spin_lock(&group->notification_lock); 928 } 929 930 /* 931 * Destroy all non-permission events. For permission events just 932 * dequeue them and set the response. They will be freed once the 933 * response is consumed and fanotify_get_response() returns. 934 */ 935 while ((fsn_event = fsnotify_remove_first_event(group))) { 936 struct fanotify_event *event = FANOTIFY_E(fsn_event); 937 938 if (!(event->mask & FANOTIFY_PERM_EVENTS)) { 939 spin_unlock(&group->notification_lock); 940 fsnotify_destroy_event(group, fsn_event); 941 } else { 942 finish_permission_event(group, FANOTIFY_PERM(event), 943 FAN_ALLOW, NULL); 944 } 945 spin_lock(&group->notification_lock); 946 } 947 spin_unlock(&group->notification_lock); 948 949 /* Response for all permission events it set, wakeup waiters */ 950 wake_up(&group->fanotify_data.access_waitq); 951 952 /* matches the fanotify_init->fsnotify_alloc_group */ 953 fsnotify_destroy_group(group); 954 955 return 0; 956 } 957 958 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 959 { 960 struct fsnotify_group *group; 961 struct fsnotify_event *fsn_event; 962 void __user *p; 963 int ret = -ENOTTY; 964 size_t send_len = 0; 965 966 group = file->private_data; 967 968 p = (void __user *) arg; 969 970 switch (cmd) { 971 case FIONREAD: 972 spin_lock(&group->notification_lock); 973 list_for_each_entry(fsn_event, &group->notification_list, list) 974 send_len += FAN_EVENT_METADATA_LEN; 975 spin_unlock(&group->notification_lock); 976 ret = put_user(send_len, (int __user *) p); 977 break; 978 } 979 980 return ret; 981 } 982 983 static const struct file_operations fanotify_fops = { 984 .show_fdinfo = fanotify_show_fdinfo, 985 .poll = fanotify_poll, 986 .read = fanotify_read, 987 .write = fanotify_write, 988 .fasync = NULL, 989 .release = fanotify_release, 990 .unlocked_ioctl = fanotify_ioctl, 991 .compat_ioctl = compat_ptr_ioctl, 992 .llseek = noop_llseek, 993 }; 994 995 static int fanotify_find_path(int dfd, const char __user *filename, 996 struct path *path, unsigned int flags, __u64 mask, 997 unsigned int obj_type) 998 { 999 int ret; 1000 1001 pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, 1002 dfd, filename, flags); 1003 1004 if (filename == NULL) { 1005 CLASS(fd, f)(dfd); 1006 1007 if (fd_empty(f)) 1008 return -EBADF; 1009 1010 if ((flags & FAN_MARK_ONLYDIR) && 1011 !(S_ISDIR(file_inode(fd_file(f))->i_mode))) 1012 return -ENOTDIR; 1013 1014 *path = fd_file(f)->f_path; 1015 path_get(path); 1016 } else { 1017 unsigned int lookup_flags = 0; 1018 1019 if (!(flags & FAN_MARK_DONT_FOLLOW)) 1020 lookup_flags |= LOOKUP_FOLLOW; 1021 if (flags & FAN_MARK_ONLYDIR) 1022 lookup_flags |= LOOKUP_DIRECTORY; 1023 1024 ret = user_path_at(dfd, filename, lookup_flags, path); 1025 if (ret) 1026 goto out; 1027 } 1028 1029 /* you can only watch an inode if you have read permissions on it */ 1030 ret = path_permission(path, MAY_READ); 1031 if (ret) { 1032 path_put(path); 1033 goto out; 1034 } 1035 1036 ret = security_path_notify(path, mask, obj_type); 1037 if (ret) 1038 path_put(path); 1039 1040 out: 1041 return ret; 1042 } 1043 1044 static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, 1045 __u32 mask, unsigned int flags, 1046 __u32 umask, int *destroy) 1047 { 1048 __u32 oldmask, newmask; 1049 1050 /* umask bits cannot be removed by user */ 1051 mask &= ~umask; 1052 spin_lock(&fsn_mark->lock); 1053 oldmask = fsnotify_calc_mask(fsn_mark); 1054 if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) { 1055 fsn_mark->mask &= ~mask; 1056 } else { 1057 fsn_mark->ignore_mask &= ~mask; 1058 } 1059 newmask = fsnotify_calc_mask(fsn_mark); 1060 /* 1061 * We need to keep the mark around even if remaining mask cannot 1062 * result in any events (e.g. mask == FAN_ONDIR) to support incremenal 1063 * changes to the mask. 1064 * Destroy mark when only umask bits remain. 1065 */ 1066 *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask); 1067 spin_unlock(&fsn_mark->lock); 1068 1069 return oldmask & ~newmask; 1070 } 1071 1072 static int fanotify_remove_mark(struct fsnotify_group *group, 1073 void *obj, unsigned int obj_type, __u32 mask, 1074 unsigned int flags, __u32 umask) 1075 { 1076 struct fsnotify_mark *fsn_mark = NULL; 1077 __u32 removed; 1078 int destroy_mark; 1079 1080 fsnotify_group_lock(group); 1081 fsn_mark = fsnotify_find_mark(obj, obj_type, group); 1082 if (!fsn_mark) { 1083 fsnotify_group_unlock(group); 1084 return -ENOENT; 1085 } 1086 1087 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, 1088 umask, &destroy_mark); 1089 if (removed & fsnotify_conn_mask(fsn_mark->connector)) 1090 fsnotify_recalc_mask(fsn_mark->connector); 1091 if (destroy_mark) 1092 fsnotify_detach_mark(fsn_mark); 1093 fsnotify_group_unlock(group); 1094 if (destroy_mark) 1095 fsnotify_free_mark(fsn_mark); 1096 1097 /* matches the fsnotify_find_mark() */ 1098 fsnotify_put_mark(fsn_mark); 1099 return 0; 1100 } 1101 1102 static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, 1103 unsigned int fan_flags) 1104 { 1105 bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE); 1106 unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS; 1107 bool recalc = false; 1108 1109 /* 1110 * When using FAN_MARK_IGNORE for the first time, mark starts using 1111 * independent event flags in ignore mask. After that, trying to 1112 * update the ignore mask with the old FAN_MARK_IGNORED_MASK API 1113 * will result in EEXIST error. 1114 */ 1115 if (ignore == FAN_MARK_IGNORE) 1116 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS; 1117 1118 /* 1119 * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to 1120 * the removal of the FS_MODIFY bit in calculated mask if it was set 1121 * because of an ignore mask that is now going to survive FS_MODIFY. 1122 */ 1123 if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && 1124 !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { 1125 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; 1126 if (!(fsn_mark->mask & FS_MODIFY)) 1127 recalc = true; 1128 } 1129 1130 if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE || 1131 want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) 1132 return recalc; 1133 1134 /* 1135 * NO_IREF may be removed from a mark, but not added. 1136 * When removed, fsnotify_recalc_mask() will take the inode ref. 1137 */ 1138 WARN_ON_ONCE(!want_iref); 1139 fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF; 1140 1141 return true; 1142 } 1143 1144 static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, 1145 __u32 mask, unsigned int fan_flags) 1146 { 1147 bool recalc; 1148 1149 spin_lock(&fsn_mark->lock); 1150 if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS)) 1151 fsn_mark->mask |= mask; 1152 else 1153 fsn_mark->ignore_mask |= mask; 1154 1155 recalc = fsnotify_calc_mask(fsn_mark) & 1156 ~fsnotify_conn_mask(fsn_mark->connector); 1157 1158 recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags); 1159 spin_unlock(&fsn_mark->lock); 1160 1161 return recalc; 1162 } 1163 1164 struct fan_fsid { 1165 struct super_block *sb; 1166 __kernel_fsid_t id; 1167 bool weak; 1168 }; 1169 1170 static int fanotify_set_mark_fsid(struct fsnotify_group *group, 1171 struct fsnotify_mark *mark, 1172 struct fan_fsid *fsid) 1173 { 1174 struct fsnotify_mark_connector *conn; 1175 struct fsnotify_mark *old; 1176 struct super_block *old_sb = NULL; 1177 1178 FANOTIFY_MARK(mark)->fsid = fsid->id; 1179 mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; 1180 if (fsid->weak) 1181 mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID; 1182 1183 /* First mark added will determine if group is single or multi fsid */ 1184 if (list_empty(&group->marks_list)) 1185 return 0; 1186 1187 /* Find sb of an existing mark */ 1188 list_for_each_entry(old, &group->marks_list, g_list) { 1189 conn = READ_ONCE(old->connector); 1190 if (!conn) 1191 continue; 1192 old_sb = fsnotify_connector_sb(conn); 1193 if (old_sb) 1194 break; 1195 } 1196 1197 /* Only detached marks left? */ 1198 if (!old_sb) 1199 return 0; 1200 1201 /* Do not allow mixing of marks with weak and strong fsid */ 1202 if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID) 1203 return -EXDEV; 1204 1205 /* Allow mixing of marks with strong fsid from different fs */ 1206 if (!fsid->weak) 1207 return 0; 1208 1209 /* Do not allow mixing marks with weak fsid from different fs */ 1210 if (old_sb != fsid->sb) 1211 return -EXDEV; 1212 1213 /* Do not allow mixing marks from different btrfs sub-volumes */ 1214 if (!fanotify_fsid_equal(&FANOTIFY_MARK(old)->fsid, 1215 &FANOTIFY_MARK(mark)->fsid)) 1216 return -EXDEV; 1217 1218 return 0; 1219 } 1220 1221 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, 1222 void *obj, 1223 unsigned int obj_type, 1224 unsigned int fan_flags, 1225 struct fan_fsid *fsid) 1226 { 1227 struct ucounts *ucounts = group->fanotify_data.ucounts; 1228 struct fanotify_mark *fan_mark; 1229 struct fsnotify_mark *mark; 1230 int ret; 1231 1232 /* 1233 * Enforce per user marks limits per user in all containing user ns. 1234 * A group with FAN_UNLIMITED_MARKS does not contribute to mark count 1235 * in the limited groups account. 1236 */ 1237 if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && 1238 !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) 1239 return ERR_PTR(-ENOSPC); 1240 1241 fan_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); 1242 if (!fan_mark) { 1243 ret = -ENOMEM; 1244 goto out_dec_ucounts; 1245 } 1246 1247 mark = &fan_mark->fsn_mark; 1248 fsnotify_init_mark(mark, group); 1249 if (fan_flags & FAN_MARK_EVICTABLE) 1250 mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; 1251 1252 /* Cache fsid of filesystem containing the marked object */ 1253 if (fsid) { 1254 ret = fanotify_set_mark_fsid(group, mark, fsid); 1255 if (ret) 1256 goto out_put_mark; 1257 } else { 1258 fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; 1259 } 1260 1261 ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0); 1262 if (ret) 1263 goto out_put_mark; 1264 1265 return mark; 1266 1267 out_put_mark: 1268 fsnotify_put_mark(mark); 1269 out_dec_ucounts: 1270 if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) 1271 dec_ucount(ucounts, UCOUNT_FANOTIFY_MARKS); 1272 return ERR_PTR(ret); 1273 } 1274 1275 static int fanotify_group_init_error_pool(struct fsnotify_group *group) 1276 { 1277 if (mempool_initialized(&group->fanotify_data.error_events_pool)) 1278 return 0; 1279 1280 return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool, 1281 FANOTIFY_DEFAULT_FEE_POOL_SIZE, 1282 sizeof(struct fanotify_error_event)); 1283 } 1284 1285 static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, 1286 unsigned int fan_flags) 1287 { 1288 /* 1289 * Non evictable mark cannot be downgraded to evictable mark. 1290 */ 1291 if (fan_flags & FAN_MARK_EVICTABLE && 1292 !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) 1293 return -EEXIST; 1294 1295 /* 1296 * New ignore mask semantics cannot be downgraded to old semantics. 1297 */ 1298 if (fan_flags & FAN_MARK_IGNORED_MASK && 1299 fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) 1300 return -EEXIST; 1301 1302 /* 1303 * An ignore mask that survives modify could never be downgraded to not 1304 * survive modify. With new FAN_MARK_IGNORE semantics we make that rule 1305 * explicit and return an error when trying to update the ignore mask 1306 * without the original FAN_MARK_IGNORED_SURV_MODIFY value. 1307 */ 1308 if (fan_flags & FAN_MARK_IGNORE && 1309 !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && 1310 fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) 1311 return -EEXIST; 1312 1313 return 0; 1314 } 1315 1316 static int fanotify_add_mark(struct fsnotify_group *group, 1317 void *obj, unsigned int obj_type, 1318 __u32 mask, unsigned int fan_flags, 1319 struct fan_fsid *fsid) 1320 { 1321 struct fsnotify_mark *fsn_mark; 1322 bool recalc; 1323 int ret = 0; 1324 1325 fsnotify_group_lock(group); 1326 fsn_mark = fsnotify_find_mark(obj, obj_type, group); 1327 if (!fsn_mark) { 1328 fsn_mark = fanotify_add_new_mark(group, obj, obj_type, 1329 fan_flags, fsid); 1330 if (IS_ERR(fsn_mark)) { 1331 fsnotify_group_unlock(group); 1332 return PTR_ERR(fsn_mark); 1333 } 1334 } 1335 1336 /* 1337 * Check if requested mark flags conflict with an existing mark flags. 1338 */ 1339 ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); 1340 if (ret) 1341 goto out; 1342 1343 /* 1344 * Error events are pre-allocated per group, only if strictly 1345 * needed (i.e. FAN_FS_ERROR was requested). 1346 */ 1347 if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) && 1348 (mask & FAN_FS_ERROR)) { 1349 ret = fanotify_group_init_error_pool(group); 1350 if (ret) 1351 goto out; 1352 } 1353 1354 recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags); 1355 if (recalc) 1356 fsnotify_recalc_mask(fsn_mark->connector); 1357 1358 out: 1359 fsnotify_group_unlock(group); 1360 1361 fsnotify_put_mark(fsn_mark); 1362 return ret; 1363 } 1364 1365 static struct fsnotify_event *fanotify_alloc_overflow_event(void) 1366 { 1367 struct fanotify_event *oevent; 1368 1369 oevent = kmalloc(sizeof(*oevent), GFP_KERNEL_ACCOUNT); 1370 if (!oevent) 1371 return NULL; 1372 1373 fanotify_init_event(oevent, 0, FS_Q_OVERFLOW); 1374 oevent->type = FANOTIFY_EVENT_TYPE_OVERFLOW; 1375 1376 return &oevent->fse; 1377 } 1378 1379 static struct hlist_head *fanotify_alloc_merge_hash(void) 1380 { 1381 struct hlist_head *hash; 1382 1383 hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS, 1384 GFP_KERNEL_ACCOUNT); 1385 if (!hash) 1386 return NULL; 1387 1388 __hash_init(hash, FANOTIFY_HTABLE_SIZE); 1389 1390 return hash; 1391 } 1392 1393 /* fanotify syscalls */ 1394 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) 1395 { 1396 struct fsnotify_group *group; 1397 int f_flags, fd; 1398 unsigned int fid_mode = flags & FANOTIFY_FID_BITS; 1399 unsigned int class = flags & FANOTIFY_CLASS_BITS; 1400 unsigned int internal_flags = 0; 1401 1402 pr_debug("%s: flags=%x event_f_flags=%x\n", 1403 __func__, flags, event_f_flags); 1404 1405 if (!capable(CAP_SYS_ADMIN)) { 1406 /* 1407 * An unprivileged user can setup an fanotify group with 1408 * limited functionality - an unprivileged group is limited to 1409 * notification events with file handles and it cannot use 1410 * unlimited queue/marks. 1411 */ 1412 if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) 1413 return -EPERM; 1414 1415 /* 1416 * Setting the internal flag FANOTIFY_UNPRIV on the group 1417 * prevents setting mount/filesystem marks on this group and 1418 * prevents reporting pid and open fd in events. 1419 */ 1420 internal_flags |= FANOTIFY_UNPRIV; 1421 } 1422 1423 #ifdef CONFIG_AUDITSYSCALL 1424 if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) 1425 #else 1426 if (flags & ~FANOTIFY_INIT_FLAGS) 1427 #endif 1428 return -EINVAL; 1429 1430 /* 1431 * A pidfd can only be returned for a thread-group leader; thus 1432 * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually 1433 * exclusive. 1434 */ 1435 if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) 1436 return -EINVAL; 1437 1438 if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) 1439 return -EINVAL; 1440 1441 switch (event_f_flags & O_ACCMODE) { 1442 case O_RDONLY: 1443 case O_RDWR: 1444 case O_WRONLY: 1445 break; 1446 default: 1447 return -EINVAL; 1448 } 1449 1450 if (fid_mode && class != FAN_CLASS_NOTIF) 1451 return -EINVAL; 1452 1453 /* 1454 * Child name is reported with parent fid so requires dir fid. 1455 * We can report both child fid and dir fid with or without name. 1456 */ 1457 if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) 1458 return -EINVAL; 1459 1460 /* 1461 * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID 1462 * and is used as an indication to report both dir and child fid on all 1463 * dirent events. 1464 */ 1465 if ((fid_mode & FAN_REPORT_TARGET_FID) && 1466 (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) 1467 return -EINVAL; 1468 1469 f_flags = O_RDWR | __FMODE_NONOTIFY; 1470 if (flags & FAN_CLOEXEC) 1471 f_flags |= O_CLOEXEC; 1472 if (flags & FAN_NONBLOCK) 1473 f_flags |= O_NONBLOCK; 1474 1475 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ 1476 group = fsnotify_alloc_group(&fanotify_fsnotify_ops, 1477 FSNOTIFY_GROUP_USER); 1478 if (IS_ERR(group)) { 1479 return PTR_ERR(group); 1480 } 1481 1482 /* Enforce groups limits per user in all containing user ns */ 1483 group->fanotify_data.ucounts = inc_ucount(current_user_ns(), 1484 current_euid(), 1485 UCOUNT_FANOTIFY_GROUPS); 1486 if (!group->fanotify_data.ucounts) { 1487 fd = -EMFILE; 1488 goto out_destroy_group; 1489 } 1490 1491 group->fanotify_data.flags = flags | internal_flags; 1492 group->memcg = get_mem_cgroup_from_mm(current->mm); 1493 1494 group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); 1495 if (!group->fanotify_data.merge_hash) { 1496 fd = -ENOMEM; 1497 goto out_destroy_group; 1498 } 1499 1500 group->overflow_event = fanotify_alloc_overflow_event(); 1501 if (unlikely(!group->overflow_event)) { 1502 fd = -ENOMEM; 1503 goto out_destroy_group; 1504 } 1505 1506 if (force_o_largefile()) 1507 event_f_flags |= O_LARGEFILE; 1508 group->fanotify_data.f_flags = event_f_flags; 1509 init_waitqueue_head(&group->fanotify_data.access_waitq); 1510 INIT_LIST_HEAD(&group->fanotify_data.access_list); 1511 switch (class) { 1512 case FAN_CLASS_NOTIF: 1513 group->priority = FSNOTIFY_PRIO_NORMAL; 1514 break; 1515 case FAN_CLASS_CONTENT: 1516 group->priority = FSNOTIFY_PRIO_CONTENT; 1517 break; 1518 case FAN_CLASS_PRE_CONTENT: 1519 group->priority = FSNOTIFY_PRIO_PRE_CONTENT; 1520 break; 1521 default: 1522 fd = -EINVAL; 1523 goto out_destroy_group; 1524 } 1525 1526 if (flags & FAN_UNLIMITED_QUEUE) { 1527 fd = -EPERM; 1528 if (!capable(CAP_SYS_ADMIN)) 1529 goto out_destroy_group; 1530 group->max_events = UINT_MAX; 1531 } else { 1532 group->max_events = fanotify_max_queued_events; 1533 } 1534 1535 if (flags & FAN_UNLIMITED_MARKS) { 1536 fd = -EPERM; 1537 if (!capable(CAP_SYS_ADMIN)) 1538 goto out_destroy_group; 1539 } 1540 1541 if (flags & FAN_ENABLE_AUDIT) { 1542 fd = -EPERM; 1543 if (!capable(CAP_AUDIT_WRITE)) 1544 goto out_destroy_group; 1545 } 1546 1547 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); 1548 if (fd < 0) 1549 goto out_destroy_group; 1550 1551 return fd; 1552 1553 out_destroy_group: 1554 fsnotify_destroy_group(group); 1555 return fd; 1556 } 1557 1558 static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags, 1559 struct fan_fsid *fsid) 1560 { 1561 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 1562 __kernel_fsid_t root_fsid; 1563 int err; 1564 1565 /* 1566 * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). 1567 */ 1568 err = vfs_get_fsid(dentry, &fsid->id); 1569 if (err) 1570 return err; 1571 1572 fsid->sb = dentry->d_sb; 1573 if (!fsid->id.val[0] && !fsid->id.val[1]) { 1574 err = -ENODEV; 1575 goto weak; 1576 } 1577 1578 /* 1579 * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) 1580 * which uses a different fsid than sb root. 1581 */ 1582 err = vfs_get_fsid(dentry->d_sb->s_root, &root_fsid); 1583 if (err) 1584 return err; 1585 1586 if (!fanotify_fsid_equal(&root_fsid, &fsid->id)) { 1587 err = -EXDEV; 1588 goto weak; 1589 } 1590 1591 fsid->weak = false; 1592 return 0; 1593 1594 weak: 1595 /* Allow weak fsid when marking inodes */ 1596 fsid->weak = true; 1597 return (mark_type == FAN_MARK_INODE) ? 0 : err; 1598 } 1599 1600 /* Check if filesystem can encode a unique fid */ 1601 static int fanotify_test_fid(struct dentry *dentry, unsigned int flags) 1602 { 1603 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 1604 const struct export_operations *nop = dentry->d_sb->s_export_op; 1605 1606 /* 1607 * We need to make sure that the filesystem supports encoding of 1608 * file handles so user can use name_to_handle_at() to compare fids 1609 * reported with events to the file handle of watched objects. 1610 */ 1611 if (!exportfs_can_encode_fid(nop)) 1612 return -EOPNOTSUPP; 1613 1614 /* 1615 * For sb/mount mark, we also need to make sure that the filesystem 1616 * supports decoding file handles, so user has a way to map back the 1617 * reported fids to filesystem objects. 1618 */ 1619 if (mark_type != FAN_MARK_INODE && !exportfs_can_decode_fh(nop)) 1620 return -EOPNOTSUPP; 1621 1622 return 0; 1623 } 1624 1625 static int fanotify_events_supported(struct fsnotify_group *group, 1626 const struct path *path, __u64 mask, 1627 unsigned int flags) 1628 { 1629 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 1630 /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ 1631 bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || 1632 (mask & FAN_RENAME) || 1633 (flags & FAN_MARK_IGNORE); 1634 1635 /* 1636 * Some filesystems such as 'proc' acquire unusual locks when opening 1637 * files. For them fanotify permission events have high chances of 1638 * deadlocking the system - open done when reporting fanotify event 1639 * blocks on this "unusual" lock while another process holding the lock 1640 * waits for fanotify permission event to be answered. Just disallow 1641 * permission events for such filesystems. 1642 */ 1643 if (mask & FANOTIFY_PERM_EVENTS && 1644 path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) 1645 return -EINVAL; 1646 1647 /* 1648 * mount and sb marks are not allowed on kernel internal pseudo fs, 1649 * like pipe_mnt, because that would subscribe to events on all the 1650 * anonynous pipes in the system. 1651 * 1652 * SB_NOUSER covers all of the internal pseudo fs whose objects are not 1653 * exposed to user's mount namespace, but there are other SB_KERNMOUNT 1654 * fs, like nsfs, debugfs, for which the value of allowing sb and mount 1655 * mark is questionable. For now we leave them alone. 1656 */ 1657 if (mark_type != FAN_MARK_INODE && 1658 path->mnt->mnt_sb->s_flags & SB_NOUSER) 1659 return -EINVAL; 1660 1661 /* 1662 * We shouldn't have allowed setting dirent events and the directory 1663 * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, 1664 * but because we always allowed it, error only when using new APIs. 1665 */ 1666 if (strict_dir_events && mark_type == FAN_MARK_INODE && 1667 !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) 1668 return -ENOTDIR; 1669 1670 return 0; 1671 } 1672 1673 static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, 1674 int dfd, const char __user *pathname) 1675 { 1676 struct inode *inode = NULL; 1677 struct vfsmount *mnt = NULL; 1678 struct fsnotify_group *group; 1679 struct path path; 1680 struct fan_fsid __fsid, *fsid = NULL; 1681 u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; 1682 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 1683 unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; 1684 unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; 1685 unsigned int obj_type, fid_mode; 1686 void *obj; 1687 u32 umask = 0; 1688 int ret; 1689 1690 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 1691 __func__, fanotify_fd, flags, dfd, pathname, mask); 1692 1693 /* we only use the lower 32 bits as of right now. */ 1694 if (upper_32_bits(mask)) 1695 return -EINVAL; 1696 1697 if (flags & ~FANOTIFY_MARK_FLAGS) 1698 return -EINVAL; 1699 1700 switch (mark_type) { 1701 case FAN_MARK_INODE: 1702 obj_type = FSNOTIFY_OBJ_TYPE_INODE; 1703 break; 1704 case FAN_MARK_MOUNT: 1705 obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; 1706 break; 1707 case FAN_MARK_FILESYSTEM: 1708 obj_type = FSNOTIFY_OBJ_TYPE_SB; 1709 break; 1710 default: 1711 return -EINVAL; 1712 } 1713 1714 switch (mark_cmd) { 1715 case FAN_MARK_ADD: 1716 case FAN_MARK_REMOVE: 1717 if (!mask) 1718 return -EINVAL; 1719 break; 1720 case FAN_MARK_FLUSH: 1721 if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) 1722 return -EINVAL; 1723 break; 1724 default: 1725 return -EINVAL; 1726 } 1727 1728 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 1729 valid_mask |= FANOTIFY_PERM_EVENTS; 1730 1731 if (mask & ~valid_mask) 1732 return -EINVAL; 1733 1734 1735 /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */ 1736 if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK)) 1737 return -EINVAL; 1738 1739 /* 1740 * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with 1741 * FAN_MARK_IGNORED_MASK. 1742 */ 1743 if (ignore == FAN_MARK_IGNORED_MASK) { 1744 mask &= ~FANOTIFY_EVENT_FLAGS; 1745 umask = FANOTIFY_EVENT_FLAGS; 1746 } 1747 1748 CLASS(fd, f)(fanotify_fd); 1749 if (fd_empty(f)) 1750 return -EBADF; 1751 1752 /* verify that this is indeed an fanotify instance */ 1753 if (unlikely(fd_file(f)->f_op != &fanotify_fops)) 1754 return -EINVAL; 1755 group = fd_file(f)->private_data; 1756 1757 /* 1758 * An unprivileged user is not allowed to setup mount nor filesystem 1759 * marks. This also includes setting up such marks by a group that 1760 * was initialized by an unprivileged user. 1761 */ 1762 if ((!capable(CAP_SYS_ADMIN) || 1763 FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && 1764 mark_type != FAN_MARK_INODE) 1765 return -EPERM; 1766 1767 /* 1768 * Permission events require minimum priority FAN_CLASS_CONTENT. 1769 */ 1770 if (mask & FANOTIFY_PERM_EVENTS && 1771 group->priority < FSNOTIFY_PRIO_CONTENT) 1772 return -EINVAL; 1773 1774 if (mask & FAN_FS_ERROR && 1775 mark_type != FAN_MARK_FILESYSTEM) 1776 return -EINVAL; 1777 1778 /* 1779 * Evictable is only relevant for inode marks, because only inode object 1780 * can be evicted on memory pressure. 1781 */ 1782 if (flags & FAN_MARK_EVICTABLE && 1783 mark_type != FAN_MARK_INODE) 1784 return -EINVAL; 1785 1786 /* 1787 * Events that do not carry enough information to report 1788 * event->fd require a group that supports reporting fid. Those 1789 * events are not supported on a mount mark, because they do not 1790 * carry enough information (i.e. path) to be filtered by mount 1791 * point. 1792 */ 1793 fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); 1794 if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && 1795 (!fid_mode || mark_type == FAN_MARK_MOUNT)) 1796 return -EINVAL; 1797 1798 /* 1799 * FAN_RENAME uses special info type records to report the old and 1800 * new parent+name. Reporting only old and new parent id is less 1801 * useful and was not implemented. 1802 */ 1803 if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) 1804 return -EINVAL; 1805 1806 if (mark_cmd == FAN_MARK_FLUSH) { 1807 if (mark_type == FAN_MARK_MOUNT) 1808 fsnotify_clear_vfsmount_marks_by_group(group); 1809 else if (mark_type == FAN_MARK_FILESYSTEM) 1810 fsnotify_clear_sb_marks_by_group(group); 1811 else 1812 fsnotify_clear_inode_marks_by_group(group); 1813 return 0; 1814 } 1815 1816 ret = fanotify_find_path(dfd, pathname, &path, flags, 1817 (mask & ALL_FSNOTIFY_EVENTS), obj_type); 1818 if (ret) 1819 return ret; 1820 1821 if (mark_cmd == FAN_MARK_ADD) { 1822 ret = fanotify_events_supported(group, &path, mask, flags); 1823 if (ret) 1824 goto path_put_and_out; 1825 } 1826 1827 if (fid_mode) { 1828 ret = fanotify_test_fsid(path.dentry, flags, &__fsid); 1829 if (ret) 1830 goto path_put_and_out; 1831 1832 ret = fanotify_test_fid(path.dentry, flags); 1833 if (ret) 1834 goto path_put_and_out; 1835 1836 fsid = &__fsid; 1837 } 1838 1839 /* inode held in place by reference to path; group by fget on fd */ 1840 if (mark_type == FAN_MARK_INODE) { 1841 inode = path.dentry->d_inode; 1842 obj = inode; 1843 } else { 1844 mnt = path.mnt; 1845 if (mark_type == FAN_MARK_MOUNT) 1846 obj = mnt; 1847 else 1848 obj = mnt->mnt_sb; 1849 } 1850 1851 /* 1852 * If some other task has this inode open for write we should not add 1853 * an ignore mask, unless that ignore mask is supposed to survive 1854 * modification changes anyway. 1855 */ 1856 if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && 1857 !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { 1858 ret = mnt ? -EINVAL : -EISDIR; 1859 /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ 1860 if (ignore == FAN_MARK_IGNORE && 1861 (mnt || S_ISDIR(inode->i_mode))) 1862 goto path_put_and_out; 1863 1864 ret = 0; 1865 if (inode && inode_is_open_for_write(inode)) 1866 goto path_put_and_out; 1867 } 1868 1869 /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ 1870 if (mnt || !S_ISDIR(inode->i_mode)) { 1871 mask &= ~FAN_EVENT_ON_CHILD; 1872 umask = FAN_EVENT_ON_CHILD; 1873 /* 1874 * If group needs to report parent fid, register for getting 1875 * events with parent/name info for non-directory. 1876 */ 1877 if ((fid_mode & FAN_REPORT_DIR_FID) && 1878 (flags & FAN_MARK_ADD) && !ignore) 1879 mask |= FAN_EVENT_ON_CHILD; 1880 } 1881 1882 /* create/update an inode mark */ 1883 switch (mark_cmd) { 1884 case FAN_MARK_ADD: 1885 ret = fanotify_add_mark(group, obj, obj_type, mask, flags, 1886 fsid); 1887 break; 1888 case FAN_MARK_REMOVE: 1889 ret = fanotify_remove_mark(group, obj, obj_type, mask, flags, 1890 umask); 1891 break; 1892 default: 1893 ret = -EINVAL; 1894 } 1895 1896 path_put_and_out: 1897 path_put(&path); 1898 return ret; 1899 } 1900 1901 #ifndef CONFIG_ARCH_SPLIT_ARG64 1902 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, 1903 __u64, mask, int, dfd, 1904 const char __user *, pathname) 1905 { 1906 return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); 1907 } 1908 #endif 1909 1910 #if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) 1911 SYSCALL32_DEFINE6(fanotify_mark, 1912 int, fanotify_fd, unsigned int, flags, 1913 SC_ARG64(mask), int, dfd, 1914 const char __user *, pathname) 1915 { 1916 return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), 1917 dfd, pathname); 1918 } 1919 #endif 1920 1921 /* 1922 * fanotify_user_setup - Our initialization function. Note that we cannot return 1923 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 1924 * must result in panic(). 1925 */ 1926 static int __init fanotify_user_setup(void) 1927 { 1928 struct sysinfo si; 1929 int max_marks; 1930 1931 si_meminfo(&si); 1932 /* 1933 * Allow up to 1% of addressable memory to be accounted for per user 1934 * marks limited to the range [8192, 1048576]. mount and sb marks are 1935 * a lot cheaper than inode marks, but there is no reason for a user 1936 * to have many of those, so calculate by the cost of inode marks. 1937 */ 1938 max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / 1939 INODE_MARK_COST; 1940 max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS, 1941 FANOTIFY_DEFAULT_MAX_USER_MARKS); 1942 1943 BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); 1944 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); 1945 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); 1946 1947 fanotify_mark_cache = KMEM_CACHE(fanotify_mark, 1948 SLAB_PANIC|SLAB_ACCOUNT); 1949 fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event, 1950 SLAB_PANIC); 1951 fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event, 1952 SLAB_PANIC); 1953 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { 1954 fanotify_perm_event_cachep = 1955 KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); 1956 } 1957 1958 fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; 1959 init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = 1960 FANOTIFY_DEFAULT_MAX_GROUPS; 1961 init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks; 1962 fanotify_sysctls_init(); 1963 1964 return 0; 1965 } 1966 device_initcall(fanotify_user_setup); 1967