1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/slab.h> 3 #include <linux/file.h> 4 #include <linux/fdtable.h> 5 #include <linux/freezer.h> 6 #include <linux/mm.h> 7 #include <linux/stat.h> 8 #include <linux/fcntl.h> 9 #include <linux/swap.h> 10 #include <linux/ctype.h> 11 #include <linux/string.h> 12 #include <linux/init.h> 13 #include <linux/pagemap.h> 14 #include <linux/perf_event.h> 15 #include <linux/highmem.h> 16 #include <linux/spinlock.h> 17 #include <linux/key.h> 18 #include <linux/personality.h> 19 #include <linux/binfmts.h> 20 #include <linux/coredump.h> 21 #include <linux/sort.h> 22 #include <linux/sched/coredump.h> 23 #include <linux/sched/signal.h> 24 #include <linux/sched/task_stack.h> 25 #include <linux/utsname.h> 26 #include <linux/pid_namespace.h> 27 #include <linux/module.h> 28 #include <linux/namei.h> 29 #include <linux/mount.h> 30 #include <linux/security.h> 31 #include <linux/syscalls.h> 32 #include <linux/tsacct_kern.h> 33 #include <linux/cn_proc.h> 34 #include <linux/audit.h> 35 #include <linux/kmod.h> 36 #include <linux/fsnotify.h> 37 #include <linux/fs_struct.h> 38 #include <linux/pipe_fs_i.h> 39 #include <linux/oom.h> 40 #include <linux/compat.h> 41 #include <linux/fs.h> 42 #include <linux/path.h> 43 #include <linux/timekeeping.h> 44 #include <linux/sysctl.h> 45 #include <linux/elf.h> 46 #include <linux/pidfs.h> 47 #include <linux/net.h> 48 #include <linux/socket.h> 49 #include <net/af_unix.h> 50 #include <net/net_namespace.h> 51 #include <net/sock.h> 52 #include <uapi/linux/pidfd.h> 53 #include <uapi/linux/un.h> 54 #include <uapi/linux/coredump.h> 55 56 #include <linux/uaccess.h> 57 #include <asm/mmu_context.h> 58 #include <asm/tlb.h> 59 #include <asm/exec.h> 60 61 #include <trace/events/task.h> 62 #include "internal.h" 63 64 #include <trace/events/sched.h> 65 66 #define CREATE_TRACE_POINTS 67 #include <trace/events/coredump.h> 68 69 static bool dump_vma_snapshot(struct coredump_params *cprm); 70 static void free_vma_snapshot(struct coredump_params *cprm); 71 72 #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) 73 /* Define a reasonable max cap */ 74 #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) 75 /* 76 * File descriptor number for the pidfd for the thread-group leader of 77 * the coredumping task installed into the usermode helper's file 78 * descriptor table. 79 */ 80 #define COREDUMP_PIDFD_NUMBER 3 81 82 static int core_uses_pid; 83 static unsigned int core_pipe_limit; 84 static unsigned int core_sort_vma; 85 static char core_pattern[CORENAME_MAX_SIZE] = "core"; 86 static int core_name_size = CORENAME_MAX_SIZE; 87 unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; 88 static atomic_t core_pipe_count = ATOMIC_INIT(0); 89 90 enum coredump_type_t { 91 COREDUMP_FILE = 1, 92 COREDUMP_PIPE = 2, 93 COREDUMP_SOCK = 3, 94 COREDUMP_SOCK_REQ = 4, 95 }; 96 97 struct core_name { 98 char *corename __counted_by_ptr(size); 99 int used, size; 100 unsigned int core_pipe_limit; 101 bool core_dumped; 102 enum coredump_type_t core_type; 103 u64 mask; 104 }; 105 106 static int expand_corename(struct core_name *cn, int size) 107 { 108 char *corename; 109 110 size = kmalloc_size_roundup(size); 111 corename = krealloc(cn->corename, size, GFP_KERNEL); 112 if (!corename) 113 return -ENOMEM; 114 115 cn->corename = corename; 116 cn->size = size; 117 118 if (size > core_name_size) /* racy but harmless */ 119 core_name_size = size; 120 121 return 0; 122 } 123 124 static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt, 125 va_list arg) 126 { 127 int free, need; 128 va_list arg_copy; 129 130 again: 131 free = cn->size - cn->used; 132 133 va_copy(arg_copy, arg); 134 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); 135 va_end(arg_copy); 136 137 if (need < free) { 138 cn->used += need; 139 return 0; 140 } 141 142 if (!expand_corename(cn, cn->size + need - free + 1)) 143 goto again; 144 145 return -ENOMEM; 146 } 147 148 static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...) 149 { 150 va_list arg; 151 int ret; 152 153 va_start(arg, fmt); 154 ret = cn_vprintf(cn, fmt, arg); 155 va_end(arg); 156 157 return ret; 158 } 159 160 static __printf(2, 3) 161 int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 162 { 163 int cur = cn->used; 164 va_list arg; 165 int ret; 166 167 va_start(arg, fmt); 168 ret = cn_vprintf(cn, fmt, arg); 169 va_end(arg); 170 171 if (ret == 0) { 172 /* 173 * Ensure that this coredump name component can't cause the 174 * resulting corefile path to consist of a ".." or ".". 175 */ 176 if ((cn->used - cur == 1 && cn->corename[cur] == '.') || 177 (cn->used - cur == 2 && cn->corename[cur] == '.' 178 && cn->corename[cur+1] == '.')) 179 cn->corename[cur] = '!'; 180 181 /* 182 * Empty names are fishy and could be used to create a "//" in a 183 * corefile name, causing the coredump to happen one directory 184 * level too high. Enforce that all components of the core 185 * pattern are at least one character long. 186 */ 187 if (cn->used == cur) 188 ret = cn_printf(cn, "!"); 189 } 190 191 for (; cur < cn->used; ++cur) { 192 if (cn->corename[cur] == '/') 193 cn->corename[cur] = '!'; 194 } 195 return ret; 196 } 197 198 static int cn_print_exe_file(struct core_name *cn, bool name_only) 199 { 200 struct file *exe_file; 201 char *pathbuf, *path, *ptr; 202 int ret; 203 204 exe_file = get_mm_exe_file(current->mm); 205 if (!exe_file) 206 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 207 208 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); 209 if (!pathbuf) { 210 ret = -ENOMEM; 211 goto put_exe_file; 212 } 213 214 path = file_path(exe_file, pathbuf, PATH_MAX); 215 if (IS_ERR(path)) { 216 ret = PTR_ERR(path); 217 goto free_buf; 218 } 219 220 if (name_only) { 221 ptr = strrchr(path, '/'); 222 if (ptr) 223 path = ptr + 1; 224 } 225 ret = cn_esc_printf(cn, "%s", path); 226 227 free_buf: 228 kfree(pathbuf); 229 put_exe_file: 230 fput(exe_file); 231 return ret; 232 } 233 234 /* 235 * coredump_parse will inspect the pattern parameter, and output a name 236 * into corename, which must have space for at least CORENAME_MAX_SIZE 237 * bytes plus one byte for the zero terminator. 238 */ 239 static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, 240 size_t **argv, int *argc) 241 { 242 const struct cred *cred = current_cred(); 243 const char *pat_ptr = core_pattern; 244 bool was_space = false; 245 int pid_in_pattern = 0; 246 int err = 0; 247 248 cn->mask = COREDUMP_KERNEL; 249 if (core_pipe_limit) 250 cn->mask |= COREDUMP_WAIT; 251 cn->used = 0; 252 cn->corename = NULL; 253 cn->core_pipe_limit = 0; 254 cn->core_dumped = false; 255 if (*pat_ptr == '|') 256 cn->core_type = COREDUMP_PIPE; 257 else if (*pat_ptr == '@') 258 cn->core_type = COREDUMP_SOCK; 259 else 260 cn->core_type = COREDUMP_FILE; 261 if (expand_corename(cn, core_name_size)) 262 return false; 263 cn->corename[0] = '\0'; 264 265 switch (cn->core_type) { 266 case COREDUMP_PIPE: { 267 int argvs = sizeof(core_pattern) / 2; 268 (*argv) = kmalloc_objs(**argv, argvs); 269 if (!(*argv)) 270 return false; 271 (*argv)[(*argc)++] = 0; 272 ++pat_ptr; 273 if (!(*pat_ptr)) 274 return false; 275 break; 276 } 277 case COREDUMP_SOCK: { 278 /* skip the @ */ 279 pat_ptr++; 280 if (!(*pat_ptr)) 281 return false; 282 if (*pat_ptr == '@') { 283 pat_ptr++; 284 if (!(*pat_ptr)) 285 return false; 286 287 cn->core_type = COREDUMP_SOCK_REQ; 288 } 289 290 err = cn_printf(cn, "%s", pat_ptr); 291 if (err) 292 return false; 293 294 /* Require absolute paths. */ 295 if (cn->corename[0] != '/') 296 return false; 297 298 /* 299 * Ensure we can uses spaces to indicate additional 300 * parameters in the future. 301 */ 302 if (strchr(cn->corename, ' ')) { 303 coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename); 304 return false; 305 } 306 307 /* Must not contain ".." in the path. */ 308 if (name_contains_dotdot(cn->corename)) { 309 coredump_report_failure("Coredump socket may not %s contain '..' spaces", cn->corename); 310 return false; 311 } 312 313 if (strlen(cn->corename) >= UNIX_PATH_MAX) { 314 coredump_report_failure("Coredump socket path %s too long", cn->corename); 315 return false; 316 } 317 318 /* 319 * Currently no need to parse any other options. 320 * Relevant information can be retrieved from the peer 321 * pidfd retrievable via SO_PEERPIDFD by the receiver or 322 * via /proc/<pid>, using the SO_PEERPIDFD to guard 323 * against pid recycling when opening /proc/<pid>. 324 */ 325 return true; 326 } 327 case COREDUMP_FILE: 328 break; 329 default: 330 WARN_ON_ONCE(true); 331 return false; 332 } 333 334 /* Repeat as long as we have more pattern to process and more output 335 space */ 336 while (*pat_ptr) { 337 /* 338 * Split on spaces before doing template expansion so that 339 * %e and %E don't get split if they have spaces in them 340 */ 341 if (cn->core_type == COREDUMP_PIPE) { 342 if (isspace(*pat_ptr)) { 343 if (cn->used != 0) 344 was_space = true; 345 pat_ptr++; 346 continue; 347 } else if (was_space) { 348 was_space = false; 349 err = cn_printf(cn, "%c", '\0'); 350 if (err) 351 return false; 352 (*argv)[(*argc)++] = cn->used; 353 } 354 } 355 if (*pat_ptr != '%') { 356 err = cn_printf(cn, "%c", *pat_ptr++); 357 } else { 358 switch (*++pat_ptr) { 359 /* single % at the end, drop that */ 360 case 0: 361 goto out; 362 /* Double percent, output one percent */ 363 case '%': 364 err = cn_printf(cn, "%c", '%'); 365 break; 366 /* pid */ 367 case 'p': 368 pid_in_pattern = 1; 369 err = cn_printf(cn, "%d", 370 task_tgid_vnr(current)); 371 break; 372 /* global pid */ 373 case 'P': 374 err = cn_printf(cn, "%d", 375 task_tgid_nr(current)); 376 break; 377 case 'i': 378 err = cn_printf(cn, "%d", 379 task_pid_vnr(current)); 380 break; 381 case 'I': 382 err = cn_printf(cn, "%d", 383 task_pid_nr(current)); 384 break; 385 /* uid */ 386 case 'u': 387 err = cn_printf(cn, "%u", 388 from_kuid(&init_user_ns, 389 cred->uid)); 390 break; 391 /* gid */ 392 case 'g': 393 err = cn_printf(cn, "%u", 394 from_kgid(&init_user_ns, 395 cred->gid)); 396 break; 397 case 'd': 398 err = cn_printf(cn, "%d", cprm->dumpable); 399 break; 400 /* signal that caused the coredump */ 401 case 's': 402 err = cn_printf(cn, "%d", 403 cprm->siginfo->si_signo); 404 break; 405 /* UNIX time of coredump */ 406 case 't': { 407 time64_t time; 408 409 time = ktime_get_real_seconds(); 410 err = cn_printf(cn, "%lld", time); 411 break; 412 } 413 /* hostname */ 414 case 'h': 415 down_read(&uts_sem); 416 err = cn_esc_printf(cn, "%s", 417 utsname()->nodename); 418 up_read(&uts_sem); 419 break; 420 /* executable, could be changed by prctl PR_SET_NAME etc */ 421 case 'e': 422 err = cn_esc_printf(cn, "%s", current->comm); 423 break; 424 /* file name of executable */ 425 case 'f': 426 err = cn_print_exe_file(cn, true); 427 break; 428 case 'E': 429 err = cn_print_exe_file(cn, false); 430 break; 431 /* core limit size */ 432 case 'c': 433 err = cn_printf(cn, "%lu", 434 rlimit(RLIMIT_CORE)); 435 break; 436 /* CPU the task ran on */ 437 case 'C': 438 err = cn_printf(cn, "%d", cprm->cpu); 439 break; 440 /* pidfd number */ 441 case 'F': { 442 /* 443 * Installing a pidfd only makes sense if 444 * we actually spawn a usermode helper. 445 */ 446 if (cn->core_type != COREDUMP_PIPE) 447 break; 448 449 /* 450 * Note that we'll install a pidfd for the 451 * thread-group leader. We know that task 452 * linkage hasn't been removed yet and even if 453 * this @current isn't the actual thread-group 454 * leader we know that the thread-group leader 455 * cannot be reaped until @current has exited. 456 */ 457 cprm->pid = task_tgid(current); 458 err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); 459 break; 460 } 461 default: 462 break; 463 } 464 ++pat_ptr; 465 } 466 467 if (err) 468 return false; 469 } 470 471 out: 472 /* Backward compatibility with core_uses_pid: 473 * 474 * If core_pattern does not include a %p (as is the default) 475 * and core_uses_pid is set, then .%pid will be appended to 476 * the filename. Do not do this for piped commands. */ 477 if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid) 478 return cn_printf(cn, ".%d", task_tgid_vnr(current)) == 0; 479 480 return true; 481 } 482 483 static int zap_process(struct signal_struct *signal, int exit_code) 484 { 485 struct task_struct *t; 486 int nr = 0; 487 488 signal->flags = SIGNAL_GROUP_EXIT; 489 signal->group_exit_code = exit_code; 490 signal->group_stop_count = 0; 491 492 __for_each_thread(signal, t) { 493 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 494 if (t != current && !(t->flags & PF_POSTCOREDUMP)) { 495 sigaddset(&t->pending.signal, SIGKILL); 496 signal_wake_up(t, 1); 497 nr++; 498 } 499 } 500 501 return nr; 502 } 503 504 static int zap_threads(struct task_struct *tsk, 505 struct core_state *core_state, int exit_code) 506 { 507 struct signal_struct *signal = tsk->signal; 508 int nr = -EAGAIN; 509 510 spin_lock_irq(&tsk->sighand->siglock); 511 if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) { 512 /* Allow SIGKILL, see prepare_signal() */ 513 signal->core_state = core_state; 514 nr = zap_process(signal, exit_code); 515 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 516 tsk->flags |= PF_DUMPCORE; 517 atomic_set(&core_state->nr_threads, nr); 518 } 519 spin_unlock_irq(&tsk->sighand->siglock); 520 return nr; 521 } 522 523 static int coredump_wait(int exit_code, struct core_state *core_state) 524 { 525 struct task_struct *tsk = current; 526 int core_waiters = -EBUSY; 527 528 init_completion(&core_state->startup); 529 core_state->dumper.task = tsk; 530 core_state->dumper.next = NULL; 531 532 core_waiters = zap_threads(tsk, core_state, exit_code); 533 if (core_waiters > 0) { 534 struct core_thread *ptr; 535 536 wait_for_completion_state(&core_state->startup, 537 TASK_UNINTERRUPTIBLE|TASK_FREEZABLE); 538 /* 539 * Wait for all the threads to become inactive, so that 540 * all the thread context (extended register state, like 541 * fpu etc) gets copied to the memory. 542 */ 543 ptr = core_state->dumper.next; 544 while (ptr != NULL) { 545 wait_task_inactive(ptr->task, TASK_ANY); 546 ptr = ptr->next; 547 } 548 } 549 550 return core_waiters; 551 } 552 553 static void coredump_finish(bool core_dumped) 554 { 555 struct core_thread *curr, *next; 556 struct task_struct *task; 557 558 spin_lock_irq(¤t->sighand->siglock); 559 if (core_dumped && !__fatal_signal_pending(current)) 560 current->signal->group_exit_code |= 0x80; 561 next = current->signal->core_state->dumper.next; 562 current->signal->core_state = NULL; 563 spin_unlock_irq(¤t->sighand->siglock); 564 565 while ((curr = next) != NULL) { 566 next = curr->next; 567 task = curr->task; 568 /* 569 * see coredump_task_exit(), curr->task must not see 570 * ->task == NULL before we read ->next. 571 */ 572 smp_mb(); 573 curr->task = NULL; 574 wake_up_process(task); 575 } 576 } 577 578 static bool dump_interrupted(void) 579 { 580 /* 581 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 582 * can do try_to_freeze() and check __fatal_signal_pending(), 583 * but then we need to teach dump_write() to restart and clear 584 * TIF_SIGPENDING. 585 */ 586 return fatal_signal_pending(current) || freezing(current); 587 } 588 589 static void wait_for_dump_helpers(struct file *file) 590 { 591 struct pipe_inode_info *pipe = file->private_data; 592 593 pipe_lock(pipe); 594 pipe->readers++; 595 pipe->writers--; 596 wake_up_interruptible_sync(&pipe->rd_wait); 597 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 598 pipe_unlock(pipe); 599 600 /* 601 * We actually want wait_event_freezable() but then we need 602 * to clear TIF_SIGPENDING and improve dump_interrupted(). 603 */ 604 wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); 605 606 pipe_lock(pipe); 607 pipe->readers--; 608 pipe->writers++; 609 pipe_unlock(pipe); 610 } 611 612 /* 613 * umh_coredump_setup 614 * helper function to customize the process used 615 * to collect the core in userspace. Specifically 616 * it sets up a pipe and installs it as fd 0 (stdin) 617 * for the process. Returns 0 on success, or 618 * PTR_ERR on failure. 619 * Note that it also sets the core limit to 1. This 620 * is a special value that we use to trap recursive 621 * core dumps 622 */ 623 static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) 624 { 625 struct file *files[2]; 626 struct coredump_params *cp = (struct coredump_params *)info->data; 627 int err; 628 629 if (cp->pid) { 630 struct file *pidfs_file __free(fput) = NULL; 631 632 pidfs_file = pidfs_alloc_file(cp->pid, 0); 633 if (IS_ERR(pidfs_file)) 634 return PTR_ERR(pidfs_file); 635 636 pidfs_coredump(cp); 637 638 /* 639 * Usermode helpers are childen of either 640 * system_dfl_wq or of kthreadd. So we know that 641 * we're starting off with a clean file descriptor 642 * table. So we should always be able to use 643 * COREDUMP_PIDFD_NUMBER as our file descriptor value. 644 */ 645 err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); 646 if (err < 0) 647 return err; 648 } 649 650 err = create_pipe_files(files, 0); 651 if (err) 652 return err; 653 654 cp->file = files[1]; 655 656 err = replace_fd(0, files[0], 0); 657 fput(files[0]); 658 if (err < 0) 659 return err; 660 661 /* and disallow core files too */ 662 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 663 664 return 0; 665 } 666 667 #ifdef CONFIG_UNIX 668 static bool coredump_sock_connect(struct core_name *cn, struct coredump_params *cprm) 669 { 670 struct file *file __free(fput) = NULL; 671 struct sockaddr_un addr = { 672 .sun_family = AF_UNIX, 673 }; 674 ssize_t addr_len; 675 int retval; 676 struct socket *socket; 677 678 addr_len = strscpy(addr.sun_path, cn->corename); 679 if (addr_len < 0) 680 return false; 681 addr_len += offsetof(struct sockaddr_un, sun_path) + 1; 682 683 /* 684 * It is possible that the userspace process which is supposed 685 * to handle the coredump and is listening on the AF_UNIX socket 686 * coredumps. Userspace should just mark itself non dumpable. 687 */ 688 689 retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket); 690 if (retval < 0) 691 return false; 692 693 file = sock_alloc_file(socket, 0, NULL); 694 if (IS_ERR(file)) 695 return false; 696 697 /* 698 * Set the thread-group leader pid which is used for the peer 699 * credentials during connect() below. Then immediately register 700 * it in pidfs... 701 */ 702 cprm->pid = task_tgid(current); 703 retval = pidfs_register_pid(cprm->pid); 704 if (retval) 705 return false; 706 707 /* 708 * ... and set the coredump information so userspace has it 709 * available after connect()... 710 */ 711 pidfs_coredump(cprm); 712 713 retval = kernel_connect(socket, (struct sockaddr_unsized *)(&addr), addr_len, 714 O_NONBLOCK | SOCK_COREDUMP); 715 716 if (retval) { 717 if (retval == -EAGAIN) 718 coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path); 719 else 720 coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval); 721 return false; 722 } 723 724 /* ... and validate that @sk_peer_pid matches @cprm.pid. */ 725 if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm->pid)) 726 return false; 727 728 cprm->limit = RLIM_INFINITY; 729 cprm->file = no_free_ptr(file); 730 731 return true; 732 } 733 734 static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags) 735 { 736 struct msghdr msg = {}; 737 struct kvec iov = { .iov_base = ack, .iov_len = size }; 738 ssize_t ret; 739 740 memset(ack, 0, size); 741 ret = kernel_recvmsg(sock_from_file(file), &msg, &iov, 1, size, flags); 742 return ret == size; 743 } 744 745 static inline bool coredump_sock_send(struct file *file, struct coredump_req *req) 746 { 747 struct msghdr msg = { .msg_flags = MSG_NOSIGNAL }; 748 struct kvec iov = { .iov_base = req, .iov_len = sizeof(*req) }; 749 ssize_t ret; 750 751 ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(*req)); 752 return ret == sizeof(*req); 753 } 754 755 static_assert(sizeof(enum coredump_mark) == sizeof(__u32)); 756 757 static inline bool coredump_sock_mark(struct file *file, enum coredump_mark mark) 758 { 759 struct msghdr msg = { .msg_flags = MSG_NOSIGNAL }; 760 struct kvec iov = { .iov_base = &mark, .iov_len = sizeof(mark) }; 761 ssize_t ret; 762 763 ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(mark)); 764 return ret == sizeof(mark); 765 } 766 767 static inline void coredump_sock_wait(struct file *file) 768 { 769 ssize_t n; 770 771 /* 772 * We use a simple read to wait for the coredump processing to 773 * finish. Either the socket is closed or we get sent unexpected 774 * data. In both cases, we're done. 775 */ 776 n = __kernel_read(file, &(char){ 0 }, 1, NULL); 777 if (n > 0) 778 coredump_report_failure("Coredump socket had unexpected data"); 779 else if (n < 0) 780 coredump_report_failure("Coredump socket failed"); 781 } 782 783 static inline void coredump_sock_shutdown(struct file *file) 784 { 785 struct socket *socket; 786 787 socket = sock_from_file(file); 788 if (!socket) 789 return; 790 791 /* Let userspace know we're done processing the coredump. */ 792 kernel_sock_shutdown(socket, SHUT_WR); 793 } 794 795 static bool coredump_sock_request(struct core_name *cn, struct coredump_params *cprm) 796 { 797 struct coredump_req req = { 798 .size = sizeof(struct coredump_req), 799 .mask = COREDUMP_KERNEL | COREDUMP_USERSPACE | 800 COREDUMP_REJECT | COREDUMP_WAIT, 801 .size_ack = sizeof(struct coredump_ack), 802 }; 803 struct coredump_ack ack = {}; 804 ssize_t usize; 805 806 if (cn->core_type != COREDUMP_SOCK_REQ) 807 return true; 808 809 /* Let userspace know what we support. */ 810 if (!coredump_sock_send(cprm->file, &req)) 811 return false; 812 813 /* Peek the size of the coredump_ack. */ 814 if (!coredump_sock_recv(cprm->file, &ack, sizeof(ack.size), 815 MSG_PEEK | MSG_WAITALL)) 816 return false; 817 818 /* Refuse unknown coredump_ack sizes. */ 819 usize = ack.size; 820 if (usize < COREDUMP_ACK_SIZE_VER0) { 821 coredump_sock_mark(cprm->file, COREDUMP_MARK_MINSIZE); 822 return false; 823 } 824 825 if (usize > sizeof(ack)) { 826 coredump_sock_mark(cprm->file, COREDUMP_MARK_MAXSIZE); 827 return false; 828 } 829 830 /* Now retrieve the coredump_ack. */ 831 if (!coredump_sock_recv(cprm->file, &ack, usize, MSG_WAITALL)) 832 return false; 833 if (ack.size != usize) 834 return false; 835 836 /* Refuse unknown coredump_ack flags. */ 837 if (ack.mask & ~req.mask) { 838 coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED); 839 return false; 840 } 841 842 /* Refuse mutually exclusive options. */ 843 if (hweight64(ack.mask & (COREDUMP_USERSPACE | COREDUMP_KERNEL | 844 COREDUMP_REJECT)) != 1) { 845 coredump_sock_mark(cprm->file, COREDUMP_MARK_CONFLICTING); 846 return false; 847 } 848 849 if (ack.spare) { 850 coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED); 851 return false; 852 } 853 854 cn->mask = ack.mask; 855 return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK); 856 } 857 858 static bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) 859 { 860 if (!coredump_sock_connect(cn, cprm)) 861 return false; 862 863 return coredump_sock_request(cn, cprm); 864 } 865 #else 866 static inline void coredump_sock_wait(struct file *file) { } 867 static inline void coredump_sock_shutdown(struct file *file) { } 868 static inline bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { return false; } 869 #endif 870 871 /* cprm->dumpable is the snapshot of task dumpability at dump start. */ 872 static inline bool coredump_force_suid_safe(const struct coredump_params *cprm) 873 { 874 /* Require nonrelative corefile path and be extra careful. */ 875 return cprm->dumpable == TASK_DUMPABLE_ROOT; 876 } 877 878 static bool coredump_file(struct core_name *cn, struct coredump_params *cprm, 879 const struct linux_binfmt *binfmt) 880 { 881 struct mnt_idmap *idmap; 882 struct inode *inode; 883 struct file *file __free(fput) = NULL; 884 int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL; 885 886 if (cprm->limit < binfmt->min_coredump) 887 return false; 888 889 if (coredump_force_suid_safe(cprm) && cn->corename[0] != '/') { 890 coredump_report_failure("this process can only dump core to a fully qualified path, skipping core dump"); 891 return false; 892 } 893 894 /* 895 * Unlink the file if it exists unless this is a SUID 896 * binary - in that case, we're running around with root 897 * privs and don't want to unlink another user's coredump. 898 */ 899 if (!coredump_force_suid_safe(cprm)) { 900 CLASS(filename_kernel, name)(cn->corename); 901 /* 902 * If it doesn't exist, that's fine. If there's some 903 * other problem, we'll catch it at the filp_open(). 904 */ 905 filename_unlinkat(AT_FDCWD, name); 906 } 907 908 /* 909 * There is a race between unlinking and creating the 910 * file, but if that causes an EEXIST here, that's 911 * fine - another process raced with us while creating 912 * the corefile, and the other process won. To userspace, 913 * what matters is that at least one of the two processes 914 * writes its coredump successfully, not which one. 915 */ 916 if (coredump_force_suid_safe(cprm)) { 917 /* 918 * Using user namespaces, normal user tasks can change 919 * their current->fs->root to point to arbitrary 920 * directories. Since the intention of the "only dump 921 * with a fully qualified path" rule is to control where 922 * coredumps may be placed using root privileges, 923 * current->fs->root must not be used. Instead, use the 924 * root directory of init_task. 925 */ 926 struct path root; 927 928 task_lock(&init_task); 929 get_fs_root(init_task.fs, &root); 930 task_unlock(&init_task); 931 file = file_open_root(&root, cn->corename, open_flags, 0600); 932 path_put(&root); 933 } else { 934 file = filp_open(cn->corename, open_flags, 0600); 935 } 936 if (IS_ERR(file)) 937 return false; 938 939 inode = file_inode(file); 940 if (inode->i_nlink > 1) 941 return false; 942 if (d_unhashed(file->f_path.dentry)) 943 return false; 944 /* 945 * AK: actually i see no reason to not allow this for named 946 * pipes etc, but keep the previous behaviour for now. 947 */ 948 if (!S_ISREG(inode->i_mode)) 949 return false; 950 /* 951 * Don't dump core if the filesystem changed owner or mode 952 * of the file during file creation. This is an issue when 953 * a process dumps core while its cwd is e.g. on a vfat 954 * filesystem. 955 */ 956 idmap = file_mnt_idmap(file); 957 if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) { 958 coredump_report_failure("Core dump to %s aborted: cannot preserve file owner", cn->corename); 959 return false; 960 } 961 if ((inode->i_mode & 0677) != 0600) { 962 coredump_report_failure("Core dump to %s aborted: cannot preserve file permissions", cn->corename); 963 return false; 964 } 965 if (!(file->f_mode & FMODE_CAN_WRITE)) 966 return false; 967 if (do_truncate(idmap, file->f_path.dentry, 0, 0, file)) 968 return false; 969 970 cprm->file = no_free_ptr(file); 971 return true; 972 } 973 974 static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, 975 size_t *argv, int argc) 976 { 977 int argi; 978 char **helper_argv __free(kfree) = NULL; 979 struct subprocess_info *sub_info; 980 981 if (cprm->limit == 1) { 982 /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. 983 * 984 * Normally core limits are irrelevant to pipes, since 985 * we're not writing to the file system, but we use 986 * cprm.limit of 1 here as a special value, this is a 987 * consistent way to catch recursive crashes. 988 * We can still crash if the core_pattern binary sets 989 * RLIM_CORE = !1, but it runs as root, and can do 990 * lots of stupid things. 991 * 992 * Note that we use task_tgid_vnr here to grab the pid 993 * of the process group leader. That way we get the 994 * right pid if a thread in a multi-threaded 995 * core_pattern process dies. 996 */ 997 coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); 998 return false; 999 } 1000 cprm->limit = RLIM_INFINITY; 1001 1002 cn->core_pipe_limit = atomic_inc_return(&core_pipe_count); 1003 if (core_pipe_limit && (core_pipe_limit < cn->core_pipe_limit)) { 1004 coredump_report_failure("over core_pipe_limit, skipping core dump"); 1005 return false; 1006 } 1007 1008 helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL); 1009 if (!helper_argv) { 1010 coredump_report_failure("%s failed to allocate memory", __func__); 1011 return false; 1012 } 1013 for (argi = 0; argi < argc; argi++) 1014 helper_argv[argi] = cn->corename + argv[argi]; 1015 helper_argv[argi] = NULL; 1016 1017 sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, 1018 GFP_KERNEL, umh_coredump_setup, 1019 NULL, cprm); 1020 if (!sub_info) 1021 return false; 1022 1023 if (call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC)) { 1024 coredump_report_failure("|%s pipe failed", cn->corename); 1025 return false; 1026 } 1027 1028 /* 1029 * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would 1030 * have this set to NULL. 1031 */ 1032 if (!cprm->file) { 1033 coredump_report_failure("Core dump to |%s disabled", cn->corename); 1034 return false; 1035 } 1036 1037 return true; 1038 } 1039 1040 static bool coredump_write(struct core_name *cn, 1041 struct coredump_params *cprm, 1042 const struct linux_binfmt *binfmt) 1043 { 1044 1045 if (dump_interrupted()) 1046 return true; 1047 1048 if (!dump_vma_snapshot(cprm)) 1049 return false; 1050 1051 file_start_write(cprm->file); 1052 cn->core_dumped = binfmt->core_dump(cprm); 1053 /* 1054 * Ensures that file size is big enough to contain the current 1055 * file postion. This prevents gdb from complaining about 1056 * a truncated file if the last "write" to the file was 1057 * dump_skip. 1058 */ 1059 if (cprm->to_skip) { 1060 cprm->to_skip--; 1061 dump_emit(cprm, "", 1); 1062 } 1063 file_end_write(cprm->file); 1064 free_vma_snapshot(cprm); 1065 return true; 1066 } 1067 1068 static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm) 1069 { 1070 if (cprm->file) 1071 filp_close(cprm->file, NULL); 1072 if (cn->core_pipe_limit) { 1073 VFS_WARN_ON_ONCE(cn->core_type != COREDUMP_PIPE); 1074 atomic_dec(&core_pipe_count); 1075 } 1076 kfree(cn->corename); 1077 coredump_finish(cn->core_dumped); 1078 } 1079 1080 static inline bool coredump_skip(const struct coredump_params *cprm, 1081 const struct linux_binfmt *binfmt) 1082 { 1083 if (!binfmt) 1084 return true; 1085 if (!binfmt->core_dump) 1086 return true; 1087 if (cprm->dumpable == TASK_DUMPABLE_OFF) 1088 return true; 1089 return false; 1090 } 1091 1092 static void do_coredump(struct core_name *cn, struct coredump_params *cprm, 1093 size_t **argv, int *argc, const struct linux_binfmt *binfmt) 1094 { 1095 trace_coredump(cprm->siginfo->si_signo); 1096 1097 if (!coredump_parse(cn, cprm, argv, argc)) { 1098 coredump_report_failure("format_corename failed, aborting core"); 1099 return; 1100 } 1101 1102 switch (cn->core_type) { 1103 case COREDUMP_FILE: 1104 if (!coredump_file(cn, cprm, binfmt)) 1105 return; 1106 break; 1107 case COREDUMP_PIPE: 1108 if (!coredump_pipe(cn, cprm, *argv, *argc)) 1109 return; 1110 break; 1111 case COREDUMP_SOCK_REQ: 1112 fallthrough; 1113 case COREDUMP_SOCK: 1114 if (!coredump_socket(cn, cprm)) 1115 return; 1116 break; 1117 default: 1118 WARN_ON_ONCE(true); 1119 return; 1120 } 1121 1122 /* Don't even generate the coredump. */ 1123 if (cn->mask & COREDUMP_REJECT) 1124 return; 1125 1126 /* get us an unshared descriptor table; almost always a no-op */ 1127 /* The cell spufs coredump code reads the file descriptor tables */ 1128 if (unshare_files()) 1129 return; 1130 1131 if ((cn->mask & COREDUMP_KERNEL) && !coredump_write(cn, cprm, binfmt)) 1132 return; 1133 1134 coredump_sock_shutdown(cprm->file); 1135 1136 /* Let the parent know that a coredump was generated. */ 1137 if (cn->mask & COREDUMP_USERSPACE) 1138 cn->core_dumped = true; 1139 1140 /* 1141 * When core_pipe_limit is set we wait for the coredump server 1142 * or usermodehelper to finish before exiting so it can e.g., 1143 * inspect /proc/<pid>. 1144 */ 1145 if (cn->mask & COREDUMP_WAIT) { 1146 switch (cn->core_type) { 1147 case COREDUMP_PIPE: 1148 wait_for_dump_helpers(cprm->file); 1149 break; 1150 case COREDUMP_SOCK_REQ: 1151 fallthrough; 1152 case COREDUMP_SOCK: 1153 coredump_sock_wait(cprm->file); 1154 break; 1155 default: 1156 break; 1157 } 1158 } 1159 } 1160 1161 void vfs_coredump(const kernel_siginfo_t *siginfo) 1162 { 1163 size_t *argv __free(kfree) = NULL; 1164 struct core_state core_state; 1165 struct core_name cn; 1166 const struct mm_struct *mm = current->mm; 1167 const struct linux_binfmt *binfmt = mm->binfmt; 1168 int argc = 0; 1169 struct coredump_params cprm = { 1170 .siginfo = siginfo, 1171 .limit = rlimit(RLIMIT_CORE), 1172 /* Snapshot MMF_DUMP_FILTER_* (unlocked) and dumpable for the dump. */ 1173 .mm_flags = __mm_flags_get_word(mm), 1174 .dumpable = task_exec_state_get_dumpable(current), 1175 .vma_meta = NULL, 1176 .cpu = raw_smp_processor_id(), 1177 }; 1178 1179 audit_core_dumps(siginfo->si_signo); 1180 1181 if (coredump_skip(&cprm, binfmt)) 1182 return; 1183 1184 CLASS(prepare_creds, cred)(); 1185 if (!cred) 1186 return; 1187 /* 1188 * We cannot trust fsuid as being the "true" uid of the process 1189 * nor do we know its entire history. We only know it was tainted 1190 * so we dump it as root in mode 2, and only into a controlled 1191 * environment (pipe handler or fully qualified path). 1192 */ 1193 if (coredump_force_suid_safe(&cprm)) 1194 cred->fsuid = GLOBAL_ROOT_UID; 1195 1196 if (coredump_wait(siginfo->si_signo, &core_state) < 0) 1197 return; 1198 1199 scoped_with_creds(cred) 1200 do_coredump(&cn, &cprm, &argv, &argc, binfmt); 1201 coredump_cleanup(&cn, &cprm); 1202 return; 1203 } 1204 1205 /* 1206 * Core dumping helper functions. These are the only things you should 1207 * do on a core-file: use only these functions to write out all the 1208 * necessary info. 1209 */ 1210 static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr) 1211 { 1212 struct file *file = cprm->file; 1213 loff_t pos = file->f_pos; 1214 ssize_t n; 1215 1216 if (cprm->written + nr > cprm->limit) 1217 return 0; 1218 if (dump_interrupted()) 1219 return 0; 1220 n = __kernel_write(file, addr, nr, &pos); 1221 if (n != nr) 1222 return 0; 1223 file->f_pos = pos; 1224 cprm->written += n; 1225 cprm->pos += n; 1226 1227 return 1; 1228 } 1229 1230 static int __dump_skip(struct coredump_params *cprm, size_t nr) 1231 { 1232 static char zeroes[PAGE_SIZE]; 1233 struct file *file = cprm->file; 1234 1235 if (file->f_mode & FMODE_LSEEK) { 1236 if (dump_interrupted() || vfs_llseek(file, nr, SEEK_CUR) < 0) 1237 return 0; 1238 cprm->pos += nr; 1239 return 1; 1240 } 1241 1242 while (nr > PAGE_SIZE) { 1243 if (!__dump_emit(cprm, zeroes, PAGE_SIZE)) 1244 return 0; 1245 nr -= PAGE_SIZE; 1246 } 1247 1248 return __dump_emit(cprm, zeroes, nr); 1249 } 1250 1251 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 1252 { 1253 if (cprm->to_skip) { 1254 if (!__dump_skip(cprm, cprm->to_skip)) 1255 return 0; 1256 cprm->to_skip = 0; 1257 } 1258 return __dump_emit(cprm, addr, nr); 1259 } 1260 EXPORT_SYMBOL(dump_emit); 1261 1262 void dump_skip_to(struct coredump_params *cprm, unsigned long pos) 1263 { 1264 cprm->to_skip = pos - cprm->pos; 1265 } 1266 EXPORT_SYMBOL(dump_skip_to); 1267 1268 void dump_skip(struct coredump_params *cprm, size_t nr) 1269 { 1270 cprm->to_skip += nr; 1271 } 1272 EXPORT_SYMBOL(dump_skip); 1273 1274 #ifdef CONFIG_ELF_CORE 1275 static int dump_emit_page(struct coredump_params *cprm, struct page *page) 1276 { 1277 struct bio_vec bvec; 1278 struct iov_iter iter; 1279 struct file *file = cprm->file; 1280 loff_t pos; 1281 ssize_t n; 1282 1283 if (!page) 1284 return 0; 1285 1286 if (cprm->to_skip) { 1287 if (!__dump_skip(cprm, cprm->to_skip)) 1288 return 0; 1289 cprm->to_skip = 0; 1290 } 1291 if (cprm->written + PAGE_SIZE > cprm->limit) 1292 return 0; 1293 if (dump_interrupted()) 1294 return 0; 1295 pos = file->f_pos; 1296 bvec_set_page(&bvec, page, PAGE_SIZE, 0); 1297 iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); 1298 n = __kernel_write_iter(cprm->file, &iter, &pos); 1299 if (n != PAGE_SIZE) 1300 return 0; 1301 file->f_pos = pos; 1302 cprm->written += PAGE_SIZE; 1303 cprm->pos += PAGE_SIZE; 1304 1305 return 1; 1306 } 1307 1308 /* 1309 * If we might get machine checks from kernel accesses during the 1310 * core dump, let's get those errors early rather than during the 1311 * IO. This is not performance-critical enough to warrant having 1312 * all the machine check logic in the iovec paths. 1313 */ 1314 #ifdef copy_mc_to_kernel 1315 1316 #define dump_page_alloc() alloc_page(GFP_KERNEL) 1317 #define dump_page_free(x) __free_page(x) 1318 static struct page *dump_page_copy(struct page *src, struct page *dst) 1319 { 1320 void *buf = kmap_local_page(src); 1321 size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE); 1322 kunmap_local(buf); 1323 return left ? NULL : dst; 1324 } 1325 1326 #else 1327 1328 /* We just want to return non-NULL; it's never used. */ 1329 #define dump_page_alloc() ERR_PTR(-EINVAL) 1330 #define dump_page_free(x) ((void)(x)) 1331 static inline struct page *dump_page_copy(struct page *src, struct page *dst) 1332 { 1333 return src; 1334 } 1335 #endif 1336 1337 int dump_user_range(struct coredump_params *cprm, unsigned long start, 1338 unsigned long len) 1339 { 1340 unsigned long addr; 1341 struct page *dump_page; 1342 int locked, ret; 1343 1344 dump_page = dump_page_alloc(); 1345 if (!dump_page) 1346 return 0; 1347 1348 ret = 0; 1349 locked = 0; 1350 for (addr = start; addr < start + len; addr += PAGE_SIZE) { 1351 struct page *page; 1352 1353 if (!locked) { 1354 if (mmap_read_lock_killable(current->mm)) 1355 goto out; 1356 locked = 1; 1357 } 1358 1359 /* 1360 * To avoid having to allocate page tables for virtual address 1361 * ranges that have never been used yet, and also to make it 1362 * easy to generate sparse core files, use a helper that returns 1363 * NULL when encountering an empty page table entry that would 1364 * otherwise have been filled with the zero page. 1365 */ 1366 page = get_dump_page(addr, &locked); 1367 if (page) { 1368 if (locked) { 1369 mmap_read_unlock(current->mm); 1370 locked = 0; 1371 } 1372 int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page)); 1373 put_page(page); 1374 if (stop) 1375 goto out; 1376 } else { 1377 dump_skip(cprm, PAGE_SIZE); 1378 } 1379 1380 if (dump_interrupted()) 1381 goto out; 1382 1383 if (!need_resched()) 1384 continue; 1385 if (locked) { 1386 mmap_read_unlock(current->mm); 1387 locked = 0; 1388 } 1389 cond_resched(); 1390 } 1391 ret = 1; 1392 out: 1393 if (locked) 1394 mmap_read_unlock(current->mm); 1395 1396 dump_page_free(dump_page); 1397 return ret; 1398 } 1399 #endif 1400 1401 int dump_align(struct coredump_params *cprm, int align) 1402 { 1403 unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1); 1404 if (align & (align - 1)) 1405 return 0; 1406 if (mod) 1407 cprm->to_skip += align - mod; 1408 return 1; 1409 } 1410 EXPORT_SYMBOL(dump_align); 1411 1412 #ifdef CONFIG_SYSCTL 1413 1414 void validate_coredump_safety(void) 1415 { 1416 if (suid_dumpable == TASK_DUMPABLE_ROOT && 1417 core_pattern[0] != '/' && core_pattern[0] != '|' && core_pattern[0] != '@') { 1418 1419 coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: " 1420 "pipe handler or fully qualified core dump path required. " 1421 "Set kernel.core_pattern before fs.suid_dumpable."); 1422 } 1423 } 1424 1425 static inline bool check_coredump_socket(void) 1426 { 1427 const char *p; 1428 1429 if (core_pattern[0] != '@') 1430 return true; 1431 1432 /* 1433 * Coredump socket must be located in the initial mount 1434 * namespace. Don't give the impression that anything else is 1435 * supported right now. 1436 */ 1437 if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns) 1438 return false; 1439 1440 /* Must be an absolute path... */ 1441 if (core_pattern[1] != '/') { 1442 /* ... or the socket request protocol... */ 1443 if (core_pattern[1] != '@') 1444 return false; 1445 /* ... and if so must be an absolute path. */ 1446 if (core_pattern[2] != '/') 1447 return false; 1448 p = &core_pattern[2]; 1449 } else { 1450 p = &core_pattern[1]; 1451 } 1452 1453 /* The path obviously cannot exceed UNIX_PATH_MAX. */ 1454 if (strlen(p) >= UNIX_PATH_MAX) 1455 return false; 1456 1457 /* Must not contain ".." in the path. */ 1458 if (name_contains_dotdot(core_pattern)) 1459 return false; 1460 1461 return true; 1462 } 1463 1464 static int proc_dostring_coredump(const struct ctl_table *table, int write, 1465 void *buffer, size_t *lenp, loff_t *ppos) 1466 { 1467 int error; 1468 ssize_t retval; 1469 char old_core_pattern[CORENAME_MAX_SIZE]; 1470 1471 if (!write) 1472 return proc_dostring(table, write, buffer, lenp, ppos); 1473 1474 retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE); 1475 1476 error = proc_dostring(table, write, buffer, lenp, ppos); 1477 if (error) 1478 return error; 1479 1480 if (!check_coredump_socket()) { 1481 strscpy(core_pattern, old_core_pattern, retval + 1); 1482 return -EINVAL; 1483 } 1484 1485 if (strncmp(old_core_pattern, core_pattern, CORENAME_MAX_SIZE)) 1486 validate_coredump_safety(); 1487 return error; 1488 } 1489 1490 static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT; 1491 static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX; 1492 static char core_modes[] = { 1493 "file\npipe" 1494 #ifdef CONFIG_UNIX 1495 "\nsocket" 1496 #endif 1497 }; 1498 1499 static const struct ctl_table coredump_sysctls[] = { 1500 { 1501 .procname = "core_uses_pid", 1502 .data = &core_uses_pid, 1503 .maxlen = sizeof(int), 1504 .mode = 0644, 1505 .proc_handler = proc_dointvec, 1506 }, 1507 { 1508 .procname = "core_pattern", 1509 .data = core_pattern, 1510 .maxlen = CORENAME_MAX_SIZE, 1511 .mode = 0644, 1512 .proc_handler = proc_dostring_coredump, 1513 }, 1514 { 1515 .procname = "core_pipe_limit", 1516 .data = &core_pipe_limit, 1517 .maxlen = sizeof(unsigned int), 1518 .mode = 0644, 1519 .proc_handler = proc_dointvec_minmax, 1520 .extra1 = SYSCTL_ZERO, 1521 .extra2 = SYSCTL_INT_MAX, 1522 }, 1523 { 1524 .procname = "core_file_note_size_limit", 1525 .data = &core_file_note_size_limit, 1526 .maxlen = sizeof(unsigned int), 1527 .mode = 0644, 1528 .proc_handler = proc_douintvec_minmax, 1529 .extra1 = (unsigned int *)&core_file_note_size_min, 1530 .extra2 = (unsigned int *)&core_file_note_size_max, 1531 }, 1532 { 1533 .procname = "core_sort_vma", 1534 .data = &core_sort_vma, 1535 .maxlen = sizeof(int), 1536 .mode = 0644, 1537 .proc_handler = proc_douintvec_minmax, 1538 .extra1 = SYSCTL_ZERO, 1539 .extra2 = SYSCTL_ONE, 1540 }, 1541 { 1542 .procname = "core_modes", 1543 .data = core_modes, 1544 .maxlen = sizeof(core_modes) - 1, 1545 .mode = 0444, 1546 .proc_handler = proc_dostring, 1547 }, 1548 }; 1549 1550 static int __init init_fs_coredump_sysctls(void) 1551 { 1552 register_sysctl_init("kernel", coredump_sysctls); 1553 return 0; 1554 } 1555 fs_initcall(init_fs_coredump_sysctls); 1556 #endif /* CONFIG_SYSCTL */ 1557 1558 /* 1559 * The purpose of always_dump_vma() is to make sure that special kernel mappings 1560 * that are useful for post-mortem analysis are included in every core dump. 1561 * In that way we ensure that the core dump is fully interpretable later 1562 * without matching up the same kernel and hardware config to see what PC values 1563 * meant. These special mappings include - vDSO, vsyscall, and other 1564 * architecture specific mappings 1565 */ 1566 static bool always_dump_vma(struct vm_area_struct *vma) 1567 { 1568 /* Any vsyscall mappings? */ 1569 if (vma == get_gate_vma(vma->vm_mm)) 1570 return true; 1571 1572 /* 1573 * Assume that all vmas with a .name op should always be dumped. 1574 * If this changes, a new vm_ops field can easily be added. 1575 */ 1576 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 1577 return true; 1578 1579 /* 1580 * arch_vma_name() returns non-NULL for special architecture mappings, 1581 * such as vDSO sections. 1582 */ 1583 if (arch_vma_name(vma)) 1584 return true; 1585 1586 return false; 1587 } 1588 1589 #define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1 1590 1591 /* 1592 * Decide how much of @vma's contents should be included in a core dump. 1593 */ 1594 static unsigned long vma_dump_size(struct vm_area_struct *vma, 1595 unsigned long mm_flags) 1596 { 1597 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1598 1599 /* always dump the vdso and vsyscall sections */ 1600 if (always_dump_vma(vma)) 1601 goto whole; 1602 1603 if (vma->vm_flags & VM_DONTDUMP) 1604 return 0; 1605 1606 /* support for DAX */ 1607 if (vma_is_dax(vma)) { 1608 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) 1609 goto whole; 1610 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) 1611 goto whole; 1612 return 0; 1613 } 1614 1615 /* Hugetlb memory check */ 1616 if (is_vm_hugetlb_page(vma)) { 1617 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1618 goto whole; 1619 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) 1620 goto whole; 1621 return 0; 1622 } 1623 1624 /* Do not dump I/O mapped devices or special mappings */ 1625 if (vma->vm_flags & VM_IO) 1626 return 0; 1627 1628 /* By default, dump shared memory if mapped from an anonymous file. */ 1629 if (vma->vm_flags & VM_SHARED) { 1630 if (file_inode(vma->vm_file)->i_nlink == 0 ? 1631 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 1632 goto whole; 1633 return 0; 1634 } 1635 1636 /* Dump segments that have been written to. */ 1637 if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE)) 1638 goto whole; 1639 if (vma->vm_file == NULL) 1640 return 0; 1641 1642 if (FILTER(MAPPED_PRIVATE)) 1643 goto whole; 1644 1645 /* 1646 * If this is the beginning of an executable file mapping, 1647 * dump the first page to aid in determining what was mapped here. 1648 */ 1649 if (FILTER(ELF_HEADERS) && 1650 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { 1651 if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) 1652 return PAGE_SIZE; 1653 1654 /* 1655 * ELF libraries aren't always executable. 1656 * We'll want to check whether the mapping starts with the ELF 1657 * magic, but not now - we're holding the mmap lock, 1658 * so copy_from_user() doesn't work here. 1659 * Use a placeholder instead, and fix it up later in 1660 * dump_vma_snapshot(). 1661 */ 1662 return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER; 1663 } 1664 1665 #undef FILTER 1666 1667 return 0; 1668 1669 whole: 1670 return vma->vm_end - vma->vm_start; 1671 } 1672 1673 /* 1674 * Helper function for iterating across a vma list. It ensures that the caller 1675 * will visit `gate_vma' prior to terminating the search. 1676 */ 1677 static struct vm_area_struct *coredump_next_vma(struct vma_iterator *vmi, 1678 struct vm_area_struct *vma, 1679 struct vm_area_struct *gate_vma) 1680 { 1681 if (gate_vma && (vma == gate_vma)) 1682 return NULL; 1683 1684 vma = vma_next(vmi); 1685 if (vma) 1686 return vma; 1687 return gate_vma; 1688 } 1689 1690 static void free_vma_snapshot(struct coredump_params *cprm) 1691 { 1692 if (cprm->vma_meta) { 1693 int i; 1694 for (i = 0; i < cprm->vma_count; i++) { 1695 struct file *file = cprm->vma_meta[i].file; 1696 if (file) 1697 fput(file); 1698 } 1699 kvfree(cprm->vma_meta); 1700 cprm->vma_meta = NULL; 1701 } 1702 } 1703 1704 static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr) 1705 { 1706 const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr; 1707 const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr; 1708 1709 if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size) 1710 return -1; 1711 if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size) 1712 return 1; 1713 return 0; 1714 } 1715 1716 /* 1717 * Under the mmap_lock, take a snapshot of relevant information about the task's 1718 * VMAs. 1719 */ 1720 static bool dump_vma_snapshot(struct coredump_params *cprm) 1721 { 1722 struct vm_area_struct *gate_vma, *vma = NULL; 1723 struct mm_struct *mm = current->mm; 1724 VMA_ITERATOR(vmi, mm, 0); 1725 int i = 0; 1726 1727 /* 1728 * Once the stack expansion code is fixed to not change VMA bounds 1729 * under mmap_lock in read mode, this can be changed to take the 1730 * mmap_lock in read mode. 1731 */ 1732 if (mmap_write_lock_killable(mm)) 1733 return false; 1734 1735 cprm->vma_data_size = 0; 1736 gate_vma = get_gate_vma(mm); 1737 cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0); 1738 1739 cprm->vma_meta = kvmalloc_objs(*cprm->vma_meta, cprm->vma_count); 1740 if (!cprm->vma_meta) { 1741 mmap_write_unlock(mm); 1742 return false; 1743 } 1744 1745 while ((vma = coredump_next_vma(&vmi, vma, gate_vma)) != NULL) { 1746 struct core_vma_metadata *m = cprm->vma_meta + i; 1747 1748 m->start = vma->vm_start; 1749 m->end = vma->vm_end; 1750 m->flags = vma->vm_flags; 1751 m->dump_size = vma_dump_size(vma, cprm->mm_flags); 1752 m->pgoff = vma->vm_pgoff; 1753 m->file = vma->vm_file; 1754 if (m->file) 1755 get_file(m->file); 1756 i++; 1757 } 1758 1759 mmap_write_unlock(mm); 1760 1761 for (i = 0; i < cprm->vma_count; i++) { 1762 struct core_vma_metadata *m = cprm->vma_meta + i; 1763 1764 if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) { 1765 char elfmag[SELFMAG]; 1766 1767 if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) || 1768 memcmp(elfmag, ELFMAG, SELFMAG) != 0) { 1769 m->dump_size = 0; 1770 } else { 1771 m->dump_size = PAGE_SIZE; 1772 } 1773 } 1774 1775 cprm->vma_data_size += m->dump_size; 1776 } 1777 1778 if (core_sort_vma) 1779 sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), 1780 cmp_vma_size, NULL); 1781 1782 return true; 1783 } 1784