1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/slab.h> 3 #include <linux/file.h> 4 #include <linux/fdtable.h> 5 #include <linux/freezer.h> 6 #include <linux/mm.h> 7 #include <linux/stat.h> 8 #include <linux/fcntl.h> 9 #include <linux/swap.h> 10 #include <linux/ctype.h> 11 #include <linux/string.h> 12 #include <linux/init.h> 13 #include <linux/pagemap.h> 14 #include <linux/perf_event.h> 15 #include <linux/highmem.h> 16 #include <linux/spinlock.h> 17 #include <linux/key.h> 18 #include <linux/personality.h> 19 #include <linux/binfmts.h> 20 #include <linux/coredump.h> 21 #include <linux/sort.h> 22 #include <linux/sched/coredump.h> 23 #include <linux/sched/signal.h> 24 #include <linux/sched/task_stack.h> 25 #include <linux/utsname.h> 26 #include <linux/pid_namespace.h> 27 #include <linux/module.h> 28 #include <linux/namei.h> 29 #include <linux/mount.h> 30 #include <linux/security.h> 31 #include <linux/syscalls.h> 32 #include <linux/tsacct_kern.h> 33 #include <linux/cn_proc.h> 34 #include <linux/audit.h> 35 #include <linux/kmod.h> 36 #include <linux/fsnotify.h> 37 #include <linux/fs_struct.h> 38 #include <linux/pipe_fs_i.h> 39 #include <linux/oom.h> 40 #include <linux/compat.h> 41 #include <linux/fs.h> 42 #include <linux/path.h> 43 #include <linux/timekeeping.h> 44 #include <linux/sysctl.h> 45 #include <linux/elf.h> 46 #include <linux/pidfs.h> 47 #include <linux/net.h> 48 #include <linux/socket.h> 49 #include <net/af_unix.h> 50 #include <net/net_namespace.h> 51 #include <net/sock.h> 52 #include <uapi/linux/pidfd.h> 53 #include <uapi/linux/un.h> 54 55 #include <linux/uaccess.h> 56 #include <asm/mmu_context.h> 57 #include <asm/tlb.h> 58 #include <asm/exec.h> 59 60 #include <trace/events/task.h> 61 #include "internal.h" 62 63 #include <trace/events/sched.h> 64 65 static bool dump_vma_snapshot(struct coredump_params *cprm); 66 static void free_vma_snapshot(struct coredump_params *cprm); 67 68 #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) 69 /* Define a reasonable max cap */ 70 #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) 71 /* 72 * File descriptor number for the pidfd for the thread-group leader of 73 * the coredumping task installed into the usermode helper's file 74 * descriptor table. 75 */ 76 #define COREDUMP_PIDFD_NUMBER 3 77 78 static int core_uses_pid; 79 static unsigned int core_pipe_limit; 80 static unsigned int core_sort_vma; 81 static char core_pattern[CORENAME_MAX_SIZE] = "core"; 82 static int core_name_size = CORENAME_MAX_SIZE; 83 unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; 84 85 enum coredump_type_t { 86 COREDUMP_FILE = 1, 87 COREDUMP_PIPE = 2, 88 COREDUMP_SOCK = 3, 89 }; 90 91 struct core_name { 92 char *corename; 93 int used, size; 94 enum coredump_type_t core_type; 95 }; 96 97 static int expand_corename(struct core_name *cn, int size) 98 { 99 char *corename; 100 101 size = kmalloc_size_roundup(size); 102 corename = krealloc(cn->corename, size, GFP_KERNEL); 103 104 if (!corename) 105 return -ENOMEM; 106 107 if (size > core_name_size) /* racy but harmless */ 108 core_name_size = size; 109 110 cn->size = size; 111 cn->corename = corename; 112 return 0; 113 } 114 115 static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt, 116 va_list arg) 117 { 118 int free, need; 119 va_list arg_copy; 120 121 again: 122 free = cn->size - cn->used; 123 124 va_copy(arg_copy, arg); 125 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); 126 va_end(arg_copy); 127 128 if (need < free) { 129 cn->used += need; 130 return 0; 131 } 132 133 if (!expand_corename(cn, cn->size + need - free + 1)) 134 goto again; 135 136 return -ENOMEM; 137 } 138 139 static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...) 140 { 141 va_list arg; 142 int ret; 143 144 va_start(arg, fmt); 145 ret = cn_vprintf(cn, fmt, arg); 146 va_end(arg); 147 148 return ret; 149 } 150 151 static __printf(2, 3) 152 int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 153 { 154 int cur = cn->used; 155 va_list arg; 156 int ret; 157 158 va_start(arg, fmt); 159 ret = cn_vprintf(cn, fmt, arg); 160 va_end(arg); 161 162 if (ret == 0) { 163 /* 164 * Ensure that this coredump name component can't cause the 165 * resulting corefile path to consist of a ".." or ".". 166 */ 167 if ((cn->used - cur == 1 && cn->corename[cur] == '.') || 168 (cn->used - cur == 2 && cn->corename[cur] == '.' 169 && cn->corename[cur+1] == '.')) 170 cn->corename[cur] = '!'; 171 172 /* 173 * Empty names are fishy and could be used to create a "//" in a 174 * corefile name, causing the coredump to happen one directory 175 * level too high. Enforce that all components of the core 176 * pattern are at least one character long. 177 */ 178 if (cn->used == cur) 179 ret = cn_printf(cn, "!"); 180 } 181 182 for (; cur < cn->used; ++cur) { 183 if (cn->corename[cur] == '/') 184 cn->corename[cur] = '!'; 185 } 186 return ret; 187 } 188 189 static int cn_print_exe_file(struct core_name *cn, bool name_only) 190 { 191 struct file *exe_file; 192 char *pathbuf, *path, *ptr; 193 int ret; 194 195 exe_file = get_mm_exe_file(current->mm); 196 if (!exe_file) 197 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 198 199 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); 200 if (!pathbuf) { 201 ret = -ENOMEM; 202 goto put_exe_file; 203 } 204 205 path = file_path(exe_file, pathbuf, PATH_MAX); 206 if (IS_ERR(path)) { 207 ret = PTR_ERR(path); 208 goto free_buf; 209 } 210 211 if (name_only) { 212 ptr = strrchr(path, '/'); 213 if (ptr) 214 path = ptr + 1; 215 } 216 ret = cn_esc_printf(cn, "%s", path); 217 218 free_buf: 219 kfree(pathbuf); 220 put_exe_file: 221 fput(exe_file); 222 return ret; 223 } 224 225 /* format_corename will inspect the pattern parameter, and output a 226 * name into corename, which must have space for at least 227 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 228 */ 229 static int format_corename(struct core_name *cn, struct coredump_params *cprm, 230 size_t **argv, int *argc) 231 { 232 const struct cred *cred = current_cred(); 233 const char *pat_ptr = core_pattern; 234 bool was_space = false; 235 int pid_in_pattern = 0; 236 int err = 0; 237 238 cn->used = 0; 239 cn->corename = NULL; 240 if (*pat_ptr == '|') 241 cn->core_type = COREDUMP_PIPE; 242 else if (*pat_ptr == '@') 243 cn->core_type = COREDUMP_SOCK; 244 else 245 cn->core_type = COREDUMP_FILE; 246 if (expand_corename(cn, core_name_size)) 247 return -ENOMEM; 248 cn->corename[0] = '\0'; 249 250 switch (cn->core_type) { 251 case COREDUMP_PIPE: { 252 int argvs = sizeof(core_pattern) / 2; 253 (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); 254 if (!(*argv)) 255 return -ENOMEM; 256 (*argv)[(*argc)++] = 0; 257 ++pat_ptr; 258 if (!(*pat_ptr)) 259 return -ENOMEM; 260 break; 261 } 262 case COREDUMP_SOCK: { 263 /* skip the @ */ 264 pat_ptr++; 265 if (!(*pat_ptr)) 266 return -ENOMEM; 267 268 err = cn_printf(cn, "%s", pat_ptr); 269 if (err) 270 return err; 271 272 /* Require absolute paths. */ 273 if (cn->corename[0] != '/') 274 return -EINVAL; 275 276 /* 277 * Ensure we can uses spaces to indicate additional 278 * parameters in the future. 279 */ 280 if (strchr(cn->corename, ' ')) { 281 coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename); 282 return -EINVAL; 283 } 284 285 /* 286 * Currently no need to parse any other options. 287 * Relevant information can be retrieved from the peer 288 * pidfd retrievable via SO_PEERPIDFD by the receiver or 289 * via /proc/<pid>, using the SO_PEERPIDFD to guard 290 * against pid recycling when opening /proc/<pid>. 291 */ 292 return 0; 293 } 294 case COREDUMP_FILE: 295 break; 296 default: 297 WARN_ON_ONCE(true); 298 return -EINVAL; 299 } 300 301 /* Repeat as long as we have more pattern to process and more output 302 space */ 303 while (*pat_ptr) { 304 /* 305 * Split on spaces before doing template expansion so that 306 * %e and %E don't get split if they have spaces in them 307 */ 308 if (cn->core_type == COREDUMP_PIPE) { 309 if (isspace(*pat_ptr)) { 310 if (cn->used != 0) 311 was_space = true; 312 pat_ptr++; 313 continue; 314 } else if (was_space) { 315 was_space = false; 316 err = cn_printf(cn, "%c", '\0'); 317 if (err) 318 return err; 319 (*argv)[(*argc)++] = cn->used; 320 } 321 } 322 if (*pat_ptr != '%') { 323 err = cn_printf(cn, "%c", *pat_ptr++); 324 } else { 325 switch (*++pat_ptr) { 326 /* single % at the end, drop that */ 327 case 0: 328 goto out; 329 /* Double percent, output one percent */ 330 case '%': 331 err = cn_printf(cn, "%c", '%'); 332 break; 333 /* pid */ 334 case 'p': 335 pid_in_pattern = 1; 336 err = cn_printf(cn, "%d", 337 task_tgid_vnr(current)); 338 break; 339 /* global pid */ 340 case 'P': 341 err = cn_printf(cn, "%d", 342 task_tgid_nr(current)); 343 break; 344 case 'i': 345 err = cn_printf(cn, "%d", 346 task_pid_vnr(current)); 347 break; 348 case 'I': 349 err = cn_printf(cn, "%d", 350 task_pid_nr(current)); 351 break; 352 /* uid */ 353 case 'u': 354 err = cn_printf(cn, "%u", 355 from_kuid(&init_user_ns, 356 cred->uid)); 357 break; 358 /* gid */ 359 case 'g': 360 err = cn_printf(cn, "%u", 361 from_kgid(&init_user_ns, 362 cred->gid)); 363 break; 364 case 'd': 365 err = cn_printf(cn, "%d", 366 __get_dumpable(cprm->mm_flags)); 367 break; 368 /* signal that caused the coredump */ 369 case 's': 370 err = cn_printf(cn, "%d", 371 cprm->siginfo->si_signo); 372 break; 373 /* UNIX time of coredump */ 374 case 't': { 375 time64_t time; 376 377 time = ktime_get_real_seconds(); 378 err = cn_printf(cn, "%lld", time); 379 break; 380 } 381 /* hostname */ 382 case 'h': 383 down_read(&uts_sem); 384 err = cn_esc_printf(cn, "%s", 385 utsname()->nodename); 386 up_read(&uts_sem); 387 break; 388 /* executable, could be changed by prctl PR_SET_NAME etc */ 389 case 'e': 390 err = cn_esc_printf(cn, "%s", current->comm); 391 break; 392 /* file name of executable */ 393 case 'f': 394 err = cn_print_exe_file(cn, true); 395 break; 396 case 'E': 397 err = cn_print_exe_file(cn, false); 398 break; 399 /* core limit size */ 400 case 'c': 401 err = cn_printf(cn, "%lu", 402 rlimit(RLIMIT_CORE)); 403 break; 404 /* CPU the task ran on */ 405 case 'C': 406 err = cn_printf(cn, "%d", cprm->cpu); 407 break; 408 /* pidfd number */ 409 case 'F': { 410 /* 411 * Installing a pidfd only makes sense if 412 * we actually spawn a usermode helper. 413 */ 414 if (cn->core_type != COREDUMP_PIPE) 415 break; 416 417 /* 418 * Note that we'll install a pidfd for the 419 * thread-group leader. We know that task 420 * linkage hasn't been removed yet and even if 421 * this @current isn't the actual thread-group 422 * leader we know that the thread-group leader 423 * cannot be reaped until @current has exited. 424 */ 425 cprm->pid = task_tgid(current); 426 err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); 427 break; 428 } 429 default: 430 break; 431 } 432 ++pat_ptr; 433 } 434 435 if (err) 436 return err; 437 } 438 439 out: 440 /* Backward compatibility with core_uses_pid: 441 * 442 * If core_pattern does not include a %p (as is the default) 443 * and core_uses_pid is set, then .%pid will be appended to 444 * the filename. Do not do this for piped commands. */ 445 if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid) 446 return cn_printf(cn, ".%d", task_tgid_vnr(current)); 447 448 return 0; 449 } 450 451 static int zap_process(struct signal_struct *signal, int exit_code) 452 { 453 struct task_struct *t; 454 int nr = 0; 455 456 signal->flags = SIGNAL_GROUP_EXIT; 457 signal->group_exit_code = exit_code; 458 signal->group_stop_count = 0; 459 460 __for_each_thread(signal, t) { 461 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 462 if (t != current && !(t->flags & PF_POSTCOREDUMP)) { 463 sigaddset(&t->pending.signal, SIGKILL); 464 signal_wake_up(t, 1); 465 nr++; 466 } 467 } 468 469 return nr; 470 } 471 472 static int zap_threads(struct task_struct *tsk, 473 struct core_state *core_state, int exit_code) 474 { 475 struct signal_struct *signal = tsk->signal; 476 int nr = -EAGAIN; 477 478 spin_lock_irq(&tsk->sighand->siglock); 479 if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) { 480 /* Allow SIGKILL, see prepare_signal() */ 481 signal->core_state = core_state; 482 nr = zap_process(signal, exit_code); 483 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 484 tsk->flags |= PF_DUMPCORE; 485 atomic_set(&core_state->nr_threads, nr); 486 } 487 spin_unlock_irq(&tsk->sighand->siglock); 488 return nr; 489 } 490 491 static int coredump_wait(int exit_code, struct core_state *core_state) 492 { 493 struct task_struct *tsk = current; 494 int core_waiters = -EBUSY; 495 496 init_completion(&core_state->startup); 497 core_state->dumper.task = tsk; 498 core_state->dumper.next = NULL; 499 500 core_waiters = zap_threads(tsk, core_state, exit_code); 501 if (core_waiters > 0) { 502 struct core_thread *ptr; 503 504 wait_for_completion_state(&core_state->startup, 505 TASK_UNINTERRUPTIBLE|TASK_FREEZABLE); 506 /* 507 * Wait for all the threads to become inactive, so that 508 * all the thread context (extended register state, like 509 * fpu etc) gets copied to the memory. 510 */ 511 ptr = core_state->dumper.next; 512 while (ptr != NULL) { 513 wait_task_inactive(ptr->task, TASK_ANY); 514 ptr = ptr->next; 515 } 516 } 517 518 return core_waiters; 519 } 520 521 static void coredump_finish(bool core_dumped) 522 { 523 struct core_thread *curr, *next; 524 struct task_struct *task; 525 526 spin_lock_irq(¤t->sighand->siglock); 527 if (core_dumped && !__fatal_signal_pending(current)) 528 current->signal->group_exit_code |= 0x80; 529 next = current->signal->core_state->dumper.next; 530 current->signal->core_state = NULL; 531 spin_unlock_irq(¤t->sighand->siglock); 532 533 while ((curr = next) != NULL) { 534 next = curr->next; 535 task = curr->task; 536 /* 537 * see coredump_task_exit(), curr->task must not see 538 * ->task == NULL before we read ->next. 539 */ 540 smp_mb(); 541 curr->task = NULL; 542 wake_up_process(task); 543 } 544 } 545 546 static bool dump_interrupted(void) 547 { 548 /* 549 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 550 * can do try_to_freeze() and check __fatal_signal_pending(), 551 * but then we need to teach dump_write() to restart and clear 552 * TIF_SIGPENDING. 553 */ 554 return fatal_signal_pending(current) || freezing(current); 555 } 556 557 static void wait_for_dump_helpers(struct file *file) 558 { 559 struct pipe_inode_info *pipe = file->private_data; 560 561 pipe_lock(pipe); 562 pipe->readers++; 563 pipe->writers--; 564 wake_up_interruptible_sync(&pipe->rd_wait); 565 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 566 pipe_unlock(pipe); 567 568 /* 569 * We actually want wait_event_freezable() but then we need 570 * to clear TIF_SIGPENDING and improve dump_interrupted(). 571 */ 572 wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); 573 574 pipe_lock(pipe); 575 pipe->readers--; 576 pipe->writers++; 577 pipe_unlock(pipe); 578 } 579 580 /* 581 * umh_coredump_setup 582 * helper function to customize the process used 583 * to collect the core in userspace. Specifically 584 * it sets up a pipe and installs it as fd 0 (stdin) 585 * for the process. Returns 0 on success, or 586 * PTR_ERR on failure. 587 * Note that it also sets the core limit to 1. This 588 * is a special value that we use to trap recursive 589 * core dumps 590 */ 591 static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) 592 { 593 struct file *files[2]; 594 struct coredump_params *cp = (struct coredump_params *)info->data; 595 int err; 596 597 if (cp->pid) { 598 struct file *pidfs_file __free(fput) = NULL; 599 600 pidfs_file = pidfs_alloc_file(cp->pid, 0); 601 if (IS_ERR(pidfs_file)) 602 return PTR_ERR(pidfs_file); 603 604 pidfs_coredump(cp); 605 606 /* 607 * Usermode helpers are childen of either 608 * system_unbound_wq or of kthreadd. So we know that 609 * we're starting off with a clean file descriptor 610 * table. So we should always be able to use 611 * COREDUMP_PIDFD_NUMBER as our file descriptor value. 612 */ 613 err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); 614 if (err < 0) 615 return err; 616 } 617 618 err = create_pipe_files(files, 0); 619 if (err) 620 return err; 621 622 cp->file = files[1]; 623 624 err = replace_fd(0, files[0], 0); 625 fput(files[0]); 626 if (err < 0) 627 return err; 628 629 /* and disallow core files too */ 630 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 631 632 return 0; 633 } 634 635 void do_coredump(const kernel_siginfo_t *siginfo) 636 { 637 struct core_state core_state; 638 struct core_name cn; 639 struct mm_struct *mm = current->mm; 640 struct linux_binfmt * binfmt; 641 const struct cred *old_cred; 642 struct cred *cred; 643 int retval = 0; 644 size_t *argv = NULL; 645 int argc = 0; 646 /* require nonrelative corefile path and be extra careful */ 647 bool need_suid_safe = false; 648 bool core_dumped = false; 649 static atomic_t core_dump_count = ATOMIC_INIT(0); 650 struct coredump_params cprm = { 651 .siginfo = siginfo, 652 .limit = rlimit(RLIMIT_CORE), 653 /* 654 * We must use the same mm->flags while dumping core to avoid 655 * inconsistency of bit flags, since this flag is not protected 656 * by any locks. 657 */ 658 .mm_flags = mm->flags, 659 .vma_meta = NULL, 660 .cpu = raw_smp_processor_id(), 661 }; 662 663 audit_core_dumps(siginfo->si_signo); 664 665 binfmt = mm->binfmt; 666 if (!binfmt || !binfmt->core_dump) 667 goto fail; 668 if (!__get_dumpable(cprm.mm_flags)) 669 goto fail; 670 671 cred = prepare_creds(); 672 if (!cred) 673 goto fail; 674 /* 675 * We cannot trust fsuid as being the "true" uid of the process 676 * nor do we know its entire history. We only know it was tainted 677 * so we dump it as root in mode 2, and only into a controlled 678 * environment (pipe handler or fully qualified path). 679 */ 680 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 681 /* Setuid core dump mode */ 682 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 683 need_suid_safe = true; 684 } 685 686 retval = coredump_wait(siginfo->si_signo, &core_state); 687 if (retval < 0) 688 goto fail_creds; 689 690 old_cred = override_creds(cred); 691 692 retval = format_corename(&cn, &cprm, &argv, &argc); 693 if (retval < 0) { 694 coredump_report_failure("format_corename failed, aborting core"); 695 goto fail_unlock; 696 } 697 698 switch (cn.core_type) { 699 case COREDUMP_FILE: { 700 struct mnt_idmap *idmap; 701 struct inode *inode; 702 int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | 703 O_LARGEFILE | O_EXCL; 704 705 if (cprm.limit < binfmt->min_coredump) 706 goto fail_unlock; 707 708 if (need_suid_safe && cn.corename[0] != '/') { 709 coredump_report_failure( 710 "this process can only dump core to a fully qualified path, skipping core dump"); 711 goto fail_unlock; 712 } 713 714 /* 715 * Unlink the file if it exists unless this is a SUID 716 * binary - in that case, we're running around with root 717 * privs and don't want to unlink another user's coredump. 718 */ 719 if (!need_suid_safe) { 720 /* 721 * If it doesn't exist, that's fine. If there's some 722 * other problem, we'll catch it at the filp_open(). 723 */ 724 do_unlinkat(AT_FDCWD, getname_kernel(cn.corename)); 725 } 726 727 /* 728 * There is a race between unlinking and creating the 729 * file, but if that causes an EEXIST here, that's 730 * fine - another process raced with us while creating 731 * the corefile, and the other process won. To userspace, 732 * what matters is that at least one of the two processes 733 * writes its coredump successfully, not which one. 734 */ 735 if (need_suid_safe) { 736 /* 737 * Using user namespaces, normal user tasks can change 738 * their current->fs->root to point to arbitrary 739 * directories. Since the intention of the "only dump 740 * with a fully qualified path" rule is to control where 741 * coredumps may be placed using root privileges, 742 * current->fs->root must not be used. Instead, use the 743 * root directory of init_task. 744 */ 745 struct path root; 746 747 task_lock(&init_task); 748 get_fs_root(init_task.fs, &root); 749 task_unlock(&init_task); 750 cprm.file = file_open_root(&root, cn.corename, 751 open_flags, 0600); 752 path_put(&root); 753 } else { 754 cprm.file = filp_open(cn.corename, open_flags, 0600); 755 } 756 if (IS_ERR(cprm.file)) 757 goto fail_unlock; 758 759 inode = file_inode(cprm.file); 760 if (inode->i_nlink > 1) 761 goto close_fail; 762 if (d_unhashed(cprm.file->f_path.dentry)) 763 goto close_fail; 764 /* 765 * AK: actually i see no reason to not allow this for named 766 * pipes etc, but keep the previous behaviour for now. 767 */ 768 if (!S_ISREG(inode->i_mode)) 769 goto close_fail; 770 /* 771 * Don't dump core if the filesystem changed owner or mode 772 * of the file during file creation. This is an issue when 773 * a process dumps core while its cwd is e.g. on a vfat 774 * filesystem. 775 */ 776 idmap = file_mnt_idmap(cprm.file); 777 if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), 778 current_fsuid())) { 779 coredump_report_failure("Core dump to %s aborted: " 780 "cannot preserve file owner", cn.corename); 781 goto close_fail; 782 } 783 if ((inode->i_mode & 0677) != 0600) { 784 coredump_report_failure("Core dump to %s aborted: " 785 "cannot preserve file permissions", cn.corename); 786 goto close_fail; 787 } 788 if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) 789 goto close_fail; 790 if (do_truncate(idmap, cprm.file->f_path.dentry, 791 0, 0, cprm.file)) 792 goto close_fail; 793 break; 794 } 795 case COREDUMP_PIPE: { 796 int argi; 797 int dump_count; 798 char **helper_argv; 799 struct subprocess_info *sub_info; 800 801 if (cprm.limit == 1) { 802 /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. 803 * 804 * Normally core limits are irrelevant to pipes, since 805 * we're not writing to the file system, but we use 806 * cprm.limit of 1 here as a special value, this is a 807 * consistent way to catch recursive crashes. 808 * We can still crash if the core_pattern binary sets 809 * RLIM_CORE = !1, but it runs as root, and can do 810 * lots of stupid things. 811 * 812 * Note that we use task_tgid_vnr here to grab the pid 813 * of the process group leader. That way we get the 814 * right pid if a thread in a multi-threaded 815 * core_pattern process dies. 816 */ 817 coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); 818 goto fail_unlock; 819 } 820 cprm.limit = RLIM_INFINITY; 821 822 dump_count = atomic_inc_return(&core_dump_count); 823 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 824 coredump_report_failure("over core_pipe_limit, skipping core dump"); 825 goto fail_dropcount; 826 } 827 828 helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), 829 GFP_KERNEL); 830 if (!helper_argv) { 831 coredump_report_failure("%s failed to allocate memory", __func__); 832 goto fail_dropcount; 833 } 834 for (argi = 0; argi < argc; argi++) 835 helper_argv[argi] = cn.corename + argv[argi]; 836 helper_argv[argi] = NULL; 837 838 retval = -ENOMEM; 839 sub_info = call_usermodehelper_setup(helper_argv[0], 840 helper_argv, NULL, GFP_KERNEL, 841 umh_coredump_setup, NULL, &cprm); 842 if (sub_info) 843 retval = call_usermodehelper_exec(sub_info, 844 UMH_WAIT_EXEC); 845 846 kfree(helper_argv); 847 if (retval) { 848 coredump_report_failure("|%s pipe failed", cn.corename); 849 goto close_fail; 850 } 851 break; 852 } 853 case COREDUMP_SOCK: { 854 #ifdef CONFIG_UNIX 855 struct file *file __free(fput) = NULL; 856 struct sockaddr_un addr = { 857 .sun_family = AF_UNIX, 858 }; 859 ssize_t addr_len; 860 struct socket *socket; 861 862 addr_len = strscpy(addr.sun_path, cn.corename); 863 if (addr_len < 0) 864 goto close_fail; 865 addr_len += offsetof(struct sockaddr_un, sun_path) + 1; 866 867 /* 868 * It is possible that the userspace process which is 869 * supposed to handle the coredump and is listening on 870 * the AF_UNIX socket coredumps. Userspace should just 871 * mark itself non dumpable. 872 */ 873 874 retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket); 875 if (retval < 0) 876 goto close_fail; 877 878 file = sock_alloc_file(socket, 0, NULL); 879 if (IS_ERR(file)) 880 goto close_fail; 881 882 /* 883 * Set the thread-group leader pid which is used for the 884 * peer credentials during connect() below. Then 885 * immediately register it in pidfs... 886 */ 887 cprm.pid = task_tgid(current); 888 retval = pidfs_register_pid(cprm.pid); 889 if (retval) 890 goto close_fail; 891 892 /* 893 * ... and set the coredump information so userspace 894 * has it available after connect()... 895 */ 896 pidfs_coredump(&cprm); 897 898 retval = kernel_connect(socket, (struct sockaddr *)(&addr), 899 addr_len, O_NONBLOCK | SOCK_COREDUMP); 900 901 /* 902 * ... Make sure to only put our reference after connect() took 903 * its own reference keeping the pidfs entry alive ... 904 */ 905 pidfs_put_pid(cprm.pid); 906 907 if (retval) { 908 if (retval == -EAGAIN) 909 coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path); 910 else 911 coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval); 912 goto close_fail; 913 } 914 915 /* ... and validate that @sk_peer_pid matches @cprm.pid. */ 916 if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid)) 917 goto close_fail; 918 919 cprm.limit = RLIM_INFINITY; 920 cprm.file = no_free_ptr(file); 921 #else 922 coredump_report_failure("Core dump socket support %s disabled", cn.corename); 923 goto close_fail; 924 #endif 925 break; 926 } 927 default: 928 WARN_ON_ONCE(true); 929 goto close_fail; 930 } 931 932 /* get us an unshared descriptor table; almost always a no-op */ 933 /* The cell spufs coredump code reads the file descriptor tables */ 934 retval = unshare_files(); 935 if (retval) 936 goto close_fail; 937 if (!dump_interrupted()) { 938 /* 939 * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would 940 * have this set to NULL. 941 */ 942 if (!cprm.file) { 943 coredump_report_failure("Core dump to |%s disabled", cn.corename); 944 goto close_fail; 945 } 946 if (!dump_vma_snapshot(&cprm)) 947 goto close_fail; 948 949 file_start_write(cprm.file); 950 core_dumped = binfmt->core_dump(&cprm); 951 /* 952 * Ensures that file size is big enough to contain the current 953 * file postion. This prevents gdb from complaining about 954 * a truncated file if the last "write" to the file was 955 * dump_skip. 956 */ 957 if (cprm.to_skip) { 958 cprm.to_skip--; 959 dump_emit(&cprm, "", 1); 960 } 961 file_end_write(cprm.file); 962 free_vma_snapshot(&cprm); 963 } 964 965 #ifdef CONFIG_UNIX 966 /* Let userspace know we're done processing the coredump. */ 967 if (sock_from_file(cprm.file)) 968 kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR); 969 #endif 970 971 /* 972 * When core_pipe_limit is set we wait for the coredump server 973 * or usermodehelper to finish before exiting so it can e.g., 974 * inspect /proc/<pid>. 975 */ 976 if (core_pipe_limit) { 977 switch (cn.core_type) { 978 case COREDUMP_PIPE: 979 wait_for_dump_helpers(cprm.file); 980 break; 981 #ifdef CONFIG_UNIX 982 case COREDUMP_SOCK: { 983 ssize_t n; 984 985 /* 986 * We use a simple read to wait for the coredump 987 * processing to finish. Either the socket is 988 * closed or we get sent unexpected data. In 989 * both cases, we're done. 990 */ 991 n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL); 992 if (n != 0) 993 coredump_report_failure("Unexpected data on coredump socket"); 994 break; 995 } 996 #endif 997 default: 998 break; 999 } 1000 } 1001 1002 close_fail: 1003 if (cprm.file) 1004 filp_close(cprm.file, NULL); 1005 fail_dropcount: 1006 if (cn.core_type == COREDUMP_PIPE) 1007 atomic_dec(&core_dump_count); 1008 fail_unlock: 1009 kfree(argv); 1010 kfree(cn.corename); 1011 coredump_finish(core_dumped); 1012 revert_creds(old_cred); 1013 fail_creds: 1014 put_cred(cred); 1015 fail: 1016 return; 1017 } 1018 1019 /* 1020 * Core dumping helper functions. These are the only things you should 1021 * do on a core-file: use only these functions to write out all the 1022 * necessary info. 1023 */ 1024 static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr) 1025 { 1026 struct file *file = cprm->file; 1027 loff_t pos = file->f_pos; 1028 ssize_t n; 1029 1030 if (cprm->written + nr > cprm->limit) 1031 return 0; 1032 if (dump_interrupted()) 1033 return 0; 1034 n = __kernel_write(file, addr, nr, &pos); 1035 if (n != nr) 1036 return 0; 1037 file->f_pos = pos; 1038 cprm->written += n; 1039 cprm->pos += n; 1040 1041 return 1; 1042 } 1043 1044 static int __dump_skip(struct coredump_params *cprm, size_t nr) 1045 { 1046 static char zeroes[PAGE_SIZE]; 1047 struct file *file = cprm->file; 1048 1049 if (file->f_mode & FMODE_LSEEK) { 1050 if (dump_interrupted() || vfs_llseek(file, nr, SEEK_CUR) < 0) 1051 return 0; 1052 cprm->pos += nr; 1053 return 1; 1054 } 1055 1056 while (nr > PAGE_SIZE) { 1057 if (!__dump_emit(cprm, zeroes, PAGE_SIZE)) 1058 return 0; 1059 nr -= PAGE_SIZE; 1060 } 1061 1062 return __dump_emit(cprm, zeroes, nr); 1063 } 1064 1065 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 1066 { 1067 if (cprm->to_skip) { 1068 if (!__dump_skip(cprm, cprm->to_skip)) 1069 return 0; 1070 cprm->to_skip = 0; 1071 } 1072 return __dump_emit(cprm, addr, nr); 1073 } 1074 EXPORT_SYMBOL(dump_emit); 1075 1076 void dump_skip_to(struct coredump_params *cprm, unsigned long pos) 1077 { 1078 cprm->to_skip = pos - cprm->pos; 1079 } 1080 EXPORT_SYMBOL(dump_skip_to); 1081 1082 void dump_skip(struct coredump_params *cprm, size_t nr) 1083 { 1084 cprm->to_skip += nr; 1085 } 1086 EXPORT_SYMBOL(dump_skip); 1087 1088 #ifdef CONFIG_ELF_CORE 1089 static int dump_emit_page(struct coredump_params *cprm, struct page *page) 1090 { 1091 struct bio_vec bvec; 1092 struct iov_iter iter; 1093 struct file *file = cprm->file; 1094 loff_t pos; 1095 ssize_t n; 1096 1097 if (!page) 1098 return 0; 1099 1100 if (cprm->to_skip) { 1101 if (!__dump_skip(cprm, cprm->to_skip)) 1102 return 0; 1103 cprm->to_skip = 0; 1104 } 1105 if (cprm->written + PAGE_SIZE > cprm->limit) 1106 return 0; 1107 if (dump_interrupted()) 1108 return 0; 1109 pos = file->f_pos; 1110 bvec_set_page(&bvec, page, PAGE_SIZE, 0); 1111 iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); 1112 n = __kernel_write_iter(cprm->file, &iter, &pos); 1113 if (n != PAGE_SIZE) 1114 return 0; 1115 file->f_pos = pos; 1116 cprm->written += PAGE_SIZE; 1117 cprm->pos += PAGE_SIZE; 1118 1119 return 1; 1120 } 1121 1122 /* 1123 * If we might get machine checks from kernel accesses during the 1124 * core dump, let's get those errors early rather than during the 1125 * IO. This is not performance-critical enough to warrant having 1126 * all the machine check logic in the iovec paths. 1127 */ 1128 #ifdef copy_mc_to_kernel 1129 1130 #define dump_page_alloc() alloc_page(GFP_KERNEL) 1131 #define dump_page_free(x) __free_page(x) 1132 static struct page *dump_page_copy(struct page *src, struct page *dst) 1133 { 1134 void *buf = kmap_local_page(src); 1135 size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE); 1136 kunmap_local(buf); 1137 return left ? NULL : dst; 1138 } 1139 1140 #else 1141 1142 /* We just want to return non-NULL; it's never used. */ 1143 #define dump_page_alloc() ERR_PTR(-EINVAL) 1144 #define dump_page_free(x) ((void)(x)) 1145 static inline struct page *dump_page_copy(struct page *src, struct page *dst) 1146 { 1147 return src; 1148 } 1149 #endif 1150 1151 int dump_user_range(struct coredump_params *cprm, unsigned long start, 1152 unsigned long len) 1153 { 1154 unsigned long addr; 1155 struct page *dump_page; 1156 int locked, ret; 1157 1158 dump_page = dump_page_alloc(); 1159 if (!dump_page) 1160 return 0; 1161 1162 ret = 0; 1163 locked = 0; 1164 for (addr = start; addr < start + len; addr += PAGE_SIZE) { 1165 struct page *page; 1166 1167 if (!locked) { 1168 if (mmap_read_lock_killable(current->mm)) 1169 goto out; 1170 locked = 1; 1171 } 1172 1173 /* 1174 * To avoid having to allocate page tables for virtual address 1175 * ranges that have never been used yet, and also to make it 1176 * easy to generate sparse core files, use a helper that returns 1177 * NULL when encountering an empty page table entry that would 1178 * otherwise have been filled with the zero page. 1179 */ 1180 page = get_dump_page(addr, &locked); 1181 if (page) { 1182 if (locked) { 1183 mmap_read_unlock(current->mm); 1184 locked = 0; 1185 } 1186 int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page)); 1187 put_page(page); 1188 if (stop) 1189 goto out; 1190 } else { 1191 dump_skip(cprm, PAGE_SIZE); 1192 } 1193 1194 if (dump_interrupted()) 1195 goto out; 1196 1197 if (!need_resched()) 1198 continue; 1199 if (locked) { 1200 mmap_read_unlock(current->mm); 1201 locked = 0; 1202 } 1203 cond_resched(); 1204 } 1205 ret = 1; 1206 out: 1207 if (locked) 1208 mmap_read_unlock(current->mm); 1209 1210 dump_page_free(dump_page); 1211 return ret; 1212 } 1213 #endif 1214 1215 int dump_align(struct coredump_params *cprm, int align) 1216 { 1217 unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1); 1218 if (align & (align - 1)) 1219 return 0; 1220 if (mod) 1221 cprm->to_skip += align - mod; 1222 return 1; 1223 } 1224 EXPORT_SYMBOL(dump_align); 1225 1226 #ifdef CONFIG_SYSCTL 1227 1228 void validate_coredump_safety(void) 1229 { 1230 if (suid_dumpable == SUID_DUMP_ROOT && 1231 core_pattern[0] != '/' && core_pattern[0] != '|' && core_pattern[0] != '@') { 1232 1233 coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: " 1234 "pipe handler or fully qualified core dump path required. " 1235 "Set kernel.core_pattern before fs.suid_dumpable."); 1236 } 1237 } 1238 1239 static inline bool check_coredump_socket(void) 1240 { 1241 if (core_pattern[0] != '@') 1242 return true; 1243 1244 /* 1245 * Coredump socket must be located in the initial mount 1246 * namespace. Don't give the impression that anything else is 1247 * supported right now. 1248 */ 1249 if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns) 1250 return false; 1251 1252 /* Must be an absolute path. */ 1253 if (*(core_pattern + 1) != '/') 1254 return false; 1255 1256 return true; 1257 } 1258 1259 static int proc_dostring_coredump(const struct ctl_table *table, int write, 1260 void *buffer, size_t *lenp, loff_t *ppos) 1261 { 1262 int error; 1263 ssize_t retval; 1264 char old_core_pattern[CORENAME_MAX_SIZE]; 1265 1266 retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE); 1267 1268 error = proc_dostring(table, write, buffer, lenp, ppos); 1269 if (error) 1270 return error; 1271 if (!check_coredump_socket()) { 1272 strscpy(core_pattern, old_core_pattern, retval + 1); 1273 return -EINVAL; 1274 } 1275 1276 validate_coredump_safety(); 1277 return error; 1278 } 1279 1280 static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT; 1281 static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX; 1282 static char core_modes[] = { 1283 "file\npipe" 1284 #ifdef CONFIG_UNIX 1285 "\nsocket" 1286 #endif 1287 }; 1288 1289 static const struct ctl_table coredump_sysctls[] = { 1290 { 1291 .procname = "core_uses_pid", 1292 .data = &core_uses_pid, 1293 .maxlen = sizeof(int), 1294 .mode = 0644, 1295 .proc_handler = proc_dointvec, 1296 }, 1297 { 1298 .procname = "core_pattern", 1299 .data = core_pattern, 1300 .maxlen = CORENAME_MAX_SIZE, 1301 .mode = 0644, 1302 .proc_handler = proc_dostring_coredump, 1303 }, 1304 { 1305 .procname = "core_pipe_limit", 1306 .data = &core_pipe_limit, 1307 .maxlen = sizeof(unsigned int), 1308 .mode = 0644, 1309 .proc_handler = proc_dointvec_minmax, 1310 .extra1 = SYSCTL_ZERO, 1311 .extra2 = SYSCTL_INT_MAX, 1312 }, 1313 { 1314 .procname = "core_file_note_size_limit", 1315 .data = &core_file_note_size_limit, 1316 .maxlen = sizeof(unsigned int), 1317 .mode = 0644, 1318 .proc_handler = proc_douintvec_minmax, 1319 .extra1 = (unsigned int *)&core_file_note_size_min, 1320 .extra2 = (unsigned int *)&core_file_note_size_max, 1321 }, 1322 { 1323 .procname = "core_sort_vma", 1324 .data = &core_sort_vma, 1325 .maxlen = sizeof(int), 1326 .mode = 0644, 1327 .proc_handler = proc_douintvec_minmax, 1328 .extra1 = SYSCTL_ZERO, 1329 .extra2 = SYSCTL_ONE, 1330 }, 1331 { 1332 .procname = "core_modes", 1333 .data = core_modes, 1334 .maxlen = sizeof(core_modes) - 1, 1335 .mode = 0444, 1336 .proc_handler = proc_dostring, 1337 }, 1338 }; 1339 1340 static int __init init_fs_coredump_sysctls(void) 1341 { 1342 register_sysctl_init("kernel", coredump_sysctls); 1343 return 0; 1344 } 1345 fs_initcall(init_fs_coredump_sysctls); 1346 #endif /* CONFIG_SYSCTL */ 1347 1348 /* 1349 * The purpose of always_dump_vma() is to make sure that special kernel mappings 1350 * that are useful for post-mortem analysis are included in every core dump. 1351 * In that way we ensure that the core dump is fully interpretable later 1352 * without matching up the same kernel and hardware config to see what PC values 1353 * meant. These special mappings include - vDSO, vsyscall, and other 1354 * architecture specific mappings 1355 */ 1356 static bool always_dump_vma(struct vm_area_struct *vma) 1357 { 1358 /* Any vsyscall mappings? */ 1359 if (vma == get_gate_vma(vma->vm_mm)) 1360 return true; 1361 1362 /* 1363 * Assume that all vmas with a .name op should always be dumped. 1364 * If this changes, a new vm_ops field can easily be added. 1365 */ 1366 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 1367 return true; 1368 1369 /* 1370 * arch_vma_name() returns non-NULL for special architecture mappings, 1371 * such as vDSO sections. 1372 */ 1373 if (arch_vma_name(vma)) 1374 return true; 1375 1376 return false; 1377 } 1378 1379 #define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1 1380 1381 /* 1382 * Decide how much of @vma's contents should be included in a core dump. 1383 */ 1384 static unsigned long vma_dump_size(struct vm_area_struct *vma, 1385 unsigned long mm_flags) 1386 { 1387 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1388 1389 /* always dump the vdso and vsyscall sections */ 1390 if (always_dump_vma(vma)) 1391 goto whole; 1392 1393 if (vma->vm_flags & VM_DONTDUMP) 1394 return 0; 1395 1396 /* support for DAX */ 1397 if (vma_is_dax(vma)) { 1398 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) 1399 goto whole; 1400 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) 1401 goto whole; 1402 return 0; 1403 } 1404 1405 /* Hugetlb memory check */ 1406 if (is_vm_hugetlb_page(vma)) { 1407 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1408 goto whole; 1409 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) 1410 goto whole; 1411 return 0; 1412 } 1413 1414 /* Do not dump I/O mapped devices or special mappings */ 1415 if (vma->vm_flags & VM_IO) 1416 return 0; 1417 1418 /* By default, dump shared memory if mapped from an anonymous file. */ 1419 if (vma->vm_flags & VM_SHARED) { 1420 if (file_inode(vma->vm_file)->i_nlink == 0 ? 1421 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 1422 goto whole; 1423 return 0; 1424 } 1425 1426 /* Dump segments that have been written to. */ 1427 if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE)) 1428 goto whole; 1429 if (vma->vm_file == NULL) 1430 return 0; 1431 1432 if (FILTER(MAPPED_PRIVATE)) 1433 goto whole; 1434 1435 /* 1436 * If this is the beginning of an executable file mapping, 1437 * dump the first page to aid in determining what was mapped here. 1438 */ 1439 if (FILTER(ELF_HEADERS) && 1440 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { 1441 if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) 1442 return PAGE_SIZE; 1443 1444 /* 1445 * ELF libraries aren't always executable. 1446 * We'll want to check whether the mapping starts with the ELF 1447 * magic, but not now - we're holding the mmap lock, 1448 * so copy_from_user() doesn't work here. 1449 * Use a placeholder instead, and fix it up later in 1450 * dump_vma_snapshot(). 1451 */ 1452 return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER; 1453 } 1454 1455 #undef FILTER 1456 1457 return 0; 1458 1459 whole: 1460 return vma->vm_end - vma->vm_start; 1461 } 1462 1463 /* 1464 * Helper function for iterating across a vma list. It ensures that the caller 1465 * will visit `gate_vma' prior to terminating the search. 1466 */ 1467 static struct vm_area_struct *coredump_next_vma(struct vma_iterator *vmi, 1468 struct vm_area_struct *vma, 1469 struct vm_area_struct *gate_vma) 1470 { 1471 if (gate_vma && (vma == gate_vma)) 1472 return NULL; 1473 1474 vma = vma_next(vmi); 1475 if (vma) 1476 return vma; 1477 return gate_vma; 1478 } 1479 1480 static void free_vma_snapshot(struct coredump_params *cprm) 1481 { 1482 if (cprm->vma_meta) { 1483 int i; 1484 for (i = 0; i < cprm->vma_count; i++) { 1485 struct file *file = cprm->vma_meta[i].file; 1486 if (file) 1487 fput(file); 1488 } 1489 kvfree(cprm->vma_meta); 1490 cprm->vma_meta = NULL; 1491 } 1492 } 1493 1494 static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr) 1495 { 1496 const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr; 1497 const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr; 1498 1499 if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size) 1500 return -1; 1501 if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size) 1502 return 1; 1503 return 0; 1504 } 1505 1506 /* 1507 * Under the mmap_lock, take a snapshot of relevant information about the task's 1508 * VMAs. 1509 */ 1510 static bool dump_vma_snapshot(struct coredump_params *cprm) 1511 { 1512 struct vm_area_struct *gate_vma, *vma = NULL; 1513 struct mm_struct *mm = current->mm; 1514 VMA_ITERATOR(vmi, mm, 0); 1515 int i = 0; 1516 1517 /* 1518 * Once the stack expansion code is fixed to not change VMA bounds 1519 * under mmap_lock in read mode, this can be changed to take the 1520 * mmap_lock in read mode. 1521 */ 1522 if (mmap_write_lock_killable(mm)) 1523 return false; 1524 1525 cprm->vma_data_size = 0; 1526 gate_vma = get_gate_vma(mm); 1527 cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0); 1528 1529 cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL); 1530 if (!cprm->vma_meta) { 1531 mmap_write_unlock(mm); 1532 return false; 1533 } 1534 1535 while ((vma = coredump_next_vma(&vmi, vma, gate_vma)) != NULL) { 1536 struct core_vma_metadata *m = cprm->vma_meta + i; 1537 1538 m->start = vma->vm_start; 1539 m->end = vma->vm_end; 1540 m->flags = vma->vm_flags; 1541 m->dump_size = vma_dump_size(vma, cprm->mm_flags); 1542 m->pgoff = vma->vm_pgoff; 1543 m->file = vma->vm_file; 1544 if (m->file) 1545 get_file(m->file); 1546 i++; 1547 } 1548 1549 mmap_write_unlock(mm); 1550 1551 for (i = 0; i < cprm->vma_count; i++) { 1552 struct core_vma_metadata *m = cprm->vma_meta + i; 1553 1554 if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) { 1555 char elfmag[SELFMAG]; 1556 1557 if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) || 1558 memcmp(elfmag, ELFMAG, SELFMAG) != 0) { 1559 m->dump_size = 0; 1560 } else { 1561 m->dump_size = PAGE_SIZE; 1562 } 1563 } 1564 1565 cprm->vma_data_size += m->dump_size; 1566 } 1567 1568 if (core_sort_vma) 1569 sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), 1570 cmp_vma_size, NULL); 1571 1572 return true; 1573 } 1574