1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/slab.h> 3 #include <linux/file.h> 4 #include <linux/fdtable.h> 5 #include <linux/freezer.h> 6 #include <linux/mm.h> 7 #include <linux/stat.h> 8 #include <linux/fcntl.h> 9 #include <linux/swap.h> 10 #include <linux/ctype.h> 11 #include <linux/string.h> 12 #include <linux/init.h> 13 #include <linux/pagemap.h> 14 #include <linux/perf_event.h> 15 #include <linux/highmem.h> 16 #include <linux/spinlock.h> 17 #include <linux/key.h> 18 #include <linux/personality.h> 19 #include <linux/binfmts.h> 20 #include <linux/coredump.h> 21 #include <linux/sched/coredump.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/task_stack.h> 24 #include <linux/utsname.h> 25 #include <linux/pid_namespace.h> 26 #include <linux/module.h> 27 #include <linux/namei.h> 28 #include <linux/mount.h> 29 #include <linux/security.h> 30 #include <linux/syscalls.h> 31 #include <linux/tsacct_kern.h> 32 #include <linux/cn_proc.h> 33 #include <linux/audit.h> 34 #include <linux/tracehook.h> 35 #include <linux/kmod.h> 36 #include <linux/fsnotify.h> 37 #include <linux/fs_struct.h> 38 #include <linux/pipe_fs_i.h> 39 #include <linux/oom.h> 40 #include <linux/compat.h> 41 #include <linux/fs.h> 42 #include <linux/path.h> 43 #include <linux/timekeeping.h> 44 45 #include <linux/uaccess.h> 46 #include <asm/mmu_context.h> 47 #include <asm/tlb.h> 48 #include <asm/exec.h> 49 50 #include <trace/events/task.h> 51 #include "internal.h" 52 53 #include <trace/events/sched.h> 54 55 int core_uses_pid; 56 unsigned int core_pipe_limit; 57 char core_pattern[CORENAME_MAX_SIZE] = "core"; 58 static int core_name_size = CORENAME_MAX_SIZE; 59 60 struct core_name { 61 char *corename; 62 int used, size; 63 }; 64 65 /* The maximal length of core_pattern is also specified in sysctl.c */ 66 67 static int expand_corename(struct core_name *cn, int size) 68 { 69 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 70 71 if (!corename) 72 return -ENOMEM; 73 74 if (size > core_name_size) /* racy but harmless */ 75 core_name_size = size; 76 77 cn->size = ksize(corename); 78 cn->corename = corename; 79 return 0; 80 } 81 82 static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt, 83 va_list arg) 84 { 85 int free, need; 86 va_list arg_copy; 87 88 again: 89 free = cn->size - cn->used; 90 91 va_copy(arg_copy, arg); 92 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); 93 va_end(arg_copy); 94 95 if (need < free) { 96 cn->used += need; 97 return 0; 98 } 99 100 if (!expand_corename(cn, cn->size + need - free + 1)) 101 goto again; 102 103 return -ENOMEM; 104 } 105 106 static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...) 107 { 108 va_list arg; 109 int ret; 110 111 va_start(arg, fmt); 112 ret = cn_vprintf(cn, fmt, arg); 113 va_end(arg); 114 115 return ret; 116 } 117 118 static __printf(2, 3) 119 int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 120 { 121 int cur = cn->used; 122 va_list arg; 123 int ret; 124 125 va_start(arg, fmt); 126 ret = cn_vprintf(cn, fmt, arg); 127 va_end(arg); 128 129 if (ret == 0) { 130 /* 131 * Ensure that this coredump name component can't cause the 132 * resulting corefile path to consist of a ".." or ".". 133 */ 134 if ((cn->used - cur == 1 && cn->corename[cur] == '.') || 135 (cn->used - cur == 2 && cn->corename[cur] == '.' 136 && cn->corename[cur+1] == '.')) 137 cn->corename[cur] = '!'; 138 139 /* 140 * Empty names are fishy and could be used to create a "//" in a 141 * corefile name, causing the coredump to happen one directory 142 * level too high. Enforce that all components of the core 143 * pattern are at least one character long. 144 */ 145 if (cn->used == cur) 146 ret = cn_printf(cn, "!"); 147 } 148 149 for (; cur < cn->used; ++cur) { 150 if (cn->corename[cur] == '/') 151 cn->corename[cur] = '!'; 152 } 153 return ret; 154 } 155 156 static int cn_print_exe_file(struct core_name *cn, bool name_only) 157 { 158 struct file *exe_file; 159 char *pathbuf, *path, *ptr; 160 int ret; 161 162 exe_file = get_mm_exe_file(current->mm); 163 if (!exe_file) 164 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 165 166 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); 167 if (!pathbuf) { 168 ret = -ENOMEM; 169 goto put_exe_file; 170 } 171 172 path = file_path(exe_file, pathbuf, PATH_MAX); 173 if (IS_ERR(path)) { 174 ret = PTR_ERR(path); 175 goto free_buf; 176 } 177 178 if (name_only) { 179 ptr = strrchr(path, '/'); 180 if (ptr) 181 path = ptr + 1; 182 } 183 ret = cn_esc_printf(cn, "%s", path); 184 185 free_buf: 186 kfree(pathbuf); 187 put_exe_file: 188 fput(exe_file); 189 return ret; 190 } 191 192 /* format_corename will inspect the pattern parameter, and output a 193 * name into corename, which must have space for at least 194 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 195 */ 196 static int format_corename(struct core_name *cn, struct coredump_params *cprm, 197 size_t **argv, int *argc) 198 { 199 const struct cred *cred = current_cred(); 200 const char *pat_ptr = core_pattern; 201 int ispipe = (*pat_ptr == '|'); 202 bool was_space = false; 203 int pid_in_pattern = 0; 204 int err = 0; 205 206 cn->used = 0; 207 cn->corename = NULL; 208 if (expand_corename(cn, core_name_size)) 209 return -ENOMEM; 210 cn->corename[0] = '\0'; 211 212 if (ispipe) { 213 int argvs = sizeof(core_pattern) / 2; 214 (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); 215 if (!(*argv)) 216 return -ENOMEM; 217 (*argv)[(*argc)++] = 0; 218 ++pat_ptr; 219 if (!(*pat_ptr)) 220 return -ENOMEM; 221 } 222 223 /* Repeat as long as we have more pattern to process and more output 224 space */ 225 while (*pat_ptr) { 226 /* 227 * Split on spaces before doing template expansion so that 228 * %e and %E don't get split if they have spaces in them 229 */ 230 if (ispipe) { 231 if (isspace(*pat_ptr)) { 232 if (cn->used != 0) 233 was_space = true; 234 pat_ptr++; 235 continue; 236 } else if (was_space) { 237 was_space = false; 238 err = cn_printf(cn, "%c", '\0'); 239 if (err) 240 return err; 241 (*argv)[(*argc)++] = cn->used; 242 } 243 } 244 if (*pat_ptr != '%') { 245 err = cn_printf(cn, "%c", *pat_ptr++); 246 } else { 247 switch (*++pat_ptr) { 248 /* single % at the end, drop that */ 249 case 0: 250 goto out; 251 /* Double percent, output one percent */ 252 case '%': 253 err = cn_printf(cn, "%c", '%'); 254 break; 255 /* pid */ 256 case 'p': 257 pid_in_pattern = 1; 258 err = cn_printf(cn, "%d", 259 task_tgid_vnr(current)); 260 break; 261 /* global pid */ 262 case 'P': 263 err = cn_printf(cn, "%d", 264 task_tgid_nr(current)); 265 break; 266 case 'i': 267 err = cn_printf(cn, "%d", 268 task_pid_vnr(current)); 269 break; 270 case 'I': 271 err = cn_printf(cn, "%d", 272 task_pid_nr(current)); 273 break; 274 /* uid */ 275 case 'u': 276 err = cn_printf(cn, "%u", 277 from_kuid(&init_user_ns, 278 cred->uid)); 279 break; 280 /* gid */ 281 case 'g': 282 err = cn_printf(cn, "%u", 283 from_kgid(&init_user_ns, 284 cred->gid)); 285 break; 286 case 'd': 287 err = cn_printf(cn, "%d", 288 __get_dumpable(cprm->mm_flags)); 289 break; 290 /* signal that caused the coredump */ 291 case 's': 292 err = cn_printf(cn, "%d", 293 cprm->siginfo->si_signo); 294 break; 295 /* UNIX time of coredump */ 296 case 't': { 297 time64_t time; 298 299 time = ktime_get_real_seconds(); 300 err = cn_printf(cn, "%lld", time); 301 break; 302 } 303 /* hostname */ 304 case 'h': 305 down_read(&uts_sem); 306 err = cn_esc_printf(cn, "%s", 307 utsname()->nodename); 308 up_read(&uts_sem); 309 break; 310 /* executable, could be changed by prctl PR_SET_NAME etc */ 311 case 'e': 312 err = cn_esc_printf(cn, "%s", current->comm); 313 break; 314 /* file name of executable */ 315 case 'f': 316 err = cn_print_exe_file(cn, true); 317 break; 318 case 'E': 319 err = cn_print_exe_file(cn, false); 320 break; 321 /* core limit size */ 322 case 'c': 323 err = cn_printf(cn, "%lu", 324 rlimit(RLIMIT_CORE)); 325 break; 326 default: 327 break; 328 } 329 ++pat_ptr; 330 } 331 332 if (err) 333 return err; 334 } 335 336 out: 337 /* Backward compatibility with core_uses_pid: 338 * 339 * If core_pattern does not include a %p (as is the default) 340 * and core_uses_pid is set, then .%pid will be appended to 341 * the filename. Do not do this for piped commands. */ 342 if (!ispipe && !pid_in_pattern && core_uses_pid) { 343 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 344 if (err) 345 return err; 346 } 347 return ispipe; 348 } 349 350 static int zap_process(struct task_struct *start, int exit_code, int flags) 351 { 352 struct task_struct *t; 353 int nr = 0; 354 355 /* ignore all signals except SIGKILL, see prepare_signal() */ 356 start->signal->flags = SIGNAL_GROUP_COREDUMP | flags; 357 start->signal->group_exit_code = exit_code; 358 start->signal->group_stop_count = 0; 359 360 for_each_thread(start, t) { 361 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 362 if (t != current && !(t->flags & PF_POSTCOREDUMP)) { 363 sigaddset(&t->pending.signal, SIGKILL); 364 signal_wake_up(t, 1); 365 nr++; 366 } 367 } 368 369 return nr; 370 } 371 372 static int zap_threads(struct task_struct *tsk, 373 struct core_state *core_state, int exit_code) 374 { 375 int nr = -EAGAIN; 376 377 spin_lock_irq(&tsk->sighand->siglock); 378 if (!signal_group_exit(tsk->signal)) { 379 tsk->signal->core_state = core_state; 380 tsk->signal->group_exit_task = tsk; 381 nr = zap_process(tsk, exit_code, 0); 382 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 383 tsk->flags |= PF_DUMPCORE; 384 atomic_set(&core_state->nr_threads, nr); 385 } 386 spin_unlock_irq(&tsk->sighand->siglock); 387 return nr; 388 } 389 390 static int coredump_wait(int exit_code, struct core_state *core_state) 391 { 392 struct task_struct *tsk = current; 393 int core_waiters = -EBUSY; 394 395 init_completion(&core_state->startup); 396 core_state->dumper.task = tsk; 397 core_state->dumper.next = NULL; 398 399 core_waiters = zap_threads(tsk, core_state, exit_code); 400 if (core_waiters > 0) { 401 struct core_thread *ptr; 402 403 freezer_do_not_count(); 404 wait_for_completion(&core_state->startup); 405 freezer_count(); 406 /* 407 * Wait for all the threads to become inactive, so that 408 * all the thread context (extended register state, like 409 * fpu etc) gets copied to the memory. 410 */ 411 ptr = core_state->dumper.next; 412 while (ptr != NULL) { 413 wait_task_inactive(ptr->task, 0); 414 ptr = ptr->next; 415 } 416 } 417 418 return core_waiters; 419 } 420 421 static void coredump_finish(bool core_dumped) 422 { 423 struct core_thread *curr, *next; 424 struct task_struct *task; 425 426 spin_lock_irq(¤t->sighand->siglock); 427 if (core_dumped && !__fatal_signal_pending(current)) 428 current->signal->group_exit_code |= 0x80; 429 current->signal->group_exit_task = NULL; 430 current->signal->flags = SIGNAL_GROUP_EXIT; 431 next = current->signal->core_state->dumper.next; 432 current->signal->core_state = NULL; 433 spin_unlock_irq(¤t->sighand->siglock); 434 435 while ((curr = next) != NULL) { 436 next = curr->next; 437 task = curr->task; 438 /* 439 * see coredump_task_exit(), curr->task must not see 440 * ->task == NULL before we read ->next. 441 */ 442 smp_mb(); 443 curr->task = NULL; 444 wake_up_process(task); 445 } 446 } 447 448 static bool dump_interrupted(void) 449 { 450 /* 451 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 452 * can do try_to_freeze() and check __fatal_signal_pending(), 453 * but then we need to teach dump_write() to restart and clear 454 * TIF_SIGPENDING. 455 */ 456 return fatal_signal_pending(current) || freezing(current); 457 } 458 459 static void wait_for_dump_helpers(struct file *file) 460 { 461 struct pipe_inode_info *pipe = file->private_data; 462 463 pipe_lock(pipe); 464 pipe->readers++; 465 pipe->writers--; 466 wake_up_interruptible_sync(&pipe->rd_wait); 467 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 468 pipe_unlock(pipe); 469 470 /* 471 * We actually want wait_event_freezable() but then we need 472 * to clear TIF_SIGPENDING and improve dump_interrupted(). 473 */ 474 wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); 475 476 pipe_lock(pipe); 477 pipe->readers--; 478 pipe->writers++; 479 pipe_unlock(pipe); 480 } 481 482 /* 483 * umh_pipe_setup 484 * helper function to customize the process used 485 * to collect the core in userspace. Specifically 486 * it sets up a pipe and installs it as fd 0 (stdin) 487 * for the process. Returns 0 on success, or 488 * PTR_ERR on failure. 489 * Note that it also sets the core limit to 1. This 490 * is a special value that we use to trap recursive 491 * core dumps 492 */ 493 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 494 { 495 struct file *files[2]; 496 struct coredump_params *cp = (struct coredump_params *)info->data; 497 int err = create_pipe_files(files, 0); 498 if (err) 499 return err; 500 501 cp->file = files[1]; 502 503 err = replace_fd(0, files[0], 0); 504 fput(files[0]); 505 /* and disallow core files too */ 506 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 507 508 return err; 509 } 510 511 void do_coredump(const kernel_siginfo_t *siginfo) 512 { 513 struct core_state core_state; 514 struct core_name cn; 515 struct mm_struct *mm = current->mm; 516 struct linux_binfmt * binfmt; 517 const struct cred *old_cred; 518 struct cred *cred; 519 int retval = 0; 520 int ispipe; 521 size_t *argv = NULL; 522 int argc = 0; 523 /* require nonrelative corefile path and be extra careful */ 524 bool need_suid_safe = false; 525 bool core_dumped = false; 526 static atomic_t core_dump_count = ATOMIC_INIT(0); 527 struct coredump_params cprm = { 528 .siginfo = siginfo, 529 .regs = signal_pt_regs(), 530 .limit = rlimit(RLIMIT_CORE), 531 /* 532 * We must use the same mm->flags while dumping core to avoid 533 * inconsistency of bit flags, since this flag is not protected 534 * by any locks. 535 */ 536 .mm_flags = mm->flags, 537 }; 538 539 audit_core_dumps(siginfo->si_signo); 540 541 binfmt = mm->binfmt; 542 if (!binfmt || !binfmt->core_dump) 543 goto fail; 544 if (!__get_dumpable(cprm.mm_flags)) 545 goto fail; 546 547 cred = prepare_creds(); 548 if (!cred) 549 goto fail; 550 /* 551 * We cannot trust fsuid as being the "true" uid of the process 552 * nor do we know its entire history. We only know it was tainted 553 * so we dump it as root in mode 2, and only into a controlled 554 * environment (pipe handler or fully qualified path). 555 */ 556 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 557 /* Setuid core dump mode */ 558 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 559 need_suid_safe = true; 560 } 561 562 retval = coredump_wait(siginfo->si_signo, &core_state); 563 if (retval < 0) 564 goto fail_creds; 565 566 old_cred = override_creds(cred); 567 568 ispipe = format_corename(&cn, &cprm, &argv, &argc); 569 570 if (ispipe) { 571 int argi; 572 int dump_count; 573 char **helper_argv; 574 struct subprocess_info *sub_info; 575 576 if (ispipe < 0) { 577 printk(KERN_WARNING "format_corename failed\n"); 578 printk(KERN_WARNING "Aborting core\n"); 579 goto fail_unlock; 580 } 581 582 if (cprm.limit == 1) { 583 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 584 * 585 * Normally core limits are irrelevant to pipes, since 586 * we're not writing to the file system, but we use 587 * cprm.limit of 1 here as a special value, this is a 588 * consistent way to catch recursive crashes. 589 * We can still crash if the core_pattern binary sets 590 * RLIM_CORE = !1, but it runs as root, and can do 591 * lots of stupid things. 592 * 593 * Note that we use task_tgid_vnr here to grab the pid 594 * of the process group leader. That way we get the 595 * right pid if a thread in a multi-threaded 596 * core_pattern process dies. 597 */ 598 printk(KERN_WARNING 599 "Process %d(%s) has RLIMIT_CORE set to 1\n", 600 task_tgid_vnr(current), current->comm); 601 printk(KERN_WARNING "Aborting core\n"); 602 goto fail_unlock; 603 } 604 cprm.limit = RLIM_INFINITY; 605 606 dump_count = atomic_inc_return(&core_dump_count); 607 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 608 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 609 task_tgid_vnr(current), current->comm); 610 printk(KERN_WARNING "Skipping core dump\n"); 611 goto fail_dropcount; 612 } 613 614 helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), 615 GFP_KERNEL); 616 if (!helper_argv) { 617 printk(KERN_WARNING "%s failed to allocate memory\n", 618 __func__); 619 goto fail_dropcount; 620 } 621 for (argi = 0; argi < argc; argi++) 622 helper_argv[argi] = cn.corename + argv[argi]; 623 helper_argv[argi] = NULL; 624 625 retval = -ENOMEM; 626 sub_info = call_usermodehelper_setup(helper_argv[0], 627 helper_argv, NULL, GFP_KERNEL, 628 umh_pipe_setup, NULL, &cprm); 629 if (sub_info) 630 retval = call_usermodehelper_exec(sub_info, 631 UMH_WAIT_EXEC); 632 633 kfree(helper_argv); 634 if (retval) { 635 printk(KERN_INFO "Core dump to |%s pipe failed\n", 636 cn.corename); 637 goto close_fail; 638 } 639 } else { 640 struct user_namespace *mnt_userns; 641 struct inode *inode; 642 int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | 643 O_LARGEFILE | O_EXCL; 644 645 if (cprm.limit < binfmt->min_coredump) 646 goto fail_unlock; 647 648 if (need_suid_safe && cn.corename[0] != '/') { 649 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 650 "to fully qualified path!\n", 651 task_tgid_vnr(current), current->comm); 652 printk(KERN_WARNING "Skipping core dump\n"); 653 goto fail_unlock; 654 } 655 656 /* 657 * Unlink the file if it exists unless this is a SUID 658 * binary - in that case, we're running around with root 659 * privs and don't want to unlink another user's coredump. 660 */ 661 if (!need_suid_safe) { 662 /* 663 * If it doesn't exist, that's fine. If there's some 664 * other problem, we'll catch it at the filp_open(). 665 */ 666 do_unlinkat(AT_FDCWD, getname_kernel(cn.corename)); 667 } 668 669 /* 670 * There is a race between unlinking and creating the 671 * file, but if that causes an EEXIST here, that's 672 * fine - another process raced with us while creating 673 * the corefile, and the other process won. To userspace, 674 * what matters is that at least one of the two processes 675 * writes its coredump successfully, not which one. 676 */ 677 if (need_suid_safe) { 678 /* 679 * Using user namespaces, normal user tasks can change 680 * their current->fs->root to point to arbitrary 681 * directories. Since the intention of the "only dump 682 * with a fully qualified path" rule is to control where 683 * coredumps may be placed using root privileges, 684 * current->fs->root must not be used. Instead, use the 685 * root directory of init_task. 686 */ 687 struct path root; 688 689 task_lock(&init_task); 690 get_fs_root(init_task.fs, &root); 691 task_unlock(&init_task); 692 cprm.file = file_open_root(&root, cn.corename, 693 open_flags, 0600); 694 path_put(&root); 695 } else { 696 cprm.file = filp_open(cn.corename, open_flags, 0600); 697 } 698 if (IS_ERR(cprm.file)) 699 goto fail_unlock; 700 701 inode = file_inode(cprm.file); 702 if (inode->i_nlink > 1) 703 goto close_fail; 704 if (d_unhashed(cprm.file->f_path.dentry)) 705 goto close_fail; 706 /* 707 * AK: actually i see no reason to not allow this for named 708 * pipes etc, but keep the previous behaviour for now. 709 */ 710 if (!S_ISREG(inode->i_mode)) 711 goto close_fail; 712 /* 713 * Don't dump core if the filesystem changed owner or mode 714 * of the file during file creation. This is an issue when 715 * a process dumps core while its cwd is e.g. on a vfat 716 * filesystem. 717 */ 718 mnt_userns = file_mnt_user_ns(cprm.file); 719 if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), 720 current_fsuid())) { 721 pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n", 722 cn.corename); 723 goto close_fail; 724 } 725 if ((inode->i_mode & 0677) != 0600) { 726 pr_info_ratelimited("Core dump to %s aborted: cannot preserve file permissions\n", 727 cn.corename); 728 goto close_fail; 729 } 730 if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) 731 goto close_fail; 732 if (do_truncate(mnt_userns, cprm.file->f_path.dentry, 733 0, 0, cprm.file)) 734 goto close_fail; 735 } 736 737 /* get us an unshared descriptor table; almost always a no-op */ 738 /* The cell spufs coredump code reads the file descriptor tables */ 739 retval = unshare_files(); 740 if (retval) 741 goto close_fail; 742 if (!dump_interrupted()) { 743 /* 744 * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would 745 * have this set to NULL. 746 */ 747 if (!cprm.file) { 748 pr_info("Core dump to |%s disabled\n", cn.corename); 749 goto close_fail; 750 } 751 file_start_write(cprm.file); 752 core_dumped = binfmt->core_dump(&cprm); 753 /* 754 * Ensures that file size is big enough to contain the current 755 * file postion. This prevents gdb from complaining about 756 * a truncated file if the last "write" to the file was 757 * dump_skip. 758 */ 759 if (cprm.to_skip) { 760 cprm.to_skip--; 761 dump_emit(&cprm, "", 1); 762 } 763 file_end_write(cprm.file); 764 } 765 if (ispipe && core_pipe_limit) 766 wait_for_dump_helpers(cprm.file); 767 close_fail: 768 if (cprm.file) 769 filp_close(cprm.file, NULL); 770 fail_dropcount: 771 if (ispipe) 772 atomic_dec(&core_dump_count); 773 fail_unlock: 774 kfree(argv); 775 kfree(cn.corename); 776 coredump_finish(core_dumped); 777 revert_creds(old_cred); 778 fail_creds: 779 put_cred(cred); 780 fail: 781 return; 782 } 783 784 /* 785 * Core dumping helper functions. These are the only things you should 786 * do on a core-file: use only these functions to write out all the 787 * necessary info. 788 */ 789 static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr) 790 { 791 struct file *file = cprm->file; 792 loff_t pos = file->f_pos; 793 ssize_t n; 794 if (cprm->written + nr > cprm->limit) 795 return 0; 796 797 798 if (dump_interrupted()) 799 return 0; 800 n = __kernel_write(file, addr, nr, &pos); 801 if (n != nr) 802 return 0; 803 file->f_pos = pos; 804 cprm->written += n; 805 cprm->pos += n; 806 807 return 1; 808 } 809 810 static int __dump_skip(struct coredump_params *cprm, size_t nr) 811 { 812 static char zeroes[PAGE_SIZE]; 813 struct file *file = cprm->file; 814 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 815 if (dump_interrupted() || 816 file->f_op->llseek(file, nr, SEEK_CUR) < 0) 817 return 0; 818 cprm->pos += nr; 819 return 1; 820 } else { 821 while (nr > PAGE_SIZE) { 822 if (!__dump_emit(cprm, zeroes, PAGE_SIZE)) 823 return 0; 824 nr -= PAGE_SIZE; 825 } 826 return __dump_emit(cprm, zeroes, nr); 827 } 828 } 829 830 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 831 { 832 if (cprm->to_skip) { 833 if (!__dump_skip(cprm, cprm->to_skip)) 834 return 0; 835 cprm->to_skip = 0; 836 } 837 return __dump_emit(cprm, addr, nr); 838 } 839 EXPORT_SYMBOL(dump_emit); 840 841 void dump_skip_to(struct coredump_params *cprm, unsigned long pos) 842 { 843 cprm->to_skip = pos - cprm->pos; 844 } 845 EXPORT_SYMBOL(dump_skip_to); 846 847 void dump_skip(struct coredump_params *cprm, size_t nr) 848 { 849 cprm->to_skip += nr; 850 } 851 EXPORT_SYMBOL(dump_skip); 852 853 #ifdef CONFIG_ELF_CORE 854 int dump_user_range(struct coredump_params *cprm, unsigned long start, 855 unsigned long len) 856 { 857 unsigned long addr; 858 859 for (addr = start; addr < start + len; addr += PAGE_SIZE) { 860 struct page *page; 861 int stop; 862 863 /* 864 * To avoid having to allocate page tables for virtual address 865 * ranges that have never been used yet, and also to make it 866 * easy to generate sparse core files, use a helper that returns 867 * NULL when encountering an empty page table entry that would 868 * otherwise have been filled with the zero page. 869 */ 870 page = get_dump_page(addr); 871 if (page) { 872 void *kaddr = kmap_local_page(page); 873 874 stop = !dump_emit(cprm, kaddr, PAGE_SIZE); 875 kunmap_local(kaddr); 876 put_page(page); 877 if (stop) 878 return 0; 879 } else { 880 dump_skip(cprm, PAGE_SIZE); 881 } 882 } 883 return 1; 884 } 885 #endif 886 887 int dump_align(struct coredump_params *cprm, int align) 888 { 889 unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1); 890 if (align & (align - 1)) 891 return 0; 892 if (mod) 893 cprm->to_skip += align - mod; 894 return 1; 895 } 896 EXPORT_SYMBOL(dump_align); 897 898 /* 899 * The purpose of always_dump_vma() is to make sure that special kernel mappings 900 * that are useful for post-mortem analysis are included in every core dump. 901 * In that way we ensure that the core dump is fully interpretable later 902 * without matching up the same kernel and hardware config to see what PC values 903 * meant. These special mappings include - vDSO, vsyscall, and other 904 * architecture specific mappings 905 */ 906 static bool always_dump_vma(struct vm_area_struct *vma) 907 { 908 /* Any vsyscall mappings? */ 909 if (vma == get_gate_vma(vma->vm_mm)) 910 return true; 911 912 /* 913 * Assume that all vmas with a .name op should always be dumped. 914 * If this changes, a new vm_ops field can easily be added. 915 */ 916 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 917 return true; 918 919 /* 920 * arch_vma_name() returns non-NULL for special architecture mappings, 921 * such as vDSO sections. 922 */ 923 if (arch_vma_name(vma)) 924 return true; 925 926 return false; 927 } 928 929 /* 930 * Decide how much of @vma's contents should be included in a core dump. 931 */ 932 static unsigned long vma_dump_size(struct vm_area_struct *vma, 933 unsigned long mm_flags) 934 { 935 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 936 937 /* always dump the vdso and vsyscall sections */ 938 if (always_dump_vma(vma)) 939 goto whole; 940 941 if (vma->vm_flags & VM_DONTDUMP) 942 return 0; 943 944 /* support for DAX */ 945 if (vma_is_dax(vma)) { 946 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) 947 goto whole; 948 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) 949 goto whole; 950 return 0; 951 } 952 953 /* Hugetlb memory check */ 954 if (is_vm_hugetlb_page(vma)) { 955 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 956 goto whole; 957 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) 958 goto whole; 959 return 0; 960 } 961 962 /* Do not dump I/O mapped devices or special mappings */ 963 if (vma->vm_flags & VM_IO) 964 return 0; 965 966 /* By default, dump shared memory if mapped from an anonymous file. */ 967 if (vma->vm_flags & VM_SHARED) { 968 if (file_inode(vma->vm_file)->i_nlink == 0 ? 969 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 970 goto whole; 971 return 0; 972 } 973 974 /* Dump segments that have been written to. */ 975 if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE)) 976 goto whole; 977 if (vma->vm_file == NULL) 978 return 0; 979 980 if (FILTER(MAPPED_PRIVATE)) 981 goto whole; 982 983 /* 984 * If this is the beginning of an executable file mapping, 985 * dump the first page to aid in determining what was mapped here. 986 */ 987 if (FILTER(ELF_HEADERS) && 988 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) && 989 (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) 990 return PAGE_SIZE; 991 992 #undef FILTER 993 994 return 0; 995 996 whole: 997 return vma->vm_end - vma->vm_start; 998 } 999 1000 static struct vm_area_struct *first_vma(struct task_struct *tsk, 1001 struct vm_area_struct *gate_vma) 1002 { 1003 struct vm_area_struct *ret = tsk->mm->mmap; 1004 1005 if (ret) 1006 return ret; 1007 return gate_vma; 1008 } 1009 1010 /* 1011 * Helper function for iterating across a vma list. It ensures that the caller 1012 * will visit `gate_vma' prior to terminating the search. 1013 */ 1014 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, 1015 struct vm_area_struct *gate_vma) 1016 { 1017 struct vm_area_struct *ret; 1018 1019 ret = this_vma->vm_next; 1020 if (ret) 1021 return ret; 1022 if (this_vma == gate_vma) 1023 return NULL; 1024 return gate_vma; 1025 } 1026 1027 /* 1028 * Under the mmap_lock, take a snapshot of relevant information about the task's 1029 * VMAs. 1030 */ 1031 int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, 1032 struct core_vma_metadata **vma_meta, 1033 size_t *vma_data_size_ptr) 1034 { 1035 struct vm_area_struct *vma, *gate_vma; 1036 struct mm_struct *mm = current->mm; 1037 int i; 1038 size_t vma_data_size = 0; 1039 1040 /* 1041 * Once the stack expansion code is fixed to not change VMA bounds 1042 * under mmap_lock in read mode, this can be changed to take the 1043 * mmap_lock in read mode. 1044 */ 1045 if (mmap_write_lock_killable(mm)) 1046 return -EINTR; 1047 1048 gate_vma = get_gate_vma(mm); 1049 *vma_count = mm->map_count + (gate_vma ? 1 : 0); 1050 1051 *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL); 1052 if (!*vma_meta) { 1053 mmap_write_unlock(mm); 1054 return -ENOMEM; 1055 } 1056 1057 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 1058 vma = next_vma(vma, gate_vma), i++) { 1059 struct core_vma_metadata *m = (*vma_meta) + i; 1060 1061 m->start = vma->vm_start; 1062 m->end = vma->vm_end; 1063 m->flags = vma->vm_flags; 1064 m->dump_size = vma_dump_size(vma, cprm->mm_flags); 1065 1066 vma_data_size += m->dump_size; 1067 } 1068 1069 mmap_write_unlock(mm); 1070 1071 if (WARN_ON(i != *vma_count)) { 1072 kvfree(*vma_meta); 1073 return -EFAULT; 1074 } 1075 1076 *vma_data_size_ptr = vma_data_size; 1077 return 0; 1078 } 1079