1 #include <linux/slab.h> 2 #include <linux/file.h> 3 #include <linux/fdtable.h> 4 #include <linux/mm.h> 5 #include <linux/stat.h> 6 #include <linux/fcntl.h> 7 #include <linux/swap.h> 8 #include <linux/string.h> 9 #include <linux/init.h> 10 #include <linux/pagemap.h> 11 #include <linux/perf_event.h> 12 #include <linux/highmem.h> 13 #include <linux/spinlock.h> 14 #include <linux/key.h> 15 #include <linux/personality.h> 16 #include <linux/binfmts.h> 17 #include <linux/coredump.h> 18 #include <linux/utsname.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/module.h> 21 #include <linux/namei.h> 22 #include <linux/mount.h> 23 #include <linux/security.h> 24 #include <linux/syscalls.h> 25 #include <linux/tsacct_kern.h> 26 #include <linux/cn_proc.h> 27 #include <linux/audit.h> 28 #include <linux/tracehook.h> 29 #include <linux/kmod.h> 30 #include <linux/fsnotify.h> 31 #include <linux/fs_struct.h> 32 #include <linux/pipe_fs_i.h> 33 #include <linux/oom.h> 34 #include <linux/compat.h> 35 36 #include <asm/uaccess.h> 37 #include <asm/mmu_context.h> 38 #include <asm/tlb.h> 39 #include <asm/exec.h> 40 41 #include <trace/events/task.h> 42 #include "internal.h" 43 44 #include <trace/events/sched.h> 45 46 int core_uses_pid; 47 unsigned int core_pipe_limit; 48 char core_pattern[CORENAME_MAX_SIZE] = "core"; 49 static int core_name_size = CORENAME_MAX_SIZE; 50 51 struct core_name { 52 char *corename; 53 int used, size; 54 }; 55 56 /* The maximal length of core_pattern is also specified in sysctl.c */ 57 58 static int expand_corename(struct core_name *cn, int size) 59 { 60 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 61 62 if (!corename) 63 return -ENOMEM; 64 65 if (size > core_name_size) /* racy but harmless */ 66 core_name_size = size; 67 68 cn->size = ksize(corename); 69 cn->corename = corename; 70 return 0; 71 } 72 73 static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt, 74 va_list arg) 75 { 76 int free, need; 77 va_list arg_copy; 78 79 again: 80 free = cn->size - cn->used; 81 82 va_copy(arg_copy, arg); 83 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); 84 va_end(arg_copy); 85 86 if (need < free) { 87 cn->used += need; 88 return 0; 89 } 90 91 if (!expand_corename(cn, cn->size + need - free + 1)) 92 goto again; 93 94 return -ENOMEM; 95 } 96 97 static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...) 98 { 99 va_list arg; 100 int ret; 101 102 va_start(arg, fmt); 103 ret = cn_vprintf(cn, fmt, arg); 104 va_end(arg); 105 106 return ret; 107 } 108 109 static __printf(2, 3) 110 int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 111 { 112 int cur = cn->used; 113 va_list arg; 114 int ret; 115 116 va_start(arg, fmt); 117 ret = cn_vprintf(cn, fmt, arg); 118 va_end(arg); 119 120 for (; cur < cn->used; ++cur) { 121 if (cn->corename[cur] == '/') 122 cn->corename[cur] = '!'; 123 } 124 return ret; 125 } 126 127 static int cn_print_exe_file(struct core_name *cn) 128 { 129 struct file *exe_file; 130 char *pathbuf, *path; 131 int ret; 132 133 exe_file = get_mm_exe_file(current->mm); 134 if (!exe_file) 135 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 136 137 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 138 if (!pathbuf) { 139 ret = -ENOMEM; 140 goto put_exe_file; 141 } 142 143 path = file_path(exe_file, pathbuf, PATH_MAX); 144 if (IS_ERR(path)) { 145 ret = PTR_ERR(path); 146 goto free_buf; 147 } 148 149 ret = cn_esc_printf(cn, "%s", path); 150 151 free_buf: 152 kfree(pathbuf); 153 put_exe_file: 154 fput(exe_file); 155 return ret; 156 } 157 158 /* format_corename will inspect the pattern parameter, and output a 159 * name into corename, which must have space for at least 160 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 161 */ 162 static int format_corename(struct core_name *cn, struct coredump_params *cprm) 163 { 164 const struct cred *cred = current_cred(); 165 const char *pat_ptr = core_pattern; 166 int ispipe = (*pat_ptr == '|'); 167 int pid_in_pattern = 0; 168 int err = 0; 169 170 cn->used = 0; 171 cn->corename = NULL; 172 if (expand_corename(cn, core_name_size)) 173 return -ENOMEM; 174 cn->corename[0] = '\0'; 175 176 if (ispipe) 177 ++pat_ptr; 178 179 /* Repeat as long as we have more pattern to process and more output 180 space */ 181 while (*pat_ptr) { 182 if (*pat_ptr != '%') { 183 err = cn_printf(cn, "%c", *pat_ptr++); 184 } else { 185 switch (*++pat_ptr) { 186 /* single % at the end, drop that */ 187 case 0: 188 goto out; 189 /* Double percent, output one percent */ 190 case '%': 191 err = cn_printf(cn, "%c", '%'); 192 break; 193 /* pid */ 194 case 'p': 195 pid_in_pattern = 1; 196 err = cn_printf(cn, "%d", 197 task_tgid_vnr(current)); 198 break; 199 /* global pid */ 200 case 'P': 201 err = cn_printf(cn, "%d", 202 task_tgid_nr(current)); 203 break; 204 case 'i': 205 err = cn_printf(cn, "%d", 206 task_pid_vnr(current)); 207 break; 208 case 'I': 209 err = cn_printf(cn, "%d", 210 task_pid_nr(current)); 211 break; 212 /* uid */ 213 case 'u': 214 err = cn_printf(cn, "%u", 215 from_kuid(&init_user_ns, 216 cred->uid)); 217 break; 218 /* gid */ 219 case 'g': 220 err = cn_printf(cn, "%u", 221 from_kgid(&init_user_ns, 222 cred->gid)); 223 break; 224 case 'd': 225 err = cn_printf(cn, "%d", 226 __get_dumpable(cprm->mm_flags)); 227 break; 228 /* signal that caused the coredump */ 229 case 's': 230 err = cn_printf(cn, "%d", 231 cprm->siginfo->si_signo); 232 break; 233 /* UNIX time of coredump */ 234 case 't': { 235 struct timeval tv; 236 do_gettimeofday(&tv); 237 err = cn_printf(cn, "%lu", tv.tv_sec); 238 break; 239 } 240 /* hostname */ 241 case 'h': 242 down_read(&uts_sem); 243 err = cn_esc_printf(cn, "%s", 244 utsname()->nodename); 245 up_read(&uts_sem); 246 break; 247 /* executable */ 248 case 'e': 249 err = cn_esc_printf(cn, "%s", current->comm); 250 break; 251 case 'E': 252 err = cn_print_exe_file(cn); 253 break; 254 /* core limit size */ 255 case 'c': 256 err = cn_printf(cn, "%lu", 257 rlimit(RLIMIT_CORE)); 258 break; 259 default: 260 break; 261 } 262 ++pat_ptr; 263 } 264 265 if (err) 266 return err; 267 } 268 269 out: 270 /* Backward compatibility with core_uses_pid: 271 * 272 * If core_pattern does not include a %p (as is the default) 273 * and core_uses_pid is set, then .%pid will be appended to 274 * the filename. Do not do this for piped commands. */ 275 if (!ispipe && !pid_in_pattern && core_uses_pid) { 276 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 277 if (err) 278 return err; 279 } 280 return ispipe; 281 } 282 283 static int zap_process(struct task_struct *start, int exit_code) 284 { 285 struct task_struct *t; 286 int nr = 0; 287 288 start->signal->group_exit_code = exit_code; 289 start->signal->group_stop_count = 0; 290 291 t = start; 292 do { 293 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 294 if (t != current && t->mm) { 295 sigaddset(&t->pending.signal, SIGKILL); 296 signal_wake_up(t, 1); 297 nr++; 298 } 299 } while_each_thread(start, t); 300 301 return nr; 302 } 303 304 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 305 struct core_state *core_state, int exit_code) 306 { 307 struct task_struct *g, *p; 308 unsigned long flags; 309 int nr = -EAGAIN; 310 311 spin_lock_irq(&tsk->sighand->siglock); 312 if (!signal_group_exit(tsk->signal)) { 313 mm->core_state = core_state; 314 nr = zap_process(tsk, exit_code); 315 tsk->signal->group_exit_task = tsk; 316 /* ignore all signals except SIGKILL, see prepare_signal() */ 317 tsk->signal->flags = SIGNAL_GROUP_COREDUMP; 318 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 319 } 320 spin_unlock_irq(&tsk->sighand->siglock); 321 if (unlikely(nr < 0)) 322 return nr; 323 324 tsk->flags |= PF_DUMPCORE; 325 if (atomic_read(&mm->mm_users) == nr + 1) 326 goto done; 327 /* 328 * We should find and kill all tasks which use this mm, and we should 329 * count them correctly into ->nr_threads. We don't take tasklist 330 * lock, but this is safe wrt: 331 * 332 * fork: 333 * None of sub-threads can fork after zap_process(leader). All 334 * processes which were created before this point should be 335 * visible to zap_threads() because copy_process() adds the new 336 * process to the tail of init_task.tasks list, and lock/unlock 337 * of ->siglock provides a memory barrier. 338 * 339 * do_exit: 340 * The caller holds mm->mmap_sem. This means that the task which 341 * uses this mm can't pass exit_mm(), so it can't exit or clear 342 * its ->mm. 343 * 344 * de_thread: 345 * It does list_replace_rcu(&leader->tasks, ¤t->tasks), 346 * we must see either old or new leader, this does not matter. 347 * However, it can change p->sighand, so lock_task_sighand(p) 348 * must be used. Since p->mm != NULL and we hold ->mmap_sem 349 * it can't fail. 350 * 351 * Note also that "g" can be the old leader with ->mm == NULL 352 * and already unhashed and thus removed from ->thread_group. 353 * This is OK, __unhash_process()->list_del_rcu() does not 354 * clear the ->next pointer, we will find the new leader via 355 * next_thread(). 356 */ 357 rcu_read_lock(); 358 for_each_process(g) { 359 if (g == tsk->group_leader) 360 continue; 361 if (g->flags & PF_KTHREAD) 362 continue; 363 p = g; 364 do { 365 if (p->mm) { 366 if (unlikely(p->mm == mm)) { 367 lock_task_sighand(p, &flags); 368 nr += zap_process(p, exit_code); 369 p->signal->flags = SIGNAL_GROUP_EXIT; 370 unlock_task_sighand(p, &flags); 371 } 372 break; 373 } 374 } while_each_thread(g, p); 375 } 376 rcu_read_unlock(); 377 done: 378 atomic_set(&core_state->nr_threads, nr); 379 return nr; 380 } 381 382 static int coredump_wait(int exit_code, struct core_state *core_state) 383 { 384 struct task_struct *tsk = current; 385 struct mm_struct *mm = tsk->mm; 386 int core_waiters = -EBUSY; 387 388 init_completion(&core_state->startup); 389 core_state->dumper.task = tsk; 390 core_state->dumper.next = NULL; 391 392 down_write(&mm->mmap_sem); 393 if (!mm->core_state) 394 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 395 up_write(&mm->mmap_sem); 396 397 if (core_waiters > 0) { 398 struct core_thread *ptr; 399 400 wait_for_completion(&core_state->startup); 401 /* 402 * Wait for all the threads to become inactive, so that 403 * all the thread context (extended register state, like 404 * fpu etc) gets copied to the memory. 405 */ 406 ptr = core_state->dumper.next; 407 while (ptr != NULL) { 408 wait_task_inactive(ptr->task, 0); 409 ptr = ptr->next; 410 } 411 } 412 413 return core_waiters; 414 } 415 416 static void coredump_finish(struct mm_struct *mm, bool core_dumped) 417 { 418 struct core_thread *curr, *next; 419 struct task_struct *task; 420 421 spin_lock_irq(¤t->sighand->siglock); 422 if (core_dumped && !__fatal_signal_pending(current)) 423 current->signal->group_exit_code |= 0x80; 424 current->signal->group_exit_task = NULL; 425 current->signal->flags = SIGNAL_GROUP_EXIT; 426 spin_unlock_irq(¤t->sighand->siglock); 427 428 next = mm->core_state->dumper.next; 429 while ((curr = next) != NULL) { 430 next = curr->next; 431 task = curr->task; 432 /* 433 * see exit_mm(), curr->task must not see 434 * ->task == NULL before we read ->next. 435 */ 436 smp_mb(); 437 curr->task = NULL; 438 wake_up_process(task); 439 } 440 441 mm->core_state = NULL; 442 } 443 444 static bool dump_interrupted(void) 445 { 446 /* 447 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 448 * can do try_to_freeze() and check __fatal_signal_pending(), 449 * but then we need to teach dump_write() to restart and clear 450 * TIF_SIGPENDING. 451 */ 452 return signal_pending(current); 453 } 454 455 static void wait_for_dump_helpers(struct file *file) 456 { 457 struct pipe_inode_info *pipe = file->private_data; 458 459 pipe_lock(pipe); 460 pipe->readers++; 461 pipe->writers--; 462 wake_up_interruptible_sync(&pipe->wait); 463 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 464 pipe_unlock(pipe); 465 466 /* 467 * We actually want wait_event_freezable() but then we need 468 * to clear TIF_SIGPENDING and improve dump_interrupted(). 469 */ 470 wait_event_interruptible(pipe->wait, pipe->readers == 1); 471 472 pipe_lock(pipe); 473 pipe->readers--; 474 pipe->writers++; 475 pipe_unlock(pipe); 476 } 477 478 /* 479 * umh_pipe_setup 480 * helper function to customize the process used 481 * to collect the core in userspace. Specifically 482 * it sets up a pipe and installs it as fd 0 (stdin) 483 * for the process. Returns 0 on success, or 484 * PTR_ERR on failure. 485 * Note that it also sets the core limit to 1. This 486 * is a special value that we use to trap recursive 487 * core dumps 488 */ 489 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 490 { 491 struct file *files[2]; 492 struct coredump_params *cp = (struct coredump_params *)info->data; 493 int err = create_pipe_files(files, 0); 494 if (err) 495 return err; 496 497 cp->file = files[1]; 498 499 err = replace_fd(0, files[0], 0); 500 fput(files[0]); 501 /* and disallow core files too */ 502 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 503 504 return err; 505 } 506 507 void do_coredump(const siginfo_t *siginfo) 508 { 509 struct core_state core_state; 510 struct core_name cn; 511 struct mm_struct *mm = current->mm; 512 struct linux_binfmt * binfmt; 513 const struct cred *old_cred; 514 struct cred *cred; 515 int retval = 0; 516 int flag = 0; 517 int ispipe; 518 struct files_struct *displaced; 519 bool need_nonrelative = false; 520 bool core_dumped = false; 521 static atomic_t core_dump_count = ATOMIC_INIT(0); 522 struct coredump_params cprm = { 523 .siginfo = siginfo, 524 .regs = signal_pt_regs(), 525 .limit = rlimit(RLIMIT_CORE), 526 /* 527 * We must use the same mm->flags while dumping core to avoid 528 * inconsistency of bit flags, since this flag is not protected 529 * by any locks. 530 */ 531 .mm_flags = mm->flags, 532 }; 533 534 audit_core_dumps(siginfo->si_signo); 535 536 binfmt = mm->binfmt; 537 if (!binfmt || !binfmt->core_dump) 538 goto fail; 539 if (!__get_dumpable(cprm.mm_flags)) 540 goto fail; 541 542 cred = prepare_creds(); 543 if (!cred) 544 goto fail; 545 /* 546 * We cannot trust fsuid as being the "true" uid of the process 547 * nor do we know its entire history. We only know it was tainted 548 * so we dump it as root in mode 2, and only into a controlled 549 * environment (pipe handler or fully qualified path). 550 */ 551 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 552 /* Setuid core dump mode */ 553 flag = O_EXCL; /* Stop rewrite attacks */ 554 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 555 need_nonrelative = true; 556 } 557 558 retval = coredump_wait(siginfo->si_signo, &core_state); 559 if (retval < 0) 560 goto fail_creds; 561 562 old_cred = override_creds(cred); 563 564 ispipe = format_corename(&cn, &cprm); 565 566 if (ispipe) { 567 int dump_count; 568 char **helper_argv; 569 struct subprocess_info *sub_info; 570 571 if (ispipe < 0) { 572 printk(KERN_WARNING "format_corename failed\n"); 573 printk(KERN_WARNING "Aborting core\n"); 574 goto fail_unlock; 575 } 576 577 if (cprm.limit == 1) { 578 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 579 * 580 * Normally core limits are irrelevant to pipes, since 581 * we're not writing to the file system, but we use 582 * cprm.limit of 1 here as a special value, this is a 583 * consistent way to catch recursive crashes. 584 * We can still crash if the core_pattern binary sets 585 * RLIM_CORE = !1, but it runs as root, and can do 586 * lots of stupid things. 587 * 588 * Note that we use task_tgid_vnr here to grab the pid 589 * of the process group leader. That way we get the 590 * right pid if a thread in a multi-threaded 591 * core_pattern process dies. 592 */ 593 printk(KERN_WARNING 594 "Process %d(%s) has RLIMIT_CORE set to 1\n", 595 task_tgid_vnr(current), current->comm); 596 printk(KERN_WARNING "Aborting core\n"); 597 goto fail_unlock; 598 } 599 cprm.limit = RLIM_INFINITY; 600 601 dump_count = atomic_inc_return(&core_dump_count); 602 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 603 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 604 task_tgid_vnr(current), current->comm); 605 printk(KERN_WARNING "Skipping core dump\n"); 606 goto fail_dropcount; 607 } 608 609 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); 610 if (!helper_argv) { 611 printk(KERN_WARNING "%s failed to allocate memory\n", 612 __func__); 613 goto fail_dropcount; 614 } 615 616 retval = -ENOMEM; 617 sub_info = call_usermodehelper_setup(helper_argv[0], 618 helper_argv, NULL, GFP_KERNEL, 619 umh_pipe_setup, NULL, &cprm); 620 if (sub_info) 621 retval = call_usermodehelper_exec(sub_info, 622 UMH_WAIT_EXEC); 623 624 argv_free(helper_argv); 625 if (retval) { 626 printk(KERN_INFO "Core dump to |%s pipe failed\n", 627 cn.corename); 628 goto close_fail; 629 } 630 } else { 631 struct inode *inode; 632 633 if (cprm.limit < binfmt->min_coredump) 634 goto fail_unlock; 635 636 if (need_nonrelative && cn.corename[0] != '/') { 637 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 638 "to fully qualified path!\n", 639 task_tgid_vnr(current), current->comm); 640 printk(KERN_WARNING "Skipping core dump\n"); 641 goto fail_unlock; 642 } 643 644 cprm.file = filp_open(cn.corename, 645 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 646 0600); 647 if (IS_ERR(cprm.file)) 648 goto fail_unlock; 649 650 inode = file_inode(cprm.file); 651 if (inode->i_nlink > 1) 652 goto close_fail; 653 if (d_unhashed(cprm.file->f_path.dentry)) 654 goto close_fail; 655 /* 656 * AK: actually i see no reason to not allow this for named 657 * pipes etc, but keep the previous behaviour for now. 658 */ 659 if (!S_ISREG(inode->i_mode)) 660 goto close_fail; 661 /* 662 * Dont allow local users get cute and trick others to coredump 663 * into their pre-created files. 664 */ 665 if (!uid_eq(inode->i_uid, current_fsuid())) 666 goto close_fail; 667 if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) 668 goto close_fail; 669 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 670 goto close_fail; 671 } 672 673 /* get us an unshared descriptor table; almost always a no-op */ 674 retval = unshare_files(&displaced); 675 if (retval) 676 goto close_fail; 677 if (displaced) 678 put_files_struct(displaced); 679 if (!dump_interrupted()) { 680 file_start_write(cprm.file); 681 core_dumped = binfmt->core_dump(&cprm); 682 file_end_write(cprm.file); 683 } 684 if (ispipe && core_pipe_limit) 685 wait_for_dump_helpers(cprm.file); 686 close_fail: 687 if (cprm.file) 688 filp_close(cprm.file, NULL); 689 fail_dropcount: 690 if (ispipe) 691 atomic_dec(&core_dump_count); 692 fail_unlock: 693 kfree(cn.corename); 694 coredump_finish(mm, core_dumped); 695 revert_creds(old_cred); 696 fail_creds: 697 put_cred(cred); 698 fail: 699 return; 700 } 701 702 /* 703 * Core dumping helper functions. These are the only things you should 704 * do on a core-file: use only these functions to write out all the 705 * necessary info. 706 */ 707 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 708 { 709 struct file *file = cprm->file; 710 loff_t pos = file->f_pos; 711 ssize_t n; 712 if (cprm->written + nr > cprm->limit) 713 return 0; 714 while (nr) { 715 if (dump_interrupted()) 716 return 0; 717 n = __kernel_write(file, addr, nr, &pos); 718 if (n <= 0) 719 return 0; 720 file->f_pos = pos; 721 cprm->written += n; 722 nr -= n; 723 } 724 return 1; 725 } 726 EXPORT_SYMBOL(dump_emit); 727 728 int dump_skip(struct coredump_params *cprm, size_t nr) 729 { 730 static char zeroes[PAGE_SIZE]; 731 struct file *file = cprm->file; 732 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 733 if (cprm->written + nr > cprm->limit) 734 return 0; 735 if (dump_interrupted() || 736 file->f_op->llseek(file, nr, SEEK_CUR) < 0) 737 return 0; 738 cprm->written += nr; 739 return 1; 740 } else { 741 while (nr > PAGE_SIZE) { 742 if (!dump_emit(cprm, zeroes, PAGE_SIZE)) 743 return 0; 744 nr -= PAGE_SIZE; 745 } 746 return dump_emit(cprm, zeroes, nr); 747 } 748 } 749 EXPORT_SYMBOL(dump_skip); 750 751 int dump_align(struct coredump_params *cprm, int align) 752 { 753 unsigned mod = cprm->written & (align - 1); 754 if (align & (align - 1)) 755 return 0; 756 return mod ? dump_skip(cprm, align - mod) : 1; 757 } 758 EXPORT_SYMBOL(dump_align); 759