1 #include <linux/slab.h> 2 #include <linux/file.h> 3 #include <linux/fdtable.h> 4 #include <linux/mm.h> 5 #include <linux/stat.h> 6 #include <linux/fcntl.h> 7 #include <linux/swap.h> 8 #include <linux/string.h> 9 #include <linux/init.h> 10 #include <linux/pagemap.h> 11 #include <linux/perf_event.h> 12 #include <linux/highmem.h> 13 #include <linux/spinlock.h> 14 #include <linux/key.h> 15 #include <linux/personality.h> 16 #include <linux/binfmts.h> 17 #include <linux/coredump.h> 18 #include <linux/utsname.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/module.h> 21 #include <linux/namei.h> 22 #include <linux/mount.h> 23 #include <linux/security.h> 24 #include <linux/syscalls.h> 25 #include <linux/tsacct_kern.h> 26 #include <linux/cn_proc.h> 27 #include <linux/audit.h> 28 #include <linux/tracehook.h> 29 #include <linux/kmod.h> 30 #include <linux/fsnotify.h> 31 #include <linux/fs_struct.h> 32 #include <linux/pipe_fs_i.h> 33 #include <linux/oom.h> 34 #include <linux/compat.h> 35 36 #include <asm/uaccess.h> 37 #include <asm/mmu_context.h> 38 #include <asm/tlb.h> 39 #include <asm/exec.h> 40 41 #include <trace/events/task.h> 42 #include "internal.h" 43 #include "coredump.h" 44 45 #include <trace/events/sched.h> 46 47 int core_uses_pid; 48 unsigned int core_pipe_limit; 49 char core_pattern[CORENAME_MAX_SIZE] = "core"; 50 static int core_name_size = CORENAME_MAX_SIZE; 51 52 struct core_name { 53 char *corename; 54 int used, size; 55 }; 56 57 /* The maximal length of core_pattern is also specified in sysctl.c */ 58 59 static int expand_corename(struct core_name *cn, int size) 60 { 61 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 62 63 if (!corename) 64 return -ENOMEM; 65 66 if (size > core_name_size) /* racy but harmless */ 67 core_name_size = size; 68 69 cn->size = ksize(corename); 70 cn->corename = corename; 71 return 0; 72 } 73 74 static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) 75 { 76 int free, need; 77 78 again: 79 free = cn->size - cn->used; 80 need = vsnprintf(cn->corename + cn->used, free, fmt, arg); 81 if (need < free) { 82 cn->used += need; 83 return 0; 84 } 85 86 if (!expand_corename(cn, cn->size + need - free + 1)) 87 goto again; 88 89 return -ENOMEM; 90 } 91 92 static int cn_printf(struct core_name *cn, const char *fmt, ...) 93 { 94 va_list arg; 95 int ret; 96 97 va_start(arg, fmt); 98 ret = cn_vprintf(cn, fmt, arg); 99 va_end(arg); 100 101 return ret; 102 } 103 104 static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 105 { 106 int cur = cn->used; 107 va_list arg; 108 int ret; 109 110 va_start(arg, fmt); 111 ret = cn_vprintf(cn, fmt, arg); 112 va_end(arg); 113 114 for (; cur < cn->used; ++cur) { 115 if (cn->corename[cur] == '/') 116 cn->corename[cur] = '!'; 117 } 118 return ret; 119 } 120 121 static int cn_print_exe_file(struct core_name *cn) 122 { 123 struct file *exe_file; 124 char *pathbuf, *path; 125 int ret; 126 127 exe_file = get_mm_exe_file(current->mm); 128 if (!exe_file) 129 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 130 131 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 132 if (!pathbuf) { 133 ret = -ENOMEM; 134 goto put_exe_file; 135 } 136 137 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); 138 if (IS_ERR(path)) { 139 ret = PTR_ERR(path); 140 goto free_buf; 141 } 142 143 ret = cn_esc_printf(cn, "%s", path); 144 145 free_buf: 146 kfree(pathbuf); 147 put_exe_file: 148 fput(exe_file); 149 return ret; 150 } 151 152 /* format_corename will inspect the pattern parameter, and output a 153 * name into corename, which must have space for at least 154 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 155 */ 156 static int format_corename(struct core_name *cn, struct coredump_params *cprm) 157 { 158 const struct cred *cred = current_cred(); 159 const char *pat_ptr = core_pattern; 160 int ispipe = (*pat_ptr == '|'); 161 int pid_in_pattern = 0; 162 int err = 0; 163 164 cn->used = 0; 165 cn->corename = NULL; 166 if (expand_corename(cn, core_name_size)) 167 return -ENOMEM; 168 cn->corename[0] = '\0'; 169 170 if (ispipe) 171 ++pat_ptr; 172 173 /* Repeat as long as we have more pattern to process and more output 174 space */ 175 while (*pat_ptr) { 176 if (*pat_ptr != '%') { 177 err = cn_printf(cn, "%c", *pat_ptr++); 178 } else { 179 switch (*++pat_ptr) { 180 /* single % at the end, drop that */ 181 case 0: 182 goto out; 183 /* Double percent, output one percent */ 184 case '%': 185 err = cn_printf(cn, "%c", '%'); 186 break; 187 /* pid */ 188 case 'p': 189 pid_in_pattern = 1; 190 err = cn_printf(cn, "%d", 191 task_tgid_vnr(current)); 192 break; 193 /* uid */ 194 case 'u': 195 err = cn_printf(cn, "%d", cred->uid); 196 break; 197 /* gid */ 198 case 'g': 199 err = cn_printf(cn, "%d", cred->gid); 200 break; 201 case 'd': 202 err = cn_printf(cn, "%d", 203 __get_dumpable(cprm->mm_flags)); 204 break; 205 /* signal that caused the coredump */ 206 case 's': 207 err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); 208 break; 209 /* UNIX time of coredump */ 210 case 't': { 211 struct timeval tv; 212 do_gettimeofday(&tv); 213 err = cn_printf(cn, "%lu", tv.tv_sec); 214 break; 215 } 216 /* hostname */ 217 case 'h': 218 down_read(&uts_sem); 219 err = cn_esc_printf(cn, "%s", 220 utsname()->nodename); 221 up_read(&uts_sem); 222 break; 223 /* executable */ 224 case 'e': 225 err = cn_esc_printf(cn, "%s", current->comm); 226 break; 227 case 'E': 228 err = cn_print_exe_file(cn); 229 break; 230 /* core limit size */ 231 case 'c': 232 err = cn_printf(cn, "%lu", 233 rlimit(RLIMIT_CORE)); 234 break; 235 default: 236 break; 237 } 238 ++pat_ptr; 239 } 240 241 if (err) 242 return err; 243 } 244 245 out: 246 /* Backward compatibility with core_uses_pid: 247 * 248 * If core_pattern does not include a %p (as is the default) 249 * and core_uses_pid is set, then .%pid will be appended to 250 * the filename. Do not do this for piped commands. */ 251 if (!ispipe && !pid_in_pattern && core_uses_pid) { 252 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 253 if (err) 254 return err; 255 } 256 return ispipe; 257 } 258 259 static int zap_process(struct task_struct *start, int exit_code) 260 { 261 struct task_struct *t; 262 int nr = 0; 263 264 start->signal->group_exit_code = exit_code; 265 start->signal->group_stop_count = 0; 266 267 t = start; 268 do { 269 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 270 if (t != current && t->mm) { 271 sigaddset(&t->pending.signal, SIGKILL); 272 signal_wake_up(t, 1); 273 nr++; 274 } 275 } while_each_thread(start, t); 276 277 return nr; 278 } 279 280 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 281 struct core_state *core_state, int exit_code) 282 { 283 struct task_struct *g, *p; 284 unsigned long flags; 285 int nr = -EAGAIN; 286 287 spin_lock_irq(&tsk->sighand->siglock); 288 if (!signal_group_exit(tsk->signal)) { 289 mm->core_state = core_state; 290 nr = zap_process(tsk, exit_code); 291 tsk->signal->group_exit_task = tsk; 292 /* ignore all signals except SIGKILL, see prepare_signal() */ 293 tsk->signal->flags = SIGNAL_GROUP_COREDUMP; 294 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 295 } 296 spin_unlock_irq(&tsk->sighand->siglock); 297 if (unlikely(nr < 0)) 298 return nr; 299 300 tsk->flags = PF_DUMPCORE; 301 if (atomic_read(&mm->mm_users) == nr + 1) 302 goto done; 303 /* 304 * We should find and kill all tasks which use this mm, and we should 305 * count them correctly into ->nr_threads. We don't take tasklist 306 * lock, but this is safe wrt: 307 * 308 * fork: 309 * None of sub-threads can fork after zap_process(leader). All 310 * processes which were created before this point should be 311 * visible to zap_threads() because copy_process() adds the new 312 * process to the tail of init_task.tasks list, and lock/unlock 313 * of ->siglock provides a memory barrier. 314 * 315 * do_exit: 316 * The caller holds mm->mmap_sem. This means that the task which 317 * uses this mm can't pass exit_mm(), so it can't exit or clear 318 * its ->mm. 319 * 320 * de_thread: 321 * It does list_replace_rcu(&leader->tasks, ¤t->tasks), 322 * we must see either old or new leader, this does not matter. 323 * However, it can change p->sighand, so lock_task_sighand(p) 324 * must be used. Since p->mm != NULL and we hold ->mmap_sem 325 * it can't fail. 326 * 327 * Note also that "g" can be the old leader with ->mm == NULL 328 * and already unhashed and thus removed from ->thread_group. 329 * This is OK, __unhash_process()->list_del_rcu() does not 330 * clear the ->next pointer, we will find the new leader via 331 * next_thread(). 332 */ 333 rcu_read_lock(); 334 for_each_process(g) { 335 if (g == tsk->group_leader) 336 continue; 337 if (g->flags & PF_KTHREAD) 338 continue; 339 p = g; 340 do { 341 if (p->mm) { 342 if (unlikely(p->mm == mm)) { 343 lock_task_sighand(p, &flags); 344 nr += zap_process(p, exit_code); 345 p->signal->flags = SIGNAL_GROUP_EXIT; 346 unlock_task_sighand(p, &flags); 347 } 348 break; 349 } 350 } while_each_thread(g, p); 351 } 352 rcu_read_unlock(); 353 done: 354 atomic_set(&core_state->nr_threads, nr); 355 return nr; 356 } 357 358 static int coredump_wait(int exit_code, struct core_state *core_state) 359 { 360 struct task_struct *tsk = current; 361 struct mm_struct *mm = tsk->mm; 362 int core_waiters = -EBUSY; 363 364 init_completion(&core_state->startup); 365 core_state->dumper.task = tsk; 366 core_state->dumper.next = NULL; 367 368 down_write(&mm->mmap_sem); 369 if (!mm->core_state) 370 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 371 up_write(&mm->mmap_sem); 372 373 if (core_waiters > 0) { 374 struct core_thread *ptr; 375 376 wait_for_completion(&core_state->startup); 377 /* 378 * Wait for all the threads to become inactive, so that 379 * all the thread context (extended register state, like 380 * fpu etc) gets copied to the memory. 381 */ 382 ptr = core_state->dumper.next; 383 while (ptr != NULL) { 384 wait_task_inactive(ptr->task, 0); 385 ptr = ptr->next; 386 } 387 } 388 389 return core_waiters; 390 } 391 392 static void coredump_finish(struct mm_struct *mm, bool core_dumped) 393 { 394 struct core_thread *curr, *next; 395 struct task_struct *task; 396 397 spin_lock_irq(¤t->sighand->siglock); 398 if (core_dumped && !__fatal_signal_pending(current)) 399 current->signal->group_exit_code |= 0x80; 400 current->signal->group_exit_task = NULL; 401 current->signal->flags = SIGNAL_GROUP_EXIT; 402 spin_unlock_irq(¤t->sighand->siglock); 403 404 next = mm->core_state->dumper.next; 405 while ((curr = next) != NULL) { 406 next = curr->next; 407 task = curr->task; 408 /* 409 * see exit_mm(), curr->task must not see 410 * ->task == NULL before we read ->next. 411 */ 412 smp_mb(); 413 curr->task = NULL; 414 wake_up_process(task); 415 } 416 417 mm->core_state = NULL; 418 } 419 420 static bool dump_interrupted(void) 421 { 422 /* 423 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 424 * can do try_to_freeze() and check __fatal_signal_pending(), 425 * but then we need to teach dump_write() to restart and clear 426 * TIF_SIGPENDING. 427 */ 428 return signal_pending(current); 429 } 430 431 static void wait_for_dump_helpers(struct file *file) 432 { 433 struct pipe_inode_info *pipe = file->private_data; 434 435 pipe_lock(pipe); 436 pipe->readers++; 437 pipe->writers--; 438 wake_up_interruptible_sync(&pipe->wait); 439 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 440 pipe_unlock(pipe); 441 442 /* 443 * We actually want wait_event_freezable() but then we need 444 * to clear TIF_SIGPENDING and improve dump_interrupted(). 445 */ 446 wait_event_interruptible(pipe->wait, pipe->readers == 1); 447 448 pipe_lock(pipe); 449 pipe->readers--; 450 pipe->writers++; 451 pipe_unlock(pipe); 452 } 453 454 /* 455 * umh_pipe_setup 456 * helper function to customize the process used 457 * to collect the core in userspace. Specifically 458 * it sets up a pipe and installs it as fd 0 (stdin) 459 * for the process. Returns 0 on success, or 460 * PTR_ERR on failure. 461 * Note that it also sets the core limit to 1. This 462 * is a special value that we use to trap recursive 463 * core dumps 464 */ 465 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 466 { 467 struct file *files[2]; 468 struct coredump_params *cp = (struct coredump_params *)info->data; 469 int err = create_pipe_files(files, 0); 470 if (err) 471 return err; 472 473 cp->file = files[1]; 474 475 err = replace_fd(0, files[0], 0); 476 fput(files[0]); 477 /* and disallow core files too */ 478 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 479 480 return err; 481 } 482 483 void do_coredump(siginfo_t *siginfo) 484 { 485 struct core_state core_state; 486 struct core_name cn; 487 struct mm_struct *mm = current->mm; 488 struct linux_binfmt * binfmt; 489 const struct cred *old_cred; 490 struct cred *cred; 491 int retval = 0; 492 int flag = 0; 493 int ispipe; 494 struct files_struct *displaced; 495 bool need_nonrelative = false; 496 bool core_dumped = false; 497 static atomic_t core_dump_count = ATOMIC_INIT(0); 498 struct coredump_params cprm = { 499 .siginfo = siginfo, 500 .regs = signal_pt_regs(), 501 .limit = rlimit(RLIMIT_CORE), 502 /* 503 * We must use the same mm->flags while dumping core to avoid 504 * inconsistency of bit flags, since this flag is not protected 505 * by any locks. 506 */ 507 .mm_flags = mm->flags, 508 }; 509 510 audit_core_dumps(siginfo->si_signo); 511 512 binfmt = mm->binfmt; 513 if (!binfmt || !binfmt->core_dump) 514 goto fail; 515 if (!__get_dumpable(cprm.mm_flags)) 516 goto fail; 517 518 cred = prepare_creds(); 519 if (!cred) 520 goto fail; 521 /* 522 * We cannot trust fsuid as being the "true" uid of the process 523 * nor do we know its entire history. We only know it was tainted 524 * so we dump it as root in mode 2, and only into a controlled 525 * environment (pipe handler or fully qualified path). 526 */ 527 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 528 /* Setuid core dump mode */ 529 flag = O_EXCL; /* Stop rewrite attacks */ 530 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 531 need_nonrelative = true; 532 } 533 534 retval = coredump_wait(siginfo->si_signo, &core_state); 535 if (retval < 0) 536 goto fail_creds; 537 538 old_cred = override_creds(cred); 539 540 ispipe = format_corename(&cn, &cprm); 541 542 if (ispipe) { 543 int dump_count; 544 char **helper_argv; 545 struct subprocess_info *sub_info; 546 547 if (ispipe < 0) { 548 printk(KERN_WARNING "format_corename failed\n"); 549 printk(KERN_WARNING "Aborting core\n"); 550 goto fail_unlock; 551 } 552 553 if (cprm.limit == 1) { 554 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 555 * 556 * Normally core limits are irrelevant to pipes, since 557 * we're not writing to the file system, but we use 558 * cprm.limit of 1 here as a speacial value, this is a 559 * consistent way to catch recursive crashes. 560 * We can still crash if the core_pattern binary sets 561 * RLIM_CORE = !1, but it runs as root, and can do 562 * lots of stupid things. 563 * 564 * Note that we use task_tgid_vnr here to grab the pid 565 * of the process group leader. That way we get the 566 * right pid if a thread in a multi-threaded 567 * core_pattern process dies. 568 */ 569 printk(KERN_WARNING 570 "Process %d(%s) has RLIMIT_CORE set to 1\n", 571 task_tgid_vnr(current), current->comm); 572 printk(KERN_WARNING "Aborting core\n"); 573 goto fail_unlock; 574 } 575 cprm.limit = RLIM_INFINITY; 576 577 dump_count = atomic_inc_return(&core_dump_count); 578 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 579 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 580 task_tgid_vnr(current), current->comm); 581 printk(KERN_WARNING "Skipping core dump\n"); 582 goto fail_dropcount; 583 } 584 585 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); 586 if (!helper_argv) { 587 printk(KERN_WARNING "%s failed to allocate memory\n", 588 __func__); 589 goto fail_dropcount; 590 } 591 592 retval = -ENOMEM; 593 sub_info = call_usermodehelper_setup(helper_argv[0], 594 helper_argv, NULL, GFP_KERNEL, 595 umh_pipe_setup, NULL, &cprm); 596 if (sub_info) 597 retval = call_usermodehelper_exec(sub_info, 598 UMH_WAIT_EXEC); 599 600 argv_free(helper_argv); 601 if (retval) { 602 printk(KERN_INFO "Core dump to |%s pipe failed\n", 603 cn.corename); 604 goto close_fail; 605 } 606 } else { 607 struct inode *inode; 608 609 if (cprm.limit < binfmt->min_coredump) 610 goto fail_unlock; 611 612 if (need_nonrelative && cn.corename[0] != '/') { 613 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 614 "to fully qualified path!\n", 615 task_tgid_vnr(current), current->comm); 616 printk(KERN_WARNING "Skipping core dump\n"); 617 goto fail_unlock; 618 } 619 620 cprm.file = filp_open(cn.corename, 621 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 622 0600); 623 if (IS_ERR(cprm.file)) 624 goto fail_unlock; 625 626 inode = file_inode(cprm.file); 627 if (inode->i_nlink > 1) 628 goto close_fail; 629 if (d_unhashed(cprm.file->f_path.dentry)) 630 goto close_fail; 631 /* 632 * AK: actually i see no reason to not allow this for named 633 * pipes etc, but keep the previous behaviour for now. 634 */ 635 if (!S_ISREG(inode->i_mode)) 636 goto close_fail; 637 /* 638 * Dont allow local users get cute and trick others to coredump 639 * into their pre-created files. 640 */ 641 if (!uid_eq(inode->i_uid, current_fsuid())) 642 goto close_fail; 643 if (!cprm.file->f_op || !cprm.file->f_op->write) 644 goto close_fail; 645 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 646 goto close_fail; 647 } 648 649 /* get us an unshared descriptor table; almost always a no-op */ 650 retval = unshare_files(&displaced); 651 if (retval) 652 goto close_fail; 653 if (displaced) 654 put_files_struct(displaced); 655 if (!dump_interrupted()) { 656 file_start_write(cprm.file); 657 core_dumped = binfmt->core_dump(&cprm); 658 file_end_write(cprm.file); 659 } 660 if (ispipe && core_pipe_limit) 661 wait_for_dump_helpers(cprm.file); 662 close_fail: 663 if (cprm.file) 664 filp_close(cprm.file, NULL); 665 fail_dropcount: 666 if (ispipe) 667 atomic_dec(&core_dump_count); 668 fail_unlock: 669 kfree(cn.corename); 670 coredump_finish(mm, core_dumped); 671 revert_creds(old_cred); 672 fail_creds: 673 put_cred(cred); 674 fail: 675 return; 676 } 677 678 /* 679 * Core dumping helper functions. These are the only things you should 680 * do on a core-file: use only these functions to write out all the 681 * necessary info. 682 */ 683 int dump_write(struct file *file, const void *addr, int nr) 684 { 685 return !dump_interrupted() && 686 access_ok(VERIFY_READ, addr, nr) && 687 file->f_op->write(file, addr, nr, &file->f_pos) == nr; 688 } 689 EXPORT_SYMBOL(dump_write); 690 691 int dump_seek(struct file *file, loff_t off) 692 { 693 int ret = 1; 694 695 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 696 if (dump_interrupted() || 697 file->f_op->llseek(file, off, SEEK_CUR) < 0) 698 return 0; 699 } else { 700 char *buf = (char *)get_zeroed_page(GFP_KERNEL); 701 702 if (!buf) 703 return 0; 704 while (off > 0) { 705 unsigned long n = off; 706 707 if (n > PAGE_SIZE) 708 n = PAGE_SIZE; 709 if (!dump_write(file, buf, n)) { 710 ret = 0; 711 break; 712 } 713 off -= n; 714 } 715 free_page((unsigned long)buf); 716 } 717 return ret; 718 } 719 EXPORT_SYMBOL(dump_seek); 720